* [PATCH 13/13] lib/fonts: Remove internal symbols and macros from public header file
From: Thomas Zimmermann @ 2026-02-18 8:16 UTC (permalink / raw)
To: gregkh, deller, sam
Cc: linux-fbdev, dri-devel, linux-kernel, Thomas Zimmermann
In-Reply-To: <20260218083855.10743-1-tzimmermann@suse.de>
Define access macros for font_data_t in fonts.c. Define struct font_data
and declare most of the font symbols in the internal header font.h, where
they can only be seen by the font code. Add config guards around font
symbols.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
include/linux/font.h | 32 +++++------------------
lib/fonts/font.h | 52 ++++++++++++++++++++++++++++++++++++++
lib/fonts/font_10x18.c | 2 +-
lib/fonts/font_6x10.c | 3 ++-
lib/fonts/font_6x11.c | 2 +-
lib/fonts/font_6x8.c | 3 ++-
lib/fonts/font_7x14.c | 2 +-
lib/fonts/font_8x16.c | 3 ++-
lib/fonts/font_8x8.c | 2 +-
lib/fonts/font_acorn_8x8.c | 2 +-
lib/fonts/font_mini_4x6.c | 2 +-
lib/fonts/font_pearl_8x8.c | 2 +-
lib/fonts/font_sun12x22.c | 3 ++-
lib/fonts/font_sun8x16.c | 3 ++-
lib/fonts/font_ter10x18.c | 4 ++-
lib/fonts/font_ter16x32.c | 4 ++-
lib/fonts/fonts.c | 8 +++++-
17 files changed, 88 insertions(+), 41 deletions(-)
create mode 100644 lib/fonts/font.h
diff --git a/include/linux/font.h b/include/linux/font.h
index 4ff956a1cd0a..6e9a4c93b47b 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -92,20 +92,12 @@ struct font_desc {
#define FONT6x8_IDX 12
#define TER10x18_IDX 13
-extern const struct font_desc font_vga_8x8,
- font_vga_8x16,
- font_pearl_8x8,
- font_vga_6x11,
- font_7x14,
- font_10x18,
- font_sun_8x16,
- font_sun_12x22,
- font_acorn_8x8,
- font_mini_4x6,
- font_6x10,
- font_ter_16x32,
- font_6x8,
- font_ter_10x18;
+#if defined(CONFIG_FONT_8x8)
+extern const struct font_desc font_vga_8x8;
+#endif
+#if defined(CONFIG_FONT_8x16)
+extern const struct font_desc font_vga_8x16;
+#endif
/* Find a font with a specific name */
@@ -120,16 +112,4 @@ extern const struct font_desc *get_default_font(int xres, int yres,
/* Max. length for the name of a predefined font */
#define MAX_FONT_NAME 32
-/* Extra word getters */
-#define REFCOUNT(fd) (((int *)(fd))[-1])
-#define FNTSIZE(fd) (((int *)(fd))[-2])
-#define FNTSUM(fd) (((int *)(fd))[-4])
-
-#define FONT_EXTRA_WORDS 4
-
-struct font_data {
- unsigned int extra[FONT_EXTRA_WORDS];
- unsigned char data[];
-} __packed;
-
#endif /* _VIDEO_FONT_H */
diff --git a/lib/fonts/font.h b/lib/fonts/font.h
new file mode 100644
index 000000000000..00f65a3da5c2
--- /dev/null
+++ b/lib/fonts/font.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LIB_FONTS_FONT_H
+#define _LIB_FONTS_FONT_H
+
+#include <linux/font.h>
+
+#if defined(CONFIG_FONT_PEARL_8x8)
+extern const struct font_desc font_pearl_8x8;
+#endif
+#if defined(CONFIG_FONT_6x11)
+extern const struct font_desc font_vga_6x11;
+#endif
+#if defined(CONFIG_FONT_7x14)
+extern const struct font_desc font_7x14;
+#endif
+#if defined(CONFIG_FONT_10x18)
+extern const struct font_desc font_10x18;
+#endif
+#if defined(CONFIG_FONT_SUN8x16)
+extern const struct font_desc font_sun_8x16;
+#endif
+#if defined(CONFIG_FONT_SUN12x22)
+extern const struct font_desc font_sun_12x22;
+#endif
+#if defined(CONFIG_FONT_ACORN_8x8)
+extern const struct font_desc font_acorn_8x8;
+#endif
+#if defined(CONFIG_FONT_MINI_4x6)
+extern const struct font_desc font_mini_4x6;
+#endif
+#if defined(CONFIG_FONT_6x10)
+extern const struct font_desc font_6x10;
+#endif
+#if defined(CONFIG_FONT_TER16x32)
+extern const struct font_desc font_ter_16x32;
+#endif
+#if defined(CONFIG_FONT_6x8)
+extern const struct font_desc font_6x8;
+#endif
+#if defined(CONFIG_FONT_TER10x18)
+extern const struct font_desc font_ter_10x18;
+#endif
+
+#define FONT_EXTRA_WORDS 4
+
+struct font_data {
+ unsigned int extra[FONT_EXTRA_WORDS];
+ unsigned char data[];
+} __packed;
+
+#endif
diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c
index 5d940db626e7..10edebc4bb74 100644
--- a/lib/fonts/font_10x18.c
+++ b/lib/fonts/font_10x18.c
@@ -4,7 +4,7 @@
* by Jurriaan Kalkman 06-2005 *
********************************/
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX 9216
diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c
index e65df019e0d2..660d3a371b30 100644
--- a/lib/fonts/font_6x10.c
+++ b/lib/fonts/font_6x10.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
#define FONTDATAMAX 2560
diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c
index bd76b3f6b635..671487ccc172 100644
--- a/lib/fonts/font_6x11.c
+++ b/lib/fonts/font_6x11.c
@@ -5,7 +5,7 @@
/* */
/**********************************************/
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX (11*256)
diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c
index 06ace7792521..5811ee07f4d8 100644
--- a/lib/fonts/font_6x8.c
+++ b/lib/fonts/font_6x8.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
#define FONTDATAMAX 2048
diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c
index a2f561c9fa04..0c7475d643c8 100644
--- a/lib/fonts/font_7x14.c
+++ b/lib/fonts/font_7x14.c
@@ -4,7 +4,7 @@
/* by Jurriaan Kalkman 05-2005 */
/**************************************/
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX 3584
diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c
index 06ae14088514..523e95c75569 100644
--- a/lib/fonts/font_8x16.c
+++ b/lib/fonts/font_8x16.c
@@ -5,9 +5,10 @@
/* */
/**********************************************/
-#include <linux/font.h>
#include <linux/module.h>
+#include "font.h"
+
#define FONTDATAMAX 4096
static const struct font_data fontdata_8x16 = {
diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c
index 69570b8c31af..e5b697fc9675 100644
--- a/lib/fonts/font_8x8.c
+++ b/lib/fonts/font_8x8.c
@@ -5,7 +5,7 @@
/* */
/**********************************************/
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX 2048
diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c
index af5fa72aa8b7..36c51016769d 100644
--- a/lib/fonts/font_acorn_8x8.c
+++ b/lib/fonts/font_acorn_8x8.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Acorn-like font definition, with PC graphics characters */
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX 2048
diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c
index cc21dc70cfd1..dc919c160dde 100644
--- a/lib/fonts/font_mini_4x6.c
+++ b/lib/fonts/font_mini_4x6.c
@@ -39,7 +39,7 @@ __END__;
MSBit to LSBit = left to right.
*/
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX 1536
diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c
index ae98ca17982e..2438b374acea 100644
--- a/lib/fonts/font_pearl_8x8.c
+++ b/lib/fonts/font_pearl_8x8.c
@@ -10,7 +10,7 @@
/* */
/**********************************************/
-#include <linux/font.h>
+#include "font.h"
#define FONTDATAMAX 2048
diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c
index 91daf5ab8b6b..2afbc144bea8 100644
--- a/lib/fonts/font_sun12x22.c
+++ b/lib/fonts/font_sun12x22.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
#define FONTDATAMAX 11264
diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c
index 81bb4eeae04e..2b7b2d8e548a 100644
--- a/lib/fonts/font_sun8x16.c
+++ b/lib/fonts/font_sun8x16.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
+#include "font.h"
#define FONTDATAMAX 4096
diff --git a/lib/fonts/font_ter10x18.c b/lib/fonts/font_ter10x18.c
index 80356e9d56c7..3f30b4a211ab 100644
--- a/lib/fonts/font_ter10x18.c
+++ b/lib/fonts/font_ter10x18.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
#include <linux/module.h>
+#include "font.h"
+
#define FONTDATAMAX 9216
static const struct font_data fontdata_ter10x18 = {
diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c
index 5baedc573dd6..93616cffe642 100644
--- a/lib/fonts/font_ter16x32.c
+++ b/lib/fonts/font_ter16x32.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/font.h>
+
#include <linux/module.h>
+#include "font.h"
+
#define FONTDATAMAX 16384
static const struct font_data fontdata_ter16x32 = {
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index 1830e6ae9c87..198aae869be2 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -13,7 +13,6 @@
*/
#include <linux/container_of.h>
-#include <linux/font.h>
#include <linux/kd.h>
#include <linux/module.h>
#include <linux/overflow.h>
@@ -26,12 +25,19 @@
#include <asm/setup.h>
#endif
+#include "font.h"
+
#define console_font_pitch(font) DIV_ROUND_UP((font)->width, 8)
/*
* Helpers for font_data_t
*/
+/* Extra word getters */
+#define REFCOUNT(fd) (((int *)(fd))[-1])
+#define FNTSIZE(fd) (((int *)(fd))[-2])
+#define FNTSUM(fd) (((int *)(fd))[-4])
+
static struct font_data *to_font_data_struct(font_data_t *fd)
{
return container_of(fd, struct font_data, data[0]);
--
2.52.0
^ permalink raw reply related
* [PATCH] staging: fbtft: fb_tinylcd: replace udelay with usleep_range
From: tomasz.unger @ 2026-02-18 17:55 UTC (permalink / raw)
To: andy, gregkh
Cc: dri-devel, linux-fbdev, linux-staging, linux-kernel, Tomasz Unger
In-Reply-To: <20260218175517.87544-1-tomasz.unger.ref@yahoo.pl>
From: Tomasz Unger <tomasz.unger@yahoo.pl>
Signed-off-by: Tomasz Unger <tomasz.unger@yahoo.pl>
---
drivers/staging/fbtft/fb_tinylcd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/fbtft/fb_tinylcd.c b/drivers/staging/fbtft/fb_tinylcd.c
index 9469248f2c50..89ee7b9d621e 100644
--- a/drivers/staging/fbtft/fb_tinylcd.c
+++ b/drivers/staging/fbtft/fb_tinylcd.c
@@ -41,7 +41,7 @@ static int init_display(struct fbtft_par *par)
0x00, 0x35, 0x33, 0x00, 0x00, 0x00);
write_reg(par, MIPI_DCS_SET_PIXEL_FORMAT, 0x55);
write_reg(par, MIPI_DCS_EXIT_SLEEP_MODE);
- udelay(250);
+ usleep_range(250, 275);
write_reg(par, MIPI_DCS_SET_DISPLAY_ON);
return 0;
--
2.53.0
^ permalink raw reply related
* [PATCH] staging: fbtft: fb_ra8875: replace udelay with usleep_range
From: tomasz.unger @ 2026-02-18 17:47 UTC (permalink / raw)
To: andy, gregkh
Cc: dri-devel, linux-fbdev, linux-staging, linux-kernel, Tomasz Unger
In-Reply-To: <20260218174737.86994-1-tomasz.unger.ref@yahoo.pl>
From: Tomasz Unger <tomasz.unger@yahoo.pl>
Signed-off-by: Tomasz Unger <tomasz.unger@yahoo.pl>
---
drivers/staging/fbtft/fb_ra8875.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/fbtft/fb_ra8875.c b/drivers/staging/fbtft/fb_ra8875.c
index 0ab1de6647d0..ec4ce534e409 100644
--- a/drivers/staging/fbtft/fb_ra8875.c
+++ b/drivers/staging/fbtft/fb_ra8875.c
@@ -210,7 +210,7 @@ static void write_reg8_bus8(struct fbtft_par *par, int len, ...)
}
len--;
- udelay(100);
+ usleep_range(100, 110);
if (len) {
buf = (u8 *)par->buf;
@@ -231,7 +231,7 @@ static void write_reg8_bus8(struct fbtft_par *par, int len, ...)
/* restore user spi-speed */
par->fbtftops.write = fbtft_write_spi;
- udelay(100);
+ usleep_range(100, 110);
}
static int write_vmem16_bus8(struct fbtft_par *par, size_t offset, size_t len)
--
2.53.0
^ permalink raw reply related
* [PATCH] staging: fbtft: fb_upd161704: replace udelay with usleep_range
From: tomasz.unger @ 2026-02-18 18:03 UTC (permalink / raw)
To: andy, gregkh
Cc: dri-devel, linux-fbdev, linux-staging, linux-kernel, Tomasz Unger
In-Reply-To: <20260218180347.88034-1-tomasz.unger.ref@yahoo.pl>
From: Tomasz Unger <tomasz.unger@yahoo.pl>
Signed-off-by: Tomasz Unger <tomasz.unger@yahoo.pl>
---
drivers/staging/fbtft/fb_upd161704.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/staging/fbtft/fb_upd161704.c b/drivers/staging/fbtft/fb_upd161704.c
index c680160d6380..62862f021b28 100644
--- a/drivers/staging/fbtft/fb_upd161704.c
+++ b/drivers/staging/fbtft/fb_upd161704.c
@@ -32,27 +32,27 @@ static int init_display(struct fbtft_par *par)
/* oscillator start */
write_reg(par, 0x003A, 0x0001); /*Oscillator 0: stop, 1: operation */
- udelay(100);
+ usleep_range(100, 110);
/* y-setting */
write_reg(par, 0x0024, 0x007B); /* amplitude setting */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0025, 0x003B); /* amplitude setting */
write_reg(par, 0x0026, 0x0034); /* amplitude setting */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0027, 0x0004); /* amplitude setting */
write_reg(par, 0x0052, 0x0025); /* circuit setting 1 */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0053, 0x0033); /* circuit setting 2 */
write_reg(par, 0x0061, 0x001C); /* adjustment V10 positive polarity */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0062, 0x002C); /* adjustment V9 negative polarity */
write_reg(par, 0x0063, 0x0022); /* adjustment V34 positive polarity */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0064, 0x0027); /* adjustment V31 negative polarity */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0065, 0x0014); /* adjustment V61 negative polarity */
- udelay(10);
+ usleep_range(10, 12);
write_reg(par, 0x0066, 0x0010); /* adjustment V61 negative polarity */
/* Basical clock for 1 line (BASECOUNT[7:0]) number specified */
@@ -60,7 +60,7 @@ static int init_display(struct fbtft_par *par)
/* Power supply setting */
write_reg(par, 0x0019, 0x0000); /* DC/DC output setting */
- udelay(200);
+ usleep_range(200, 220);
write_reg(par, 0x001A, 0x1000); /* DC/DC frequency setting */
write_reg(par, 0x001B, 0x0023); /* DC/DC rising setting */
write_reg(par, 0x001C, 0x0C01); /* Regulator voltage setting */
--
2.53.0
^ permalink raw reply related
* [PATCH v10 2/8] gpu: Move DRM buddy allocator one level up (part two)
From: Joel Fernandes @ 2026-02-18 20:55 UTC (permalink / raw)
To: linux-kernel, David Airlie, Simona Vetter, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, Jonathan Corbet, Shuah Khan,
Matthew Auld, Arun Pravin, Christian Koenig, Alex Deucher,
Jani Nikula, Joonas Lahtinen, Rodrigo Vivi, Tvrtko Ursulin,
Huang Rui, Matthew Brost, Thomas Hellström, Helge Deller
Cc: Danilo Krummrich, Miguel Ojeda, Dave Airlie, Gary Guo,
Daniel Almeida, Koen Koning, dri-devel, nouveau, rust-for-linux,
Joel Fernandes, linux-doc, amd-gfx, intel-gfx, intel-xe,
linux-fbdev
In-Reply-To: <20260218205507.689429-1-joelagnelf@nvidia.com>
Move the DRM buddy allocator one level up so that it can be used by GPU
drivers (example, nova-core) that have usecases other than DRM (such as
VFIO vGPU support). Modify the API, structures and Kconfigs to use
"gpu_buddy" terminology. Adapt the drivers and tests to use the new API.
The commit cannot be split due to bisectability, however no functional
change is intended. Verified by running K-UNIT tests and build tested
various configurations.
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
[airlied: I've split this into two so git can find copies easier.
I've also just nuked drm_random library, that stuff needs to be done
elsewhere and only the buddy tests seem to be using it].
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
Documentation/gpu/drm-mm.rst | 6 +
MAINTAINERS | 8 +-
drivers/gpu/Kconfig | 13 +
drivers/gpu/Makefile | 1 +
drivers/gpu/buddy.c | 556 +++++++++---------
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
.../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 12 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 79 +--
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 18 +-
drivers/gpu/drm/drm_buddy.c | 77 +++
drivers/gpu/drm/i915/i915_scatterlist.c | 8 +-
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 55 +-
drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 4 +-
.../drm/i915/selftests/intel_memory_region.c | 20 +-
.../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 4 +-
drivers/gpu/drm/ttm/tests/ttm_mock_manager.c | 18 +-
drivers/gpu/drm/ttm/tests/ttm_mock_manager.h | 2 +-
drivers/gpu/drm/xe/xe_res_cursor.h | 34 +-
drivers/gpu/drm/xe/xe_svm.c | 12 +-
drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 71 +--
drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 2 +-
drivers/gpu/tests/Makefile | 2 +-
drivers/gpu/tests/gpu_buddy_test.c | 412 ++++++-------
drivers/gpu/tests/gpu_random.c | 16 +-
drivers/gpu/tests/gpu_random.h | 18 +-
drivers/video/Kconfig | 1 +
include/drm/drm_buddy.h | 18 +
include/linux/gpu_buddy.h | 120 ++--
30 files changed, 853 insertions(+), 739 deletions(-)
create mode 100644 drivers/gpu/Kconfig
create mode 100644 drivers/gpu/drm/drm_buddy.c
create mode 100644 include/drm/drm_buddy.h
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index ceee0e663237..32fb506db05b 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -532,6 +532,12 @@ Buddy Allocator Function References (GPU buddy)
.. kernel-doc:: drivers/gpu/buddy.c
:export:
+DRM Buddy Specific Logging Function References
+----------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_buddy.c
+ :export:
+
DRM Cache Handling and Fast WC memcpy()
=======================================
diff --git a/MAINTAINERS b/MAINTAINERS
index dc82a6bd1a61..14b4f9af0e36 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8905,15 +8905,17 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: drivers/gpu/drm/ttm/
F: include/drm/ttm/
-DRM BUDDY ALLOCATOR
+GPU BUDDY ALLOCATOR
M: Matthew Auld <matthew.auld@intel.com>
M: Arun Pravin <arunpravin.paneerselvam@amd.com>
R: Christian Koenig <christian.koenig@amd.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
-F: drivers/gpu/drm/drm_buddy.c
-F: drivers/gpu/drm/tests/drm_buddy_test.c
+F: drivers/gpu/drm_buddy.c
+F: drivers/gpu/buddy.c
+F: drivers/gpu/tests/gpu_buddy_test.c
+F: include/linux/gpu_buddy.h
F: include/drm/drm_buddy.h
DRM AUTOMATED TESTING
diff --git a/drivers/gpu/Kconfig b/drivers/gpu/Kconfig
new file mode 100644
index 000000000000..ebb2ad4b7ea0
--- /dev/null
+++ b/drivers/gpu/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config GPU_BUDDY
+ bool
+ help
+ A page based buddy allocator for GPU memory.
+
+config GPU_BUDDY_KUNIT_TEST
+ tristate "KUnit tests for GPU buddy allocator" if !KUNIT_ALL_TESTS
+ depends on GPU_BUDDY && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ KUnit tests for the GPU buddy allocator.
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index c5292ee2c852..5cd54d06e262 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -6,3 +6,4 @@ obj-y += host1x/ drm/ vga/ tests/
obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
obj-$(CONFIG_TRACE_GPU_MEM) += trace/
obj-$(CONFIG_NOVA_CORE) += nova-core/
+obj-$(CONFIG_GPU_BUDDY) += buddy.o
diff --git a/drivers/gpu/buddy.c b/drivers/gpu/buddy.c
index 4cc63d961d26..603c59a2013a 100644
--- a/drivers/gpu/buddy.c
+++ b/drivers/gpu/buddy.c
@@ -11,27 +11,17 @@
#include <linux/sizes.h>
#include <linux/gpu_buddy.h>
-#include <drm/drm_print.h>
-
-enum drm_buddy_free_tree {
- DRM_BUDDY_CLEAR_TREE = 0,
- DRM_BUDDY_DIRTY_TREE,
- DRM_BUDDY_MAX_FREE_TREES,
-};
static struct kmem_cache *slab_blocks;
-#define for_each_free_tree(tree) \
- for ((tree) = 0; (tree) < DRM_BUDDY_MAX_FREE_TREES; (tree)++)
-
-static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
- struct drm_buddy_block *parent,
+static struct gpu_buddy_block *gpu_block_alloc(struct gpu_buddy *mm,
+ struct gpu_buddy_block *parent,
unsigned int order,
u64 offset)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
- BUG_ON(order > DRM_BUDDY_MAX_ORDER);
+ BUG_ON(order > GPU_BUDDY_MAX_ORDER);
block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL);
if (!block)
@@ -43,30 +33,30 @@ static struct drm_buddy_block *drm_block_alloc(struct drm_buddy *mm,
RB_CLEAR_NODE(&block->rb);
- BUG_ON(block->header & DRM_BUDDY_HEADER_UNUSED);
+ BUG_ON(block->header & GPU_BUDDY_HEADER_UNUSED);
return block;
}
-static void drm_block_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void gpu_block_free(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
kmem_cache_free(slab_blocks, block);
}
-static enum drm_buddy_free_tree
-get_block_tree(struct drm_buddy_block *block)
+static enum gpu_buddy_free_tree
+get_block_tree(struct gpu_buddy_block *block)
{
- return drm_buddy_block_is_clear(block) ?
- DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+ return gpu_buddy_block_is_clear(block) ?
+ GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
}
-static struct drm_buddy_block *
+static struct gpu_buddy_block *
rbtree_get_free_block(const struct rb_node *node)
{
- return node ? rb_entry(node, struct drm_buddy_block, rb) : NULL;
+ return node ? rb_entry(node, struct gpu_buddy_block, rb) : NULL;
}
-static struct drm_buddy_block *
+static struct gpu_buddy_block *
rbtree_last_free_block(struct rb_root *root)
{
return rbtree_get_free_block(rb_last(root));
@@ -77,33 +67,33 @@ static bool rbtree_is_empty(struct rb_root *root)
return RB_EMPTY_ROOT(root);
}
-static bool drm_buddy_block_offset_less(const struct drm_buddy_block *block,
- const struct drm_buddy_block *node)
+static bool gpu_buddy_block_offset_less(const struct gpu_buddy_block *block,
+ const struct gpu_buddy_block *node)
{
- return drm_buddy_block_offset(block) < drm_buddy_block_offset(node);
+ return gpu_buddy_block_offset(block) < gpu_buddy_block_offset(node);
}
static bool rbtree_block_offset_less(struct rb_node *block,
const struct rb_node *node)
{
- return drm_buddy_block_offset_less(rbtree_get_free_block(block),
+ return gpu_buddy_block_offset_less(rbtree_get_free_block(block),
rbtree_get_free_block(node));
}
-static void rbtree_insert(struct drm_buddy *mm,
- struct drm_buddy_block *block,
- enum drm_buddy_free_tree tree)
+static void rbtree_insert(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block,
+ enum gpu_buddy_free_tree tree)
{
rb_add(&block->rb,
- &mm->free_trees[tree][drm_buddy_block_order(block)],
+ &mm->free_trees[tree][gpu_buddy_block_order(block)],
rbtree_block_offset_less);
}
-static void rbtree_remove(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void rbtree_remove(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- unsigned int order = drm_buddy_block_order(block);
- enum drm_buddy_free_tree tree;
+ unsigned int order = gpu_buddy_block_order(block);
+ enum gpu_buddy_free_tree tree;
struct rb_root *root;
tree = get_block_tree(block);
@@ -113,42 +103,42 @@ static void rbtree_remove(struct drm_buddy *mm,
RB_CLEAR_NODE(&block->rb);
}
-static void clear_reset(struct drm_buddy_block *block)
+static void clear_reset(struct gpu_buddy_block *block)
{
- block->header &= ~DRM_BUDDY_HEADER_CLEAR;
+ block->header &= ~GPU_BUDDY_HEADER_CLEAR;
}
-static void mark_cleared(struct drm_buddy_block *block)
+static void mark_cleared(struct gpu_buddy_block *block)
{
- block->header |= DRM_BUDDY_HEADER_CLEAR;
+ block->header |= GPU_BUDDY_HEADER_CLEAR;
}
-static void mark_allocated(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void mark_allocated(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- block->header &= ~DRM_BUDDY_HEADER_STATE;
- block->header |= DRM_BUDDY_ALLOCATED;
+ block->header &= ~GPU_BUDDY_HEADER_STATE;
+ block->header |= GPU_BUDDY_ALLOCATED;
rbtree_remove(mm, block);
}
-static void mark_free(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void mark_free(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- enum drm_buddy_free_tree tree;
+ enum gpu_buddy_free_tree tree;
- block->header &= ~DRM_BUDDY_HEADER_STATE;
- block->header |= DRM_BUDDY_FREE;
+ block->header &= ~GPU_BUDDY_HEADER_STATE;
+ block->header |= GPU_BUDDY_FREE;
tree = get_block_tree(block);
rbtree_insert(mm, block, tree);
}
-static void mark_split(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static void mark_split(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- block->header &= ~DRM_BUDDY_HEADER_STATE;
- block->header |= DRM_BUDDY_SPLIT;
+ block->header &= ~GPU_BUDDY_HEADER_STATE;
+ block->header |= GPU_BUDDY_SPLIT;
rbtree_remove(mm, block);
}
@@ -163,10 +153,10 @@ static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
return s1 <= s2 && e1 >= e2;
}
-static struct drm_buddy_block *
-__get_buddy(struct drm_buddy_block *block)
+static struct gpu_buddy_block *
+__get_buddy(struct gpu_buddy_block *block)
{
- struct drm_buddy_block *parent;
+ struct gpu_buddy_block *parent;
parent = block->parent;
if (!parent)
@@ -178,19 +168,19 @@ __get_buddy(struct drm_buddy_block *block)
return parent->left;
}
-static unsigned int __drm_buddy_free(struct drm_buddy *mm,
- struct drm_buddy_block *block,
+static unsigned int __gpu_buddy_free(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block,
bool force_merge)
{
- struct drm_buddy_block *parent;
+ struct gpu_buddy_block *parent;
unsigned int order;
while ((parent = block->parent)) {
- struct drm_buddy_block *buddy;
+ struct gpu_buddy_block *buddy;
buddy = __get_buddy(block);
- if (!drm_buddy_block_is_free(buddy))
+ if (!gpu_buddy_block_is_free(buddy))
break;
if (!force_merge) {
@@ -198,31 +188,31 @@ static unsigned int __drm_buddy_free(struct drm_buddy *mm,
* Check the block and its buddy clear state and exit
* the loop if they both have the dissimilar state.
*/
- if (drm_buddy_block_is_clear(block) !=
- drm_buddy_block_is_clear(buddy))
+ if (gpu_buddy_block_is_clear(block) !=
+ gpu_buddy_block_is_clear(buddy))
break;
- if (drm_buddy_block_is_clear(block))
+ if (gpu_buddy_block_is_clear(block))
mark_cleared(parent);
}
rbtree_remove(mm, buddy);
- if (force_merge && drm_buddy_block_is_clear(buddy))
- mm->clear_avail -= drm_buddy_block_size(mm, buddy);
+ if (force_merge && gpu_buddy_block_is_clear(buddy))
+ mm->clear_avail -= gpu_buddy_block_size(mm, buddy);
- drm_block_free(mm, block);
- drm_block_free(mm, buddy);
+ gpu_block_free(mm, block);
+ gpu_block_free(mm, buddy);
block = parent;
}
- order = drm_buddy_block_order(block);
+ order = gpu_buddy_block_order(block);
mark_free(mm, block);
return order;
}
-static int __force_merge(struct drm_buddy *mm,
+static int __force_merge(struct gpu_buddy *mm,
u64 start,
u64 end,
unsigned int min_order)
@@ -241,7 +231,7 @@ static int __force_merge(struct drm_buddy *mm,
struct rb_node *iter = rb_last(&mm->free_trees[tree][i]);
while (iter) {
- struct drm_buddy_block *block, *buddy;
+ struct gpu_buddy_block *block, *buddy;
u64 block_start, block_end;
block = rbtree_get_free_block(iter);
@@ -250,18 +240,18 @@ static int __force_merge(struct drm_buddy *mm,
if (!block || !block->parent)
continue;
- block_start = drm_buddy_block_offset(block);
- block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+ block_start = gpu_buddy_block_offset(block);
+ block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!contains(start, end, block_start, block_end))
continue;
buddy = __get_buddy(block);
- if (!drm_buddy_block_is_free(buddy))
+ if (!gpu_buddy_block_is_free(buddy))
continue;
- WARN_ON(drm_buddy_block_is_clear(block) ==
- drm_buddy_block_is_clear(buddy));
+ WARN_ON(gpu_buddy_block_is_clear(block) ==
+ gpu_buddy_block_is_clear(buddy));
/*
* Advance to the next node when the current node is the buddy,
@@ -271,10 +261,10 @@ static int __force_merge(struct drm_buddy *mm,
iter = rb_prev(iter);
rbtree_remove(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail -= drm_buddy_block_size(mm, block);
+ if (gpu_buddy_block_is_clear(block))
+ mm->clear_avail -= gpu_buddy_block_size(mm, block);
- order = __drm_buddy_free(mm, block, true);
+ order = __gpu_buddy_free(mm, block, true);
if (order >= min_order)
return 0;
}
@@ -285,9 +275,9 @@ static int __force_merge(struct drm_buddy *mm,
}
/**
- * drm_buddy_init - init memory manager
+ * gpu_buddy_init - init memory manager
*
- * @mm: DRM buddy manager to initialize
+ * @mm: GPU buddy manager to initialize
* @size: size in bytes to manage
* @chunk_size: minimum page size in bytes for our allocations
*
@@ -296,7 +286,7 @@ static int __force_merge(struct drm_buddy *mm,
* Returns:
* 0 on success, error code on failure.
*/
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size)
{
unsigned int i, j, root_count = 0;
u64 offset = 0;
@@ -318,9 +308,9 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
mm->chunk_size = chunk_size;
mm->max_order = ilog2(size) - ilog2(chunk_size);
- BUG_ON(mm->max_order > DRM_BUDDY_MAX_ORDER);
+ BUG_ON(mm->max_order > GPU_BUDDY_MAX_ORDER);
- mm->free_trees = kmalloc_array(DRM_BUDDY_MAX_FREE_TREES,
+ mm->free_trees = kmalloc_array(GPU_BUDDY_MAX_FREE_TREES,
sizeof(*mm->free_trees),
GFP_KERNEL);
if (!mm->free_trees)
@@ -340,7 +330,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
mm->n_roots = hweight64(size);
mm->roots = kmalloc_array(mm->n_roots,
- sizeof(struct drm_buddy_block *),
+ sizeof(struct gpu_buddy_block *),
GFP_KERNEL);
if (!mm->roots)
goto out_free_tree;
@@ -350,21 +340,21 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
* not itself a power-of-two.
*/
do {
- struct drm_buddy_block *root;
+ struct gpu_buddy_block *root;
unsigned int order;
u64 root_size;
order = ilog2(size) - ilog2(chunk_size);
root_size = chunk_size << order;
- root = drm_block_alloc(mm, NULL, order, offset);
+ root = gpu_block_alloc(mm, NULL, order, offset);
if (!root)
goto out_free_roots;
mark_free(mm, root);
BUG_ON(root_count > mm->max_order);
- BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
+ BUG_ON(gpu_buddy_block_size(mm, root) < chunk_size);
mm->roots[root_count] = root;
@@ -377,7 +367,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
out_free_roots:
while (root_count--)
- drm_block_free(mm, mm->roots[root_count]);
+ gpu_block_free(mm, mm->roots[root_count]);
kfree(mm->roots);
out_free_tree:
while (i--)
@@ -385,16 +375,16 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
kfree(mm->free_trees);
return -ENOMEM;
}
-EXPORT_SYMBOL(drm_buddy_init);
+EXPORT_SYMBOL(gpu_buddy_init);
/**
- * drm_buddy_fini - tear down the memory manager
+ * gpu_buddy_fini - tear down the memory manager
*
- * @mm: DRM buddy manager to free
+ * @mm: GPU buddy manager to free
*
* Cleanup memory manager resources and the freetree
*/
-void drm_buddy_fini(struct drm_buddy *mm)
+void gpu_buddy_fini(struct gpu_buddy *mm)
{
u64 root_size, size, start;
unsigned int order;
@@ -404,13 +394,13 @@ void drm_buddy_fini(struct drm_buddy *mm)
for (i = 0; i < mm->n_roots; ++i) {
order = ilog2(size) - ilog2(mm->chunk_size);
- start = drm_buddy_block_offset(mm->roots[i]);
+ start = gpu_buddy_block_offset(mm->roots[i]);
__force_merge(mm, start, start + size, order);
- if (WARN_ON(!drm_buddy_block_is_free(mm->roots[i])))
+ if (WARN_ON(!gpu_buddy_block_is_free(mm->roots[i])))
kunit_fail_current_test("buddy_fini() root");
- drm_block_free(mm, mm->roots[i]);
+ gpu_block_free(mm, mm->roots[i]);
root_size = mm->chunk_size << order;
size -= root_size;
@@ -423,31 +413,31 @@ void drm_buddy_fini(struct drm_buddy *mm)
kfree(mm->free_trees);
kfree(mm->roots);
}
-EXPORT_SYMBOL(drm_buddy_fini);
+EXPORT_SYMBOL(gpu_buddy_fini);
-static int split_block(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+static int split_block(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- unsigned int block_order = drm_buddy_block_order(block) - 1;
- u64 offset = drm_buddy_block_offset(block);
+ unsigned int block_order = gpu_buddy_block_order(block) - 1;
+ u64 offset = gpu_buddy_block_offset(block);
- BUG_ON(!drm_buddy_block_is_free(block));
- BUG_ON(!drm_buddy_block_order(block));
+ BUG_ON(!gpu_buddy_block_is_free(block));
+ BUG_ON(!gpu_buddy_block_order(block));
- block->left = drm_block_alloc(mm, block, block_order, offset);
+ block->left = gpu_block_alloc(mm, block, block_order, offset);
if (!block->left)
return -ENOMEM;
- block->right = drm_block_alloc(mm, block, block_order,
+ block->right = gpu_block_alloc(mm, block, block_order,
offset + (mm->chunk_size << block_order));
if (!block->right) {
- drm_block_free(mm, block->left);
+ gpu_block_free(mm, block->left);
return -ENOMEM;
}
mark_split(mm, block);
- if (drm_buddy_block_is_clear(block)) {
+ if (gpu_buddy_block_is_clear(block)) {
mark_cleared(block->left);
mark_cleared(block->right);
clear_reset(block);
@@ -460,34 +450,34 @@ static int split_block(struct drm_buddy *mm,
}
/**
- * drm_get_buddy - get buddy address
+ * gpu_get_buddy - get buddy address
*
- * @block: DRM buddy block
+ * @block: GPU buddy block
*
* Returns the corresponding buddy block for @block, or NULL
* if this is a root block and can't be merged further.
* Requires some kind of locking to protect against
* any concurrent allocate and free operations.
*/
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block)
+struct gpu_buddy_block *
+gpu_get_buddy(struct gpu_buddy_block *block)
{
return __get_buddy(block);
}
-EXPORT_SYMBOL(drm_get_buddy);
+EXPORT_SYMBOL(gpu_get_buddy);
/**
- * drm_buddy_reset_clear - reset blocks clear state
+ * gpu_buddy_reset_clear - reset blocks clear state
*
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
* @is_clear: blocks clear state
*
* Reset the clear state based on @is_clear value for each block
* in the freetree.
*/
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear)
{
- enum drm_buddy_free_tree src_tree, dst_tree;
+ enum gpu_buddy_free_tree src_tree, dst_tree;
u64 root_size, size, start;
unsigned int order;
int i;
@@ -495,60 +485,60 @@ void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear)
size = mm->size;
for (i = 0; i < mm->n_roots; ++i) {
order = ilog2(size) - ilog2(mm->chunk_size);
- start = drm_buddy_block_offset(mm->roots[i]);
+ start = gpu_buddy_block_offset(mm->roots[i]);
__force_merge(mm, start, start + size, order);
root_size = mm->chunk_size << order;
size -= root_size;
}
- src_tree = is_clear ? DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
- dst_tree = is_clear ? DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+ src_tree = is_clear ? GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
+ dst_tree = is_clear ? GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
for (i = 0; i <= mm->max_order; ++i) {
struct rb_root *root = &mm->free_trees[src_tree][i];
- struct drm_buddy_block *block, *tmp;
+ struct gpu_buddy_block *block, *tmp;
rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
rbtree_remove(mm, block);
if (is_clear) {
mark_cleared(block);
- mm->clear_avail += drm_buddy_block_size(mm, block);
+ mm->clear_avail += gpu_buddy_block_size(mm, block);
} else {
clear_reset(block);
- mm->clear_avail -= drm_buddy_block_size(mm, block);
+ mm->clear_avail -= gpu_buddy_block_size(mm, block);
}
rbtree_insert(mm, block, dst_tree);
}
}
}
-EXPORT_SYMBOL(drm_buddy_reset_clear);
+EXPORT_SYMBOL(gpu_buddy_reset_clear);
/**
- * drm_buddy_free_block - free a block
+ * gpu_buddy_free_block - free a block
*
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
* @block: block to be freed
*/
-void drm_buddy_free_block(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+void gpu_buddy_free_block(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- BUG_ON(!drm_buddy_block_is_allocated(block));
- mm->avail += drm_buddy_block_size(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail += drm_buddy_block_size(mm, block);
+ BUG_ON(!gpu_buddy_block_is_allocated(block));
+ mm->avail += gpu_buddy_block_size(mm, block);
+ if (gpu_buddy_block_is_clear(block))
+ mm->clear_avail += gpu_buddy_block_size(mm, block);
- __drm_buddy_free(mm, block, false);
+ __gpu_buddy_free(mm, block, false);
}
-EXPORT_SYMBOL(drm_buddy_free_block);
+EXPORT_SYMBOL(gpu_buddy_free_block);
-static void __drm_buddy_free_list(struct drm_buddy *mm,
+static void __gpu_buddy_free_list(struct gpu_buddy *mm,
struct list_head *objects,
bool mark_clear,
bool mark_dirty)
{
- struct drm_buddy_block *block, *on;
+ struct gpu_buddy_block *block, *on;
WARN_ON(mark_dirty && mark_clear);
@@ -557,13 +547,13 @@ static void __drm_buddy_free_list(struct drm_buddy *mm,
mark_cleared(block);
else if (mark_dirty)
clear_reset(block);
- drm_buddy_free_block(mm, block);
+ gpu_buddy_free_block(mm, block);
cond_resched();
}
INIT_LIST_HEAD(objects);
}
-static void drm_buddy_free_list_internal(struct drm_buddy *mm,
+static void gpu_buddy_free_list_internal(struct gpu_buddy *mm,
struct list_head *objects)
{
/*
@@ -571,43 +561,43 @@ static void drm_buddy_free_list_internal(struct drm_buddy *mm,
* at this point. For example we might have just failed part of the
* allocation.
*/
- __drm_buddy_free_list(mm, objects, false, false);
+ __gpu_buddy_free_list(mm, objects, false, false);
}
/**
- * drm_buddy_free_list - free blocks
+ * gpu_buddy_free_list - free blocks
*
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
* @objects: input list head to free blocks
- * @flags: optional flags like DRM_BUDDY_CLEARED
+ * @flags: optional flags like GPU_BUDDY_CLEARED
*/
-void drm_buddy_free_list(struct drm_buddy *mm,
+void gpu_buddy_free_list(struct gpu_buddy *mm,
struct list_head *objects,
unsigned int flags)
{
- bool mark_clear = flags & DRM_BUDDY_CLEARED;
+ bool mark_clear = flags & GPU_BUDDY_CLEARED;
- __drm_buddy_free_list(mm, objects, mark_clear, !mark_clear);
+ __gpu_buddy_free_list(mm, objects, mark_clear, !mark_clear);
}
-EXPORT_SYMBOL(drm_buddy_free_list);
+EXPORT_SYMBOL(gpu_buddy_free_list);
-static bool block_incompatible(struct drm_buddy_block *block, unsigned int flags)
+static bool block_incompatible(struct gpu_buddy_block *block, unsigned int flags)
{
- bool needs_clear = flags & DRM_BUDDY_CLEAR_ALLOCATION;
+ bool needs_clear = flags & GPU_BUDDY_CLEAR_ALLOCATION;
- return needs_clear != drm_buddy_block_is_clear(block);
+ return needs_clear != gpu_buddy_block_is_clear(block);
}
-static struct drm_buddy_block *
-__alloc_range_bias(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+__alloc_range_bias(struct gpu_buddy *mm,
u64 start, u64 end,
unsigned int order,
unsigned long flags,
bool fallback)
{
u64 req_size = mm->chunk_size << order;
- struct drm_buddy_block *block;
- struct drm_buddy_block *buddy;
+ struct gpu_buddy_block *block;
+ struct gpu_buddy_block *buddy;
LIST_HEAD(dfs);
int err;
int i;
@@ -622,23 +612,23 @@ __alloc_range_bias(struct drm_buddy *mm,
u64 block_end;
block = list_first_entry_or_null(&dfs,
- struct drm_buddy_block,
+ struct gpu_buddy_block,
tmp_link);
if (!block)
break;
list_del(&block->tmp_link);
- if (drm_buddy_block_order(block) < order)
+ if (gpu_buddy_block_order(block) < order)
continue;
- block_start = drm_buddy_block_offset(block);
- block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+ block_start = gpu_buddy_block_offset(block);
+ block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!overlaps(start, end, block_start, block_end))
continue;
- if (drm_buddy_block_is_allocated(block))
+ if (gpu_buddy_block_is_allocated(block))
continue;
if (block_start < start || block_end > end) {
@@ -654,17 +644,17 @@ __alloc_range_bias(struct drm_buddy *mm,
continue;
if (contains(start, end, block_start, block_end) &&
- order == drm_buddy_block_order(block)) {
+ order == gpu_buddy_block_order(block)) {
/*
* Find the free block within the range.
*/
- if (drm_buddy_block_is_free(block))
+ if (gpu_buddy_block_is_free(block))
return block;
continue;
}
- if (!drm_buddy_block_is_split(block)) {
+ if (!gpu_buddy_block_is_split(block)) {
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
@@ -684,19 +674,19 @@ __alloc_range_bias(struct drm_buddy *mm,
*/
buddy = __get_buddy(block);
if (buddy &&
- (drm_buddy_block_is_free(block) &&
- drm_buddy_block_is_free(buddy)))
- __drm_buddy_free(mm, block, false);
+ (gpu_buddy_block_is_free(block) &&
+ gpu_buddy_block_is_free(buddy)))
+ __gpu_buddy_free(mm, block, false);
return ERR_PTR(err);
}
-static struct drm_buddy_block *
-__drm_buddy_alloc_range_bias(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+__gpu_buddy_alloc_range_bias(struct gpu_buddy *mm,
u64 start, u64 end,
unsigned int order,
unsigned long flags)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
bool fallback = false;
block = __alloc_range_bias(mm, start, end, order,
@@ -708,12 +698,12 @@ __drm_buddy_alloc_range_bias(struct drm_buddy *mm,
return block;
}
-static struct drm_buddy_block *
-get_maxblock(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+get_maxblock(struct gpu_buddy *mm,
unsigned int order,
- enum drm_buddy_free_tree tree)
+ enum gpu_buddy_free_tree tree)
{
- struct drm_buddy_block *max_block = NULL, *block = NULL;
+ struct gpu_buddy_block *max_block = NULL, *block = NULL;
struct rb_root *root;
unsigned int i;
@@ -728,8 +718,8 @@ get_maxblock(struct drm_buddy *mm,
continue;
}
- if (drm_buddy_block_offset(block) >
- drm_buddy_block_offset(max_block)) {
+ if (gpu_buddy_block_offset(block) >
+ gpu_buddy_block_offset(max_block)) {
max_block = block;
}
}
@@ -737,25 +727,25 @@ get_maxblock(struct drm_buddy *mm,
return max_block;
}
-static struct drm_buddy_block *
-alloc_from_freetree(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+alloc_from_freetree(struct gpu_buddy *mm,
unsigned int order,
unsigned long flags)
{
- struct drm_buddy_block *block = NULL;
+ struct gpu_buddy_block *block = NULL;
struct rb_root *root;
- enum drm_buddy_free_tree tree;
+ enum gpu_buddy_free_tree tree;
unsigned int tmp;
int err;
- tree = (flags & DRM_BUDDY_CLEAR_ALLOCATION) ?
- DRM_BUDDY_CLEAR_TREE : DRM_BUDDY_DIRTY_TREE;
+ tree = (flags & GPU_BUDDY_CLEAR_ALLOCATION) ?
+ GPU_BUDDY_CLEAR_TREE : GPU_BUDDY_DIRTY_TREE;
- if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+ if (flags & GPU_BUDDY_TOPDOWN_ALLOCATION) {
block = get_maxblock(mm, order, tree);
if (block)
/* Store the obtained block order */
- tmp = drm_buddy_block_order(block);
+ tmp = gpu_buddy_block_order(block);
} else {
for (tmp = order; tmp <= mm->max_order; ++tmp) {
/* Get RB tree root for this order and tree */
@@ -768,8 +758,8 @@ alloc_from_freetree(struct drm_buddy *mm,
if (!block) {
/* Try allocating from the other tree */
- tree = (tree == DRM_BUDDY_CLEAR_TREE) ?
- DRM_BUDDY_DIRTY_TREE : DRM_BUDDY_CLEAR_TREE;
+ tree = (tree == GPU_BUDDY_CLEAR_TREE) ?
+ GPU_BUDDY_DIRTY_TREE : GPU_BUDDY_CLEAR_TREE;
for (tmp = order; tmp <= mm->max_order; ++tmp) {
root = &mm->free_trees[tree][tmp];
@@ -782,7 +772,7 @@ alloc_from_freetree(struct drm_buddy *mm,
return ERR_PTR(-ENOSPC);
}
- BUG_ON(!drm_buddy_block_is_free(block));
+ BUG_ON(!gpu_buddy_block_is_free(block));
while (tmp != order) {
err = split_block(mm, block);
@@ -796,18 +786,18 @@ alloc_from_freetree(struct drm_buddy *mm,
err_undo:
if (tmp != order)
- __drm_buddy_free(mm, block, false);
+ __gpu_buddy_free(mm, block, false);
return ERR_PTR(err);
}
-static int __alloc_range(struct drm_buddy *mm,
+static int __alloc_range(struct gpu_buddy *mm,
struct list_head *dfs,
u64 start, u64 size,
struct list_head *blocks,
u64 *total_allocated_on_err)
{
- struct drm_buddy_block *block;
- struct drm_buddy_block *buddy;
+ struct gpu_buddy_block *block;
+ struct gpu_buddy_block *buddy;
u64 total_allocated = 0;
LIST_HEAD(allocated);
u64 end;
@@ -820,31 +810,31 @@ static int __alloc_range(struct drm_buddy *mm,
u64 block_end;
block = list_first_entry_or_null(dfs,
- struct drm_buddy_block,
+ struct gpu_buddy_block,
tmp_link);
if (!block)
break;
list_del(&block->tmp_link);
- block_start = drm_buddy_block_offset(block);
- block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+ block_start = gpu_buddy_block_offset(block);
+ block_end = block_start + gpu_buddy_block_size(mm, block) - 1;
if (!overlaps(start, end, block_start, block_end))
continue;
- if (drm_buddy_block_is_allocated(block)) {
+ if (gpu_buddy_block_is_allocated(block)) {
err = -ENOSPC;
goto err_free;
}
if (contains(start, end, block_start, block_end)) {
- if (drm_buddy_block_is_free(block)) {
+ if (gpu_buddy_block_is_free(block)) {
mark_allocated(mm, block);
- total_allocated += drm_buddy_block_size(mm, block);
- mm->avail -= drm_buddy_block_size(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail -= drm_buddy_block_size(mm, block);
+ total_allocated += gpu_buddy_block_size(mm, block);
+ mm->avail -= gpu_buddy_block_size(mm, block);
+ if (gpu_buddy_block_is_clear(block))
+ mm->clear_avail -= gpu_buddy_block_size(mm, block);
list_add_tail(&block->link, &allocated);
continue;
} else if (!mm->clear_avail) {
@@ -853,7 +843,7 @@ static int __alloc_range(struct drm_buddy *mm,
}
}
- if (!drm_buddy_block_is_split(block)) {
+ if (!gpu_buddy_block_is_split(block)) {
err = split_block(mm, block);
if (unlikely(err))
goto err_undo;
@@ -880,22 +870,22 @@ static int __alloc_range(struct drm_buddy *mm,
*/
buddy = __get_buddy(block);
if (buddy &&
- (drm_buddy_block_is_free(block) &&
- drm_buddy_block_is_free(buddy)))
- __drm_buddy_free(mm, block, false);
+ (gpu_buddy_block_is_free(block) &&
+ gpu_buddy_block_is_free(buddy)))
+ __gpu_buddy_free(mm, block, false);
err_free:
if (err == -ENOSPC && total_allocated_on_err) {
list_splice_tail(&allocated, blocks);
*total_allocated_on_err = total_allocated;
} else {
- drm_buddy_free_list_internal(mm, &allocated);
+ gpu_buddy_free_list_internal(mm, &allocated);
}
return err;
}
-static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+static int __gpu_buddy_alloc_range(struct gpu_buddy *mm,
u64 start,
u64 size,
u64 *total_allocated_on_err,
@@ -911,13 +901,13 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
blocks, total_allocated_on_err);
}
-static int __alloc_contig_try_harder(struct drm_buddy *mm,
+static int __alloc_contig_try_harder(struct gpu_buddy *mm,
u64 size,
u64 min_block_size,
struct list_head *blocks)
{
u64 rhs_offset, lhs_offset, lhs_size, filled;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
unsigned int tree, order;
LIST_HEAD(blocks_lhs);
unsigned long pages;
@@ -943,8 +933,8 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
block = rbtree_get_free_block(iter);
/* Allocate blocks traversing RHS */
- rhs_offset = drm_buddy_block_offset(block);
- err = __drm_buddy_alloc_range(mm, rhs_offset, size,
+ rhs_offset = gpu_buddy_block_offset(block);
+ err = __gpu_buddy_alloc_range(mm, rhs_offset, size,
&filled, blocks);
if (!err || err != -ENOSPC)
return err;
@@ -954,18 +944,18 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
lhs_size = round_up(lhs_size, min_block_size);
/* Allocate blocks traversing LHS */
- lhs_offset = drm_buddy_block_offset(block) - lhs_size;
- err = __drm_buddy_alloc_range(mm, lhs_offset, lhs_size,
+ lhs_offset = gpu_buddy_block_offset(block) - lhs_size;
+ err = __gpu_buddy_alloc_range(mm, lhs_offset, lhs_size,
NULL, &blocks_lhs);
if (!err) {
list_splice(&blocks_lhs, blocks);
return 0;
} else if (err != -ENOSPC) {
- drm_buddy_free_list_internal(mm, blocks);
+ gpu_buddy_free_list_internal(mm, blocks);
return err;
}
/* Free blocks for the next iteration */
- drm_buddy_free_list_internal(mm, blocks);
+ gpu_buddy_free_list_internal(mm, blocks);
iter = rb_prev(iter);
}
@@ -975,9 +965,9 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
}
/**
- * drm_buddy_block_trim - free unused pages
+ * gpu_buddy_block_trim - free unused pages
*
- * @mm: DRM buddy manager
+ * @mm: GPU buddy manager
* @start: start address to begin the trimming.
* @new_size: original size requested
* @blocks: Input and output list of allocated blocks.
@@ -993,13 +983,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm,
* Returns:
* 0 on success, error code on failure.
*/
-int drm_buddy_block_trim(struct drm_buddy *mm,
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
u64 *start,
u64 new_size,
struct list_head *blocks)
{
- struct drm_buddy_block *parent;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *parent;
+ struct gpu_buddy_block *block;
u64 block_start, block_end;
LIST_HEAD(dfs);
u64 new_start;
@@ -1009,22 +999,22 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
return -EINVAL;
block = list_first_entry(blocks,
- struct drm_buddy_block,
+ struct gpu_buddy_block,
link);
- block_start = drm_buddy_block_offset(block);
- block_end = block_start + drm_buddy_block_size(mm, block);
+ block_start = gpu_buddy_block_offset(block);
+ block_end = block_start + gpu_buddy_block_size(mm, block);
- if (WARN_ON(!drm_buddy_block_is_allocated(block)))
+ if (WARN_ON(!gpu_buddy_block_is_allocated(block)))
return -EINVAL;
- if (new_size > drm_buddy_block_size(mm, block))
+ if (new_size > gpu_buddy_block_size(mm, block))
return -EINVAL;
if (!new_size || !IS_ALIGNED(new_size, mm->chunk_size))
return -EINVAL;
- if (new_size == drm_buddy_block_size(mm, block))
+ if (new_size == gpu_buddy_block_size(mm, block))
return 0;
new_start = block_start;
@@ -1043,9 +1033,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
list_del(&block->link);
mark_free(mm, block);
- mm->avail += drm_buddy_block_size(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail += drm_buddy_block_size(mm, block);
+ mm->avail += gpu_buddy_block_size(mm, block);
+ if (gpu_buddy_block_is_clear(block))
+ mm->clear_avail += gpu_buddy_block_size(mm, block);
/* Prevent recursively freeing this node */
parent = block->parent;
@@ -1055,26 +1045,26 @@ int drm_buddy_block_trim(struct drm_buddy *mm,
err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL);
if (err) {
mark_allocated(mm, block);
- mm->avail -= drm_buddy_block_size(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail -= drm_buddy_block_size(mm, block);
+ mm->avail -= gpu_buddy_block_size(mm, block);
+ if (gpu_buddy_block_is_clear(block))
+ mm->clear_avail -= gpu_buddy_block_size(mm, block);
list_add(&block->link, blocks);
}
block->parent = parent;
return err;
}
-EXPORT_SYMBOL(drm_buddy_block_trim);
+EXPORT_SYMBOL(gpu_buddy_block_trim);
-static struct drm_buddy_block *
-__drm_buddy_alloc_blocks(struct drm_buddy *mm,
+static struct gpu_buddy_block *
+__gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
u64 start, u64 end,
unsigned int order,
unsigned long flags)
{
- if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+ if (flags & GPU_BUDDY_RANGE_ALLOCATION)
/* Allocate traversing within the range */
- return __drm_buddy_alloc_range_bias(mm, start, end,
+ return __gpu_buddy_alloc_range_bias(mm, start, end,
order, flags);
else
/* Allocate from freetree */
@@ -1082,15 +1072,15 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
}
/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ * gpu_buddy_alloc_blocks - allocate power-of-two blocks
*
- * @mm: DRM buddy manager to allocate from
+ * @mm: GPU buddy manager to allocate from
* @start: start of the allowed range for this block
* @end: end of the allowed range for this block
* @size: size of the allocation in bytes
* @min_block_size: alignment of the allocation
* @blocks: output list head to add allocated blocks
- * @flags: DRM_BUDDY_*_ALLOCATION flags
+ * @flags: GPU_BUDDY_*_ALLOCATION flags
*
* alloc_range_bias() called on range limitations, which traverses
* the tree and returns the desired block.
@@ -1101,13 +1091,13 @@ __drm_buddy_alloc_blocks(struct drm_buddy *mm,
* Returns:
* 0 on success, error code on failure.
*/
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
u64 start, u64 end, u64 size,
u64 min_block_size,
struct list_head *blocks,
unsigned long flags)
{
- struct drm_buddy_block *block = NULL;
+ struct gpu_buddy_block *block = NULL;
u64 original_size, original_min_size;
unsigned int min_order, order;
LIST_HEAD(allocated);
@@ -1137,14 +1127,14 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
if (!IS_ALIGNED(start | end, min_block_size))
return -EINVAL;
- return __drm_buddy_alloc_range(mm, start, size, NULL, blocks);
+ return __gpu_buddy_alloc_range(mm, start, size, NULL, blocks);
}
original_size = size;
original_min_size = min_block_size;
/* Roundup the size to power of 2 */
- if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) {
+ if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) {
size = roundup_pow_of_two(size);
min_block_size = size;
/* Align size value to min_block_size */
@@ -1157,8 +1147,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
min_order = ilog2(min_block_size) - ilog2(mm->chunk_size);
if (order > mm->max_order || size > mm->size) {
- if ((flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION) &&
- !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+ if ((flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION) &&
+ !(flags & GPU_BUDDY_RANGE_ALLOCATION))
return __alloc_contig_try_harder(mm, original_size,
original_min_size, blocks);
@@ -1171,7 +1161,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
BUG_ON(order < min_order);
do {
- block = __drm_buddy_alloc_blocks(mm, start,
+ block = __gpu_buddy_alloc_blocks(mm, start,
end,
order,
flags);
@@ -1182,7 +1172,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
/* Try allocation through force merge method */
if (mm->clear_avail &&
!__force_merge(mm, start, end, min_order)) {
- block = __drm_buddy_alloc_blocks(mm, start,
+ block = __gpu_buddy_alloc_blocks(mm, start,
end,
min_order,
flags);
@@ -1196,8 +1186,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
* Try contiguous block allocation through
* try harder method.
*/
- if (flags & DRM_BUDDY_CONTIGUOUS_ALLOCATION &&
- !(flags & DRM_BUDDY_RANGE_ALLOCATION))
+ if (flags & GPU_BUDDY_CONTIGUOUS_ALLOCATION &&
+ !(flags & GPU_BUDDY_RANGE_ALLOCATION))
return __alloc_contig_try_harder(mm,
original_size,
original_min_size,
@@ -1208,9 +1198,9 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
} while (1);
mark_allocated(mm, block);
- mm->avail -= drm_buddy_block_size(mm, block);
- if (drm_buddy_block_is_clear(block))
- mm->clear_avail -= drm_buddy_block_size(mm, block);
+ mm->avail -= gpu_buddy_block_size(mm, block);
+ if (gpu_buddy_block_is_clear(block))
+ mm->clear_avail -= gpu_buddy_block_size(mm, block);
kmemleak_update_trace(block);
list_add_tail(&block->link, &allocated);
@@ -1221,7 +1211,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
} while (1);
/* Trim the allocated block to the required size */
- if (!(flags & DRM_BUDDY_TRIM_DISABLE) &&
+ if (!(flags & GPU_BUDDY_TRIM_DISABLE) &&
original_size != size) {
struct list_head *trim_list;
LIST_HEAD(temp);
@@ -1234,11 +1224,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
block = list_last_entry(&allocated, typeof(*block), link);
list_move(&block->link, &temp);
trim_list = &temp;
- trim_size = drm_buddy_block_size(mm, block) -
+ trim_size = gpu_buddy_block_size(mm, block) -
(size - original_size);
}
- drm_buddy_block_trim(mm,
+ gpu_buddy_block_trim(mm,
NULL,
trim_size,
trim_list);
@@ -1251,44 +1241,42 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
return 0;
err_free:
- drm_buddy_free_list_internal(mm, &allocated);
+ gpu_buddy_free_list_internal(mm, &allocated);
return err;
}
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
+EXPORT_SYMBOL(gpu_buddy_alloc_blocks);
/**
- * drm_buddy_block_print - print block information
+ * gpu_buddy_block_print - print block information
*
- * @mm: DRM buddy manager
- * @block: DRM buddy block
- * @p: DRM printer to use
+ * @mm: GPU buddy manager
+ * @block: GPU buddy block
*/
-void drm_buddy_block_print(struct drm_buddy *mm,
- struct drm_buddy_block *block,
- struct drm_printer *p)
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- u64 start = drm_buddy_block_offset(block);
- u64 size = drm_buddy_block_size(mm, block);
+ u64 start = gpu_buddy_block_offset(block);
+ u64 size = gpu_buddy_block_size(mm, block);
- drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size);
+ pr_info("%#018llx-%#018llx: %llu\n", start, start + size, size);
}
-EXPORT_SYMBOL(drm_buddy_block_print);
+EXPORT_SYMBOL(gpu_buddy_block_print);
/**
- * drm_buddy_print - print allocator state
+ * gpu_buddy_print - print allocator state
*
- * @mm: DRM buddy manager
- * @p: DRM printer to use
+ * @mm: GPU buddy manager
+ * @p: GPU printer to use
*/
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
+void gpu_buddy_print(struct gpu_buddy *mm)
{
int order;
- drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
- mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
+ pr_info("chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+ mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
for (order = mm->max_order; order >= 0; order--) {
- struct drm_buddy_block *block, *tmp;
+ struct gpu_buddy_block *block, *tmp;
struct rb_root *root;
u64 count = 0, free;
unsigned int tree;
@@ -1297,40 +1285,38 @@ void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p)
root = &mm->free_trees[tree][order];
rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
- BUG_ON(!drm_buddy_block_is_free(block));
+ BUG_ON(!gpu_buddy_block_is_free(block));
count++;
}
}
- drm_printf(p, "order-%2d ", order);
-
free = count * (mm->chunk_size << order);
if (free < SZ_1M)
- drm_printf(p, "free: %8llu KiB", free >> 10);
+ pr_info("order-%2d free: %8llu KiB, blocks: %llu\n",
+ order, free >> 10, count);
else
- drm_printf(p, "free: %8llu MiB", free >> 20);
-
- drm_printf(p, ", blocks: %llu\n", count);
+ pr_info("order-%2d free: %8llu MiB, blocks: %llu\n",
+ order, free >> 20, count);
}
}
-EXPORT_SYMBOL(drm_buddy_print);
+EXPORT_SYMBOL(gpu_buddy_print);
-static void drm_buddy_module_exit(void)
+static void gpu_buddy_module_exit(void)
{
kmem_cache_destroy(slab_blocks);
}
-static int __init drm_buddy_module_init(void)
+static int __init gpu_buddy_module_init(void)
{
- slab_blocks = KMEM_CACHE(drm_buddy_block, 0);
+ slab_blocks = KMEM_CACHE(gpu_buddy_block, 0);
if (!slab_blocks)
return -ENOMEM;
return 0;
}
-module_init(drm_buddy_module_init);
-module_exit(drm_buddy_module_exit);
+module_init(gpu_buddy_module_init);
+module_exit(gpu_buddy_module_exit);
-MODULE_DESCRIPTION("DRM Buddy Allocator");
+MODULE_DESCRIPTION("GPU Buddy Allocator");
MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index ca2a2801e77f..f48d00fe28cc 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -220,6 +220,7 @@ config DRM_GPUSVM
config DRM_BUDDY
tristate
depends on DRM
+ select GPU_BUDDY
help
A page based buddy allocator
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 5c86bc908955..6ff0f6f10b58 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -113,7 +113,7 @@ drm_gpusvm_helper-$(CONFIG_ZONE_DEVICE) += \
obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
-obj-$(CONFIG_DRM_BUDDY) += ../buddy.o
+obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
drm_dma_helper-y := drm_gem_dma_helper.o
drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f582113d78b7..149f8f942eae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -5663,7 +5663,7 @@ int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct amdgpu_vram_mgr_resource *vres;
struct ras_critical_region *region;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
int ret = 0;
if (!bo || !bo->tbo.resource)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index be2e56ce1355..8908d9e08a30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -55,7 +55,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct list_head *head, *next;
struct drm_mm_node *node;
@@ -71,7 +71,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
head = &to_amdgpu_vram_mgr_resource(res)->blocks;
block = list_first_entry_or_null(head,
- struct drm_buddy_block,
+ struct gpu_buddy_block,
link);
if (!block)
goto fallback;
@@ -81,7 +81,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
next = block->link.next;
if (next != head)
- block = list_entry(next, struct drm_buddy_block, link);
+ block = list_entry(next, struct gpu_buddy_block, link);
}
cur->start = amdgpu_vram_mgr_block_start(block) + start;
@@ -125,7 +125,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
*/
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct drm_mm_node *node;
struct list_head *next;
@@ -146,7 +146,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
block = cur->node;
next = block->link.next;
- block = list_entry(next, struct drm_buddy_block, link);
+ block = list_entry(next, struct gpu_buddy_block, link);
cur->node = block;
cur->start = amdgpu_vram_mgr_block_start(block);
@@ -175,7 +175,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
*/
static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
switch (cur->mem_type) {
case TTM_PL_VRAM:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 9d934c07fa6b..cd94f6efb7cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -25,6 +25,7 @@
#include <linux/dma-mapping.h>
#include <drm/ttm/ttm_range_manager.h>
#include <drm/drm_drv.h>
+#include <drm/drm_buddy.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -52,15 +53,15 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
-static inline struct drm_buddy_block *
+static inline struct gpu_buddy_block *
amdgpu_vram_mgr_first_block(struct list_head *list)
{
- return list_first_entry_or_null(list, struct drm_buddy_block, link);
+ return list_first_entry_or_null(list, struct gpu_buddy_block, link);
}
static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 start, size;
block = amdgpu_vram_mgr_first_block(head);
@@ -71,7 +72,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
start = amdgpu_vram_mgr_block_start(block);
size = amdgpu_vram_mgr_block_size(block);
- block = list_entry(block->link.next, struct drm_buddy_block, link);
+ block = list_entry(block->link.next, struct gpu_buddy_block, link);
if (start + size != amdgpu_vram_mgr_block_start(block))
return false;
}
@@ -81,7 +82,7 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 size = 0;
list_for_each_entry(block, head, link)
@@ -254,7 +255,7 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
* Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
- struct drm_buddy_block *block)
+ struct gpu_buddy_block *block)
{
u64 start = amdgpu_vram_mgr_block_start(block);
u64 end = start + amdgpu_vram_mgr_block_size(block);
@@ -279,7 +280,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 usage = 0;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
@@ -299,15 +300,15 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
uint64_t vis_usage;
list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
- if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+ if (gpu_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
rsv->size, mm->chunk_size, &rsv->allocated,
- DRM_BUDDY_RANGE_ALLOCATION))
+ GPU_BUDDY_RANGE_ALLOCATION))
continue;
block = amdgpu_vram_mgr_first_block(&rsv->allocated);
@@ -403,7 +404,7 @@ int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
uint64_t address, struct amdgpu_vram_block_info *info)
{
struct amdgpu_vram_mgr_resource *vres;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 start, size;
int ret = -ENOENT;
@@ -450,8 +451,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
unsigned int adjust_dcc_size = 0;
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &mgr->mm;
+ struct gpu_buddy_block *block;
unsigned long pages_per_block;
int r;
@@ -493,17 +494,17 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
INIT_LIST_HEAD(&vres->blocks);
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
- vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ vres->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
- vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+ vres->flags |= GPU_BUDDY_CLEAR_ALLOCATION;
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
- vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ vres->flags |= GPU_BUDDY_RANGE_ALLOCATION;
if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
adev->gmc.gmc_funcs->get_dcc_alignment)
@@ -516,7 +517,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
remaining_size = (u64)dcc_size;
- vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+ vres->flags |= GPU_BUDDY_TRIM_DISABLE;
}
mutex_lock(&mgr->lock);
@@ -536,7 +537,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
BUG_ON(min_block_size < mm->chunk_size);
- r = drm_buddy_alloc_blocks(mm, fpfn,
+ r = gpu_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
min_block_size,
@@ -545,7 +546,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
!(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
- vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ vres->flags &= ~GPU_BUDDY_CONTIGUOUS_ALLOCATION;
pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
tbo->page_alignment);
@@ -566,7 +567,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
- struct drm_buddy_block *dcc_block;
+ struct gpu_buddy_block *dcc_block;
unsigned long dcc_start;
u64 trim_start;
@@ -576,7 +577,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
adjust_dcc_size);
trim_start = (u64)dcc_start;
- drm_buddy_block_trim(mm, &trim_start,
+ gpu_buddy_block_trim(mm, &trim_start,
(u64)vres->base.size,
&vres->blocks);
}
@@ -614,7 +615,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return 0;
error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks, 0);
+ gpu_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
@@ -637,8 +638,8 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &mgr->mm;
+ struct gpu_buddy_block *block;
uint64_t vis_usage = 0;
mutex_lock(&mgr->lock);
@@ -649,7 +650,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
list_for_each_entry(block, &vres->blocks, link)
vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
- drm_buddy_free_list(mm, &vres->blocks, vres->flags);
+ gpu_buddy_free_list(mm, &vres->blocks, vres->flags);
amdgpu_vram_mgr_do_reserve(man);
mutex_unlock(&mgr->lock);
@@ -688,7 +689,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
if (!*sgt)
return -ENOMEM;
- /* Determine the number of DRM_BUDDY blocks to export */
+ /* Determine the number of GPU_BUDDY blocks to export */
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
@@ -704,10 +705,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg->length = 0;
/*
- * Walk down DRM_BUDDY blocks to populate scatterlist nodes
- * @note: Use iterator api to get first the DRM_BUDDY block
+ * Walk down GPU_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the GPU_BUDDY block
* and the number of bytes from it. Access the following
- * DRM_BUDDY block(s) if more buffer needs to exported
+ * GPU_BUDDY block(s) if more buffer needs to exported
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
@@ -792,10 +793,10 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
mutex_lock(&mgr->lock);
- drm_buddy_reset_clear(mm, false);
+ gpu_buddy_reset_clear(mm, false);
mutex_unlock(&mgr->lock);
}
@@ -815,7 +816,7 @@ static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
size_t size)
{
struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
/* Check each drm buddy block individually */
list_for_each_entry(block, &mgr->blocks, link) {
@@ -848,7 +849,7 @@ static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
size_t size)
{
struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
/* Check each drm buddy block individually */
list_for_each_entry(block, &mgr->blocks, link) {
@@ -877,7 +878,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv;
drm_printf(printer, " vis usage:%llu\n",
@@ -930,7 +931,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
mgr->default_page_size = PAGE_SIZE;
man->func = &amdgpu_vram_mgr_func;
- err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ err = gpu_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
if (err)
return err;
@@ -965,11 +966,11 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
+ gpu_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
- drm_buddy_fini(&mgr->mm);
+ gpu_buddy_fini(&mgr->mm);
mutex_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 874779618056..429a21a2e9b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -28,7 +28,7 @@
struct amdgpu_vram_mgr {
struct ttm_resource_manager manager;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
/* protects access to buffer objects */
struct mutex lock;
struct list_head reservations_pending;
@@ -57,19 +57,19 @@ struct amdgpu_vram_mgr_resource {
struct amdgpu_vres_task task;
};
-static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+static inline u64 amdgpu_vram_mgr_block_start(struct gpu_buddy_block *block)
{
- return drm_buddy_block_offset(block);
+ return gpu_buddy_block_offset(block);
}
-static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+static inline u64 amdgpu_vram_mgr_block_size(struct gpu_buddy_block *block)
{
- return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+ return (u64)PAGE_SIZE << gpu_buddy_block_order(block);
}
-static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+static inline bool amdgpu_vram_mgr_is_cleared(struct gpu_buddy_block *block)
{
- return drm_buddy_block_is_clear(block);
+ return gpu_buddy_block_is_clear(block);
}
static inline struct amdgpu_vram_mgr_resource *
@@ -82,8 +82,8 @@ static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
{
struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
- WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
- ares->flags |= DRM_BUDDY_CLEARED;
+ WARN_ON(ares->flags & GPU_BUDDY_CLEARED);
+ ares->flags |= GPU_BUDDY_CLEARED;
}
int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
new file mode 100644
index 000000000000..841f3de5f307
--- /dev/null
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <kunit/test-bug.h>
+
+#include <linux/export.h>
+#include <linux/kmemleak.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+
+#include <linux/gpu_buddy.h>
+#include <drm/drm_buddy.h>
+#include <drm/drm_print.h>
+
+/**
+ * drm_buddy_block_print - print block information
+ *
+ * @mm: DRM buddy manager
+ * @block: DRM buddy block
+ * @p: DRM printer to use
+ */
+void drm_buddy_block_print(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block,
+ struct drm_printer *p)
+{
+ u64 start = gpu_buddy_block_offset(block);
+ u64 size = gpu_buddy_block_size(mm, block);
+
+ drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size);
+}
+EXPORT_SYMBOL(drm_buddy_block_print);
+
+/**
+ * drm_buddy_print - print allocator state
+ *
+ * @mm: DRM buddy manager
+ * @p: DRM printer to use
+ */
+void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p)
+{
+ int order;
+
+ drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB, clear_free: %lluMiB\n",
+ mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20, mm->clear_avail >> 20);
+
+ for (order = mm->max_order; order >= 0; order--) {
+ struct gpu_buddy_block *block, *tmp;
+ struct rb_root *root;
+ u64 count = 0, free;
+ unsigned int tree;
+
+ for_each_free_tree(tree) {
+ root = &mm->free_trees[tree][order];
+
+ rbtree_postorder_for_each_entry_safe(block, tmp, root, rb) {
+ BUG_ON(!gpu_buddy_block_is_free(block));
+ count++;
+ }
+ }
+
+ drm_printf(p, "order-%2d ", order);
+
+ free = count * (mm->chunk_size << order);
+ if (free < SZ_1M)
+ drm_printf(p, "free: %8llu KiB", free >> 10);
+ else
+ drm_printf(p, "free: %8llu MiB", free >> 20);
+
+ drm_printf(p, ", blocks: %llu\n", count);
+ }
+}
+EXPORT_SYMBOL(drm_buddy_print);
+
+MODULE_DESCRIPTION("DRM-specific GPU Buddy Allocator Print Helpers");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
index 30246f02bcfe..6a34dae13769 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -167,9 +167,9 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
const u64 size = res->size;
const u32 max_segment = round_down(UINT_MAX, page_alignment);
- struct drm_buddy *mm = bman_res->mm;
+ struct gpu_buddy *mm = bman_res->mm;
struct list_head *blocks = &bman_res->blocks;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct i915_refct_sgt *rsgt;
struct scatterlist *sg;
struct sg_table *st;
@@ -202,8 +202,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
list_for_each_entry(block, blocks, link) {
u64 block_size, offset;
- block_size = min_t(u64, size, drm_buddy_block_size(mm, block));
- offset = drm_buddy_block_offset(block);
+ block_size = min_t(u64, size, gpu_buddy_block_size(mm, block));
+ offset = gpu_buddy_block_offset(block);
while (block_size) {
u64 len;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 6b256d95badd..c5ca90088705 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/gpu_buddy.h>
+#include <drm/drm_buddy.h>
#include <drm/drm_print.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_bo.h>
@@ -16,7 +17,7 @@
struct i915_ttm_buddy_manager {
struct ttm_resource_manager manager;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
struct list_head reserved;
struct mutex lock;
unsigned long visible_size;
@@ -38,7 +39,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
{
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
struct i915_ttm_buddy_resource *bman_res;
- struct drm_buddy *mm = &bman->mm;
+ struct gpu_buddy *mm = &bman->mm;
unsigned long n_pages, lpfn;
u64 min_page_size;
u64 size;
@@ -57,13 +58,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
bman_res->mm = mm;
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ bman_res->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
- bman_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ bman_res->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
if (place->fpfn || lpfn != man->size)
- bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ bman_res->flags |= GPU_BUDDY_RANGE_ALLOCATION;
GEM_BUG_ON(!bman_res->base.size);
size = bman_res->base.size;
@@ -89,7 +90,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
goto err_free_res;
}
- err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+ err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
(u64)lpfn << PAGE_SHIFT,
(u64)n_pages << PAGE_SHIFT,
min_page_size,
@@ -101,15 +102,15 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
if (lpfn <= bman->visible_size) {
bman_res->used_visible_size = PFN_UP(bman_res->base.size);
} else {
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
list_for_each_entry(block, &bman_res->blocks, link) {
unsigned long start =
- drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ gpu_buddy_block_offset(block) >> PAGE_SHIFT;
if (start < bman->visible_size) {
unsigned long end = start +
- (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+ (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
bman_res->used_visible_size +=
min(end, bman->visible_size) - start;
@@ -126,7 +127,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
return 0;
err_free_blocks:
- drm_buddy_free_list(mm, &bman_res->blocks, 0);
+ gpu_buddy_free_list(mm, &bman_res->blocks, 0);
mutex_unlock(&bman->lock);
err_free_res:
ttm_resource_fini(man, &bman_res->base);
@@ -141,7 +142,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
mutex_lock(&bman->lock);
- drm_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
+ gpu_buddy_free_list(&bman->mm, &bman_res->blocks, 0);
bman->visible_avail += bman_res->used_visible_size;
mutex_unlock(&bman->lock);
@@ -156,8 +157,8 @@ static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man,
{
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
- struct drm_buddy *mm = &bman->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &bman->mm;
+ struct gpu_buddy_block *block;
if (!place->fpfn && !place->lpfn)
return true;
@@ -176,9 +177,9 @@ static bool i915_ttm_buddy_man_intersects(struct ttm_resource_manager *man,
/* Check each drm buddy block individually */
list_for_each_entry(block, &bman_res->blocks, link) {
unsigned long fpfn =
- drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ gpu_buddy_block_offset(block) >> PAGE_SHIFT;
unsigned long lpfn = fpfn +
- (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+ (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
if (place->fpfn < lpfn && place->lpfn > fpfn)
return true;
@@ -194,8 +195,8 @@ static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man,
{
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
- struct drm_buddy *mm = &bman->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &bman->mm;
+ struct gpu_buddy_block *block;
if (!place->fpfn && !place->lpfn)
return true;
@@ -209,9 +210,9 @@ static bool i915_ttm_buddy_man_compatible(struct ttm_resource_manager *man,
/* Check each drm buddy block individually */
list_for_each_entry(block, &bman_res->blocks, link) {
unsigned long fpfn =
- drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ gpu_buddy_block_offset(block) >> PAGE_SHIFT;
unsigned long lpfn = fpfn +
- (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+ (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
if (fpfn < place->fpfn || lpfn > place->lpfn)
return false;
@@ -224,7 +225,7 @@ static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
mutex_lock(&bman->lock);
drm_printf(printer, "default_page_size: %lluKiB\n",
@@ -293,7 +294,7 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev,
if (!bman)
return -ENOMEM;
- err = drm_buddy_init(&bman->mm, size, chunk_size);
+ err = gpu_buddy_init(&bman->mm, size, chunk_size);
if (err)
goto err_free_bman;
@@ -333,7 +334,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
{
struct ttm_resource_manager *man = ttm_manager_type(bdev, type);
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
- struct drm_buddy *mm = &bman->mm;
+ struct gpu_buddy *mm = &bman->mm;
int ret;
ttm_resource_manager_set_used(man, false);
@@ -345,8 +346,8 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
ttm_set_driver_manager(bdev, type, NULL);
mutex_lock(&bman->lock);
- drm_buddy_free_list(mm, &bman->reserved, 0);
- drm_buddy_fini(mm);
+ gpu_buddy_free_list(mm, &bman->reserved, 0);
+ gpu_buddy_fini(mm);
bman->visible_avail += bman->visible_reserved;
WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
mutex_unlock(&bman->lock);
@@ -371,15 +372,15 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
u64 start, u64 size)
{
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
- struct drm_buddy *mm = &bman->mm;
+ struct gpu_buddy *mm = &bman->mm;
unsigned long fpfn = start >> PAGE_SHIFT;
unsigned long flags = 0;
int ret;
- flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ flags |= GPU_BUDDY_RANGE_ALLOCATION;
mutex_lock(&bman->lock);
- ret = drm_buddy_alloc_blocks(mm, start,
+ ret = gpu_buddy_alloc_blocks(mm, start,
start + size,
size, mm->chunk_size,
&bman->reserved,
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index d64620712830..1cff018c1689 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -13,7 +13,7 @@
struct ttm_device;
struct ttm_resource_manager;
-struct drm_buddy;
+struct gpu_buddy;
/**
* struct i915_ttm_buddy_resource
@@ -33,7 +33,7 @@ struct i915_ttm_buddy_resource {
struct list_head blocks;
unsigned long flags;
unsigned long used_visible_size;
- struct drm_buddy *mm;
+ struct gpu_buddy *mm;
};
/**
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 7b856b5090f9..8307390943a2 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -6,7 +6,7 @@
#include <linux/prime_numbers.h>
#include <linux/sort.h>
-#include <drm/drm_buddy.h>
+#include <linux/gpu_buddy.h>
#include "../i915_selftest.h"
@@ -371,7 +371,7 @@ static int igt_mock_splintered_region(void *arg)
struct drm_i915_private *i915 = mem->i915;
struct i915_ttm_buddy_resource *res;
struct drm_i915_gem_object *obj;
- struct drm_buddy *mm;
+ struct gpu_buddy *mm;
unsigned int expected_order;
LIST_HEAD(objects);
u64 size;
@@ -447,8 +447,8 @@ static int igt_mock_max_segment(void *arg)
struct drm_i915_private *i915 = mem->i915;
struct i915_ttm_buddy_resource *res;
struct drm_i915_gem_object *obj;
- struct drm_buddy_block *block;
- struct drm_buddy *mm;
+ struct gpu_buddy_block *block;
+ struct gpu_buddy *mm;
struct list_head *blocks;
struct scatterlist *sg;
I915_RND_STATE(prng);
@@ -487,8 +487,8 @@ static int igt_mock_max_segment(void *arg)
mm = res->mm;
size = 0;
list_for_each_entry(block, blocks, link) {
- if (drm_buddy_block_size(mm, block) > size)
- size = drm_buddy_block_size(mm, block);
+ if (gpu_buddy_block_size(mm, block) > size)
+ size = gpu_buddy_block_size(mm, block);
}
if (size < max_segment) {
pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n",
@@ -527,14 +527,14 @@ static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
struct intel_memory_region *mr = obj->mm.region;
struct i915_ttm_buddy_resource *bman_res =
to_ttm_buddy_resource(obj->mm.res);
- struct drm_buddy *mm = bman_res->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = bman_res->mm;
+ struct gpu_buddy_block *block;
u64 total;
total = 0;
list_for_each_entry(block, &bman_res->blocks, link) {
- u64 start = drm_buddy_block_offset(block);
- u64 end = start + drm_buddy_block_size(mm, block);
+ u64 start = gpu_buddy_block_offset(block);
+ u64 end = start + gpu_buddy_block_size(mm, block);
if (start < resource_size(&mr->io))
total += min_t(u64, end, resource_size(&mr->io)) - start;
diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
index 6d95447a989d..e32f3c8d7b84 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c
@@ -251,7 +251,7 @@ static void ttm_bo_validate_basic(struct kunit *test)
NULL, &dummy_ttm_bo_destroy);
KUNIT_EXPECT_EQ(test, err, 0);
- snd_place = ttm_place_kunit_init(test, snd_mem, DRM_BUDDY_TOPDOWN_ALLOCATION);
+ snd_place = ttm_place_kunit_init(test, snd_mem, GPU_BUDDY_TOPDOWN_ALLOCATION);
snd_placement = ttm_placement_kunit_init(test, snd_place, 1);
err = ttm_bo_validate(bo, snd_placement, &ctx_val);
@@ -263,7 +263,7 @@ static void ttm_bo_validate_basic(struct kunit *test)
KUNIT_EXPECT_TRUE(test, ttm_tt_is_populated(bo->ttm));
KUNIT_EXPECT_EQ(test, bo->resource->mem_type, snd_mem);
KUNIT_EXPECT_EQ(test, bo->resource->placement,
- DRM_BUDDY_TOPDOWN_ALLOCATION);
+ GPU_BUDDY_TOPDOWN_ALLOCATION);
ttm_bo_fini(bo);
ttm_mock_manager_fini(priv->ttm_dev, snd_mem);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
index dd395229e388..294d56d9067e 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
+++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.c
@@ -31,7 +31,7 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
{
struct ttm_mock_manager *manager = to_mock_mgr(man);
struct ttm_mock_resource *mock_res;
- struct drm_buddy *mm = &manager->mm;
+ struct gpu_buddy *mm = &manager->mm;
u64 lpfn, fpfn, alloc_size;
int err;
@@ -47,14 +47,14 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
INIT_LIST_HEAD(&mock_res->blocks);
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- mock_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ mock_res->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
- mock_res->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ mock_res->flags |= GPU_BUDDY_CONTIGUOUS_ALLOCATION;
alloc_size = (uint64_t)mock_res->base.size;
mutex_lock(&manager->lock);
- err = drm_buddy_alloc_blocks(mm, fpfn, lpfn, alloc_size,
+ err = gpu_buddy_alloc_blocks(mm, fpfn, lpfn, alloc_size,
manager->default_page_size,
&mock_res->blocks,
mock_res->flags);
@@ -67,7 +67,7 @@ static int ttm_mock_manager_alloc(struct ttm_resource_manager *man,
return 0;
error_free_blocks:
- drm_buddy_free_list(mm, &mock_res->blocks, 0);
+ gpu_buddy_free_list(mm, &mock_res->blocks, 0);
ttm_resource_fini(man, &mock_res->base);
mutex_unlock(&manager->lock);
@@ -79,10 +79,10 @@ static void ttm_mock_manager_free(struct ttm_resource_manager *man,
{
struct ttm_mock_manager *manager = to_mock_mgr(man);
struct ttm_mock_resource *mock_res = to_mock_mgr_resource(res);
- struct drm_buddy *mm = &manager->mm;
+ struct gpu_buddy *mm = &manager->mm;
mutex_lock(&manager->lock);
- drm_buddy_free_list(mm, &mock_res->blocks, 0);
+ gpu_buddy_free_list(mm, &mock_res->blocks, 0);
mutex_unlock(&manager->lock);
ttm_resource_fini(man, res);
@@ -106,7 +106,7 @@ int ttm_mock_manager_init(struct ttm_device *bdev, u32 mem_type, u32 size)
mutex_init(&manager->lock);
- err = drm_buddy_init(&manager->mm, size, PAGE_SIZE);
+ err = gpu_buddy_init(&manager->mm, size, PAGE_SIZE);
if (err) {
kfree(manager);
@@ -142,7 +142,7 @@ void ttm_mock_manager_fini(struct ttm_device *bdev, u32 mem_type)
ttm_resource_manager_set_used(man, false);
mutex_lock(&mock_man->lock);
- drm_buddy_fini(&mock_man->mm);
+ gpu_buddy_fini(&mock_man->mm);
mutex_unlock(&mock_man->lock);
ttm_set_driver_manager(bdev, mem_type, NULL);
diff --git a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
index 96ea8c9aae34..08710756fd8e 100644
--- a/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
+++ b/drivers/gpu/drm/ttm/tests/ttm_mock_manager.h
@@ -9,7 +9,7 @@
struct ttm_mock_manager {
struct ttm_resource_manager man;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
u64 default_page_size;
/* protects allocations of mock buffer objects */
struct mutex lock;
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index 4e00008b7081..5f4ab08c0686 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -58,7 +58,7 @@ struct xe_res_cursor {
/** @dma_addr: Current element in a struct drm_pagemap_addr array */
const struct drm_pagemap_addr *dma_addr;
/** @mm: Buddy allocator for VRAM cursor */
- struct drm_buddy *mm;
+ struct gpu_buddy *mm;
/**
* @dma_start: DMA start address for the current segment.
* This may be different to @dma_addr.addr since elements in
@@ -69,7 +69,7 @@ struct xe_res_cursor {
u64 dma_seg_size;
};
-static struct drm_buddy *xe_res_get_buddy(struct ttm_resource *res)
+static struct gpu_buddy *xe_res_get_buddy(struct ttm_resource *res)
{
struct ttm_resource_manager *mgr;
@@ -104,30 +104,30 @@ static inline void xe_res_first(struct ttm_resource *res,
case XE_PL_STOLEN:
case XE_PL_VRAM0:
case XE_PL_VRAM1: {
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct list_head *head, *next;
- struct drm_buddy *mm = xe_res_get_buddy(res);
+ struct gpu_buddy *mm = xe_res_get_buddy(res);
head = &to_xe_ttm_vram_mgr_resource(res)->blocks;
block = list_first_entry_or_null(head,
- struct drm_buddy_block,
+ struct gpu_buddy_block,
link);
if (!block)
goto fallback;
- while (start >= drm_buddy_block_size(mm, block)) {
- start -= drm_buddy_block_size(mm, block);
+ while (start >= gpu_buddy_block_size(mm, block)) {
+ start -= gpu_buddy_block_size(mm, block);
next = block->link.next;
if (next != head)
- block = list_entry(next, struct drm_buddy_block,
+ block = list_entry(next, struct gpu_buddy_block,
link);
}
cur->mm = mm;
- cur->start = drm_buddy_block_offset(block) + start;
- cur->size = min(drm_buddy_block_size(mm, block) - start,
+ cur->start = gpu_buddy_block_offset(block) + start;
+ cur->size = min(gpu_buddy_block_size(mm, block) - start,
size);
cur->remaining = size;
cur->node = block;
@@ -259,7 +259,7 @@ static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr,
*/
static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct list_head *next;
u64 start;
@@ -295,18 +295,18 @@ static inline void xe_res_next(struct xe_res_cursor *cur, u64 size)
block = cur->node;
next = block->link.next;
- block = list_entry(next, struct drm_buddy_block, link);
+ block = list_entry(next, struct gpu_buddy_block, link);
- while (start >= drm_buddy_block_size(cur->mm, block)) {
- start -= drm_buddy_block_size(cur->mm, block);
+ while (start >= gpu_buddy_block_size(cur->mm, block)) {
+ start -= gpu_buddy_block_size(cur->mm, block);
next = block->link.next;
- block = list_entry(next, struct drm_buddy_block, link);
+ block = list_entry(next, struct gpu_buddy_block, link);
}
- cur->start = drm_buddy_block_offset(block) + start;
- cur->size = min(drm_buddy_block_size(cur->mm, block) - start,
+ cur->start = gpu_buddy_block_offset(block) + start;
+ cur->size = min(gpu_buddy_block_size(cur->mm, block) - start,
cur->remaining);
cur->node = block;
break;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 213f0334518a..cda3bf7e2418 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -747,7 +747,7 @@ static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset)
return PHYS_PFN(offset + xpagemap->hpa_base);
}
-static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram)
+static struct gpu_buddy *vram_to_buddy(struct xe_vram_region *vram)
{
return &vram->ttm.mm;
}
@@ -758,17 +758,17 @@ static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocati
struct xe_bo *bo = to_xe_bo(devmem_allocation);
struct ttm_resource *res = bo->ttm.resource;
struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
int j = 0;
list_for_each_entry(block, blocks, link) {
struct xe_vram_region *vr = block->private;
- struct drm_buddy *buddy = vram_to_buddy(vr);
+ struct gpu_buddy *buddy = vram_to_buddy(vr);
u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap,
- drm_buddy_block_offset(block));
+ gpu_buddy_block_offset(block));
int i;
- for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
+ for (i = 0; i < gpu_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
pfn[j++] = block_pfn + i;
}
@@ -1033,7 +1033,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
struct dma_fence *pre_migrate_fence = NULL;
struct xe_device *xe = vr->xe;
struct device *dev = xe->drm.dev;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
struct xe_validation_ctx vctx;
struct list_head *blocks;
struct drm_exec exec;
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 6553a19f7cf2..d119217d566a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -6,6 +6,7 @@
#include <drm/drm_managed.h>
#include <drm/drm_drv.h>
+#include <drm/drm_buddy.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
@@ -16,16 +17,16 @@
#include "xe_ttm_vram_mgr.h"
#include "xe_vram_types.h"
-static inline struct drm_buddy_block *
+static inline struct gpu_buddy_block *
xe_ttm_vram_mgr_first_block(struct list_head *list)
{
- return list_first_entry_or_null(list, struct drm_buddy_block, link);
+ return list_first_entry_or_null(list, struct gpu_buddy_block, link);
}
-static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
+static inline bool xe_is_vram_mgr_blocks_contiguous(struct gpu_buddy *mm,
struct list_head *head)
{
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
u64 start, size;
block = xe_ttm_vram_mgr_first_block(head);
@@ -33,12 +34,12 @@ static inline bool xe_is_vram_mgr_blocks_contiguous(struct drm_buddy *mm,
return false;
while (head != block->link.next) {
- start = drm_buddy_block_offset(block);
- size = drm_buddy_block_size(mm, block);
+ start = gpu_buddy_block_offset(block);
+ size = gpu_buddy_block_size(mm, block);
- block = list_entry(block->link.next, struct drm_buddy_block,
+ block = list_entry(block->link.next, struct gpu_buddy_block,
link);
- if (start + size != drm_buddy_block_offset(block))
+ if (start + size != gpu_buddy_block_offset(block))
return false;
}
@@ -52,7 +53,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
{
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
struct xe_ttm_vram_mgr_resource *vres;
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
u64 size, min_page_size;
unsigned long lpfn;
int err;
@@ -79,10 +80,10 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
INIT_LIST_HEAD(&vres->blocks);
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+ vres->flags |= GPU_BUDDY_TOPDOWN_ALLOCATION;
if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
- vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+ vres->flags |= GPU_BUDDY_RANGE_ALLOCATION;
if (WARN_ON(!vres->base.size)) {
err = -EINVAL;
@@ -118,27 +119,27 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn);
}
- err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+ err = gpu_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
(u64)lpfn << PAGE_SHIFT, size,
min_page_size, &vres->blocks, vres->flags);
if (err)
goto error_unlock;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
+ if (!gpu_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks))
size = vres->base.size;
}
if (lpfn <= mgr->visible_size >> PAGE_SHIFT) {
vres->used_visible_size = size;
} else {
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
list_for_each_entry(block, &vres->blocks, link) {
- u64 start = drm_buddy_block_offset(block);
+ u64 start = gpu_buddy_block_offset(block);
if (start < mgr->visible_size) {
- u64 end = start + drm_buddy_block_size(mm, block);
+ u64 end = start + gpu_buddy_block_size(mm, block);
vres->used_visible_size +=
min(end, mgr->visible_size) - start;
@@ -158,11 +159,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
* the object.
*/
if (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS) {
- struct drm_buddy_block *block = list_first_entry(&vres->blocks,
+ struct gpu_buddy_block *block = list_first_entry(&vres->blocks,
typeof(*block),
link);
- vres->base.start = drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ vres->base.start = gpu_buddy_block_offset(block) >> PAGE_SHIFT;
} else {
vres->base.start = XE_BO_INVALID_OFFSET;
}
@@ -184,10 +185,10 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
struct xe_ttm_vram_mgr_resource *vres =
to_xe_ttm_vram_mgr_resource(res);
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
mutex_lock(&mgr->lock);
- drm_buddy_free_list(mm, &vres->blocks, 0);
+ gpu_buddy_free_list(mm, &vres->blocks, 0);
mgr->visible_avail += vres->used_visible_size;
mutex_unlock(&mgr->lock);
@@ -200,7 +201,7 @@ static void xe_ttm_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
- struct drm_buddy *mm = &mgr->mm;
+ struct gpu_buddy *mm = &mgr->mm;
mutex_lock(&mgr->lock);
drm_printf(printer, "default_page_size: %lluKiB\n",
@@ -223,8 +224,8 @@ static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
struct xe_ttm_vram_mgr_resource *vres =
to_xe_ttm_vram_mgr_resource(res);
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &mgr->mm;
+ struct gpu_buddy_block *block;
if (!place->fpfn && !place->lpfn)
return true;
@@ -234,9 +235,9 @@ static bool xe_ttm_vram_mgr_intersects(struct ttm_resource_manager *man,
list_for_each_entry(block, &vres->blocks, link) {
unsigned long fpfn =
- drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ gpu_buddy_block_offset(block) >> PAGE_SHIFT;
unsigned long lpfn = fpfn +
- (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+ (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
if (place->fpfn < lpfn && place->lpfn > fpfn)
return true;
@@ -253,8 +254,8 @@ static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
struct xe_ttm_vram_mgr_resource *vres =
to_xe_ttm_vram_mgr_resource(res);
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct gpu_buddy *mm = &mgr->mm;
+ struct gpu_buddy_block *block;
if (!place->fpfn && !place->lpfn)
return true;
@@ -264,9 +265,9 @@ static bool xe_ttm_vram_mgr_compatible(struct ttm_resource_manager *man,
list_for_each_entry(block, &vres->blocks, link) {
unsigned long fpfn =
- drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ gpu_buddy_block_offset(block) >> PAGE_SHIFT;
unsigned long lpfn = fpfn +
- (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+ (gpu_buddy_block_size(mm, block) >> PAGE_SHIFT);
if (fpfn < place->fpfn || lpfn > place->lpfn)
return false;
@@ -296,7 +297,7 @@ static void xe_ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
WARN_ON_ONCE(mgr->visible_avail != mgr->visible_size);
- drm_buddy_fini(&mgr->mm);
+ gpu_buddy_fini(&mgr->mm);
ttm_resource_manager_cleanup(&mgr->manager);
@@ -327,7 +328,7 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
mgr->visible_avail = io_size;
ttm_resource_manager_init(man, &xe->ttm, size);
- err = drm_buddy_init(&mgr->mm, man->size, default_page_size);
+ err = gpu_buddy_init(&mgr->mm, man->size, default_page_size);
if (err)
return err;
@@ -375,7 +376,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
if (!*sgt)
return -ENOMEM;
- /* Determine the number of DRM_BUDDY blocks to export */
+ /* Determine the number of GPU_BUDDY blocks to export */
xe_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
@@ -392,10 +393,10 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
sg->length = 0;
/*
- * Walk down DRM_BUDDY blocks to populate scatterlist nodes
- * @note: Use iterator api to get first the DRM_BUDDY block
+ * Walk down GPU_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the GPU_BUDDY block
* and the number of bytes from it. Access the following
- * DRM_BUDDY block(s) if more buffer needs to exported
+ * GPU_BUDDY block(s) if more buffer needs to exported
*/
xe_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index babeec5511d9..9106da056b49 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -18,7 +18,7 @@ struct xe_ttm_vram_mgr {
/** @manager: Base TTM resource manager */
struct ttm_resource_manager manager;
/** @mm: DRM buddy allocator which manages the VRAM */
- struct drm_buddy mm;
+ struct gpu_buddy mm;
/** @visible_size: Proped size of the CPU visible portion */
u64 visible_size;
/** @visible_avail: CPU visible portion still unallocated */
diff --git a/drivers/gpu/tests/Makefile b/drivers/gpu/tests/Makefile
index 8e7654e87d82..4183e6e2de45 100644
--- a/drivers/gpu/tests/Makefile
+++ b/drivers/gpu/tests/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
gpu_buddy_tests-y = gpu_buddy_test.o gpu_random.o
-obj-$(CONFIG_DRM_KUNIT_TEST) += gpu_buddy_tests.o
+obj-$(CONFIG_GPU_BUDDY_KUNIT_TEST) += gpu_buddy_tests.o
diff --git a/drivers/gpu/tests/gpu_buddy_test.c b/drivers/gpu/tests/gpu_buddy_test.c
index b905932da990..450e71deed90 100644
--- a/drivers/gpu/tests/gpu_buddy_test.c
+++ b/drivers/gpu/tests/gpu_buddy_test.c
@@ -21,9 +21,9 @@ static inline u64 get_size(int order, u64 chunk_size)
return (1 << order) * chunk_size;
}
-static void drm_test_buddy_fragmentation_performance(struct kunit *test)
+static void gpu_test_buddy_fragmentation_performance(struct kunit *test)
{
- struct drm_buddy_block *block, *tmp;
+ struct gpu_buddy_block *block, *tmp;
int num_blocks, i, ret, count = 0;
LIST_HEAD(allocated_blocks);
unsigned long elapsed_ms;
@@ -32,7 +32,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
LIST_HEAD(clear_list);
LIST_HEAD(dirty_list);
LIST_HEAD(free_list);
- struct drm_buddy mm;
+ struct gpu_buddy mm;
u64 mm_size = SZ_4G;
ktime_t start, end;
@@ -47,7 +47,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
* quickly the allocator can satisfy larger, aligned requests from a pool of
* highly fragmented space.
*/
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
"buddy_init failed\n");
num_blocks = mm_size / SZ_64K;
@@ -55,7 +55,7 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
start = ktime_get();
/* Allocate with maximum fragmentation - 8K blocks with 64K alignment */
for (i = 0; i < num_blocks; i++)
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
&allocated_blocks, 0),
"buddy_alloc hit an error size=%u\n", SZ_8K);
@@ -68,21 +68,21 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
}
/* Free with different flags to ensure no coalescing */
- drm_buddy_free_list(&mm, &clear_list, DRM_BUDDY_CLEARED);
- drm_buddy_free_list(&mm, &dirty_list, 0);
+ gpu_buddy_free_list(&mm, &clear_list, GPU_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &dirty_list, 0);
for (i = 0; i < num_blocks; i++)
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_64K, SZ_64K,
&test_blocks, 0),
"buddy_alloc hit an error size=%u\n", SZ_64K);
- drm_buddy_free_list(&mm, &test_blocks, 0);
+ gpu_buddy_free_list(&mm, &test_blocks, 0);
end = ktime_get();
elapsed_ms = ktime_to_ms(ktime_sub(end, start));
kunit_info(test, "Fragmented allocation took %lu ms\n", elapsed_ms);
- drm_buddy_fini(&mm);
+ gpu_buddy_fini(&mm);
/*
* Reverse free order under fragmentation
@@ -96,13 +96,13 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
* deallocation occurs in the opposite order of allocation, exposing the
* cost difference between a linear freelist scan and an ordered tree lookup.
*/
- ret = drm_buddy_init(&mm, mm_size, SZ_4K);
+ ret = gpu_buddy_init(&mm, mm_size, SZ_4K);
KUNIT_ASSERT_EQ(test, ret, 0);
start = ktime_get();
/* Allocate maximum fragmentation */
for (i = 0; i < num_blocks; i++)
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, SZ_8K, SZ_64K,
&allocated_blocks, 0),
"buddy_alloc hit an error size=%u\n", SZ_8K);
@@ -111,28 +111,28 @@ static void drm_test_buddy_fragmentation_performance(struct kunit *test)
list_move_tail(&block->link, &free_list);
count++;
}
- drm_buddy_free_list(&mm, &free_list, DRM_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &free_list, GPU_BUDDY_CLEARED);
list_for_each_entry_safe_reverse(block, tmp, &allocated_blocks, link)
list_move(&block->link, &reverse_list);
- drm_buddy_free_list(&mm, &reverse_list, DRM_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &reverse_list, GPU_BUDDY_CLEARED);
end = ktime_get();
elapsed_ms = ktime_to_ms(ktime_sub(end, start));
kunit_info(test, "Reverse-ordered free took %lu ms\n", elapsed_ms);
- drm_buddy_fini(&mm);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_range_bias(struct kunit *test)
+static void gpu_test_buddy_alloc_range_bias(struct kunit *test)
{
u32 mm_size, size, ps, bias_size, bias_start, bias_end, bias_rem;
- DRM_RND_STATE(prng, random_seed);
+ GPU_RND_STATE(prng, random_seed);
unsigned int i, count, *order;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
unsigned long flags;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
LIST_HEAD(allocated);
bias_size = SZ_1M;
@@ -142,11 +142,11 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
"buddy_init failed\n");
count = mm_size / bias_size;
- order = drm_random_order(count, &prng);
+ order = gpu_random_order(count, &prng);
KUNIT_EXPECT_TRUE(test, order);
/*
@@ -166,79 +166,79 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
/* internal round_up too big */
KUNIT_ASSERT_TRUE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, bias_size + ps, bias_size,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, bias_size, bias_size);
/* size too big */
KUNIT_ASSERT_TRUE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, bias_size + ps, ps,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, bias_size + ps, ps);
/* bias range too small for size */
KUNIT_ASSERT_TRUE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start + ps,
+ gpu_buddy_alloc_blocks(&mm, bias_start + ps,
bias_end, bias_size, ps,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
bias_start + ps, bias_end, bias_size, ps);
/* bias misaligned */
KUNIT_ASSERT_TRUE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start + ps,
+ gpu_buddy_alloc_blocks(&mm, bias_start + ps,
bias_end - ps,
bias_size >> 1, bias_size >> 1,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n",
bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1);
/* single big page */
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, bias_size, bias_size,
&tmp,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, bias_size, bias_size);
- drm_buddy_free_list(&mm, &tmp, 0);
+ gpu_buddy_free_list(&mm, &tmp, 0);
/* single page with internal round_up */
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, ps, bias_size,
&tmp,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, ps, bias_size);
- drm_buddy_free_list(&mm, &tmp, 0);
+ gpu_buddy_free_list(&mm, &tmp, 0);
/* random size within */
size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
if (size)
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, size, ps,
&tmp,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, size, ps);
bias_rem -= size;
/* too big for current avail */
KUNIT_ASSERT_TRUE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, bias_rem + ps, ps,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, bias_rem + ps, ps);
@@ -248,10 +248,10 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
size = max(size, ps);
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, size, ps,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, size, ps);
/*
@@ -259,15 +259,15 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
* unallocated, and ideally not always on the bias
* boundaries.
*/
- drm_buddy_free_list(&mm, &tmp, 0);
+ gpu_buddy_free_list(&mm, &tmp, 0);
} else {
list_splice_tail(&tmp, &allocated);
}
}
kfree(order);
- drm_buddy_free_list(&mm, &allocated, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &allocated, 0);
+ gpu_buddy_fini(&mm);
/*
* Something more free-form. Idea is to pick a random starting bias
@@ -278,7 +278,7 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
* allocated nodes in the middle of the address space.
*/
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
"buddy_init failed\n");
bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
@@ -290,10 +290,10 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, size, ps,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n",
bias_start, bias_end, size, ps);
bias_rem -= size;
@@ -319,24 +319,24 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
KUNIT_ASSERT_EQ(test, bias_start, 0);
KUNIT_ASSERT_EQ(test, bias_end, mm_size);
KUNIT_ASSERT_TRUE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start, bias_end,
+ gpu_buddy_alloc_blocks(&mm, bias_start, bias_end,
ps, ps,
&allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc passed with bias(%x-%x), size=%u\n",
bias_start, bias_end, ps);
- drm_buddy_free_list(&mm, &allocated, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &allocated, 0);
+ gpu_buddy_fini(&mm);
/*
- * Allocate cleared blocks in the bias range when the DRM buddy's clear avail is
+ * Allocate cleared blocks in the bias range when the GPU buddy's clear avail is
* zero. This will validate the bias range allocation in scenarios like system boot
* when no cleared blocks are available and exercise the fallback path too. The resulting
* blocks should always be dirty.
*/
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, ps),
"buddy_init failed\n");
bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps);
@@ -344,11 +344,11 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
bias_end = max(bias_end, bias_start + ps);
bias_rem = bias_end - bias_start;
- flags = DRM_BUDDY_CLEAR_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION;
+ flags = GPU_BUDDY_CLEAR_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION;
size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps);
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, bias_start,
+ gpu_buddy_alloc_blocks(&mm, bias_start,
bias_end, size, ps,
&allocated,
flags),
@@ -356,27 +356,27 @@ static void drm_test_buddy_alloc_range_bias(struct kunit *test)
bias_start, bias_end, size, ps);
list_for_each_entry(block, &allocated, link)
- KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+ KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
- drm_buddy_free_list(&mm, &allocated, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &allocated, 0);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_clear(struct kunit *test)
+static void gpu_test_buddy_alloc_clear(struct kunit *test)
{
unsigned long n_pages, total, i = 0;
const unsigned long ps = SZ_4K;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
const int max_order = 12;
LIST_HEAD(allocated);
- struct drm_buddy mm;
+ struct gpu_buddy mm;
unsigned int order;
u32 mm_size, size;
LIST_HEAD(dirty);
LIST_HEAD(clean);
mm_size = SZ_4K << max_order;
- KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+ KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
@@ -389,11 +389,11 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
* is indeed all dirty pages and vice versa. Free it all again,
* keeping the dirty/clear status.
*/
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
5 * ps, ps, &allocated,
- DRM_BUDDY_TOPDOWN_ALLOCATION),
+ GPU_BUDDY_TOPDOWN_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", 5 * ps);
- drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
n_pages = 10;
do {
@@ -406,37 +406,37 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
flags = 0;
} else {
list = &clean;
- flags = DRM_BUDDY_CLEAR_ALLOCATION;
+ flags = GPU_BUDDY_CLEAR_ALLOCATION;
}
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
ps, ps, list,
flags),
"buddy_alloc hit an error size=%lu\n", ps);
} while (++i < n_pages);
list_for_each_entry(block, &clean, link)
- KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), true);
+ KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), true);
list_for_each_entry(block, &dirty, link)
- KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
+ KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
- drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
/*
* Trying to go over the clear limit for some allocation.
* The allocation should never fail with reasonable page-size.
*/
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
10 * ps, ps, &clean,
- DRM_BUDDY_CLEAR_ALLOCATION),
+ GPU_BUDDY_CLEAR_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", 10 * ps);
- drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
- drm_buddy_free_list(&mm, &dirty, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &dirty, 0);
+ gpu_buddy_fini(&mm);
- KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+ KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
/*
* Create a new mm. Intentionally fragment the address space by creating
@@ -458,34 +458,34 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
else
list = &clean;
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
ps, ps, list, 0),
"buddy_alloc hit an error size=%lu\n", ps);
} while (++i < n_pages);
- drm_buddy_free_list(&mm, &clean, DRM_BUDDY_CLEARED);
- drm_buddy_free_list(&mm, &dirty, 0);
+ gpu_buddy_free_list(&mm, &clean, GPU_BUDDY_CLEARED);
+ gpu_buddy_free_list(&mm, &dirty, 0);
order = 1;
do {
size = SZ_4K << order;
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
size, size, &allocated,
- DRM_BUDDY_CLEAR_ALLOCATION),
+ GPU_BUDDY_CLEAR_ALLOCATION),
"buddy_alloc hit an error size=%u\n", size);
total = 0;
list_for_each_entry(block, &allocated, link) {
if (size != mm_size)
- KUNIT_EXPECT_EQ(test, drm_buddy_block_is_clear(block), false);
- total += drm_buddy_block_size(&mm, block);
+ KUNIT_EXPECT_EQ(test, gpu_buddy_block_is_clear(block), false);
+ total += gpu_buddy_block_size(&mm, block);
}
KUNIT_EXPECT_EQ(test, total, size);
- drm_buddy_free_list(&mm, &allocated, 0);
+ gpu_buddy_free_list(&mm, &allocated, 0);
} while (++order <= max_order);
- drm_buddy_fini(&mm);
+ gpu_buddy_fini(&mm);
/*
* Create a new mm with a non power-of-two size. Allocate a random size from each
@@ -494,44 +494,44 @@ static void drm_test_buddy_alloc_clear(struct kunit *test)
*/
mm_size = (SZ_4K << max_order) + (SZ_4K << (max_order - 2));
- KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+ KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
4 * ps, ps, &allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", 4 * ps);
- drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
+ gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, SZ_4K << max_order,
2 * ps, ps, &allocated,
- DRM_BUDDY_CLEAR_ALLOCATION),
+ GPU_BUDDY_CLEAR_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", 2 * ps);
- drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
+ gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, SZ_4K << max_order, mm_size,
ps, ps, &allocated,
- DRM_BUDDY_RANGE_ALLOCATION),
+ GPU_BUDDY_RANGE_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", ps);
- drm_buddy_free_list(&mm, &allocated, DRM_BUDDY_CLEARED);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &allocated, GPU_BUDDY_CLEARED);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_contiguous(struct kunit *test)
+static void gpu_test_buddy_alloc_contiguous(struct kunit *test)
{
const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K;
unsigned long i, n_pages, total;
- struct drm_buddy_block *block;
- struct drm_buddy mm;
+ struct gpu_buddy_block *block;
+ struct gpu_buddy mm;
LIST_HEAD(left);
LIST_HEAD(middle);
LIST_HEAD(right);
LIST_HEAD(allocated);
- KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps));
+ KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, mm_size, ps));
/*
* Idea is to fragment the address space by alternating block
* allocations between three different lists; one for left, middle and
* right. We can then free a list to simulate fragmentation. In
- * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION,
+ * particular we want to exercise the GPU_BUDDY_CONTIGUOUS_ALLOCATION,
* including the try_harder path.
*/
@@ -548,66 +548,66 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test)
else
list = &right;
KUNIT_ASSERT_FALSE_MSG(test,
- drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ gpu_buddy_alloc_blocks(&mm, 0, mm_size,
ps, ps, list, 0),
"buddy_alloc hit an error size=%lu\n",
ps);
} while (++i < n_pages);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
3 * ps, ps, &allocated,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc didn't error size=%lu\n", 3 * ps);
- drm_buddy_free_list(&mm, &middle, 0);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ gpu_buddy_free_list(&mm, &middle, 0);
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
3 * ps, ps, &allocated,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc didn't error size=%lu\n", 3 * ps);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
2 * ps, ps, &allocated,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc didn't error size=%lu\n", 2 * ps);
- drm_buddy_free_list(&mm, &right, 0);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ gpu_buddy_free_list(&mm, &right, 0);
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
3 * ps, ps, &allocated,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc didn't error size=%lu\n", 3 * ps);
/*
* At this point we should have enough contiguous space for 2 blocks,
* however they are never buddies (since we freed middle and right) so
* will require the try_harder logic to find them.
*/
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
2 * ps, ps, &allocated,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", 2 * ps);
- drm_buddy_free_list(&mm, &left, 0);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size,
+ gpu_buddy_free_list(&mm, &left, 0);
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size,
3 * ps, ps, &allocated,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc hit an error size=%lu\n", 3 * ps);
total = 0;
list_for_each_entry(block, &allocated, link)
- total += drm_buddy_block_size(&mm, block);
+ total += gpu_buddy_block_size(&mm, block);
KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3);
- drm_buddy_free_list(&mm, &allocated, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &allocated, 0);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_pathological(struct kunit *test)
+static void gpu_test_buddy_alloc_pathological(struct kunit *test)
{
u64 mm_size, size, start = 0;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
const int max_order = 3;
unsigned long flags = 0;
int order, top;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
LIST_HEAD(blocks);
LIST_HEAD(holes);
LIST_HEAD(tmp);
@@ -620,7 +620,7 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
*/
mm_size = SZ_4K << max_order;
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
"buddy_init failed\n");
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
@@ -630,18 +630,18 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
block = list_first_entry_or_null(&blocks, typeof(*block), link);
if (block) {
list_del(&block->link);
- drm_buddy_free_block(&mm, block);
+ gpu_buddy_free_block(&mm, block);
}
for (order = top; order--;) {
size = get_size(order, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start,
mm_size, size, size,
&tmp, flags),
"buddy_alloc hit -ENOMEM with order=%d, top=%d\n",
order, top);
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_move_tail(&block->link, &blocks);
@@ -649,45 +649,45 @@ static void drm_test_buddy_alloc_pathological(struct kunit *test)
/* There should be one final page for this sub-allocation */
size = get_size(0, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc hit -ENOMEM for hole\n");
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_move_tail(&block->link, &holes);
size = get_size(top, mm.chunk_size);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!",
top, max_order);
}
- drm_buddy_free_list(&mm, &holes, 0);
+ gpu_buddy_free_list(&mm, &holes, 0);
/* Nothing larger than blocks of chunk_size now available */
for (order = 1; order <= max_order; order++) {
size = get_size(order, mm.chunk_size);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc unexpectedly succeeded at order %d, it should be full!",
order);
}
list_splice_tail(&holes, &blocks);
- drm_buddy_free_list(&mm, &blocks, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &blocks, 0);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
+static void gpu_test_buddy_alloc_pessimistic(struct kunit *test)
{
u64 mm_size, size, start = 0;
- struct drm_buddy_block *block, *bn;
+ struct gpu_buddy_block *block, *bn;
const unsigned int max_order = 16;
unsigned long flags = 0;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
unsigned int order;
LIST_HEAD(blocks);
LIST_HEAD(tmp);
@@ -699,19 +699,19 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
*/
mm_size = SZ_4K << max_order;
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
"buddy_init failed\n");
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
for (order = 0; order < max_order; order++) {
size = get_size(order, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc hit -ENOMEM with order=%d\n",
order);
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_move_tail(&block->link, &blocks);
@@ -719,11 +719,11 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
/* And now the last remaining block available */
size = get_size(0, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc hit -ENOMEM on final alloc\n");
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_move_tail(&block->link, &blocks);
@@ -731,58 +731,58 @@ static void drm_test_buddy_alloc_pessimistic(struct kunit *test)
/* Should be completely full! */
for (order = max_order; order--;) {
size = get_size(order, mm.chunk_size);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc unexpectedly succeeded, it should be full!");
}
block = list_last_entry(&blocks, typeof(*block), link);
list_del(&block->link);
- drm_buddy_free_block(&mm, block);
+ gpu_buddy_free_block(&mm, block);
/* As we free in increasing size, we make available larger blocks */
order = 1;
list_for_each_entry_safe(block, bn, &blocks, link) {
list_del(&block->link);
- drm_buddy_free_block(&mm, block);
+ gpu_buddy_free_block(&mm, block);
size = get_size(order, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc hit -ENOMEM with order=%d\n",
order);
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_del(&block->link);
- drm_buddy_free_block(&mm, block);
+ gpu_buddy_free_block(&mm, block);
order++;
}
/* To confirm, now the whole mm should be available */
size = get_size(max_order, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc (realloc) hit -ENOMEM with order=%d\n",
max_order);
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_del(&block->link);
- drm_buddy_free_block(&mm, block);
- drm_buddy_free_list(&mm, &blocks, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_block(&mm, block);
+ gpu_buddy_free_list(&mm, &blocks, 0);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_optimistic(struct kunit *test)
+static void gpu_test_buddy_alloc_optimistic(struct kunit *test)
{
u64 mm_size, size, start = 0;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
unsigned long flags = 0;
const int max_order = 16;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
LIST_HEAD(blocks);
LIST_HEAD(tmp);
int order;
@@ -794,19 +794,19 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
mm_size = SZ_4K * ((1 << (max_order + 1)) - 1);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
"buddy_init failed\n");
KUNIT_EXPECT_EQ(test, mm.max_order, max_order);
for (order = 0; order <= max_order; order++) {
size = get_size(order, mm.chunk_size);
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc hit -ENOMEM with order=%d\n",
order);
- block = list_first_entry_or_null(&tmp, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&tmp, struct gpu_buddy_block, link);
KUNIT_ASSERT_TRUE_MSG(test, block, "alloc_blocks has no blocks\n");
list_move_tail(&block->link, &blocks);
@@ -814,115 +814,115 @@ static void drm_test_buddy_alloc_optimistic(struct kunit *test)
/* Should be completely full! */
size = get_size(0, mm.chunk_size);
- KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, start, mm_size,
+ KUNIT_ASSERT_TRUE_MSG(test, gpu_buddy_alloc_blocks(&mm, start, mm_size,
size, size, &tmp, flags),
"buddy_alloc unexpectedly succeeded, it should be full!");
- drm_buddy_free_list(&mm, &blocks, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &blocks, 0);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_limit(struct kunit *test)
+static void gpu_test_buddy_alloc_limit(struct kunit *test)
{
u64 size = U64_MAX, start = 0;
- struct drm_buddy_block *block;
+ struct gpu_buddy_block *block;
unsigned long flags = 0;
LIST_HEAD(allocated);
- struct drm_buddy mm;
+ struct gpu_buddy mm;
- KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, size, SZ_4K));
+ KUNIT_EXPECT_FALSE(test, gpu_buddy_init(&mm, size, SZ_4K));
- KUNIT_EXPECT_EQ_MSG(test, mm.max_order, DRM_BUDDY_MAX_ORDER,
+ KUNIT_EXPECT_EQ_MSG(test, mm.max_order, GPU_BUDDY_MAX_ORDER,
"mm.max_order(%d) != %d\n", mm.max_order,
- DRM_BUDDY_MAX_ORDER);
+ GPU_BUDDY_MAX_ORDER);
size = mm.chunk_size << mm.max_order;
- KUNIT_EXPECT_FALSE(test, drm_buddy_alloc_blocks(&mm, start, size, size,
+ KUNIT_EXPECT_FALSE(test, gpu_buddy_alloc_blocks(&mm, start, size, size,
mm.chunk_size, &allocated, flags));
- block = list_first_entry_or_null(&allocated, struct drm_buddy_block, link);
+ block = list_first_entry_or_null(&allocated, struct gpu_buddy_block, link);
KUNIT_EXPECT_TRUE(test, block);
- KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_order(block), mm.max_order,
+ KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_order(block), mm.max_order,
"block order(%d) != %d\n",
- drm_buddy_block_order(block), mm.max_order);
+ gpu_buddy_block_order(block), mm.max_order);
- KUNIT_EXPECT_EQ_MSG(test, drm_buddy_block_size(&mm, block),
+ KUNIT_EXPECT_EQ_MSG(test, gpu_buddy_block_size(&mm, block),
BIT_ULL(mm.max_order) * mm.chunk_size,
"block size(%llu) != %llu\n",
- drm_buddy_block_size(&mm, block),
+ gpu_buddy_block_size(&mm, block),
BIT_ULL(mm.max_order) * mm.chunk_size);
- drm_buddy_free_list(&mm, &allocated, 0);
- drm_buddy_fini(&mm);
+ gpu_buddy_free_list(&mm, &allocated, 0);
+ gpu_buddy_fini(&mm);
}
-static void drm_test_buddy_alloc_exceeds_max_order(struct kunit *test)
+static void gpu_test_buddy_alloc_exceeds_max_order(struct kunit *test)
{
u64 mm_size = SZ_8G + SZ_2G, size = SZ_8G + SZ_1G, min_block_size = SZ_8G;
- struct drm_buddy mm;
+ struct gpu_buddy mm;
LIST_HEAD(blocks);
int err;
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, SZ_4K),
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_init(&mm, mm_size, SZ_4K),
"buddy_init failed\n");
/* CONTIGUOUS allocation should succeed via try_harder fallback */
- KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, size,
+ KUNIT_ASSERT_FALSE_MSG(test, gpu_buddy_alloc_blocks(&mm, 0, mm_size, size,
SZ_4K, &blocks,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION),
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION),
"buddy_alloc hit an error size=%llu\n", size);
- drm_buddy_free_list(&mm, &blocks, 0);
+ gpu_buddy_free_list(&mm, &blocks, 0);
/* Non-CONTIGUOUS with large min_block_size should return -EINVAL */
- err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
+ err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks, 0);
KUNIT_EXPECT_EQ(test, err, -EINVAL);
/* Non-CONTIGUOUS + RANGE with large min_block_size should return -EINVAL */
- err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
- DRM_BUDDY_RANGE_ALLOCATION);
+ err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, min_block_size, &blocks,
+ GPU_BUDDY_RANGE_ALLOCATION);
KUNIT_EXPECT_EQ(test, err, -EINVAL);
/* CONTIGUOUS + RANGE should return -EINVAL (no try_harder for RANGE) */
- err = drm_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
- DRM_BUDDY_CONTIGUOUS_ALLOCATION | DRM_BUDDY_RANGE_ALLOCATION);
+ err = gpu_buddy_alloc_blocks(&mm, 0, mm_size, size, SZ_4K, &blocks,
+ GPU_BUDDY_CONTIGUOUS_ALLOCATION | GPU_BUDDY_RANGE_ALLOCATION);
KUNIT_EXPECT_EQ(test, err, -EINVAL);
- drm_buddy_fini(&mm);
+ gpu_buddy_fini(&mm);
}
-static int drm_buddy_suite_init(struct kunit_suite *suite)
+static int gpu_buddy_suite_init(struct kunit_suite *suite)
{
while (!random_seed)
random_seed = get_random_u32();
- kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n",
+ kunit_info(suite, "Testing GPU buddy manager, with random_seed=0x%x\n",
random_seed);
return 0;
}
-static struct kunit_case drm_buddy_tests[] = {
- KUNIT_CASE(drm_test_buddy_alloc_limit),
- KUNIT_CASE(drm_test_buddy_alloc_optimistic),
- KUNIT_CASE(drm_test_buddy_alloc_pessimistic),
- KUNIT_CASE(drm_test_buddy_alloc_pathological),
- KUNIT_CASE(drm_test_buddy_alloc_contiguous),
- KUNIT_CASE(drm_test_buddy_alloc_clear),
- KUNIT_CASE(drm_test_buddy_alloc_range_bias),
- KUNIT_CASE(drm_test_buddy_fragmentation_performance),
- KUNIT_CASE(drm_test_buddy_alloc_exceeds_max_order),
+static struct kunit_case gpu_buddy_tests[] = {
+ KUNIT_CASE(gpu_test_buddy_alloc_limit),
+ KUNIT_CASE(gpu_test_buddy_alloc_optimistic),
+ KUNIT_CASE(gpu_test_buddy_alloc_pessimistic),
+ KUNIT_CASE(gpu_test_buddy_alloc_pathological),
+ KUNIT_CASE(gpu_test_buddy_alloc_contiguous),
+ KUNIT_CASE(gpu_test_buddy_alloc_clear),
+ KUNIT_CASE(gpu_test_buddy_alloc_range_bias),
+ KUNIT_CASE(gpu_test_buddy_fragmentation_performance),
+ KUNIT_CASE(gpu_test_buddy_alloc_exceeds_max_order),
{}
};
-static struct kunit_suite drm_buddy_test_suite = {
- .name = "drm_buddy",
- .suite_init = drm_buddy_suite_init,
- .test_cases = drm_buddy_tests,
+static struct kunit_suite gpu_buddy_test_suite = {
+ .name = "gpu_buddy",
+ .suite_init = gpu_buddy_suite_init,
+ .test_cases = gpu_buddy_tests,
};
-kunit_test_suite(drm_buddy_test_suite);
+kunit_test_suite(gpu_buddy_test_suite);
MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("Kunit test for drm_buddy functions");
+MODULE_DESCRIPTION("Kunit test for gpu_buddy functions");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/tests/gpu_random.c b/drivers/gpu/tests/gpu_random.c
index ddd1f594b5d5..6356372f7e52 100644
--- a/drivers/gpu/tests/gpu_random.c
+++ b/drivers/gpu/tests/gpu_random.c
@@ -8,26 +8,26 @@
#include "gpu_random.h"
-u32 drm_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
+u32 gpu_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state)
{
return upper_32_bits((u64)prandom_u32_state(state) * ep_ro);
}
-EXPORT_SYMBOL(drm_prandom_u32_max_state);
+EXPORT_SYMBOL(gpu_prandom_u32_max_state);
-void drm_random_reorder(unsigned int *order, unsigned int count,
+void gpu_random_reorder(unsigned int *order, unsigned int count,
struct rnd_state *state)
{
unsigned int i, j;
for (i = 0; i < count; ++i) {
BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32));
- j = drm_prandom_u32_max_state(count, state);
+ j = gpu_prandom_u32_max_state(count, state);
swap(order[i], order[j]);
}
}
-EXPORT_SYMBOL(drm_random_reorder);
+EXPORT_SYMBOL(gpu_random_reorder);
-unsigned int *drm_random_order(unsigned int count, struct rnd_state *state)
+unsigned int *gpu_random_order(unsigned int count, struct rnd_state *state)
{
unsigned int *order, i;
@@ -38,7 +38,7 @@ unsigned int *drm_random_order(unsigned int count, struct rnd_state *state)
for (i = 0; i < count; i++)
order[i] = i;
- drm_random_reorder(order, count, state);
+ gpu_random_reorder(order, count, state);
return order;
}
-EXPORT_SYMBOL(drm_random_order);
+EXPORT_SYMBOL(gpu_random_order);
diff --git a/drivers/gpu/tests/gpu_random.h b/drivers/gpu/tests/gpu_random.h
index 9f827260a89d..b68cf3448264 100644
--- a/drivers/gpu/tests/gpu_random.h
+++ b/drivers/gpu/tests/gpu_random.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __DRM_RANDOM_H__
-#define __DRM_RANDOM_H__
+#ifndef __GPU_RANDOM_H__
+#define __GPU_RANDOM_H__
/* This is a temporary home for a couple of utility functions that should
* be transposed to lib/ at the earliest convenience.
@@ -8,21 +8,21 @@
#include <linux/prandom.h>
-#define DRM_RND_STATE_INITIALIZER(seed__) ({ \
+#define GPU_RND_STATE_INITIALIZER(seed__) ({ \
struct rnd_state state__; \
prandom_seed_state(&state__, (seed__)); \
state__; \
})
-#define DRM_RND_STATE(name__, seed__) \
- struct rnd_state name__ = DRM_RND_STATE_INITIALIZER(seed__)
+#define GPU_RND_STATE(name__, seed__) \
+ struct rnd_state name__ = GPU_RND_STATE_INITIALIZER(seed__)
-unsigned int *drm_random_order(unsigned int count,
+unsigned int *gpu_random_order(unsigned int count,
struct rnd_state *state);
-void drm_random_reorder(unsigned int *order,
+void gpu_random_reorder(unsigned int *order,
unsigned int count,
struct rnd_state *state);
-u32 drm_prandom_u32_max_state(u32 ep_ro,
+u32 gpu_prandom_u32_max_state(u32 ep_ro,
struct rnd_state *state);
-#endif /* !__DRM_RANDOM_H__ */
+#endif /* !__GPU_RANDOM_H__ */
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 9884f003247d..a7144d275f54 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -37,6 +37,7 @@ source "drivers/char/agp/Kconfig"
source "drivers/gpu/vga/Kconfig"
+source "drivers/gpu/Kconfig"
source "drivers/gpu/host1x/Kconfig"
source "drivers/gpu/ipu-v3/Kconfig"
source "drivers/gpu/nova-core/Kconfig"
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
new file mode 100644
index 000000000000..3054369bebff
--- /dev/null
+++ b/include/drm/drm_buddy.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __DRM_BUDDY_H__
+#define __DRM_BUDDY_H__
+
+#include <linux/gpu_buddy.h>
+
+struct drm_printer;
+
+/* DRM-specific GPU Buddy Allocator print helpers */
+void drm_buddy_print(struct gpu_buddy *mm, struct drm_printer *p);
+void drm_buddy_block_print(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block,
+ struct drm_printer *p);
+#endif
diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index b909fa8f810a..07ac65db6d2e 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -3,8 +3,8 @@
* Copyright © 2021 Intel Corporation
*/
-#ifndef __DRM_BUDDY_H__
-#define __DRM_BUDDY_H__
+#ifndef __GPU_BUDDY_H__
+#define __GPU_BUDDY_H__
#include <linux/bitops.h>
#include <linux/list.h>
@@ -12,38 +12,45 @@
#include <linux/sched.h>
#include <linux/rbtree.h>
-struct drm_printer;
+#define GPU_BUDDY_RANGE_ALLOCATION BIT(0)
+#define GPU_BUDDY_TOPDOWN_ALLOCATION BIT(1)
+#define GPU_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
+#define GPU_BUDDY_CLEAR_ALLOCATION BIT(3)
+#define GPU_BUDDY_CLEARED BIT(4)
+#define GPU_BUDDY_TRIM_DISABLE BIT(5)
-#define DRM_BUDDY_RANGE_ALLOCATION BIT(0)
-#define DRM_BUDDY_TOPDOWN_ALLOCATION BIT(1)
-#define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2)
-#define DRM_BUDDY_CLEAR_ALLOCATION BIT(3)
-#define DRM_BUDDY_CLEARED BIT(4)
-#define DRM_BUDDY_TRIM_DISABLE BIT(5)
+enum gpu_buddy_free_tree {
+ GPU_BUDDY_CLEAR_TREE = 0,
+ GPU_BUDDY_DIRTY_TREE,
+ GPU_BUDDY_MAX_FREE_TREES,
+};
-struct drm_buddy_block {
-#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-#define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
-#define DRM_BUDDY_ALLOCATED (1 << 10)
-#define DRM_BUDDY_FREE (2 << 10)
-#define DRM_BUDDY_SPLIT (3 << 10)
-#define DRM_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9)
+#define for_each_free_tree(tree) \
+ for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
+
+struct gpu_buddy_block {
+#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define GPU_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
+#define GPU_BUDDY_ALLOCATED (1 << 10)
+#define GPU_BUDDY_FREE (2 << 10)
+#define GPU_BUDDY_SPLIT (3 << 10)
+#define GPU_BUDDY_HEADER_CLEAR GENMASK_ULL(9, 9)
/* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
-#define DRM_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0)
+#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define GPU_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0)
u64 header;
- struct drm_buddy_block *left;
- struct drm_buddy_block *right;
- struct drm_buddy_block *parent;
+ struct gpu_buddy_block *left;
+ struct gpu_buddy_block *right;
+ struct gpu_buddy_block *parent;
void *private; /* owned by creator */
/*
- * While the block is allocated by the user through drm_buddy_alloc*,
+ * While the block is allocated by the user through gpu_buddy_alloc*,
* the user has ownership of the link, for example to maintain within
* a list, if so desired. As soon as the block is freed with
- * drm_buddy_free* ownership is given back to the mm.
+ * gpu_buddy_free* ownership is given back to the mm.
*/
union {
struct rb_node rb;
@@ -54,15 +61,15 @@ struct drm_buddy_block {
};
/* Order-zero must be at least SZ_4K */
-#define DRM_BUDDY_MAX_ORDER (63 - 12)
+#define GPU_BUDDY_MAX_ORDER (63 - 12)
/*
* Binary Buddy System.
*
* Locking should be handled by the user, a simple mutex around
- * drm_buddy_alloc* and drm_buddy_free* should suffice.
+ * gpu_buddy_alloc* and gpu_buddy_free* should suffice.
*/
-struct drm_buddy {
+struct gpu_buddy {
/* Maintain a free list for each order. */
struct rb_root **free_trees;
@@ -73,7 +80,7 @@ struct drm_buddy {
* block. Nodes are either allocated or free, in which case they will
* also exist on the respective free list.
*/
- struct drm_buddy_block **roots;
+ struct gpu_buddy_block **roots;
/*
* Anything from here is public, and remains static for the lifetime of
@@ -90,82 +97,81 @@ struct drm_buddy {
};
static inline u64
-drm_buddy_block_offset(const struct drm_buddy_block *block)
+gpu_buddy_block_offset(const struct gpu_buddy_block *block)
{
- return block->header & DRM_BUDDY_HEADER_OFFSET;
+ return block->header & GPU_BUDDY_HEADER_OFFSET;
}
static inline unsigned int
-drm_buddy_block_order(struct drm_buddy_block *block)
+gpu_buddy_block_order(struct gpu_buddy_block *block)
{
- return block->header & DRM_BUDDY_HEADER_ORDER;
+ return block->header & GPU_BUDDY_HEADER_ORDER;
}
static inline unsigned int
-drm_buddy_block_state(struct drm_buddy_block *block)
+gpu_buddy_block_state(struct gpu_buddy_block *block)
{
- return block->header & DRM_BUDDY_HEADER_STATE;
+ return block->header & GPU_BUDDY_HEADER_STATE;
}
static inline bool
-drm_buddy_block_is_allocated(struct drm_buddy_block *block)
+gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
{
- return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
+ return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
}
static inline bool
-drm_buddy_block_is_clear(struct drm_buddy_block *block)
+gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
{
- return block->header & DRM_BUDDY_HEADER_CLEAR;
+ return block->header & GPU_BUDDY_HEADER_CLEAR;
}
static inline bool
-drm_buddy_block_is_free(struct drm_buddy_block *block)
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
{
- return drm_buddy_block_state(block) == DRM_BUDDY_FREE;
+ return gpu_buddy_block_state(block) == GPU_BUDDY_FREE;
}
static inline bool
-drm_buddy_block_is_split(struct drm_buddy_block *block)
+gpu_buddy_block_is_split(struct gpu_buddy_block *block)
{
- return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT;
+ return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
}
static inline u64
-drm_buddy_block_size(struct drm_buddy *mm,
- struct drm_buddy_block *block)
+gpu_buddy_block_size(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block)
{
- return mm->chunk_size << drm_buddy_block_order(block);
+ return mm->chunk_size << gpu_buddy_block_order(block);
}
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
-void drm_buddy_fini(struct drm_buddy *mm);
+void gpu_buddy_fini(struct gpu_buddy *mm);
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block);
+struct gpu_buddy_block *
+gpu_get_buddy(struct gpu_buddy_block *block);
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
u64 start, u64 end, u64 size,
u64 min_page_size,
struct list_head *blocks,
unsigned long flags);
-int drm_buddy_block_trim(struct drm_buddy *mm,
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
u64 *start,
u64 new_size,
struct list_head *blocks);
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
-void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
+void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
-void drm_buddy_free_list(struct drm_buddy *mm,
+void gpu_buddy_free_list(struct gpu_buddy *mm,
struct list_head *objects,
unsigned int flags);
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
-void drm_buddy_block_print(struct drm_buddy *mm,
- struct drm_buddy_block *block,
- struct drm_printer *p);
+void gpu_buddy_print(struct gpu_buddy *mm);
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+ struct gpu_buddy_block *block);
#endif
--
2.34.1
^ permalink raw reply related
* Re: [PATCH v10 0/8] Preparatory patches for nova-core memory management
From: Joel Fernandes @ 2026-02-18 20:59 UTC (permalink / raw)
To: linux-kernel, Miguel Ojeda, Boqun Feng, Gary Guo,
Björn Roy Baron, Benno Lossin, Andreas Hindborg, Alice Ryhl,
Trevor Gross, Danilo Krummrich
Cc: Dave Airlie, Daniel Almeida, Koen Koning, dri-devel, nouveau,
rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
Alex Deucher, Christian König, Jani Nikula, Joonas Lahtinen,
Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
Matthew Brost, Lucas De Marchi, Thomas Hellström,
Helge Deller, Danilo Krummrich, Alice Ryhl, Miguel Ojeda,
Alex Gaynor, Boqun Feng, Gary Guo, Björn Roy Baron,
Benno Lossin, Andreas Hindborg, Trevor Gross, John Hubbard,
Alistair Popple, Timur Tabi, Edwin Peer, Alexandre Courbot,
Andrea Righi, Andy Ritger, Zhi Wang, Balbir Singh,
Philipp Stanner, Elle Rhumsaa, Daniel Almeida, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes
In-Reply-To: <20260218205507.689429-1-joelagnelf@nvidia.com>
My CC list missed a lot of folks, sorry about that. Adding more CC's to this
email to make people aware of the posting. Thankfully it got posted to the
archives so for those on rust-for-linux, dri-devel and nouveau lore lists, they
would get it.
Thanks,
--
Joel Fernandes
On 2/18/2026 3:54 PM, Joel Fernandes wrote:
> These are initial preparatory patches needed for nova-core memory management
> support. The series moves the DRM buddy allocator one level up so it can be
> shared across GPU subsystems, adds Rust FFI and clist bindings, and creates
> Rust GPU buddy allocator bindings.
>
> The clist/ffi patches are ready, reviewed by Gary and Danilo. Miguel, can you
> pull those via the rust tree?
>
> The non-Rust DRM buddy related patches are already being pulled into upstream
> by Dave Airlie but I have included them here as they are needed for the rest of
> the patches (thanks to Dave for reworking them so they applied).
>
> I will post the nova-core memory management patches as a separate follow-up
> series just after this one.
>
> The git tree with all these patches can be found at:
> git://git.kernel.org/pub/scm/linux/kernel/git/jfern/linux.git (tag: nova/mm)
>
> Joel Fernandes (7):
> gpu: Move DRM buddy allocator one level up (part one)
> gpu: Move DRM buddy allocator one level up (part two)
> rust: ffi: Convert pub use to pub mod and create ffi module
> rust: clist: Add support to interface with C linked lists
> rust: gpu: Add GPU buddy allocator bindings
> nova-core: mm: Select GPU_BUDDY for VRAM allocation
> nova-core: Kconfig: Sort select statements alphabetically
>
> Koen Koning (1):
> gpu: Fix uninitialized buddy for built-in drivers
>
> Documentation/gpu/drm-mm.rst | 10 +-
> MAINTAINERS | 15 +-
> drivers/gpu/Kconfig | 13 +
> drivers/gpu/Makefile | 3 +-
> drivers/gpu/buddy.c | 1322 +++++++++++++++++
> drivers/gpu/drm/Kconfig | 5 +-
> drivers/gpu/drm/Kconfig.debug | 1 -
> drivers/gpu/drm/Makefile | 1 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +-
> .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 12 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 79 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 20 +-
> drivers/gpu/drm/drm_buddy.c | 1277 +---------------
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +-
> drivers/gpu/drm/i915/i915_scatterlist.c | 10 +-
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 55 +-
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 4 +-
> .../drm/i915/selftests/intel_memory_region.c | 20 +-
> drivers/gpu/drm/tests/Makefile | 1 -
> drivers/gpu/drm/tests/drm_exec_test.c | 2 -
> drivers/gpu/drm/tests/drm_mm_test.c | 2 -
> .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 4 +-
> drivers/gpu/drm/ttm/tests/ttm_mock_manager.c | 18 +-
> drivers/gpu/drm/ttm/tests/ttm_mock_manager.h | 4 +-
> drivers/gpu/drm/xe/xe_res_cursor.h | 34 +-
> drivers/gpu/drm/xe/xe_svm.c | 12 +-
> drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 71 +-
> drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h | 4 +-
> drivers/gpu/nova-core/Kconfig | 3 +-
> drivers/gpu/tests/Makefile | 4 +
> .../gpu_buddy_test.c} | 416 +++---
> .../lib/drm_random.c => tests/gpu_random.c} | 18 +-
> .../lib/drm_random.h => tests/gpu_random.h} | 18 +-
> drivers/video/Kconfig | 1 +
> include/drm/drm_buddy.h | 163 +-
> include/linux/gpu_buddy.h | 177 +++
> rust/bindings/bindings_helper.h | 11 +
> rust/helpers/gpu.c | 23 +
> rust/helpers/helpers.c | 2 +
> rust/helpers/list.c | 17 +
> rust/kernel/ffi/clist.rs | 327 ++++
> rust/kernel/ffi/mod.rs | 9 +
> rust/kernel/gpu/buddy.rs | 537 +++++++
> rust/kernel/gpu/mod.rs | 5 +
> rust/kernel/lib.rs | 5 +-
> 45 files changed, 2893 insertions(+), 1846 deletions(-)
> create mode 100644 drivers/gpu/Kconfig
> create mode 100644 drivers/gpu/buddy.c
> create mode 100644 drivers/gpu/tests/Makefile
> rename drivers/gpu/{drm/tests/drm_buddy_test.c => tests/gpu_buddy_test.c} (67%)
> rename drivers/gpu/{drm/lib/drm_random.c => tests/gpu_random.c} (59%)
> rename drivers/gpu/{drm/lib/drm_random.h => tests/gpu_random.h} (53%)
> create mode 100644 include/linux/gpu_buddy.h
> create mode 100644 rust/helpers/gpu.c
> create mode 100644 rust/helpers/list.c
> create mode 100644 rust/kernel/ffi/clist.rs
> create mode 100644 rust/kernel/ffi/mod.rs
> create mode 100644 rust/kernel/gpu/buddy.rs
> create mode 100644 rust/kernel/gpu/mod.rs
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> base-commit: 2961f841b025fb234860bac26dfb7fa7cb0fb122
--
Joel Fernandes
^ permalink raw reply
* [PATCH v7 00/23] nova-core: Add memory management support
From: Joel Fernandes @ 2026-02-18 21:19 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes
This series adds support for nova-core memory management, including VRAM
allocation, PRAMIN, VMM, page table walking, and BAR 1 read/writes.
These are critical for channel management, vGPU, and all memory
management uses.
These patches depend on the following preparatory patches:
https://lore.kernel.org/all/20260218205507.689429-1-joelagnelf@nvidia.com/T/#t
All patches (including the preparatory patches from the other series) can
be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/jfern/linux.git (branch nova/mm or tag: nova-mm-v7-20260218)
Earlier series of these patches:
https://lore.kernel.org/linux-fbdev/DG0MRL6T7ACW.25G3GLJMP7PN1@kernel.org/T/#t
https://lore.kernel.org/linux-fbdev/20260210233204.790524-1-joelagnelf@nvidia.com/
Joel Fernandes (23):
nova-core: mm: Add support to use PRAMIN windows to write to VRAM
docs: gpu: nova-core: Document the PRAMIN aperture mechanism
nova-core: Add BAR1 aperture type and size constant
nova-core: gsp: Add BAR1 PDE base accessors
nova-core: mm: Add common memory management types
nova-core: mm: Add common types for all page table formats
nova-core: mm: Add MMU v2 page table types
nova-core: mm: Add MMU v3 page table types
nova-core: mm: Add unified page table entry wrapper enums
nova-core: mm: Add TLB flush support
nova-core: mm: Add GpuMm centralized memory manager
nova-core: mm: Add page table walker for MMU v2/v3
nova-core: mm: Add Virtual Memory Manager
nova-core: mm: Add virtual address range tracking to VMM
nova-core: mm: Add multi-page mapping API to VMM
nova-core: mm: Add BAR1 user interface
nova-core: gsp: Return GspStaticInfo and FbLayout from boot()
nova-core: mm: Add BAR1 memory management self-tests
nova-core: mm: Add PRAMIN aperture self-tests
nova-core: gsp: Extract usable FB region from GSP
nova-core: fb: Add usable_vram field to FbLayout
nova-core: mm: Use usable VRAM region for buddy allocator
nova-core: mm: Add BarUser to struct Gpu and create at boot
Documentation/gpu/nova/core/pramin.rst | 125 ++++++
Documentation/gpu/nova/index.rst | 1 +
drivers/gpu/nova-core/Kconfig | 10 +
drivers/gpu/nova-core/driver.rs | 9 +-
drivers/gpu/nova-core/fb.rs | 23 +-
drivers/gpu/nova-core/gpu.rs | 135 +++++-
drivers/gpu/nova-core/gsp/boot.rs | 22 +-
drivers/gpu/nova-core/gsp/commands.rs | 18 +-
drivers/gpu/nova-core/gsp/fw/commands.rs | 38 ++
drivers/gpu/nova-core/mm/bar_user.rs | 318 ++++++++++++++
drivers/gpu/nova-core/mm/mod.rs | 241 +++++++++++
drivers/gpu/nova-core/mm/pagetable/mod.rs | 479 +++++++++++++++++++++
drivers/gpu/nova-core/mm/pagetable/ver2.rs | 199 +++++++++
drivers/gpu/nova-core/mm/pagetable/ver3.rs | 302 +++++++++++++
drivers/gpu/nova-core/mm/pagetable/walk.rs | 213 +++++++++
drivers/gpu/nova-core/mm/pramin.rs | 454 +++++++++++++++++++
drivers/gpu/nova-core/mm/tlb.rs | 92 ++++
drivers/gpu/nova-core/mm/vmm.rs | 473 ++++++++++++++++++++
drivers/gpu/nova-core/nova_core.rs | 1 +
drivers/gpu/nova-core/regs.rs | 38 ++
20 files changed, 3182 insertions(+), 9 deletions(-)
create mode 100644 Documentation/gpu/nova/core/pramin.rst
create mode 100644 drivers/gpu/nova-core/mm/bar_user.rs
create mode 100644 drivers/gpu/nova-core/mm/mod.rs
create mode 100644 drivers/gpu/nova-core/mm/pagetable/mod.rs
create mode 100644 drivers/gpu/nova-core/mm/pagetable/ver2.rs
create mode 100644 drivers/gpu/nova-core/mm/pagetable/ver3.rs
create mode 100644 drivers/gpu/nova-core/mm/pagetable/walk.rs
create mode 100644 drivers/gpu/nova-core/mm/pramin.rs
create mode 100644 drivers/gpu/nova-core/mm/tlb.rs
create mode 100644 drivers/gpu/nova-core/mm/vmm.rs
--
2.34.1
^ permalink raw reply
* [PATCH v7 01/23] nova-core: mm: Add support to use PRAMIN windows to write to VRAM
From: Joel Fernandes @ 2026-02-18 21:19 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
PRAMIN apertures are a crucial mechanism to direct read/write to VRAM.
Add support for the same.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/mod.rs | 5 +
drivers/gpu/nova-core/mm/pramin.rs | 293 +++++++++++++++++++++++++++++
drivers/gpu/nova-core/nova_core.rs | 1 +
drivers/gpu/nova-core/regs.rs | 5 +
4 files changed, 304 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/mod.rs
create mode 100644 drivers/gpu/nova-core/mm/pramin.rs
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
new file mode 100644
index 000000000000..7a5dd4220c67
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Memory management subsystems for nova-core.
+
+pub(crate) mod pramin;
diff --git a/drivers/gpu/nova-core/mm/pramin.rs b/drivers/gpu/nova-core/mm/pramin.rs
new file mode 100644
index 000000000000..77916f5b231e
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/pramin.rs
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Direct VRAM access through the PRAMIN aperture.
+//!
+//! PRAMIN provides a 1MB sliding window into VRAM through BAR0, allowing the CPU to access
+//! video memory directly. Access is managed through a two-level API:
+//!
+//! - [`Pramin`]: The parent object that owns the BAR0 reference and synchronization lock.
+//! - [`PraminWindow`]: A guard object that holds exclusive PRAMIN access for its lifetime.
+//!
+//! The PRAMIN aperture is a 1MB region at BAR0 + 0x700000 for all GPUs. The window base is
+//! controlled by the `NV_PBUS_BAR0_WINDOW` register and must be 64KB aligned.
+//!
+//! # Examples
+//!
+//! ## Basic read/write
+//!
+//! ```no_run
+//! use crate::driver::Bar0;
+//! use crate::mm::pramin;
+//! use kernel::devres::Devres;
+//! use kernel::prelude::*;
+//! use kernel::sync::Arc;
+//!
+//! fn example(devres_bar: Arc<Devres<Bar0>>) -> Result<()> {
+//! let pramin = Arc::pin_init(pramin::Pramin::new(devres_bar)?, GFP_KERNEL)?;
+//! let mut window = pramin.window()?;
+//!
+//! // Write and read back.
+//! window.try_write32(0x100, 0xDEADBEEF)?;
+//! let val = window.try_read32(0x100)?;
+//! assert_eq!(val, 0xDEADBEEF);
+//!
+//! Ok(())
+//! // Original window position restored on drop.
+//! }
+//! ```
+//!
+//! ## Auto-repositioning across VRAM regions
+//!
+//! ```no_run
+//! use crate::driver::Bar0;
+//! use crate::mm::pramin;
+//! use kernel::devres::Devres;
+//! use kernel::prelude::*;
+//! use kernel::sync::Arc;
+//!
+//! fn example(devres_bar: Arc<Devres<Bar0>>) -> Result<()> {
+//! let pramin = Arc::pin_init(pramin::Pramin::new(devres_bar)?, GFP_KERNEL)?;
+//! let mut window = pramin.window()?;
+//!
+//! // Access first 1MB region.
+//! window.try_write32(0x100, 0x11111111)?;
+//!
+//! // Access at 2MB - window auto-repositions.
+//! window.try_write32(0x200000, 0x22222222)?;
+//!
+//! // Back to first region - window repositions again.
+//! let val = window.try_read32(0x100)?;
+//! assert_eq!(val, 0x11111111);
+//!
+//! Ok(())
+//! }
+//! ```
+
+#![allow(unused)]
+
+use crate::{
+ driver::Bar0,
+ regs, //
+};
+
+use kernel::bits::genmask_u64;
+use kernel::devres::Devres;
+use kernel::io::Io;
+use kernel::new_mutex;
+use kernel::prelude::*;
+use kernel::ptr::{
+ Alignable,
+ Alignment, //
+};
+use kernel::sizes::{
+ SZ_1M,
+ SZ_64K, //
+};
+use kernel::sync::{
+ lock::mutex::MutexGuard,
+ Arc,
+ Mutex, //
+};
+
+/// PRAMIN aperture base offset in BAR0.
+const PRAMIN_BASE: usize = 0x700000;
+
+/// PRAMIN aperture size (1MB).
+const PRAMIN_SIZE: usize = SZ_1M;
+
+/// 64KB alignment for window base.
+const WINDOW_ALIGN: Alignment = Alignment::new::<SZ_64K>();
+
+/// Maximum addressable VRAM offset (40-bit address space).
+///
+/// The `NV_PBUS_BAR0_WINDOW` register has a 24-bit `window_base` field (bits 23:0) that stores
+/// bits [39:16] of the target VRAM address. This limits the addressable space to 2^40 bytes.
+///
+/// CAST: On 64-bit systems, this fits in usize.
+const MAX_VRAM_OFFSET: usize = genmask_u64(0..=39) as usize;
+
+/// Generate a PRAMIN read accessor.
+macro_rules! define_pramin_read {
+ ($name:ident, $ty:ty) => {
+ #[doc = concat!("Read a `", stringify!($ty), "` from VRAM at the given offset.")]
+ pub(crate) fn $name(&mut self, vram_offset: usize) -> Result<$ty> {
+ // Compute window parameters without bar reference.
+ let (bar_offset, new_base) =
+ self.compute_window(vram_offset, ::core::mem::size_of::<$ty>())?;
+
+ // Update window base if needed and perform read.
+ let bar = self.bar.try_access().ok_or(ENODEV)?;
+ if let Some(base) = new_base {
+ Self::write_window_base(&bar, base);
+ self.state.current_base = base;
+ }
+ bar.$name(bar_offset)
+ }
+ };
+}
+
+/// Generate a PRAMIN write accessor.
+macro_rules! define_pramin_write {
+ ($name:ident, $ty:ty) => {
+ #[doc = concat!("Write a `", stringify!($ty), "` to VRAM at the given offset.")]
+ pub(crate) fn $name(&mut self, vram_offset: usize, value: $ty) -> Result {
+ // Compute window parameters without bar reference.
+ let (bar_offset, new_base) =
+ self.compute_window(vram_offset, ::core::mem::size_of::<$ty>())?;
+
+ // Update window base if needed and perform write.
+ let bar = self.bar.try_access().ok_or(ENODEV)?;
+ if let Some(base) = new_base {
+ Self::write_window_base(&bar, base);
+ self.state.current_base = base;
+ }
+ bar.$name(value, bar_offset)
+ }
+ };
+}
+
+/// PRAMIN state protected by mutex.
+struct PraminState {
+ current_base: usize,
+}
+
+/// PRAMIN aperture manager.
+///
+/// Call [`Pramin::window()`] to acquire exclusive PRAMIN access.
+#[pin_data]
+pub(crate) struct Pramin {
+ bar: Arc<Devres<Bar0>>,
+ /// PRAMIN aperture state, protected by a mutex.
+ ///
+ /// # Safety
+ ///
+ /// This lock is acquired during the DMA fence signalling critical path.
+ /// It must NEVER be held across any reclaimable CPU memory / allocations
+ /// (`GFP_KERNEL`), because the memory reclaim path can call
+ /// `dma_fence_wait()`, which would deadlock with this lock held.
+ #[pin]
+ state: Mutex<PraminState>,
+}
+
+impl Pramin {
+ /// Create a pin-initializer for PRAMIN.
+ pub(crate) fn new(bar: Arc<Devres<Bar0>>) -> Result<impl PinInit<Self>> {
+ let bar_access = bar.try_access().ok_or(ENODEV)?;
+ let current_base = Self::try_read_window_base(&bar_access)?;
+
+ Ok(pin_init!(Self {
+ bar,
+ state <- new_mutex!(PraminState { current_base }, "pramin_state"),
+ }))
+ }
+
+ /// Acquire exclusive PRAMIN access.
+ ///
+ /// Returns a [`PraminWindow`] guard that provides VRAM read/write accessors.
+ /// The [`PraminWindow`] is exclusive and only one can exist at a time.
+ pub(crate) fn window(&self) -> Result<PraminWindow<'_>> {
+ let state = self.state.lock();
+ let saved_base = state.current_base;
+ Ok(PraminWindow {
+ bar: self.bar.clone(),
+ state,
+ saved_base,
+ })
+ }
+
+ /// Read the current window base from the BAR0_WINDOW register.
+ fn try_read_window_base(bar: &Bar0) -> Result<usize> {
+ let reg = regs::NV_PBUS_BAR0_WINDOW::read(bar);
+ let base = u64::from(reg.window_base());
+ let shifted = base.checked_shl(16).ok_or(EOVERFLOW)?;
+ shifted.try_into().map_err(|_| EOVERFLOW)
+ }
+}
+
+/// PRAMIN window guard for direct VRAM access.
+///
+/// This guard holds exclusive access to the PRAMIN aperture. The window auto-repositions
+/// when accessing VRAM offsets outside the current 1MB range. Original window position
+/// is saved on creation and restored on drop.
+///
+/// Only one [`PraminWindow`] can exist at a time per [`Pramin`] instance (enforced by the
+/// internal `MutexGuard`).
+pub(crate) struct PraminWindow<'a> {
+ bar: Arc<Devres<Bar0>>,
+ state: MutexGuard<'a, PraminState>,
+ saved_base: usize,
+}
+
+impl PraminWindow<'_> {
+ /// Write a new window base to the BAR0_WINDOW register.
+ fn write_window_base(bar: &Bar0, base: usize) {
+ // CAST:
+ // - We have guaranteed that the base is within the addressable range (40-bits).
+ // - After >> 16, a 40-bit aligned base becomes 24 bits, which fits in u32.
+ regs::NV_PBUS_BAR0_WINDOW::default()
+ .set_window_base((base >> 16) as u32)
+ .write(bar);
+ }
+
+ /// Compute window parameters for a VRAM access.
+ ///
+ /// Returns (`bar_offset`, `new_base`) where:
+ /// - `bar_offset`: The BAR0 offset to use for the access.
+ /// - `new_base`: `Some(base)` if window needs repositioning, `None` otherwise.
+ fn compute_window(
+ &self,
+ vram_offset: usize,
+ access_size: usize,
+ ) -> Result<(usize, Option<usize>)> {
+ // Validate VRAM offset is within addressable range (40-bit address space).
+ let end_offset = vram_offset.checked_add(access_size).ok_or(EINVAL)?;
+ if end_offset > MAX_VRAM_OFFSET + 1 {
+ return Err(EINVAL);
+ }
+
+ // Calculate which 64KB-aligned base we need.
+ let needed_base = vram_offset.align_down(WINDOW_ALIGN);
+
+ // Calculate offset within the window.
+ let offset_in_window = vram_offset - needed_base;
+
+ // Check if access fits in 1MB window from this base.
+ if offset_in_window + access_size > PRAMIN_SIZE {
+ return Err(EINVAL);
+ }
+
+ // Return bar offset and whether window needs repositioning.
+ let new_base = if self.state.current_base != needed_base {
+ Some(needed_base)
+ } else {
+ None
+ };
+
+ Ok((PRAMIN_BASE + offset_in_window, new_base))
+ }
+
+ define_pramin_read!(try_read8, u8);
+ define_pramin_read!(try_read16, u16);
+ define_pramin_read!(try_read32, u32);
+ define_pramin_read!(try_read64, u64);
+
+ define_pramin_write!(try_write8, u8);
+ define_pramin_write!(try_write16, u16);
+ define_pramin_write!(try_write32, u32);
+ define_pramin_write!(try_write64, u64);
+}
+
+impl Drop for PraminWindow<'_> {
+ fn drop(&mut self) {
+ // Restore the original window base if it changed.
+ if self.state.current_base != self.saved_base {
+ if let Some(bar) = self.bar.try_access() {
+ Self::write_window_base(&bar, self.saved_base);
+
+ // Update state to reflect the restored base.
+ self.state.current_base = self.saved_base;
+ }
+ }
+ // MutexGuard drops automatically, releasing the lock.
+ }
+}
diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
index c1121e7c64c5..3de00db3279e 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -13,6 +13,7 @@
mod gfw;
mod gpu;
mod gsp;
+mod mm;
mod num;
mod regs;
mod sbuffer;
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index ea0d32f5396c..d0982e346f74 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -102,6 +102,11 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
31:16 frts_err_code as u16;
});
+register!(NV_PBUS_BAR0_WINDOW @ 0x00001700, "BAR0 window control for PRAMIN access" {
+ 25:24 target as u8, "Target memory (0=VRAM, 1=SYS_MEM_COH, 2=SYS_MEM_NONCOH)";
+ 23:0 window_base as u32, "Window base address (bits 39:16 of FB addr)";
+});
+
// PFB
// The following two registers together hold the physical system memory address that is used by the
--
2.34.1
^ permalink raw reply related
* [PATCH v7 02/23] docs: gpu: nova-core: Document the PRAMIN aperture mechanism
From: Joel Fernandes @ 2026-02-18 21:19 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add documentation for the PRAMIN aperture mechanism used by nova-core
for direct VRAM access.
Nova only uses TARGET=VID_MEM for VRAM access. The SYS_MEM target values
are documented for completeness but not used by the driver.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
Documentation/gpu/nova/core/pramin.rst | 125 +++++++++++++++++++++++++
Documentation/gpu/nova/index.rst | 1 +
2 files changed, 126 insertions(+)
create mode 100644 Documentation/gpu/nova/core/pramin.rst
diff --git a/Documentation/gpu/nova/core/pramin.rst b/Documentation/gpu/nova/core/pramin.rst
new file mode 100644
index 000000000000..55ec9d920629
--- /dev/null
+++ b/Documentation/gpu/nova/core/pramin.rst
@@ -0,0 +1,125 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+PRAMIN aperture mechanism
+=========================
+
+.. note::
+ The following description is approximate and current as of the Ampere family.
+ It may change for future generations and is intended to assist in understanding
+ the driver code.
+
+Introduction
+============
+
+PRAMIN is a hardware aperture mechanism that provides CPU access to GPU Video RAM (VRAM) before
+the GPU's Memory Management Unit (MMU) and page tables are initialized. This 1MB sliding window,
+located at a fixed offset within BAR0, is essential for setting up page tables and other critical
+GPU data structures without relying on the GPU's MMU.
+
+Architecture Overview
+=====================
+
+The PRAMIN aperture mechanism is logically implemented by the GPU's PBUS (PCIe Bus Controller Unit)
+and provides a CPU-accessible window into VRAM through the PCIe interface::
+
+ +-----------------+ PCIe +------------------------------+
+ | CPU |<----------->| GPU |
+ +-----------------+ | |
+ | +----------------------+ |
+ | | PBUS | |
+ | | (Bus Controller) | |
+ | | | |
+ | | +--------------+<------------ (window starts at
+ | | | PRAMIN | | | BAR0 + 0x700000)
+ | | | Window | | |
+ | | | (1MB) | | |
+ | | +--------------+ | |
+ | | | | |
+ | +---------|------------+ |
+ | | |
+ | v |
+ | +----------------------+<------------ (Program PRAMIN to any
+ | | VRAM | | 64KB-aligned VRAM boundary)
+ | | (Several GBs) | |
+ | | | |
+ | | FB[0x000000000000] | |
+ | | ... | |
+ | | FB[0x7FFFFFFFFFF] | |
+ | +----------------------+ |
+ +------------------------------+
+
+PBUS (PCIe Bus Controller) is responsible for, among other things, handling MMIO
+accesses to the BAR registers.
+
+PRAMIN Window Operation
+=======================
+
+The PRAMIN window provides a 1MB sliding aperture that can be repositioned over
+the entire VRAM address space using the ``NV_PBUS_BAR0_WINDOW`` register.
+
+Window Control Mechanism
+-------------------------
+
+The window position is controlled via the PBUS ``BAR0_WINDOW`` register::
+
+ NV_PBUS_BAR0_WINDOW Register (0x1700):
+ +-------+--------+--------------------------------------+
+ | 31:26 | 25:24 | 23:0 |
+ | RSVD | TARGET | BASE_ADDR |
+ | | | (bits 39:16 of VRAM address) |
+ +-------+--------+--------------------------------------+
+
+ BASE_ADDR field (bits 23:0):
+ - Contains bits [39:16] of the target VRAM address
+ - Provides 40-bit (1TB) address space coverage
+ - Must be programmed with 64KB-aligned addresses
+
+ TARGET field (bits 25:24):
+ - 0x0: VRAM (Video Memory)
+ - 0x1: SYS_MEM_COH (Coherent System Memory)
+ - 0x2: SYS_MEM_NONCOH (Non-coherent System Memory)
+ - 0x3: Reserved
+
+ .. note::
+ Nova only uses TARGET=VRAM (0x0) for video memory access. The SYS_MEM
+ target values are documented here for hardware completeness but are
+ not used by the driver.
+
+64KB Alignment Requirement
+---------------------------
+
+The PRAMIN window must be aligned to 64KB boundaries in VRAM. This is enforced
+by the ``BASE_ADDR`` field representing bits [39:16] of the target address::
+
+ VRAM Address Calculation:
+ actual_vram_addr = (BASE_ADDR << 16) + pramin_offset
+ Where:
+ - BASE_ADDR: 24-bit value from NV_PBUS_BAR0_WINDOW[23:0]
+ - pramin_offset: 20-bit offset within the PRAMIN window [0x00000-0xFFFFF]
+
+ Example Window Positioning:
+ +---------------------------------------------------------+
+ | VRAM Space |
+ | |
+ | 0x000000000 +-----------------+ <-- 64KB aligned |
+ | | PRAMIN Window | |
+ | | (1MB) | |
+ | 0x0000FFFFF +-----------------+ |
+ | |
+ | | ^ |
+ | | | Window can slide |
+ | v | to any 64KB-aligned boundary |
+ | |
+ | 0x123400000 +-----------------+ <-- 64KB aligned |
+ | | PRAMIN Window | |
+ | | (1MB) | |
+ | 0x1234FFFFF +-----------------+ |
+ | |
+ | ... |
+ | |
+ | 0x7FFFF0000 +-----------------+ <-- 64KB aligned |
+ | | PRAMIN Window | |
+ | | (1MB) | |
+ | 0x7FFFFFFFF +-----------------+ |
+ +---------------------------------------------------------+
diff --git a/Documentation/gpu/nova/index.rst b/Documentation/gpu/nova/index.rst
index e39cb3163581..b8254b1ffe2a 100644
--- a/Documentation/gpu/nova/index.rst
+++ b/Documentation/gpu/nova/index.rst
@@ -32,3 +32,4 @@ vGPU manager VFIO driver and the nova-drm driver.
core/devinit
core/fwsec
core/falcon
+ core/pramin
--
2.34.1
^ permalink raw reply related
* [PATCH v7 03/23] nova-core: Add BAR1 aperture type and size constant
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add BAR1_SIZE constant and Bar1 type alias for the 256MB BAR1 aperture.
These are prerequisites for BAR1 memory access functionality.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/driver.rs | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index 5a4cc047bcfc..d8b2e967ba4c 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -13,7 +13,10 @@
Vendor, //
},
prelude::*,
- sizes::SZ_16M,
+ sizes::{
+ SZ_16M,
+ SZ_256M, //
+ },
sync::Arc, //
};
@@ -28,6 +31,7 @@ pub(crate) struct NovaCore {
}
const BAR0_SIZE: usize = SZ_16M;
+pub(crate) const BAR1_SIZE: usize = SZ_256M;
// For now we only support Ampere which can use up to 47-bit DMA addresses.
//
@@ -38,6 +42,7 @@ pub(crate) struct NovaCore {
const GPU_DMA_BITS: u32 = 47;
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
+pub(crate) type Bar1 = pci::Bar<BAR1_SIZE>;
kernel::pci_device_table!(
PCI_TABLE,
--
2.34.1
^ permalink raw reply related
* [PATCH v7 04/23] nova-core: gsp: Add BAR1 PDE base accessors
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add accessor methods to GspStaticConfigInfo for retrieving the BAR1 Page
Directory Entry base addresses from GSP-RM firmware.
These addresses point to the root page tables for BAR1 virtual memory spaces.
The page tables are set up by GSP-RM during GPU initialization.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/driver.rs | 1 +
drivers/gpu/nova-core/gsp/commands.rs | 8 ++++++++
drivers/gpu/nova-core/gsp/fw/commands.rs | 8 ++++++++
3 files changed, 17 insertions(+)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index d8b2e967ba4c..f30ffa45cf13 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -42,6 +42,7 @@ pub(crate) struct NovaCore {
const GPU_DMA_BITS: u32 = 47;
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
+#[expect(dead_code)]
pub(crate) type Bar1 = pci::Bar<BAR1_SIZE>;
kernel::pci_device_table!(
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
index 8f270eca33be..a7778d5d9e32 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -189,6 +189,7 @@ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
/// The reply from the GSP to the [`GetGspInfo`] command.
pub(crate) struct GetGspStaticInfoReply {
gpu_name: [u8; 64],
+ bar1_pde_base: u64,
}
impl MessageFromGsp for GetGspStaticInfoReply {
@@ -202,6 +203,7 @@ fn read(
) -> Result<Self, Self::InitError> {
Ok(GetGspStaticInfoReply {
gpu_name: msg.gpu_name_str(),
+ bar1_pde_base: msg.bar1_pde_base(),
})
}
}
@@ -228,6 +230,12 @@ pub(crate) fn gpu_name(&self) -> core::result::Result<&str, GpuNameError> {
.to_str()
.map_err(GpuNameError::InvalidUtf8)
}
+
+ /// Returns the BAR1 Page Directory Entry base address.
+ #[expect(dead_code)]
+ pub(crate) fn bar1_pde_base(&self) -> u64 {
+ self.bar1_pde_base
+ }
}
/// Send the [`GetGspInfo`] command and awaits for its reply.
diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-core/gsp/fw/commands.rs
index 21be44199693..f069f4092911 100644
--- a/drivers/gpu/nova-core/gsp/fw/commands.rs
+++ b/drivers/gpu/nova-core/gsp/fw/commands.rs
@@ -114,6 +114,14 @@ impl GspStaticConfigInfo {
pub(crate) fn gpu_name_str(&self) -> [u8; 64] {
self.0.gpuNameString
}
+
+ /// Returns the BAR1 Page Directory Entry base address.
+ ///
+ /// This is the root page table address for BAR1 virtual memory,
+ /// set up by GSP-RM firmware.
+ pub(crate) fn bar1_pde_base(&self) -> u64 {
+ self.0.bar1PdeBase
+ }
}
// SAFETY: Padding is explicit and will not contain uninitialized data.
--
2.34.1
^ permalink raw reply related
* [PATCH v7 05/23] nova-core: mm: Add common memory management types
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add foundational types for GPU memory management. These types are used
throughout the nova memory management subsystem for page table
operations, address translation, and memory allocation.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/mod.rs | 171 ++++++++++++++++++++++++++++++++
1 file changed, 171 insertions(+)
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
index 7a5dd4220c67..04d9baa7220e 100644
--- a/drivers/gpu/nova-core/mm/mod.rs
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -2,4 +2,175 @@
//! Memory management subsystems for nova-core.
+#![expect(dead_code)]
+
pub(crate) mod pramin;
+
+use kernel::sizes::SZ_4K;
+
+/// Page size in bytes (4 KiB).
+pub(crate) const PAGE_SIZE: usize = SZ_4K;
+
+bitfield! {
+ pub(crate) struct VramAddress(u64), "Physical VRAM address in GPU video memory" {
+ 11:0 offset as u64, "Offset within 4KB page";
+ 63:12 frame_number as u64 => Pfn, "Physical frame number";
+ }
+}
+
+impl VramAddress {
+ /// Create a new VRAM address from a raw value.
+ pub(crate) const fn new(addr: u64) -> Self {
+ Self(addr)
+ }
+
+ /// Get the raw address value as `usize` (useful for MMIO offsets).
+ pub(crate) const fn raw(&self) -> usize {
+ self.0 as usize
+ }
+
+ /// Get the raw address value as `u64`.
+ pub(crate) const fn raw_u64(&self) -> u64 {
+ self.0
+ }
+}
+
+impl PartialEq for VramAddress {
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0
+ }
+}
+
+impl Eq for VramAddress {}
+
+impl PartialOrd for VramAddress {
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for VramAddress {
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ self.0.cmp(&other.0)
+ }
+}
+
+impl From<Pfn> for VramAddress {
+ fn from(pfn: Pfn) -> Self {
+ Self::default().set_frame_number(pfn)
+ }
+}
+
+bitfield! {
+ pub(crate) struct VirtualAddress(u64), "Virtual address in GPU address space" {
+ 11:0 offset as u64, "Offset within 4KB page";
+ 20:12 l4_index as u64, "Level 4 index (PTE)";
+ 29:21 l3_index as u64, "Level 3 index (Dual PDE)";
+ 38:30 l2_index as u64, "Level 2 index";
+ 47:39 l1_index as u64, "Level 1 index";
+ 56:48 l0_index as u64, "Level 0 index (PDB)";
+ 63:12 frame_number as u64 => Vfn, "Virtual frame number";
+ }
+}
+
+impl VirtualAddress {
+ /// Create a new virtual address from a raw value.
+ #[expect(dead_code)]
+ pub(crate) const fn new(addr: u64) -> Self {
+ Self(addr)
+ }
+
+ /// Get the page table index for a given level (0-5).
+ pub(crate) fn level_index(&self, level: u64) -> u64 {
+ match level {
+ 0 => self.l0_index(),
+ 1 => self.l1_index(),
+ 2 => self.l2_index(),
+ 3 => self.l3_index(),
+ 4 => self.l4_index(),
+ // L5 is only used by MMU v3 (PTE level).
+ 5 => self.l4_index(),
+ _ => 0,
+ }
+ }
+}
+
+impl From<Vfn> for VirtualAddress {
+ fn from(vfn: Vfn) -> Self {
+ Self::default().set_frame_number(vfn)
+ }
+}
+
+/// Physical Frame Number.
+///
+/// Represents a physical page in VRAM.
+#[repr(transparent)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub(crate) struct Pfn(u64);
+
+impl Pfn {
+ /// Create a new PFN from a frame number.
+ pub(crate) const fn new(frame_number: u64) -> Self {
+ Self(frame_number)
+ }
+
+ /// Get the raw frame number.
+ pub(crate) const fn raw(self) -> u64 {
+ self.0
+ }
+}
+
+impl From<VramAddress> for Pfn {
+ fn from(addr: VramAddress) -> Self {
+ addr.frame_number()
+ }
+}
+
+impl From<u64> for Pfn {
+ fn from(val: u64) -> Self {
+ Self(val)
+ }
+}
+
+impl From<Pfn> for u64 {
+ fn from(pfn: Pfn) -> Self {
+ pfn.0
+ }
+}
+
+/// Virtual Frame Number.
+///
+/// Represents a virtual page in GPU address space.
+#[repr(transparent)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub(crate) struct Vfn(u64);
+
+impl Vfn {
+ /// Create a new VFN from a frame number.
+ pub(crate) const fn new(frame_number: u64) -> Self {
+ Self(frame_number)
+ }
+
+ /// Get the raw frame number.
+ pub(crate) const fn raw(self) -> u64 {
+ self.0
+ }
+}
+
+impl From<VirtualAddress> for Vfn {
+ fn from(addr: VirtualAddress) -> Self {
+ addr.frame_number()
+ }
+}
+
+impl From<u64> for Vfn {
+ fn from(val: u64) -> Self {
+ Self(val)
+ }
+}
+
+impl From<Vfn> for u64 {
+ fn from(vfn: Vfn) -> Self {
+ vfn.0
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 06/23] nova-core: mm: Add common types for all page table formats
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add common page table types shared between MMU v2 and v3. These types
are hardware-agnostic and used by both MMU versions.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/mod.rs | 1 +
drivers/gpu/nova-core/mm/pagetable/mod.rs | 149 ++++++++++++++++++++++
2 files changed, 150 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/pagetable/mod.rs
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
index 04d9baa7220e..c88b7b7b6975 100644
--- a/drivers/gpu/nova-core/mm/mod.rs
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -4,6 +4,7 @@
#![expect(dead_code)]
+pub(crate) mod pagetable;
pub(crate) mod pramin;
use kernel::sizes::SZ_4K;
diff --git a/drivers/gpu/nova-core/mm/pagetable/mod.rs b/drivers/gpu/nova-core/mm/pagetable/mod.rs
new file mode 100644
index 000000000000..aea06e5da4ff
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/pagetable/mod.rs
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Common page table types shared between MMU v2 and v3.
+//!
+//! This module provides foundational types used by both MMU versions:
+//! - Page table level hierarchy
+//! - Memory aperture types for PDEs and PTEs
+
+#![expect(dead_code)]
+
+use crate::gpu::Architecture;
+
+/// MMU version enumeration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum MmuVersion {
+ /// MMU v2 for Turing/Ampere/Ada.
+ V2,
+ /// MMU v3 for Hopper and later.
+ V3,
+}
+
+impl From<Architecture> for MmuVersion {
+ fn from(arch: Architecture) -> Self {
+ match arch {
+ Architecture::Turing | Architecture::Ampere | Architecture::Ada => Self::V2,
+ // In the future, uncomment:
+ // _ => Self::V3,
+ }
+ }
+}
+
+/// Page Table Level hierarchy for MMU v2/v3.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum PageTableLevel {
+ /// Level 0 - Page Directory Base (root).
+ Pdb,
+ /// Level 1 - Intermediate page directory.
+ L1,
+ /// Level 2 - Intermediate page directory.
+ L2,
+ /// Level 3 - Intermediate page directory or dual PDE (version-dependent).
+ L3,
+ /// Level 4 - PTE level for v2, intermediate page directory for v3.
+ L4,
+ /// Level 5 - PTE level used for MMU v3 only.
+ L5,
+}
+
+impl PageTableLevel {
+ /// Number of entries per page table (512 for 4KB pages).
+ pub(crate) const ENTRIES_PER_TABLE: usize = 512;
+
+ /// Get the next level in the hierarchy.
+ pub(crate) const fn next(&self) -> Option<PageTableLevel> {
+ match self {
+ Self::Pdb => Some(Self::L1),
+ Self::L1 => Some(Self::L2),
+ Self::L2 => Some(Self::L3),
+ Self::L3 => Some(Self::L4),
+ Self::L4 => Some(Self::L5),
+ Self::L5 => None,
+ }
+ }
+
+ /// Convert level to index.
+ pub(crate) const fn as_index(&self) -> u64 {
+ match self {
+ Self::Pdb => 0,
+ Self::L1 => 1,
+ Self::L2 => 2,
+ Self::L3 => 3,
+ Self::L4 => 4,
+ Self::L5 => 5,
+ }
+ }
+}
+
+/// Memory aperture for Page Table Entries (`PTE`s).
+///
+/// Determines which memory region the `PTE` points to.
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub(crate) enum AperturePte {
+ /// Local video memory (VRAM).
+ #[default]
+ VideoMemory = 0,
+ /// Peer GPU's video memory.
+ PeerMemory = 1,
+ /// System memory with cache coherence.
+ SystemCoherent = 2,
+ /// System memory without cache coherence.
+ SystemNonCoherent = 3,
+}
+
+// TODO[FPRI]: Replace with `#[derive(FromPrimitive)]` when available.
+impl From<u8> for AperturePte {
+ fn from(val: u8) -> Self {
+ match val {
+ 0 => Self::VideoMemory,
+ 1 => Self::PeerMemory,
+ 2 => Self::SystemCoherent,
+ 3 => Self::SystemNonCoherent,
+ _ => Self::VideoMemory,
+ }
+ }
+}
+
+// TODO[FPRI]: Replace with `#[derive(ToPrimitive)]` when available.
+impl From<AperturePte> for u8 {
+ fn from(val: AperturePte) -> Self {
+ val as u8
+ }
+}
+
+/// Memory aperture for Page Directory Entries (`PDE`s).
+///
+/// Note: For `PDE`s, `Invalid` (0) means the entry is not valid.
+#[repr(u8)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub(crate) enum AperturePde {
+ /// Invalid/unused entry.
+ #[default]
+ Invalid = 0,
+ /// Page table is in video memory.
+ VideoMemory = 1,
+ /// Page table is in system memory with coherence.
+ SystemCoherent = 2,
+ /// Page table is in system memory without coherence.
+ SystemNonCoherent = 3,
+}
+
+// TODO[FPRI]: Replace with `#[derive(FromPrimitive)]` when available.
+impl From<u8> for AperturePde {
+ fn from(val: u8) -> Self {
+ match val {
+ 1 => Self::VideoMemory,
+ 2 => Self::SystemCoherent,
+ 3 => Self::SystemNonCoherent,
+ _ => Self::Invalid,
+ }
+ }
+}
+
+// TODO[FPRI]: Replace with `#[derive(ToPrimitive)]` when available.
+impl From<AperturePde> for u8 {
+ fn from(val: AperturePde) -> Self {
+ val as u8
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 07/23] nova-core: mm: Add MMU v2 page table types
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add page table entry and directory structures for MMU version 2
used by Turing/Ampere/Ada GPUs.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/pagetable/mod.rs | 1 +
drivers/gpu/nova-core/mm/pagetable/ver2.rs | 199 +++++++++++++++++++++
2 files changed, 200 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/pagetable/ver2.rs
diff --git a/drivers/gpu/nova-core/mm/pagetable/mod.rs b/drivers/gpu/nova-core/mm/pagetable/mod.rs
index aea06e5da4ff..925063fde45d 100644
--- a/drivers/gpu/nova-core/mm/pagetable/mod.rs
+++ b/drivers/gpu/nova-core/mm/pagetable/mod.rs
@@ -7,6 +7,7 @@
//! - Memory aperture types for PDEs and PTEs
#![expect(dead_code)]
+pub(crate) mod ver2;
use crate::gpu::Architecture;
diff --git a/drivers/gpu/nova-core/mm/pagetable/ver2.rs b/drivers/gpu/nova-core/mm/pagetable/ver2.rs
new file mode 100644
index 000000000000..d30a9a8bddbd
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/pagetable/ver2.rs
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! MMU v2 page table types for Turing and Ampere GPUs.
+//!
+//! This module defines MMU version 2 specific types (Turing, Ampere and Ada GPUs).
+//!
+//! Bit field layouts derived from the NVIDIA OpenRM documentation:
+//! `open-gpu-kernel-modules/src/common/inc/swref/published/turing/tu102/dev_mmu.h`
+
+#![expect(dead_code)]
+
+use super::{
+ AperturePde,
+ AperturePte,
+ PageTableLevel, //
+};
+use crate::mm::{
+ Pfn,
+ VramAddress, //
+};
+
+/// PDE levels for MMU v2 (5-level hierarchy: PDB -> L1 -> L2 -> L3 -> L4).
+pub(crate) const PDE_LEVELS: &[PageTableLevel] = &[
+ PageTableLevel::Pdb,
+ PageTableLevel::L1,
+ PageTableLevel::L2,
+ PageTableLevel::L3,
+];
+
+/// PTE level for MMU v2.
+pub(crate) const PTE_LEVEL: PageTableLevel = PageTableLevel::L4;
+
+/// Dual PDE level for MMU v2 (128-bit entries).
+pub(crate) const DUAL_PDE_LEVEL: PageTableLevel = PageTableLevel::L3;
+
+// Page Table Entry (PTE) for MMU v2 - 64-bit entry at level 4.
+bitfield! {
+ pub(crate) struct Pte(u64), "Page Table Entry for MMU v2" {
+ 0:0 valid as bool, "Entry is valid";
+ 2:1 aperture as u8 => AperturePte, "Memory aperture type";
+ 3:3 volatile as bool, "Volatile (bypass L2 cache)";
+ 4:4 encrypted as bool, "Encryption enabled (Confidential Computing)";
+ 5:5 privilege as bool, "Privileged access only";
+ 6:6 read_only as bool, "Write protection";
+ 7:7 atomic_disable as bool, "Atomic operations disabled";
+ 53:8 frame_number_sys as u64 => Pfn, "Frame number for system memory";
+ 32:8 frame_number_vid as u64 => Pfn, "Frame number for video memory";
+ 35:33 peer_id as u8, "Peer GPU ID for peer memory (0-7)";
+ 53:36 comptagline as u32, "Compression tag line bits";
+ 63:56 kind as u8, "Surface kind/format";
+ }
+}
+
+impl Pte {
+ /// Create a PTE from a `u64` value.
+ pub(crate) fn new(val: u64) -> Self {
+ Self(val)
+ }
+
+ /// Create a valid PTE for video memory.
+ pub(crate) fn new_vram(pfn: Pfn, writable: bool) -> Self {
+ Self::default()
+ .set_valid(true)
+ .set_aperture(AperturePte::VideoMemory)
+ .set_frame_number_vid(pfn)
+ .set_read_only(!writable)
+ }
+
+ /// Create an invalid PTE.
+ pub(crate) fn invalid() -> Self {
+ Self::default()
+ }
+
+ /// Get the frame number based on aperture type.
+ pub(crate) fn frame_number(&self) -> Pfn {
+ match self.aperture() {
+ AperturePte::VideoMemory => self.frame_number_vid(),
+ _ => self.frame_number_sys(),
+ }
+ }
+
+ /// Get the raw `u64` value.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ self.0
+ }
+}
+
+// Page Directory Entry (PDE) for MMU v2 - 64-bit entry at levels 0-2.
+bitfield! {
+ pub(crate) struct Pde(u64), "Page Directory Entry for MMU v2" {
+ 0:0 valid_inverted as bool, "Valid bit (inverted logic)";
+ 2:1 aperture as u8 => AperturePde, "Memory aperture type";
+ 3:3 volatile as bool, "Volatile (bypass L2 cache)";
+ 5:5 no_ats as bool, "Disable Address Translation Services";
+ 53:8 table_frame_sys as u64 => Pfn, "Table frame number for system memory";
+ 32:8 table_frame_vid as u64 => Pfn, "Table frame number for video memory";
+ 35:33 peer_id as u8, "Peer GPU ID (0-7)";
+ }
+}
+
+impl Pde {
+ /// Create a PDE from a `u64` value.
+ pub(crate) fn new(val: u64) -> Self {
+ Self(val)
+ }
+
+ /// Create a valid PDE pointing to a page table in video memory.
+ pub(crate) fn new_vram(table_pfn: Pfn) -> Self {
+ Self::default()
+ .set_valid_inverted(false) // 0 = valid
+ .set_aperture(AperturePde::VideoMemory)
+ .set_table_frame_vid(table_pfn)
+ }
+
+ /// Create an invalid PDE.
+ pub(crate) fn invalid() -> Self {
+ Self::default()
+ .set_valid_inverted(true)
+ .set_aperture(AperturePde::Invalid)
+ }
+
+ /// Check if this PDE is valid.
+ pub(crate) fn is_valid(&self) -> bool {
+ !self.valid_inverted() && self.aperture() != AperturePde::Invalid
+ }
+
+ /// Get the table frame number based on aperture type.
+ pub(crate) fn table_frame(&self) -> Pfn {
+ match self.aperture() {
+ AperturePde::VideoMemory => self.table_frame_vid(),
+ _ => self.table_frame_sys(),
+ }
+ }
+
+ /// Get the VRAM address of the page table.
+ pub(crate) fn table_vram_address(&self) -> VramAddress {
+ debug_assert!(
+ self.aperture() == AperturePde::VideoMemory,
+ "table_vram_address called on non-VRAM PDE (aperture: {:?})",
+ self.aperture()
+ );
+ VramAddress::from(self.table_frame_vid())
+ }
+
+ /// Get the raw `u64` value of the PDE.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ self.0
+ }
+}
+
+/// Dual PDE at Level 3 - 128-bit entry of Large/Small Page Table pointers.
+///
+/// The dual PDE supports both large (64KB) and small (4KB) page tables.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct DualPde {
+ /// Large/Big Page Table pointer (lower 64 bits).
+ pub big: Pde,
+ /// Small Page Table pointer (upper 64 bits).
+ pub small: Pde,
+}
+
+impl DualPde {
+ /// Create a dual PDE from raw 128-bit value (two `u64`s).
+ pub(crate) fn new(big: u64, small: u64) -> Self {
+ Self {
+ big: Pde::new(big),
+ small: Pde::new(small),
+ }
+ }
+
+ /// Create a dual PDE with only the small page table pointer set.
+ ///
+ /// Note: The big (LPT) portion is set to 0, not `Pde::invalid()`.
+ /// According to hardware documentation, clearing bit 0 of the 128-bit
+ /// entry makes the PDE behave as a "normal" PDE. Using `Pde::invalid()`
+ /// would set bit 0 (valid_inverted), which breaks page table walking.
+ pub(crate) fn new_small(table_pfn: Pfn) -> Self {
+ Self {
+ big: Pde::new(0),
+ small: Pde::new_vram(table_pfn),
+ }
+ }
+
+ /// Check if the small page table pointer is valid.
+ pub(crate) fn has_small(&self) -> bool {
+ self.small.is_valid()
+ }
+
+ /// Check if the big page table pointer is valid.
+ pub(crate) fn has_big(&self) -> bool {
+ self.big.is_valid()
+ }
+
+ /// Get the small page table PFN.
+ pub(crate) fn small_pfn(&self) -> Pfn {
+ self.small.table_frame()
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 08/23] nova-core: mm: Add MMU v3 page table types
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add page table entry and directory structures for MMU version 3
used by Hopper and later GPUs.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/pagetable/mod.rs | 1 +
drivers/gpu/nova-core/mm/pagetable/ver3.rs | 302 +++++++++++++++++++++
2 files changed, 303 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/pagetable/ver3.rs
diff --git a/drivers/gpu/nova-core/mm/pagetable/mod.rs b/drivers/gpu/nova-core/mm/pagetable/mod.rs
index 925063fde45d..5263a8f56529 100644
--- a/drivers/gpu/nova-core/mm/pagetable/mod.rs
+++ b/drivers/gpu/nova-core/mm/pagetable/mod.rs
@@ -8,6 +8,7 @@
#![expect(dead_code)]
pub(crate) mod ver2;
+pub(crate) mod ver3;
use crate::gpu::Architecture;
diff --git a/drivers/gpu/nova-core/mm/pagetable/ver3.rs b/drivers/gpu/nova-core/mm/pagetable/ver3.rs
new file mode 100644
index 000000000000..e6cab2fe7d33
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/pagetable/ver3.rs
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! MMU v3 page table types for Hopper and later GPUs.
+//!
+//! This module defines MMU version 3 specific types (Hopper and later GPUs).
+//!
+//! Key differences from MMU v2:
+//! - Unified 40-bit address field for all apertures (v2 had separate sys/vid fields).
+//! - PCF (Page Classification Field) replaces separate privilege/RO/atomic/cache bits.
+//! - KIND field is 4 bits (not 8).
+//! - IS_PTE bit in PDE to support large pages directly.
+//! - No COMPTAGLINE field (compression handled differently in v3).
+//! - No separate ENCRYPTED bit.
+//!
+//! Bit field layouts derived from the NVIDIA OpenRM documentation:
+//! `open-gpu-kernel-modules/src/common/inc/swref/published/hopper/gh100/dev_mmu.h`
+
+#![expect(dead_code)]
+
+use super::{
+ AperturePde,
+ AperturePte,
+ PageTableLevel, //
+};
+use crate::mm::{
+ Pfn,
+ VramAddress, //
+};
+use kernel::prelude::*;
+
+/// PDE levels for MMU v3 (6-level hierarchy).
+pub(crate) const PDE_LEVELS: &[PageTableLevel] = &[
+ PageTableLevel::Pdb,
+ PageTableLevel::L1,
+ PageTableLevel::L2,
+ PageTableLevel::L3,
+ PageTableLevel::L4,
+];
+
+/// PTE level for MMU v3.
+pub(crate) const PTE_LEVEL: PageTableLevel = PageTableLevel::L5;
+
+/// Dual PDE level for MMU v3 (128-bit entries).
+pub(crate) const DUAL_PDE_LEVEL: PageTableLevel = PageTableLevel::L4;
+
+// Page Classification Field (PCF) - 5 bits for PTEs in MMU v3.
+bitfield! {
+ pub(crate) struct PtePcf(u8), "Page Classification Field for PTEs" {
+ 0:0 uncached as bool, "Bypass L2 cache (0=cached, 1=bypass)";
+ 1:1 acd as bool, "Access counting disabled (0=enabled, 1=disabled)";
+ 2:2 read_only as bool, "Read-only access (0=read-write, 1=read-only)";
+ 3:3 no_atomic as bool, "Atomics disabled (0=enabled, 1=disabled)";
+ 4:4 privileged as bool, "Privileged access only (0=regular, 1=privileged)";
+ }
+}
+
+impl PtePcf {
+ /// Create PCF for read-write mapping (cached, no atomics, regular mode).
+ pub(crate) fn rw() -> Self {
+ Self::default().set_no_atomic(true)
+ }
+
+ /// Create PCF for read-only mapping (cached, no atomics, regular mode).
+ pub(crate) fn ro() -> Self {
+ Self::default().set_read_only(true).set_no_atomic(true)
+ }
+
+ /// Get the raw `u8` value.
+ pub(crate) fn raw_u8(&self) -> u8 {
+ self.0
+ }
+}
+
+impl From<u8> for PtePcf {
+ fn from(val: u8) -> Self {
+ Self(val)
+ }
+}
+
+// Page Classification Field (PCF) - 3 bits for PDEs in MMU v3.
+// Controls Address Translation Services (ATS) and caching.
+bitfield! {
+ pub(crate) struct PdePcf(u8), "Page Classification Field for PDEs" {
+ 0:0 uncached as bool, "Bypass L2 cache (0=cached, 1=bypass)";
+ 1:1 no_ats as bool, "Address Translation Services disabled (0=enabled, 1=disabled)";
+ }
+}
+
+impl PdePcf {
+ /// Create PCF for cached mapping with ATS enabled (default).
+ pub(crate) fn cached() -> Self {
+ Self::default()
+ }
+
+ /// Get the raw `u8` value.
+ pub(crate) fn raw_u8(&self) -> u8 {
+ self.0
+ }
+}
+
+impl From<u8> for PdePcf {
+ fn from(val: u8) -> Self {
+ Self(val)
+ }
+}
+
+// Page Table Entry (PTE) for MMU v3.
+bitfield! {
+ pub(crate) struct Pte(u64), "Page Table Entry for MMU v3" {
+ 0:0 valid as bool, "Entry is valid";
+ 2:1 aperture as u8 => AperturePte, "Memory aperture type";
+ 7:3 pcf as u8 => PtePcf, "Page Classification Field";
+ 11:8 kind as u8, "Surface kind (4 bits, 0x0=pitch, 0xF=invalid)";
+ 51:12 frame_number as u64 => Pfn, "Physical frame number (for all apertures)";
+ 63:61 peer_id as u8, "Peer GPU ID for peer memory (0-7)";
+ }
+}
+
+impl Pte {
+ /// Create a PTE from a `u64` value.
+ pub(crate) fn new(val: u64) -> Self {
+ Self(val)
+ }
+
+ /// Create a valid PTE for video memory.
+ pub(crate) fn new_vram(frame: Pfn, writable: bool) -> Self {
+ let pcf = if writable { PtePcf::rw() } else { PtePcf::ro() };
+ Self::default()
+ .set_valid(true)
+ .set_aperture(AperturePte::VideoMemory)
+ .set_pcf(pcf)
+ .set_frame_number(frame)
+ }
+
+ /// Create an invalid PTE.
+ pub(crate) fn invalid() -> Self {
+ Self::default()
+ }
+
+ /// Get the raw `u64` value.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ self.0
+ }
+}
+
+// Page Directory Entry (PDE) for MMU v3.
+//
+// Note: v3 uses a unified 40-bit address field (v2 had separate sys/vid address fields).
+bitfield! {
+ pub(crate) struct Pde(u64), "Page Directory Entry for MMU v3 (Hopper+)" {
+ 0:0 is_pte as bool, "Entry is a PTE (0=PDE, 1=large page PTE)";
+ 2:1 aperture as u8 => AperturePde, "Memory aperture (0=invalid, 1=vidmem, 2=coherent, 3=non-coherent)";
+ 5:3 pcf as u8 => PdePcf, "Page Classification Field (3 bits for PDE)";
+ 51:12 table_frame as u64 => Pfn, "Table frame number (40-bit unified address)";
+ }
+}
+
+impl Pde {
+ /// Create a PDE from a `u64` value.
+ pub(crate) fn new(val: u64) -> Self {
+ Self(val)
+ }
+
+ /// Create a valid PDE pointing to a page table in video memory.
+ pub(crate) fn new_vram(table_pfn: Pfn) -> Self {
+ Self::default()
+ .set_is_pte(false)
+ .set_aperture(AperturePde::VideoMemory)
+ .set_table_frame(table_pfn)
+ }
+
+ /// Create an invalid PDE.
+ pub(crate) fn invalid() -> Self {
+ Self::default().set_aperture(AperturePde::Invalid)
+ }
+
+ /// Check if this PDE is valid.
+ pub(crate) fn is_valid(&self) -> bool {
+ self.aperture() != AperturePde::Invalid
+ }
+
+ /// Get the VRAM address of the page table.
+ pub(crate) fn table_vram_address(&self) -> VramAddress {
+ debug_assert!(
+ self.aperture() == AperturePde::VideoMemory,
+ "table_vram_address called on non-VRAM PDE (aperture: {:?})",
+ self.aperture()
+ );
+ VramAddress::from(self.table_frame())
+ }
+
+ /// Get the raw `u64` value.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ self.0
+ }
+}
+
+// Big Page Table pointer for Dual PDE - 64-bit lower word of the 128-bit Dual PDE.
+bitfield! {
+ pub(crate) struct DualPdeBig(u64), "Big Page Table pointer in Dual PDE (MMU v3)" {
+ 0:0 is_pte as bool, "Entry is a PTE (for large pages)";
+ 2:1 aperture as u8 => AperturePde, "Memory aperture type";
+ 5:3 pcf as u8 => PdePcf, "Page Classification Field";
+ 51:8 table_frame as u64, "Table frame (table address 256-byte aligned)";
+ }
+}
+
+impl DualPdeBig {
+ /// Create a big page table pointer from a `u64` value.
+ pub(crate) fn new(val: u64) -> Self {
+ Self(val)
+ }
+
+ /// Create an invalid big page table pointer.
+ pub(crate) fn invalid() -> Self {
+ Self::default().set_aperture(AperturePde::Invalid)
+ }
+
+ /// Create a valid big PDE pointing to a page table in video memory.
+ pub(crate) fn new_vram(table_addr: VramAddress) -> Result<Self> {
+ // Big page table addresses must be 256-byte aligned (shift 8).
+ if table_addr.raw_u64() & 0xFF != 0 {
+ return Err(EINVAL);
+ }
+
+ let table_frame = table_addr.raw_u64() >> 8;
+ Ok(Self::default()
+ .set_is_pte(false)
+ .set_aperture(AperturePde::VideoMemory)
+ .set_table_frame(table_frame))
+ }
+
+ /// Check if this big PDE is valid.
+ pub(crate) fn is_valid(&self) -> bool {
+ self.aperture() != AperturePde::Invalid
+ }
+
+ /// Get the VRAM address of the big page table.
+ pub(crate) fn table_vram_address(&self) -> VramAddress {
+ debug_assert!(
+ self.aperture() == AperturePde::VideoMemory,
+ "table_vram_address called on non-VRAM DualPdeBig (aperture: {:?})",
+ self.aperture()
+ );
+ VramAddress::new(self.table_frame() << 8)
+ }
+
+ /// Get the raw `u64` value.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ self.0
+ }
+}
+
+/// Dual PDE at Level 4 for MMU v3 - 128-bit entry.
+///
+/// Contains both big (64KB) and small (4KB) page table pointers:
+/// - Lower 64 bits: Big Page Table pointer.
+/// - Upper 64 bits: Small Page Table pointer.
+///
+/// ## Note
+///
+/// The big and small page table pointers have different address layouts:
+/// - Big address = field value << 8 (256-byte alignment).
+/// - Small address = field value << 12 (4KB alignment).
+///
+/// This is why `DualPdeBig` is a separate type from `Pde`.
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+pub(crate) struct DualPde {
+ /// Big Page Table pointer.
+ pub big: DualPdeBig,
+ /// Small Page Table pointer.
+ pub small: Pde,
+}
+
+impl DualPde {
+ /// Create a dual PDE from raw 128-bit value (two `u64`s).
+ pub(crate) fn new(big: u64, small: u64) -> Self {
+ Self {
+ big: DualPdeBig::new(big),
+ small: Pde::new(small),
+ }
+ }
+
+ /// Create a dual PDE with only the small page table pointer set.
+ pub(crate) fn new_small(table_pfn: Pfn) -> Self {
+ Self {
+ big: DualPdeBig::invalid(),
+ small: Pde::new_vram(table_pfn),
+ }
+ }
+
+ /// Check if the small page table pointer is valid.
+ pub(crate) fn has_small(&self) -> bool {
+ self.small.is_valid()
+ }
+
+ /// Check if the big page table pointer is valid.
+ pub(crate) fn has_big(&self) -> bool {
+ self.big.is_valid()
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 09/23] nova-core: mm: Add unified page table entry wrapper enums
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add unified Pte, Pde, and DualPde wrapper enums that abstract over
MMU v2 and v3 page table entry formats. These enums allow the page
table walker and VMM to work with both MMU versions.
Each unified type:
- Takes MmuVersion parameter in constructors
- Wraps both ver2 and ver3 variants
- Delegates method calls to the appropriate variant
This enables version-agnostic page table operations while keeping
version-specific implementation details encapsulated in the ver2
and ver3 modules.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/pagetable/mod.rs | 327 ++++++++++++++++++++++
1 file changed, 327 insertions(+)
diff --git a/drivers/gpu/nova-core/mm/pagetable/mod.rs b/drivers/gpu/nova-core/mm/pagetable/mod.rs
index 5263a8f56529..1f783261a30f 100644
--- a/drivers/gpu/nova-core/mm/pagetable/mod.rs
+++ b/drivers/gpu/nova-core/mm/pagetable/mod.rs
@@ -10,6 +10,14 @@
pub(crate) mod ver2;
pub(crate) mod ver3;
+use kernel::prelude::*;
+
+use super::{
+ pramin,
+ Pfn,
+ VramAddress,
+ PAGE_SIZE, //
+};
use crate::gpu::Architecture;
/// MMU version enumeration.
@@ -77,6 +85,76 @@ pub(crate) const fn as_index(&self) -> u64 {
}
}
+impl MmuVersion {
+ /// Get the `PDE` levels (excluding PTE level) for page table walking.
+ pub(crate) fn pde_levels(&self) -> &'static [PageTableLevel] {
+ match self {
+ Self::V2 => ver2::PDE_LEVELS,
+ Self::V3 => ver3::PDE_LEVELS,
+ }
+ }
+
+ /// Get the PTE level for this MMU version.
+ pub(crate) fn pte_level(&self) -> PageTableLevel {
+ match self {
+ Self::V2 => ver2::PTE_LEVEL,
+ Self::V3 => ver3::PTE_LEVEL,
+ }
+ }
+
+ /// Get the dual PDE level (128-bit entries) for this MMU version.
+ pub(crate) fn dual_pde_level(&self) -> PageTableLevel {
+ match self {
+ Self::V2 => ver2::DUAL_PDE_LEVEL,
+ Self::V3 => ver3::DUAL_PDE_LEVEL,
+ }
+ }
+
+ /// Get the number of PDE levels for this MMU version.
+ pub(crate) fn pde_level_count(&self) -> usize {
+ self.pde_levels().len()
+ }
+
+ /// Get the entry size in bytes for a given level.
+ pub(crate) fn entry_size(&self, level: PageTableLevel) -> usize {
+ if level == self.dual_pde_level() {
+ 16 // 128-bit dual PDE
+ } else {
+ 8 // 64-bit PDE/PTE
+ }
+ }
+
+ /// Get the number of entries per page table page for a given level.
+ pub(crate) fn entries_per_page(&self, level: PageTableLevel) -> usize {
+ PAGE_SIZE / self.entry_size(level)
+ }
+
+ /// Compute upper bound on page table pages needed for `num_virt_pages`.
+ ///
+ /// Walks from PTE level up through PDE levels, accumulating the tree.
+ pub(crate) fn pt_pages_upper_bound(&self, num_virt_pages: usize) -> usize {
+ let mut total = 0;
+
+ // PTE pages at the leaf level.
+ let pte_epp = self.entries_per_page(self.pte_level());
+ // Ceiling of (num_virt_pages / entries_per_page).
+ let mut pages_at_level = (num_virt_pages + pte_epp - 1) / pte_epp;
+ total += pages_at_level;
+
+ // Walk PDE levels bottom-up (reverse of pde_levels()).
+ for &level in self.pde_levels().iter().rev() {
+ let epp = self.entries_per_page(level);
+ // How many pages at this level do we need to point to
+ // the previous pages_at_level?
+ // Calculated as ceiling of (pages_at_level / entries_per_page).
+ pages_at_level = (pages_at_level + epp - 1) / epp;
+ total += pages_at_level;
+ }
+
+ total
+ }
+}
+
/// Memory aperture for Page Table Entries (`PTE`s).
///
/// Determines which memory region the `PTE` points to.
@@ -149,3 +227,252 @@ fn from(val: AperturePde) -> Self {
val as u8
}
}
+
+/// Unified Page Table Entry wrapper for both MMU v2 and v3 `PTE`
+/// types, allowing the walker to work with either format.
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum Pte {
+ /// MMU v2 `PTE` (Turing/Ampere/Ada).
+ V2(ver2::Pte),
+ /// MMU v3 `PTE` (Hopper+).
+ V3(ver3::Pte),
+}
+
+impl Pte {
+ /// Create a `PTE` from a raw `u64` value for the given MMU version.
+ pub(crate) fn new(version: MmuVersion, val: u64) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::Pte::new(val)),
+ MmuVersion::V3 => Self::V3(ver3::Pte::new(val)),
+ }
+ }
+
+ /// Create an invalid `PTE` for the given MMU version.
+ pub(crate) fn invalid(version: MmuVersion) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::Pte::invalid()),
+ MmuVersion::V3 => Self::V3(ver3::Pte::invalid()),
+ }
+ }
+
+ /// Create a valid `PTE` for video memory.
+ pub(crate) fn new_vram(version: MmuVersion, pfn: Pfn, writable: bool) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::Pte::new_vram(pfn, writable)),
+ MmuVersion::V3 => Self::V3(ver3::Pte::new_vram(pfn, writable)),
+ }
+ }
+
+ /// Check if this `PTE` is valid.
+ pub(crate) fn is_valid(&self) -> bool {
+ match self {
+ Self::V2(p) => p.valid(),
+ Self::V3(p) => p.valid(),
+ }
+ }
+
+ /// Get the physical frame number.
+ pub(crate) fn frame_number(&self) -> Pfn {
+ match self {
+ Self::V2(p) => p.frame_number(),
+ Self::V3(p) => p.frame_number(),
+ }
+ }
+
+ /// Get the raw `u64` value.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ match self {
+ Self::V2(p) => p.raw_u64(),
+ Self::V3(p) => p.raw_u64(),
+ }
+ }
+
+ /// Read a `PTE` from VRAM.
+ pub(crate) fn read(
+ window: &mut pramin::PraminWindow<'_>,
+ addr: VramAddress,
+ mmu_version: MmuVersion,
+ ) -> Result<Self> {
+ let val = window.try_read64(addr.raw())?;
+ Ok(Self::new(mmu_version, val))
+ }
+
+ /// Write this `PTE` to VRAM.
+ pub(crate) fn write(
+ &self,
+ window: &mut pramin::PraminWindow<'_>,
+ addr: VramAddress,
+ ) -> Result {
+ window.try_write64(addr.raw(), self.raw_u64())
+ }
+}
+
+impl Default for Pte {
+ fn default() -> Self {
+ Self::V2(ver2::Pte::default())
+ }
+}
+
+/// Unified Page Directory Entry wrapper for both MMU v2 and v3 `PDE`.
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum Pde {
+ /// MMU v2 `PDE` (Turing/Ampere/Ada).
+ V2(ver2::Pde),
+ /// MMU v3 `PDE` (Hopper+).
+ V3(ver3::Pde),
+}
+
+impl Pde {
+ /// Create a `PDE` from a raw `u64` value for the given MMU version.
+ pub(crate) fn new(version: MmuVersion, val: u64) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::Pde::new(val)),
+ MmuVersion::V3 => Self::V3(ver3::Pde::new(val)),
+ }
+ }
+
+ /// Create a valid `PDE` pointing to a page table in video memory.
+ pub(crate) fn new_vram(version: MmuVersion, table_pfn: Pfn) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::Pde::new_vram(table_pfn)),
+ MmuVersion::V3 => Self::V3(ver3::Pde::new_vram(table_pfn)),
+ }
+ }
+
+ /// Create an invalid `PDE` for the given MMU version.
+ pub(crate) fn invalid(version: MmuVersion) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::Pde::invalid()),
+ MmuVersion::V3 => Self::V3(ver3::Pde::invalid()),
+ }
+ }
+
+ /// Check if this `PDE` is valid.
+ pub(crate) fn is_valid(&self) -> bool {
+ match self {
+ Self::V2(p) => p.is_valid(),
+ Self::V3(p) => p.is_valid(),
+ }
+ }
+
+ /// Get the VRAM address of the page table.
+ pub(crate) fn table_vram_address(&self) -> VramAddress {
+ match self {
+ Self::V2(p) => p.table_vram_address(),
+ Self::V3(p) => p.table_vram_address(),
+ }
+ }
+
+ /// Get the raw `u64` value.
+ pub(crate) fn raw_u64(&self) -> u64 {
+ match self {
+ Self::V2(p) => p.raw_u64(),
+ Self::V3(p) => p.raw_u64(),
+ }
+ }
+
+ /// Read a `PDE` from VRAM.
+ pub(crate) fn read(
+ window: &mut pramin::PraminWindow<'_>,
+ addr: VramAddress,
+ mmu_version: MmuVersion,
+ ) -> Result<Self> {
+ let val = window.try_read64(addr.raw())?;
+ Ok(Self::new(mmu_version, val))
+ }
+
+ /// Write this `PDE` to VRAM.
+ pub(crate) fn write(
+ &self,
+ window: &mut pramin::PraminWindow<'_>,
+ addr: VramAddress,
+ ) -> Result {
+ window.try_write64(addr.raw(), self.raw_u64())
+ }
+}
+
+impl Default for Pde {
+ fn default() -> Self {
+ Self::V2(ver2::Pde::default())
+ }
+}
+
+/// Unified Dual Page Directory Entry wrapper for both MMU v2 and v3 [`DualPde`].
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum DualPde {
+ /// MMU v2 [`DualPde`] (Turing/Ampere/Ada).
+ V2(ver2::DualPde),
+ /// MMU v3 [`DualPde`] (Hopper+).
+ V3(ver3::DualPde),
+}
+
+impl DualPde {
+ /// Create a [`DualPde`] from raw 128-bit value (two `u64`s) for the given MMU version.
+ pub(crate) fn new(version: MmuVersion, big: u64, small: u64) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::DualPde::new(big, small)),
+ MmuVersion::V3 => Self::V3(ver3::DualPde::new(big, small)),
+ }
+ }
+
+ /// Create a [`DualPde`] with only the small page table pointer set.
+ pub(crate) fn new_small(version: MmuVersion, table_pfn: Pfn) -> Self {
+ match version {
+ MmuVersion::V2 => Self::V2(ver2::DualPde::new_small(table_pfn)),
+ MmuVersion::V3 => Self::V3(ver3::DualPde::new_small(table_pfn)),
+ }
+ }
+
+ /// Check if the small page table pointer is valid.
+ pub(crate) fn has_small(&self) -> bool {
+ match self {
+ Self::V2(d) => d.has_small(),
+ Self::V3(d) => d.has_small(),
+ }
+ }
+
+ /// Get the small page table VRAM address.
+ pub(crate) fn small_vram_address(&self) -> VramAddress {
+ match self {
+ Self::V2(d) => d.small.table_vram_address(),
+ Self::V3(d) => d.small.table_vram_address(),
+ }
+ }
+
+ /// Get the raw `u64` value of the big PDE.
+ pub(crate) fn big_raw_u64(&self) -> u64 {
+ match self {
+ Self::V2(d) => d.big.raw_u64(),
+ Self::V3(d) => d.big.raw_u64(),
+ }
+ }
+
+ /// Get the raw `u64` value of the small PDE.
+ pub(crate) fn small_raw_u64(&self) -> u64 {
+ match self {
+ Self::V2(d) => d.small.raw_u64(),
+ Self::V3(d) => d.small.raw_u64(),
+ }
+ }
+
+ /// Read a dual PDE (128-bit) from VRAM.
+ pub(crate) fn read(
+ window: &mut pramin::PraminWindow<'_>,
+ addr: VramAddress,
+ mmu_version: MmuVersion,
+ ) -> Result<Self> {
+ let lo = window.try_read64(addr.raw())?;
+ let hi = window.try_read64(addr.raw() + 8)?;
+ Ok(Self::new(mmu_version, lo, hi))
+ }
+
+ /// Write this dual PDE (128-bit) to VRAM.
+ pub(crate) fn write(
+ &self,
+ window: &mut pramin::PraminWindow<'_>,
+ addr: VramAddress,
+ ) -> Result {
+ window.try_write64(addr.raw(), self.big_raw_u64())?;
+ window.try_write64(addr.raw() + 8, self.small_raw_u64())
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 10/23] nova-core: mm: Add TLB flush support
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add TLB (Translation Lookaside Buffer) flush support for GPU MMU.
After modifying page table entries, the GPU's TLB must be invalidated
to ensure the new mappings take effect. The Tlb struct provides flush
functionality through BAR0 registers.
The flush operation writes the page directory base address and triggers
an invalidation, polling for completion with a 2 second timeout matching
the Nouveau driver.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/mod.rs | 1 +
drivers/gpu/nova-core/mm/tlb.rs | 92 +++++++++++++++++++++++++++++++++
drivers/gpu/nova-core/regs.rs | 33 ++++++++++++
3 files changed, 126 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/tlb.rs
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
index c88b7b7b6975..eafd56964b1b 100644
--- a/drivers/gpu/nova-core/mm/mod.rs
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -6,6 +6,7 @@
pub(crate) mod pagetable;
pub(crate) mod pramin;
+pub(crate) mod tlb;
use kernel::sizes::SZ_4K;
diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
new file mode 100644
index 000000000000..466c7731323e
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/tlb.rs
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! TLB (Translation Lookaside Buffer) flush support for GPU MMU.
+//!
+//! After modifying page table entries, the GPU's TLB must be flushed to
+//! ensure the new mappings take effect. This module provides TLB flush
+//! functionality for virtual memory managers.
+//!
+//! # Example
+//!
+//! ```ignore
+//! use crate::mm::tlb::Tlb;
+//!
+//! fn page_table_update(tlb: &Tlb, pdb_addr: VramAddress) -> Result<()> {
+//! // ... modify page tables ...
+//!
+//! // Flush TLB to make changes visible (polls for completion).
+//! tlb.flush(pdb_addr)?;
+//!
+//! Ok(())
+//! }
+//! ```
+
+#![allow(dead_code)]
+
+use kernel::{
+ devres::Devres,
+ io::poll::read_poll_timeout,
+ new_mutex,
+ prelude::*,
+ sync::{Arc, Mutex},
+ time::Delta, //
+};
+
+use crate::{
+ driver::Bar0,
+ mm::VramAddress,
+ regs, //
+};
+
+/// TLB manager for GPU translation buffer operations.
+#[pin_data]
+pub(crate) struct Tlb {
+ bar: Arc<Devres<Bar0>>,
+ /// TLB flush serialization lock: This lock is acquired during the
+ /// DMA fence signalling critical path. It must NEVER be held across any
+ /// reclaimable CPU memory allocations because the memory reclaim path can
+ /// call `dma_fence_wait()`, which would deadlock with this lock held.
+ #[pin]
+ lock: Mutex<()>,
+}
+
+impl Tlb {
+ /// Create a new TLB manager.
+ pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
+ pin_init!(Self {
+ bar,
+ lock <- new_mutex!((), "tlb_flush"),
+ })
+ }
+
+ /// Flush the GPU TLB for a specific page directory base.
+ ///
+ /// This invalidates all TLB entries associated with the given PDB address.
+ /// Must be called after modifying page table entries to ensure the GPU sees
+ /// the updated mappings.
+ pub(crate) fn flush(&self, pdb_addr: VramAddress) -> Result {
+ let _guard = self.lock.lock();
+
+ let bar = self.bar.try_access().ok_or(ENODEV)?;
+
+ // Write PDB address.
+ regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()).write(&*bar);
+ regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()).write(&*bar);
+
+ // Trigger flush: invalidate all pages and enable.
+ regs::NV_TLB_FLUSH_CTRL::default()
+ .set_page_all(true)
+ .set_enable(true)
+ .write(&*bar);
+
+ // Poll for completion - enable bit clears when flush is done.
+ read_poll_timeout(
+ || Ok(regs::NV_TLB_FLUSH_CTRL::read(&*bar)),
+ |ctrl| !ctrl.enable(),
+ Delta::ZERO,
+ Delta::from_secs(2),
+ )?;
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index d0982e346f74..c948f961f307 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -454,3 +454,36 @@ pub(crate) mod ga100 {
0:0 display_disabled as bool;
});
}
+
+// MMU TLB
+
+register!(NV_TLB_FLUSH_PDB_LO @ 0x00b830a0, "TLB flush register: PDB address bits [39:8]" {
+ 31:0 pdb_lo as u32, "PDB address bits [39:8]";
+});
+
+impl NV_TLB_FLUSH_PDB_LO {
+ /// Create a register value from a PDB address.
+ ///
+ /// Extracts bits [39:8] of the address and shifts it right by 8 bits.
+ pub(crate) fn from_pdb_addr(addr: u64) -> Self {
+ Self::default().set_pdb_lo(((addr >> 8) & 0xFFFF_FFFF) as u32)
+ }
+}
+
+register!(NV_TLB_FLUSH_PDB_HI @ 0x00b830a4, "TLB flush register: PDB address bits [47:40]" {
+ 7:0 pdb_hi as u8, "PDB address bits [47:40]";
+});
+
+impl NV_TLB_FLUSH_PDB_HI {
+ /// Create a register value from a PDB address.
+ ///
+ /// Extracts bits [47:40] of the address and shifts it right by 40 bits.
+ pub(crate) fn from_pdb_addr(addr: u64) -> Self {
+ Self::default().set_pdb_hi(((addr >> 40) & 0xFF) as u8)
+ }
+}
+
+register!(NV_TLB_FLUSH_CTRL @ 0x00b830b0, "TLB flush control register" {
+ 0:0 page_all as bool, "Invalidate all pages";
+ 31:31 enable as bool, "Enable/trigger flush (clears when flush completes)";
+});
--
2.34.1
^ permalink raw reply related
* [PATCH v7 11/23] nova-core: mm: Add GpuMm centralized memory manager
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Introduce GpuMm as the centralized GPU memory manager that owns:
- Buddy allocator for VRAM allocation.
- PRAMIN window for direct VRAM access.
- TLB manager for translation buffer operations.
This provides clean ownership model where GpuMm provides accessor
methods for its components that can be used for memory management
operations.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/gpu.rs | 15 ++++++++
drivers/gpu/nova-core/mm/mod.rs | 63 ++++++++++++++++++++++++++++++++-
2 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 9b042ef1a308..396d5bf57167 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -4,8 +4,10 @@
device,
devres::Devres,
fmt,
+ gpu::buddy::GpuBuddyParams,
pci,
prelude::*,
+ sizes::{SZ_1M, SZ_4K},
sync::Arc, //
};
@@ -19,6 +21,7 @@
fb::SysmemFlush,
gfw,
gsp::Gsp,
+ mm::GpuMm,
regs,
};
@@ -249,6 +252,9 @@ pub(crate) struct Gpu {
gsp_falcon: Falcon<GspFalcon>,
/// SEC2 falcon instance, used for GSP boot up and cleanup.
sec2_falcon: Falcon<Sec2Falcon>,
+ /// GPU memory manager owning memory management resources.
+ #[pin]
+ mm: GpuMm,
/// GSP runtime data. Temporarily an empty placeholder.
#[pin]
gsp: Gsp,
@@ -281,6 +287,15 @@ pub(crate) fn new<'a>(
sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
+ // Create GPU memory manager owning memory management resources.
+ // This will be initialized with the usable VRAM region from GSP in a later
+ // patch. For now, we use a placeholder of 1MB.
+ mm <- GpuMm::new(devres_bar.clone(), &GpuBuddyParams {
+ base_offset_bytes: 0,
+ physical_memory_size_bytes: SZ_1M as u64,
+ chunk_size_bytes: SZ_4K as u64,
+ })?,
+
gsp <- Gsp::new(pdev),
_: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
index eafd56964b1b..471f8eb0635c 100644
--- a/drivers/gpu/nova-core/mm/mod.rs
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -8,7 +8,68 @@
pub(crate) mod pramin;
pub(crate) mod tlb;
-use kernel::sizes::SZ_4K;
+use kernel::{
+ devres::Devres,
+ gpu::buddy::{
+ GpuBuddy,
+ GpuBuddyParams, //
+ },
+ prelude::*,
+ sizes::SZ_4K,
+ sync::Arc, //
+};
+
+use crate::driver::Bar0;
+
+pub(crate) use tlb::Tlb;
+
+/// GPU Memory Manager - owns all core MM components.
+///
+/// Provides centralized ownership of memory management resources:
+/// - [`GpuBuddy`] allocator for VRAM page table allocation.
+/// - [`pramin::Pramin`] for direct VRAM access.
+/// - [`Tlb`] manager for translation buffer flush operations.
+#[pin_data]
+pub(crate) struct GpuMm {
+ buddy: GpuBuddy,
+ #[pin]
+ pramin: pramin::Pramin,
+ #[pin]
+ tlb: Tlb,
+}
+
+impl GpuMm {
+ /// Create a pin-initializer for `GpuMm`.
+ pub(crate) fn new(
+ bar: Arc<Devres<Bar0>>,
+ buddy_params: GpuBuddyParams,
+ ) -> Result<impl PinInit<Self>> {
+ let buddy = GpuBuddy::new(buddy_params)?;
+ let tlb_init = Tlb::new(bar.clone());
+ let pramin_init = pramin::Pramin::new(bar)?;
+
+ Ok(pin_init!(Self {
+ buddy,
+ pramin <- pramin_init,
+ tlb <- tlb_init,
+ }))
+ }
+
+ /// Access the [`GpuBuddy`] allocator.
+ pub(crate) fn buddy(&self) -> &GpuBuddy {
+ &self.buddy
+ }
+
+ /// Access the [`pramin::Pramin`].
+ pub(crate) fn pramin(&self) -> &pramin::Pramin {
+ &self.pramin
+ }
+
+ /// Access the [`Tlb`] manager.
+ pub(crate) fn tlb(&self) -> &Tlb {
+ &self.tlb
+ }
+}
/// Page size in bytes (4 KiB).
pub(crate) const PAGE_SIZE: usize = SZ_4K;
--
2.34.1
^ permalink raw reply related
* [PATCH v7 12/23] nova-core: mm: Add page table walker for MMU v2/v3
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add the page table walker implementation that traverses the page table
hierarchy for both MMU v2 (5-level) and MMU v3 (6-level) to resolve
virtual addresses to physical addresses or find PTE locations.
Currently only v2 has been tested (nova-core currently boots pre-hopper)
with some initial prepatory work done for v3.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/pagetable/mod.rs | 1 +
drivers/gpu/nova-core/mm/pagetable/walk.rs | 213 +++++++++++++++++++++
2 files changed, 214 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/pagetable/walk.rs
diff --git a/drivers/gpu/nova-core/mm/pagetable/mod.rs b/drivers/gpu/nova-core/mm/pagetable/mod.rs
index 1f783261a30f..2b9fc60fb167 100644
--- a/drivers/gpu/nova-core/mm/pagetable/mod.rs
+++ b/drivers/gpu/nova-core/mm/pagetable/mod.rs
@@ -9,6 +9,7 @@
#![expect(dead_code)]
pub(crate) mod ver2;
pub(crate) mod ver3;
+pub(crate) mod walk;
use kernel::prelude::*;
diff --git a/drivers/gpu/nova-core/mm/pagetable/walk.rs b/drivers/gpu/nova-core/mm/pagetable/walk.rs
new file mode 100644
index 000000000000..4ee343734d72
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/pagetable/walk.rs
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Page table walker implementation for NVIDIA GPUs.
+//!
+//! This module provides page table walking functionality for MMU v2 and v3.
+//! The walker traverses the page table hierarchy to resolve virtual addresses
+//! to physical addresses or to find PTE locations.
+//!
+//! # Page Table Hierarchy
+//!
+//! ## MMU v2 (Turing/Ampere/Ada) - 5 levels
+//!
+//! ```text
+//! +-------+ +-------+ +-------+ +---------+ +-------+
+//! | PDB |---->| L1 |---->| L2 |---->| L3 Dual |---->| L4 |
+//! | (L0) | | | | | | PDE | | (PTE) |
+//! +-------+ +-------+ +-------+ +---------+ +-------+
+//! 64-bit 64-bit 64-bit 128-bit 64-bit
+//! PDE PDE PDE (big+small) PTE
+//! ```
+//!
+//! ## MMU v3 (Hopper+) - 6 levels
+//!
+//! ```text
+//! +-------+ +-------+ +-------+ +-------+ +---------+ +-------+
+//! | PDB |---->| L1 |---->| L2 |---->| L3 |---->| L4 Dual |---->| L5 |
+//! | (L0) | | | | | | | | PDE | | (PTE) |
+//! +-------+ +-------+ +-------+ +-------+ +---------+ +-------+
+//! 64-bit 64-bit 64-bit 64-bit 128-bit 64-bit
+//! PDE PDE PDE PDE (big+small) PTE
+//! ```
+//!
+//! # Result of a page table walk
+//!
+//! The walker returns a [`WalkResult`] indicating the outcome.
+
+use kernel::prelude::*;
+
+use super::{
+ DualPde,
+ MmuVersion,
+ PageTableLevel,
+ Pde,
+ Pte, //
+};
+use crate::mm::{
+ pramin,
+ GpuMm,
+ Pfn,
+ Vfn,
+ VirtualAddress,
+ VramAddress, //
+};
+
+/// Result of walking to a PTE.
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum WalkResult {
+ /// Intermediate page tables are missing (only returned in lookup mode).
+ PageTableMissing,
+ /// PTE exists but is invalid (page not mapped).
+ Unmapped { pte_addr: VramAddress },
+ /// PTE exists and is valid (page is mapped).
+ Mapped { pte_addr: VramAddress, pfn: Pfn },
+}
+
+/// Result of walking PDE levels only.
+///
+/// Returned by [`PtWalk::walk_pde_levels()`] to indicate whether all PDE levels
+/// resolved or a PDE is missing.
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum WalkPdeResult {
+ /// All PDE levels resolved -- returns PTE page table address.
+ Complete {
+ /// VRAM address of the PTE-level page table.
+ pte_table: VramAddress,
+ },
+ /// A PDE is missing and no prepared page was provided by the closure.
+ Missing {
+ /// PDE slot address in the parent page table (where to install).
+ install_addr: VramAddress,
+ /// The page table level that is missing.
+ level: PageTableLevel,
+ },
+}
+
+/// Page table walker for NVIDIA GPUs.
+///
+/// Walks the page table hierarchy (5 levels for v2, 6 for v3) to find PTE
+/// locations or resolve virtual addresses.
+pub(crate) struct PtWalk {
+ pdb_addr: VramAddress,
+ mmu_version: MmuVersion,
+}
+
+impl PtWalk {
+ /// Calculate the VRAM address of an entry within a page table.
+ fn entry_addr(
+ table: VramAddress,
+ mmu_version: MmuVersion,
+ level: PageTableLevel,
+ index: u64,
+ ) -> VramAddress {
+ let entry_size = mmu_version.entry_size(level) as u64;
+ VramAddress::new(table.raw_u64() + index * entry_size)
+ }
+
+ /// Create a new page table walker.
+ pub(crate) fn new(pdb_addr: VramAddress, mmu_version: MmuVersion) -> Self {
+ Self {
+ pdb_addr,
+ mmu_version,
+ }
+ }
+
+ /// Walk PDE levels with closure-based resolution for missing PDEs.
+ ///
+ /// Traverses all PDE levels for the MMU version. At each level, reads the PDE.
+ /// If valid, extracts the child table address and continues. If missing, calls
+ /// `resolve_prepared(install_addr)` to resolve the missing PDE.
+ pub(crate) fn walk_pde_levels(
+ &self,
+ window: &mut pramin::PraminWindow<'_>,
+ vfn: Vfn,
+ resolve_prepared: impl Fn(VramAddress) -> Option<VramAddress>,
+ ) -> Result<WalkPdeResult> {
+ let va = VirtualAddress::from(vfn);
+ let mut cur_table = self.pdb_addr;
+
+ for &level in self.mmu_version.pde_levels() {
+ let idx = va.level_index(level.as_index());
+ let install_addr = Self::entry_addr(cur_table, self.mmu_version, level, idx);
+
+ if level == self.mmu_version.dual_pde_level() {
+ // 128-bit dual PDE with big+small page table pointers.
+ let dpde = DualPde::read(window, install_addr, self.mmu_version)?;
+ if dpde.has_small() {
+ cur_table = dpde.small_vram_address();
+ continue;
+ }
+ } else {
+ // Regular 64-bit PDE.
+ let pde = Pde::read(window, install_addr, self.mmu_version)?;
+ if pde.is_valid() {
+ cur_table = pde.table_vram_address();
+ continue;
+ }
+ }
+
+ // PDE missing in HW. Ask caller for resolution.
+ if let Some(prepared_addr) = resolve_prepared(install_addr) {
+ cur_table = prepared_addr;
+ continue;
+ }
+
+ return Ok(WalkPdeResult::Missing {
+ install_addr,
+ level,
+ });
+ }
+
+ Ok(WalkPdeResult::Complete {
+ pte_table: cur_table,
+ })
+ }
+
+ /// Walk to PTE for lookup only (no allocation).
+ ///
+ /// Returns [`WalkResult::PageTableMissing`] if intermediate tables don't exist.
+ pub(crate) fn walk_to_pte_lookup(&self, mm: &GpuMm, vfn: Vfn) -> Result<WalkResult> {
+ let mut window = mm.pramin().window()?;
+ self.walk_to_pte_lookup_with_window(&mut window, vfn)
+ }
+
+ /// Walk to PTE using a caller-provided PRAMIN window (lookup only).
+ ///
+ /// Uses [`PtWalk::walk_pde_levels()`] for the PDE traversal, then reads the PTE at
+ /// the leaf level. Useful when called for multiple VFNs with single PRAMIN window
+ /// acquisition. Used by [`Vmm::execute_map()`] and [`Vmm::unmap_pages()`].
+ pub(crate) fn walk_to_pte_lookup_with_window(
+ &self,
+ window: &mut pramin::PraminWindow<'_>,
+ vfn: Vfn,
+ ) -> Result<WalkResult> {
+ match self.walk_pde_levels(window, vfn, |_| None)? {
+ WalkPdeResult::Complete { pte_table } => {
+ Self::read_pte_at_level(window, vfn, pte_table, self.mmu_version)
+ }
+ WalkPdeResult::Missing { .. } => Ok(WalkResult::PageTableMissing),
+ }
+ }
+
+ /// Read the PTE at the PTE level given the PTE table address.
+ fn read_pte_at_level(
+ window: &mut pramin::PraminWindow<'_>,
+ vfn: Vfn,
+ pte_table: VramAddress,
+ mmu_version: MmuVersion,
+ ) -> Result<WalkResult> {
+ let va = VirtualAddress::from(vfn);
+ let pte_level = mmu_version.pte_level();
+ let pte_idx = va.level_index(pte_level.as_index());
+ let pte_addr = Self::entry_addr(pte_table, mmu_version, pte_level, pte_idx);
+ let pte = Pte::read(window, pte_addr, mmu_version)?;
+
+ if pte.is_valid() {
+ return Ok(WalkResult::Mapped {
+ pte_addr,
+ pfn: pte.frame_number(),
+ });
+ }
+ Ok(WalkResult::Unmapped { pte_addr })
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 13/23] nova-core: mm: Add Virtual Memory Manager
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add the Virtual Memory Manager (VMM) infrastructure for GPU address
space management. Each Vmm instance manages a single address space
identified by its Page Directory Base (PDB) address, used for Channel,
BAR1 and BAR2 mappings.
Mapping APIs and virtual address range tracking are added in later
commits.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/mod.rs | 1 +
drivers/gpu/nova-core/mm/vmm.rs | 63 +++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 drivers/gpu/nova-core/mm/vmm.rs
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
index 471f8eb0635c..922479accba6 100644
--- a/drivers/gpu/nova-core/mm/mod.rs
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -7,6 +7,7 @@
pub(crate) mod pagetable;
pub(crate) mod pramin;
pub(crate) mod tlb;
+pub(crate) mod vmm;
use kernel::{
devres::Devres,
diff --git a/drivers/gpu/nova-core/mm/vmm.rs b/drivers/gpu/nova-core/mm/vmm.rs
new file mode 100644
index 000000000000..eaee707181b5
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/vmm.rs
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Virtual Memory Manager for NVIDIA GPU page table management.
+//!
+//! The [`Vmm`] provides high-level page mapping and unmapping operations for GPU
+//! virtual address spaces (Channels, BAR1, BAR2). It wraps the page table walker
+//! and handles TLB flushing after modifications.
+
+#![allow(dead_code)]
+
+use kernel::{
+ gpu::buddy::AllocatedBlocks,
+ prelude::*, //
+};
+
+use crate::mm::{
+ pagetable::{
+ walk::{PtWalk, WalkResult},
+ MmuVersion, //
+ },
+ GpuMm,
+ Pfn,
+ Vfn,
+ VramAddress, //
+};
+
+/// Virtual Memory Manager for a GPU address space.
+///
+/// Each [`Vmm`] instance manages a single address space identified by its Page
+/// Directory Base (`PDB`) address. The [`Vmm`] is used for Channel, BAR1 and
+/// BAR2 mappings.
+pub(crate) struct Vmm {
+ pub(crate) pdb_addr: VramAddress,
+ pub(crate) mmu_version: MmuVersion,
+ /// Page table allocations required for mappings.
+ page_table_allocs: KVec<Pin<KBox<AllocatedBlocks>>>,
+}
+
+impl Vmm {
+ /// Create a new [`Vmm`] for the given Page Directory Base address.
+ pub(crate) fn new(pdb_addr: VramAddress, mmu_version: MmuVersion) -> Result<Self> {
+ // Only MMU v2 is supported for now.
+ if mmu_version != MmuVersion::V2 {
+ return Err(ENOTSUPP);
+ }
+
+ Ok(Self {
+ pdb_addr,
+ mmu_version,
+ page_table_allocs: KVec::new(),
+ })
+ }
+
+ /// Read the PFN for a mapped VFN if one is mapped.
+ pub(crate) fn read_mapping(&self, mm: &GpuMm, vfn: Vfn) -> Result<Option<Pfn>> {
+ let walker = PtWalk::new(self.pdb_addr, self.mmu_version);
+
+ match walker.walk_to_pte_lookup(mm, vfn)? {
+ WalkResult::Mapped { pfn, .. } => Ok(Some(pfn)),
+ WalkResult::Unmapped { .. } | WalkResult::PageTableMissing => Ok(None),
+ }
+ }
+}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 14/23] nova-core: mm: Add virtual address range tracking to VMM
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add virtual address range tracking to the VMM using a buddy allocator.
This enables contiguous virtual address range allocation for mappings.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/vmm.rs | 75 +++++++++++++++++++++++++++++++--
1 file changed, 71 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/nova-core/mm/vmm.rs b/drivers/gpu/nova-core/mm/vmm.rs
index eaee707181b5..9e57916017ed 100644
--- a/drivers/gpu/nova-core/mm/vmm.rs
+++ b/drivers/gpu/nova-core/mm/vmm.rs
@@ -9,10 +9,19 @@
#![allow(dead_code)]
use kernel::{
- gpu::buddy::AllocatedBlocks,
- prelude::*, //
+ gpu::buddy::{
+ AllocatedBlocks,
+ BuddyFlags,
+ GpuBuddy,
+ GpuBuddyAllocParams,
+ GpuBuddyParams, //
+ },
+ prelude::*,
+ sizes::SZ_4K, //
};
+use core::ops::Range;
+
use crate::mm::{
pagetable::{
walk::{PtWalk, WalkResult},
@@ -21,7 +30,8 @@
GpuMm,
Pfn,
Vfn,
- VramAddress, //
+ VramAddress,
+ PAGE_SIZE, //
};
/// Virtual Memory Manager for a GPU address space.
@@ -34,23 +44,80 @@ pub(crate) struct Vmm {
pub(crate) mmu_version: MmuVersion,
/// Page table allocations required for mappings.
page_table_allocs: KVec<Pin<KBox<AllocatedBlocks>>>,
+ /// Buddy allocator for virtual address range tracking.
+ virt_buddy: GpuBuddy,
}
impl Vmm {
/// Create a new [`Vmm`] for the given Page Directory Base address.
- pub(crate) fn new(pdb_addr: VramAddress, mmu_version: MmuVersion) -> Result<Self> {
+ /// The [`Vmm`] will manage a virtual address space of `va_size` bytes.
+ pub(crate) fn new(
+ pdb_addr: VramAddress,
+ mmu_version: MmuVersion,
+ va_size: u64,
+ ) -> Result<Self> {
// Only MMU v2 is supported for now.
if mmu_version != MmuVersion::V2 {
return Err(ENOTSUPP);
}
+ let virt_buddy = GpuBuddy::new(GpuBuddyParams {
+ base_offset_bytes: 0,
+ physical_memory_size_bytes: va_size,
+ chunk_size_bytes: SZ_4K as u64,
+ })?;
+
Ok(Self {
pdb_addr,
mmu_version,
page_table_allocs: KVec::new(),
+ virt_buddy,
})
}
+ /// Allocate a contiguous virtual frame number range.
+ ///
+ /// # Arguments
+ /// - `num_pages`: Number of pages to allocate.
+ /// - `va_range`: `None` = allocate anywhere,
+ /// `Some(range)` = constrain allocation to the given range.
+ pub(crate) fn alloc_vfn_range(
+ &self,
+ num_pages: usize,
+ va_range: Option<Range<u64>>,
+ ) -> Result<(Vfn, Pin<KBox<AllocatedBlocks>>)> {
+ let size_bytes = (num_pages as u64)
+ .checked_mul(PAGE_SIZE as u64)
+ .ok_or(EOVERFLOW)?;
+
+ let (start, end) = match va_range {
+ Some(r) => {
+ let range_size = r.end.checked_sub(r.start).ok_or(EOVERFLOW)?;
+ if range_size != size_bytes {
+ return Err(EINVAL);
+ }
+ (r.start, r.end)
+ }
+ None => (0, 0),
+ };
+
+ let params = GpuBuddyAllocParams {
+ start_range_address: start,
+ end_range_address: end,
+ size_bytes,
+ min_block_size_bytes: SZ_4K as u64,
+ buddy_flags: BuddyFlags::try_new(BuddyFlags::CONTIGUOUS_ALLOCATION)?,
+ };
+
+ let alloc = KBox::pin_init(self.virt_buddy.alloc_blocks(¶ms), GFP_KERNEL)?;
+
+ // Get the starting offset of the first block (only block as range is contiguous).
+ let offset = alloc.iter().next().ok_or(ENOMEM)?.offset();
+ let vfn = Vfn::new(offset / PAGE_SIZE as u64);
+
+ Ok((vfn, alloc))
+ }
+
/// Read the PFN for a mapped VFN if one is mapped.
pub(crate) fn read_mapping(&self, mm: &GpuMm, vfn: Vfn) -> Result<Option<Pfn>> {
let walker = PtWalk::new(self.pdb_addr, self.mmu_version);
--
2.34.1
^ permalink raw reply related
* [PATCH v7 15/23] nova-core: mm: Add multi-page mapping API to VMM
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add the page table mapping and unmapping API to the Virtual Memory
Manager, implementing a two-phase prepare/execute model suitable for
use both inside and outside the DMA fence signalling critical path.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/mm/vmm.rs | 347 +++++++++++++++++++++++++++++++-
1 file changed, 345 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/nova-core/mm/vmm.rs b/drivers/gpu/nova-core/mm/vmm.rs
index 9e57916017ed..af3daccbf958 100644
--- a/drivers/gpu/nova-core/mm/vmm.rs
+++ b/drivers/gpu/nova-core/mm/vmm.rs
@@ -17,15 +17,25 @@
GpuBuddyParams, //
},
prelude::*,
+ rbtree::{RBTree, RBTreeNode},
sizes::SZ_4K, //
};
+use core::cell::Cell;
use core::ops::Range;
use crate::mm::{
pagetable::{
- walk::{PtWalk, WalkResult},
- MmuVersion, //
+ walk::{
+ PtWalk,
+ WalkPdeResult,
+ WalkResult, //
+ },
+ DualPde,
+ MmuVersion,
+ PageTableLevel,
+ Pde,
+ Pte, //
},
GpuMm,
Pfn,
@@ -46,6 +56,74 @@ pub(crate) struct Vmm {
page_table_allocs: KVec<Pin<KBox<AllocatedBlocks>>>,
/// Buddy allocator for virtual address range tracking.
virt_buddy: GpuBuddy,
+ /// Prepared PT pages pending PDE installation, keyed by `install_addr`.
+ ///
+ /// Populated by `Vmm` mapping prepare phase and drained in the execute phase.
+ /// Shared by all pending maps in the `Vmm`, thus preventing races where 2
+ /// maps might be trying to install the same page table/directory entry pointer.
+ pt_pages: RBTree<VramAddress, PreparedPtPage>,
+}
+
+/// A pre-allocated and zeroed page table page.
+///
+/// Created during the mapping prepare phase and consumed during the mapping execute phase.
+/// Stored in an [`RBTree`] keyed by the PDE slot address (`install_addr`).
+struct PreparedPtPage {
+ /// The allocated and zeroed page table page.
+ alloc: Pin<KBox<AllocatedBlocks>>,
+ /// Page table level -- needed to determine if this PT page is for a dual PDE.
+ level: PageTableLevel,
+}
+
+/// Multi-page prepared mapping -- VA range allocated, ready for execute.
+///
+/// Produced by [`Vmm::prepare_map()`], consumed by [`Vmm::execute_map()`].
+/// The struct owns the VA space allocation between prepare and execute phases.
+pub(crate) struct PreparedMapping {
+ vfn_start: Vfn,
+ num_pages: usize,
+ vfn_alloc: Pin<KBox<AllocatedBlocks>>,
+}
+
+/// Result of a mapping operation -- tracks the active mapped range.
+///
+/// Returned by [`Vmm::execute_map()`] and [`Vmm::map_pages()`].
+/// Owns the VA allocation; the VA range is freed when this is dropped.
+/// Callers must call [`Vmm::unmap_pages()`] before dropping to invalidate
+/// PTEs (dropping only frees the VA range, not the PTE entries).
+pub(crate) struct MappedRange {
+ pub(crate) vfn_start: Vfn,
+ pub(crate) num_pages: usize,
+ /// VA allocation -- freed when [`MappedRange`] is dropped.
+ _vfn_alloc: Pin<KBox<AllocatedBlocks>>,
+ /// Logs a warning if dropped without unmapping.
+ _drop_guard: MustUnmapGuard,
+}
+
+/// Guard that logs a warning once if a [`MappedRange`] is dropped without
+/// calling [`Vmm::unmap_pages()`].
+struct MustUnmapGuard {
+ armed: Cell<bool>,
+}
+
+impl MustUnmapGuard {
+ const fn new() -> Self {
+ Self {
+ armed: Cell::new(true),
+ }
+ }
+
+ fn disarm(&self) {
+ self.armed.set(false);
+ }
+}
+
+impl Drop for MustUnmapGuard {
+ fn drop(&mut self) {
+ if self.armed.get() {
+ kernel::pr_warn_once!("MappedRange dropped without calling unmap_pages()\n");
+ }
+ }
}
impl Vmm {
@@ -72,6 +150,7 @@ pub(crate) fn new(
mmu_version,
page_table_allocs: KVec::new(),
virt_buddy,
+ pt_pages: RBTree::new(),
})
}
@@ -127,4 +206,268 @@ pub(crate) fn read_mapping(&self, mm: &GpuMm, vfn: Vfn) -> Result<Option<Pfn>> {
WalkResult::Unmapped { .. } | WalkResult::PageTableMissing => Ok(None),
}
}
+
+ /// Allocate and zero a physical page table page for a specific PDE slot.
+ /// Called during the map prepare phase.
+ fn alloc_and_zero_page_table(
+ &mut self,
+ mm: &GpuMm,
+ level: PageTableLevel,
+ ) -> Result<PreparedPtPage> {
+ let params = GpuBuddyAllocParams {
+ start_range_address: 0,
+ end_range_address: 0,
+ size_bytes: SZ_4K as u64,
+ min_block_size_bytes: SZ_4K as u64,
+ buddy_flags: BuddyFlags::try_new(0)?,
+ };
+ let blocks = KBox::pin_init(mm.buddy().alloc_blocks(¶ms), GFP_KERNEL)?;
+
+ // Get page's VRAM address from the allocation.
+ let page_vram = VramAddress::new(blocks.iter().next().ok_or(ENOMEM)?.offset());
+
+ // Zero via PRAMIN.
+ let mut window = mm.pramin().window()?;
+ let base = page_vram.raw();
+ for off in (0..PAGE_SIZE).step_by(8) {
+ window.try_write64(base + off, 0)?;
+ }
+
+ Ok(PreparedPtPage {
+ alloc: blocks,
+ level,
+ })
+ }
+
+ /// Ensure all intermediate page table pages are prepared for a [`Vfn`]. Just
+ /// finds out which PDE pages are missing, allocates pages for them, and defers
+ /// installation to the execute phase.
+ ///
+ /// PRAMIN is released before each allocation and re-acquired after. Memory
+ /// allocations outside of holding this lock to prevent deadlocks with fence signalling
+ /// critical path.
+ fn ensure_pte_path(&mut self, mm: &GpuMm, vfn: Vfn) -> Result {
+ let walker = PtWalk::new(self.pdb_addr, self.mmu_version);
+ let max_iter = 2 * self.mmu_version.pde_level_count();
+
+ // Keep looping until all PDE levels are resolved.
+ for _ in 0..max_iter {
+ let mut window = mm.pramin().window()?;
+
+ // Walk PDE levels. The closure checks self.pt_pages for prepared-but-uninstalled
+ // pages, letting the walker continue through them as if they were installed in HW.
+ // The walker keeps calling the closure to get these "prepared but not installed" pages.
+ let result = walker.walk_pde_levels(&mut window, vfn, |install_addr| {
+ self.pt_pages
+ .get(&install_addr)
+ .and_then(|p| Some(VramAddress::new(p.alloc.iter().next()?.offset())))
+ })?;
+
+ match result {
+ WalkPdeResult::Complete { .. } => {
+ // All PDE levels resolved.
+ return Ok(());
+ }
+ WalkPdeResult::Missing {
+ install_addr,
+ level,
+ } => {
+ // Drop PRAMIN before allocation.
+ drop(window);
+ let page = self.alloc_and_zero_page_table(mm, level)?;
+ let node = RBTreeNode::new(install_addr, page, GFP_KERNEL)?;
+ let old = self.pt_pages.insert(node);
+ if old.is_some() {
+ kernel::pr_warn_once!(
+ "VMM: duplicate install_addr in pt_pages (internal consistency error)\n"
+ );
+ return Err(EIO);
+ }
+ // Loop: re-acquire PRAMIN and re-walk from root.
+ }
+ }
+ }
+
+ Err(EIO)
+ }
+
+ /// Prepare resources for mapping `num_pages` pages.
+ ///
+ /// Allocates a contiguous VA range, then walks the hierarchy per-VFN to prepare pages
+ /// for all missing PDEs. Returns a [`PreparedMapping`] with the VA allocation.
+ ///
+ /// If `va_range` is not `None`, the VA range is constrained to the given range. Safe
+ /// to call outside the fence signalling critical path.
+ pub(crate) fn prepare_map(
+ &mut self,
+ mm: &GpuMm,
+ num_pages: usize,
+ va_range: Option<Range<u64>>,
+ ) -> Result<PreparedMapping> {
+ if num_pages == 0 {
+ return Err(EINVAL);
+ }
+
+ // Pre-reserve so execute_map() can use push_within_capacity (no alloc in
+ // fence signalling critical path).
+ // Upper bound on page table pages needed for the full tree (PTE pages + PDE
+ // pages at all levels).
+ let pt_upper_bound = self.mmu_version.pt_pages_upper_bound(num_pages);
+ self.page_table_allocs.reserve(pt_upper_bound, GFP_KERNEL)?;
+
+ // Allocate contiguous VA range.
+ let (vfn_start, vfn_alloc) = self.alloc_vfn_range(num_pages, va_range)?;
+
+ // Walk the hierarchy per-VFN to prepare pages for all missing PDEs.
+ for i in 0..num_pages {
+ let vfn = Vfn::new(vfn_start.raw() + i as u64);
+ self.ensure_pte_path(mm, vfn)?;
+ }
+
+ Ok(PreparedMapping {
+ vfn_start,
+ num_pages,
+ vfn_alloc,
+ })
+ }
+
+ /// Execute a prepared multi-page mapping.
+ ///
+ /// Drain prepared PT pages and install PDEs followed by single TLB flush.
+ pub(crate) fn execute_map(
+ &mut self,
+ mm: &GpuMm,
+ prepared: PreparedMapping,
+ pfns: &[Pfn],
+ writable: bool,
+ ) -> Result<MappedRange> {
+ if pfns.len() != prepared.num_pages {
+ return Err(EINVAL);
+ }
+
+ let PreparedMapping {
+ vfn_start,
+ num_pages,
+ vfn_alloc,
+ } = prepared;
+
+ let walker = PtWalk::new(self.pdb_addr, self.mmu_version);
+ let mut window = mm.pramin().window()?;
+
+ // First, drain self.pt_pages, install all pending PDEs.
+ let mut cursor = self.pt_pages.cursor_front_mut();
+ while let Some(c) = cursor {
+ let (next, node) = c.remove_current();
+ let (install_addr, page) = node.to_key_value();
+ let page_vram = VramAddress::new(page.alloc.iter().next().ok_or(ENOMEM)?.offset());
+
+ if page.level == self.mmu_version.dual_pde_level() {
+ let new_dpde = DualPde::new_small(self.mmu_version, Pfn::from(page_vram));
+ new_dpde.write(&mut window, install_addr)?;
+ } else {
+ let new_pde = Pde::new_vram(self.mmu_version, Pfn::from(page_vram));
+ new_pde.write(&mut window, install_addr)?;
+ }
+
+ // Track the allocated pages in the `Vmm`.
+ self.page_table_allocs
+ .push_within_capacity(page.alloc)
+ .map_err(|_| ENOMEM)?;
+
+ cursor = next;
+ }
+
+ // Next, write PTEs (all PDEs now installed in HW).
+ for (i, &pfn) in pfns.iter().enumerate() {
+ let vfn = Vfn::new(vfn_start.raw() + i as u64);
+ let result = walker.walk_to_pte_lookup_with_window(&mut window, vfn)?;
+
+ match result {
+ WalkResult::Unmapped { pte_addr } | WalkResult::Mapped { pte_addr, .. } => {
+ let pte = Pte::new_vram(self.mmu_version, pfn, writable);
+ pte.write(&mut window, pte_addr)?;
+ }
+ WalkResult::PageTableMissing => {
+ kernel::pr_warn_once!("VMM: page table missing for VFN {vfn:?}\n");
+ return Err(EIO);
+ }
+ }
+ }
+
+ drop(window);
+
+ // Finally, flush the TLB.
+ mm.tlb().flush(self.pdb_addr)?;
+
+ Ok(MappedRange {
+ vfn_start,
+ num_pages,
+ _vfn_alloc: vfn_alloc,
+ _drop_guard: MustUnmapGuard::new(),
+ })
+ }
+
+ /// Map pages doing prepare and execute in the same call.
+ ///
+ /// This is a convenience wrapper for callers outside the fence signalling critical
+ /// path (e.g., BAR mappings). For DRM usecases, [`Vmm::prepare_map()`] and
+ /// [`Vmm::execute_map()`] will be called separately.
+ pub(crate) fn map_pages(
+ &mut self,
+ mm: &GpuMm,
+ pfns: &[Pfn],
+ va_range: Option<Range<u64>>,
+ writable: bool,
+ ) -> Result<MappedRange> {
+ if pfns.is_empty() {
+ return Err(EINVAL);
+ }
+
+ // Check if provided VA range is sufficient (if provided).
+ if let Some(ref range) = va_range {
+ let required = pfns.len().checked_mul(PAGE_SIZE).ok_or(EOVERFLOW)? as u64;
+ let available = range.end.checked_sub(range.start).ok_or(EINVAL)?;
+ if available < required {
+ return Err(EINVAL);
+ }
+ }
+
+ let prepared = self.prepare_map(mm, pfns.len(), va_range)?;
+ self.execute_map(mm, prepared, pfns, writable)
+ }
+
+ /// Unmap all pages in a [`MappedRange`] with a single TLB flush.
+ ///
+ /// Takes the range by value (consumes it), then invalidates PTEs for the range,
+ /// flushes the TLB, then drops the range (freeing the VA). PRAMIN lock is held.
+ pub(crate) fn unmap_pages(&mut self, mm: &GpuMm, range: MappedRange) -> Result {
+ let walker = PtWalk::new(self.pdb_addr, self.mmu_version);
+ let invalid_pte = Pte::invalid(self.mmu_version);
+
+ let mut window = mm.pramin().window()?;
+ for i in 0..range.num_pages {
+ let vfn = Vfn::new(range.vfn_start.raw() + i as u64);
+ let result = walker.walk_to_pte_lookup_with_window(&mut window, vfn)?;
+
+ match result {
+ WalkResult::Mapped { pte_addr, .. } | WalkResult::Unmapped { pte_addr } => {
+ invalid_pte.write(&mut window, pte_addr)?;
+ }
+ WalkResult::PageTableMissing => {
+ continue;
+ }
+ }
+ }
+ drop(window);
+
+ mm.tlb().flush(self.pdb_addr)?;
+
+ // TODO: Internal page table pages (PDE, PTE pages) are still kept around.
+ // This is by design as repeated maps/unmaps will be fast. As a future TODO,
+ // we can add a reclaimer here to reclaim if VRAM is short. For now, the PT
+ // pages are dropped once the `Vmm` is dropped.
+
+ range._drop_guard.disarm(); // Unmap complete, Ok to drop MappedRange.
+ Ok(())
+ }
}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 16/23] nova-core: mm: Add BAR1 user interface
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add the BAR1 user interface for CPU access to GPU video memory through
the BAR1 aperture.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/driver.rs | 1 -
drivers/gpu/nova-core/mm/bar_user.rs | 154 +++++++++++++++++++++++++++
drivers/gpu/nova-core/mm/mod.rs | 1 +
3 files changed, 155 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/nova-core/mm/bar_user.rs
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index f30ffa45cf13..d8b2e967ba4c 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -42,7 +42,6 @@ pub(crate) struct NovaCore {
const GPU_DMA_BITS: u32 = 47;
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
-#[expect(dead_code)]
pub(crate) type Bar1 = pci::Bar<BAR1_SIZE>;
kernel::pci_device_table!(
diff --git a/drivers/gpu/nova-core/mm/bar_user.rs b/drivers/gpu/nova-core/mm/bar_user.rs
new file mode 100644
index 000000000000..74119c4d365e
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/bar_user.rs
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! BAR1 user interface for CPU access to GPU virtual memory. Used for USERD
+//! for GPU work submission, and applications to access GPU buffers via mmap().
+
+use kernel::io::Io;
+use kernel::prelude::*;
+
+use crate::{
+ driver::Bar1,
+ mm::{
+ pagetable::MmuVersion,
+ vmm::{
+ MappedRange,
+ Vmm, //
+ },
+ GpuMm,
+ Pfn,
+ Vfn,
+ VirtualAddress,
+ VramAddress,
+ PAGE_SIZE, //
+ },
+};
+
+/// BAR1 user interface for virtual memory mappings.
+///
+/// Owns a VMM instance with virtual address tracking and provides
+/// BAR1-specific mapping and cleanup operations.
+pub(crate) struct BarUser {
+ vmm: Vmm,
+}
+
+impl BarUser {
+ /// Create a new [`BarUser`] with virtual address tracking.
+ pub(crate) fn new(
+ pdb_addr: VramAddress,
+ mmu_version: MmuVersion,
+ va_size: u64,
+ ) -> Result<Self> {
+ Ok(Self {
+ vmm: Vmm::new(pdb_addr, mmu_version, va_size)?,
+ })
+ }
+
+ /// Map physical pages to a contiguous BAR1 virtual range.
+ pub(crate) fn map<'a>(
+ &'a mut self,
+ mm: &'a GpuMm,
+ bar: &'a Bar1,
+ pfns: &[Pfn],
+ writable: bool,
+ ) -> Result<BarAccess<'a>> {
+ if pfns.is_empty() {
+ return Err(EINVAL);
+ }
+
+ let mapped = self.vmm.map_pages(mm, pfns, None, writable)?;
+
+ Ok(BarAccess {
+ vmm: &mut self.vmm,
+ mm,
+ bar,
+ mapped: Some(mapped),
+ })
+ }
+}
+
+/// Access object for a mapped BAR1 region.
+///
+/// Wraps a [`MappedRange`] and provides BAR1 access. When dropped,
+/// unmaps pages and releases the VA range (by passing the range to
+/// [`Vmm::unmap_pages()`], which consumes it).
+pub(crate) struct BarAccess<'a> {
+ vmm: &'a mut Vmm,
+ mm: &'a GpuMm,
+ bar: &'a Bar1,
+ /// Needs to be an `Option` so that we can `take()` it and call `Drop`
+ /// on it in [`Vmm::unmap_pages()`].
+ mapped: Option<MappedRange>,
+}
+
+impl<'a> BarAccess<'a> {
+ /// Returns the active mapping.
+ fn mapped(&self) -> &MappedRange {
+ // SAFETY: unwrap() will never panic here because `mapped` is only
+ // `None` after `take()` in `Drop`, accessors are never called in `Drop`.
+ self.mapped.as_ref().unwrap()
+ }
+
+ /// Get the base virtual address of this mapping.
+ pub(crate) fn base(&self) -> VirtualAddress {
+ VirtualAddress::from(self.mapped().vfn_start)
+ }
+
+ /// Get the total size of the mapped region in bytes.
+ pub(crate) fn size(&self) -> usize {
+ self.mapped().num_pages * PAGE_SIZE
+ }
+
+ /// Get the starting virtual frame number.
+ pub(crate) fn vfn_start(&self) -> Vfn {
+ self.mapped().vfn_start
+ }
+
+ /// Get the number of pages in this mapping.
+ pub(crate) fn num_pages(&self) -> usize {
+ self.mapped().num_pages
+ }
+
+ /// Translate an offset within this mapping to a BAR1 aperture offset.
+ fn bar_offset(&self, offset: usize) -> Result<usize> {
+ if offset >= self.size() {
+ return Err(EINVAL);
+ }
+
+ let base = (self.mapped().vfn_start.raw() as usize)
+ .checked_mul(PAGE_SIZE)
+ .ok_or(EOVERFLOW)?;
+ base.checked_add(offset).ok_or(EOVERFLOW)
+ }
+
+ // Fallible accessors with runtime bounds checking.
+
+ /// Read a 32-bit value at the given offset.
+ pub(crate) fn try_read32(&self, offset: usize) -> Result<u32> {
+ self.bar.try_read32(self.bar_offset(offset)?)
+ }
+
+ /// Write a 32-bit value at the given offset.
+ pub(crate) fn try_write32(&self, value: u32, offset: usize) -> Result {
+ self.bar.try_write32(value, self.bar_offset(offset)?)
+ }
+
+ /// Read a 64-bit value at the given offset.
+ pub(crate) fn try_read64(&self, offset: usize) -> Result<u64> {
+ self.bar.try_read64(self.bar_offset(offset)?)
+ }
+
+ /// Write a 64-bit value at the given offset.
+ pub(crate) fn try_write64(&self, value: u64, offset: usize) -> Result {
+ self.bar.try_write64(value, self.bar_offset(offset)?)
+ }
+}
+
+impl Drop for BarAccess<'_> {
+ fn drop(&mut self) {
+ if let Some(mapped) = self.mapped.take() {
+ if self.vmm.unmap_pages(self.mm, mapped).is_err() {
+ kernel::pr_warn_once!("BarAccess: unmap_pages failed.\n");
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/mm/mod.rs b/drivers/gpu/nova-core/mm/mod.rs
index 922479accba6..b00f30641ab5 100644
--- a/drivers/gpu/nova-core/mm/mod.rs
+++ b/drivers/gpu/nova-core/mm/mod.rs
@@ -4,6 +4,7 @@
#![expect(dead_code)]
+pub(crate) mod bar_user;
pub(crate) mod pagetable;
pub(crate) mod pramin;
pub(crate) mod tlb;
--
2.34.1
^ permalink raw reply related
* [PATCH v7 17/23] nova-core: gsp: Return GspStaticInfo and FbLayout from boot()
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Refactor the GSP boot function to return the GspStaticInfo and FbLayout.
This enables access required for memory management initialization to:
- bar1_pde_base: BAR1 page directory base.
- bar2_pde_base: BAR2 page directory base.
- usable memory regions in vidmem.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/gpu.rs | 9 +++++++--
drivers/gpu/nova-core/gsp/boot.rs | 15 ++++++++++++---
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 396d5bf57167..e217e5c7cb32 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -20,7 +20,10 @@
},
fb::SysmemFlush,
gfw,
- gsp::Gsp,
+ gsp::{
+ commands::GetGspStaticInfoReply,
+ Gsp, //
+ },
mm::GpuMm,
regs,
};
@@ -258,6 +261,8 @@ pub(crate) struct Gpu {
/// GSP runtime data. Temporarily an empty placeholder.
#[pin]
gsp: Gsp,
+ /// Static GPU information from GSP.
+ gsp_static_info: GetGspStaticInfoReply,
}
impl Gpu {
@@ -298,7 +303,7 @@ pub(crate) fn new<'a>(
gsp <- Gsp::new(pdev),
- _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
+ gsp_static_info: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?.0 },
bar: devres_bar,
})
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index be427fe26a58..7a4a0c759267 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -32,7 +32,10 @@
},
gpu::Chipset,
gsp::{
- commands,
+ commands::{
+ self,
+ GetGspStaticInfoReply, //
+ },
sequencer::{
GspSequencer,
GspSequencerParams, //
@@ -127,6 +130,12 @@ fn run_fwsec_frts(
/// structures that the GSP will use at runtime.
///
/// Upon return, the GSP is up and running, and its runtime object given as return value.
+ ///
+ /// Returns a tuple containing:
+ /// - [`GetGspStaticInfoReply`]: Static GPU information from GSP, including the BAR1 page
+ /// directory base address needed for memory management.
+ /// - [`FbLayout`]: Frame buffer layout computed during boot, containing memory regions
+ /// required for [`GpuMm`] initialization.
pub(crate) fn boot(
mut self: Pin<&mut Self>,
pdev: &pci::Device<device::Bound>,
@@ -134,7 +143,7 @@ pub(crate) fn boot(
chipset: Chipset,
gsp_falcon: &Falcon<Gsp>,
sec2_falcon: &Falcon<Sec2>,
- ) -> Result {
+ ) -> Result<(GetGspStaticInfoReply, FbLayout)> {
let dev = pdev.as_ref();
let bios = Vbios::new(dev, bar)?;
@@ -243,6 +252,6 @@ pub(crate) fn boot(
Err(e) => dev_warn!(pdev.as_ref(), "GPU name unavailable: {:?}\n", e),
}
- Ok(())
+ Ok((info, fb_layout))
}
}
--
2.34.1
^ permalink raw reply related
* [PATCH v7 18/23] nova-core: mm: Add BAR1 memory management self-tests
From: Joel Fernandes @ 2026-02-18 21:20 UTC (permalink / raw)
To: linux-kernel
Cc: Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, David Airlie,
Simona Vetter, Jonathan Corbet, Alex Deucher,
Christian König, Jani Nikula, Joonas Lahtinen, Rodrigo Vivi,
Tvrtko Ursulin, Huang Rui, Matthew Auld, Matthew Brost,
Lucas De Marchi, Thomas Hellström, Helge Deller,
Danilo Krummrich, Alice Ryhl, Miguel Ojeda, Alex Gaynor,
Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
Andreas Hindborg, Trevor Gross, John Hubbard, Alistair Popple,
Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
Elle Rhumsaa, Daniel Almeida, Eliot Courtney, joel, nouveau,
dri-devel, rust-for-linux, linux-doc, amd-gfx, intel-gfx,
intel-xe, linux-fbdev, Joel Fernandes, Nikola Djukic
In-Reply-To: <20260218212020.800836-1-joelagnelf@nvidia.com>
Add self-tests for BAR1 access during driver probe when
CONFIG_NOVA_MM_SELFTESTS is enabled (default disabled). This results in
testing the Vmm, GPU buddy allocator and BAR1 region all of which should
function correctly for the tests to pass.
Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
drivers/gpu/nova-core/Kconfig | 10 ++
drivers/gpu/nova-core/driver.rs | 2 +
drivers/gpu/nova-core/gpu.rs | 48 ++++++++
drivers/gpu/nova-core/gsp/commands.rs | 1 -
drivers/gpu/nova-core/mm/bar_user.rs | 164 ++++++++++++++++++++++++++
5 files changed, 224 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig
index 6513007bf66f..d0d4177adb1b 100644
--- a/drivers/gpu/nova-core/Kconfig
+++ b/drivers/gpu/nova-core/Kconfig
@@ -15,3 +15,13 @@ config NOVA_CORE
This driver is work in progress and may not be functional.
If M is selected, the module will be called nova_core.
+
+config NOVA_MM_SELFTESTS
+ bool "Memory management self-tests"
+ depends on NOVA_CORE
+ help
+ Enable self-tests for the memory management subsystem. When enabled,
+ tests are run during GPU probe to verify page table walking, and BAR1
+ virtual memory mapping functionality.
+
+ This is a testing option and is default-disabled.
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index d8b2e967ba4c..7d0d09939835 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -92,6 +92,8 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
Ok(try_pin_init!(Self {
gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?),
+ // Run optional GPU selftests.
+ _: { gpu.run_selftests(pdev)? },
_reg <- auxiliary::Registration::new(
pdev.as_ref(),
c"nova-drm",
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index e217e5c7cb32..f17bf1bf0b12 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -319,4 +319,52 @@ pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) {
.inspect(|bar| self.sysmem_flush.unregister(bar))
.is_err());
}
+
+ /// Run selftests on the constructed [`Gpu`].
+ pub(crate) fn run_selftests(
+ mut self: Pin<&mut Self>,
+ pdev: &pci::Device<device::Bound>,
+ ) -> Result {
+ self.as_mut().run_mm_selftests(pdev)?;
+ Ok(())
+ }
+
+ #[cfg(CONFIG_NOVA_MM_SELFTESTS)]
+ fn run_mm_selftests(
+ mut self: Pin<&mut Self>,
+ pdev: &pci::Device<device::Bound>,
+ ) -> Result {
+ use crate::driver::BAR1_SIZE;
+
+ let mmu_version = MmuVersion::from(self.spec.chipset.arch());
+
+ // BAR1 self-tests.
+ let bar1 = Arc::pin_init(
+ pdev.iomap_region_sized::<BAR1_SIZE>(1, c"nova-core/bar1"),
+ GFP_KERNEL,
+ )?;
+ let bar1_access = bar1.access(pdev.as_ref())?;
+
+ let proj = self.as_mut().project();
+ let bar1_pde_base = proj.gsp_static_info.bar1_pde_base();
+ let mm = proj.mm.as_ref().get_ref();
+
+ crate::mm::bar_user::run_self_test(
+ pdev.as_ref(),
+ mm,
+ bar1_access,
+ bar1_pde_base,
+ mmu_version,
+ )?;
+
+ Ok(())
+ }
+
+ #[cfg(not(CONFIG_NOVA_MM_SELFTESTS))]
+ fn run_mm_selftests(
+ self: Pin<&mut Self>,
+ _pdev: &pci::Device<device::Bound>,
+ ) -> Result {
+ Ok(())
+ }
}
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
index a7778d5d9e32..bf9e3086565f 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -232,7 +232,6 @@ pub(crate) fn gpu_name(&self) -> core::result::Result<&str, GpuNameError> {
}
/// Returns the BAR1 Page Directory Entry base address.
- #[expect(dead_code)]
pub(crate) fn bar1_pde_base(&self) -> u64 {
self.bar1_pde_base
}
diff --git a/drivers/gpu/nova-core/mm/bar_user.rs b/drivers/gpu/nova-core/mm/bar_user.rs
index 74119c4d365e..5edaacf4c9d0 100644
--- a/drivers/gpu/nova-core/mm/bar_user.rs
+++ b/drivers/gpu/nova-core/mm/bar_user.rs
@@ -152,3 +152,167 @@ fn drop(&mut self) {
}
}
}
+
+/// Check if the PDB has valid, VRAM-backed page tables.
+///
+/// Returns `Err(ENOENT)` if page tables are missing or not in VRAM.
+#[cfg(CONFIG_NOVA_MM_SELFTESTS)]
+fn check_valid_page_tables(mm: &GpuMm, pdb_addr: VramAddress) -> Result {
+ use crate::mm::pagetable::ver2::Pde;
+ use crate::mm::pagetable::AperturePde;
+
+ let mut window = mm.pramin().window()?;
+ let pdb_entry_raw = window.try_read64(pdb_addr.raw())?;
+ let pdb_entry = Pde::new(pdb_entry_raw);
+
+ if !pdb_entry.is_valid() {
+ return Err(ENOENT);
+ }
+
+ if pdb_entry.aperture() != AperturePde::VideoMemory {
+ return Err(ENOENT);
+ }
+
+ Ok(())
+}
+
+/// Run MM subsystem self-tests during probe.
+///
+/// Tests page table infrastructure and `BAR1` MMIO access using the `BAR1`
+/// address space. Uses the `GpuMm`'s buddy allocator / to allocate page tables
+/// and test pages as needed.
+#[cfg(CONFIG_NOVA_MM_SELFTESTS)]
+pub(crate) fn run_self_test(
+ dev: &kernel::device::Device,
+ mm: &GpuMm,
+ bar1: &crate::driver::Bar1,
+ bar1_pdb: u64,
+ mmu_version: MmuVersion,
+) -> Result {
+ use crate::mm::vmm::Vmm;
+ use crate::mm::PAGE_SIZE;
+ use kernel::gpu::buddy::BuddyFlags;
+ use kernel::gpu::buddy::GpuBuddyAllocParams;
+ use kernel::sizes::{
+ SZ_4K,
+ SZ_64K, //
+ };
+
+ // Self-tests only support MMU v2 for now.
+ if mmu_version != MmuVersion::V2 {
+ dev_info!(
+ dev,
+ "MM: Skipping self-tests for MMU {:?} (only V2 supported)\n",
+ mmu_version
+ );
+ return Ok(());
+ }
+
+ // Test patterns.
+ const PATTERN_PRAMIN: u32 = 0xDEAD_BEEF;
+ const PATTERN_BAR1: u32 = 0xCAFE_BABE;
+
+ dev_info!(dev, "MM: Starting self-test...\n");
+
+ let pdb_addr = VramAddress::new(bar1_pdb);
+
+ // Check if initial page tables are in VRAM.
+ if check_valid_page_tables(mm, pdb_addr).is_err() {
+ dev_info!(dev, "MM: Self-test SKIPPED - no valid VRAM page tables\n");
+ return Ok(());
+ }
+
+ // Setup a test page from the buddy allocator.
+ let alloc_params = GpuBuddyAllocParams {
+ start_range_address: 0,
+ end_range_address: 0,
+ size_bytes: SZ_4K as u64,
+ min_block_size_bytes: SZ_4K as u64,
+ buddy_flags: BuddyFlags::try_new(0)?,
+ };
+
+ let test_page_blocks = KBox::pin_init(mm.buddy().alloc_blocks(&alloc_params), GFP_KERNEL)?;
+ let test_vram_offset = test_page_blocks.iter().next().ok_or(ENOMEM)?.offset();
+ let test_vram = VramAddress::new(test_vram_offset);
+ let test_pfn = Pfn::from(test_vram);
+
+ // Create a VMM of size 64K to track virtual memory mappings.
+ let mut vmm = Vmm::new(pdb_addr, MmuVersion::V2, SZ_64K as u64)?;
+
+ // Create a test mapping.
+ let mapped = vmm.map_pages(mm, &[test_pfn], None, true)?;
+ let test_vfn = mapped.vfn_start;
+
+ // Pre-compute test addresses for each access path.
+ // Use distinct offsets within the page for read (0x100) and write (0x200) tests.
+ let bar1_base_offset = test_vfn.raw() as usize * PAGE_SIZE;
+ let bar1_read_offset: usize = bar1_base_offset + 0x100;
+ let bar1_write_offset: usize = bar1_base_offset + 0x200;
+ let vram_read_addr: usize = test_vram.raw() + 0x100;
+ let vram_write_addr: usize = test_vram.raw() + 0x200;
+
+ // Test 1: Write via PRAMIN, read via BAR1.
+ {
+ let mut window = mm.pramin().window()?;
+ window.try_write32(vram_read_addr, PATTERN_PRAMIN)?;
+ }
+
+ // Read back via BAR1 aperture.
+ let bar1_value = bar1.try_read32(bar1_read_offset)?;
+
+ let test1_passed = if bar1_value == PATTERN_PRAMIN {
+ true
+ } else {
+ dev_err!(
+ dev,
+ "MM: Test 1 FAILED - Expected {:#010x}, got {:#010x}\n",
+ PATTERN_PRAMIN,
+ bar1_value
+ );
+ false
+ };
+
+ // Test 2: Write via BAR1, read via PRAMIN.
+ bar1.try_write32(PATTERN_BAR1, bar1_write_offset)?;
+
+ // Read back via PRAMIN.
+ let pramin_value = {
+ let mut window = mm.pramin().window()?;
+ window.try_read32(vram_write_addr)?
+ };
+
+ let test2_passed = if pramin_value == PATTERN_BAR1 {
+ true
+ } else {
+ dev_err!(
+ dev,
+ "MM: Test 2 FAILED - Expected {:#010x}, got {:#010x}\n",
+ PATTERN_BAR1,
+ pramin_value
+ );
+ false
+ };
+
+ // Cleanup - invalidate PTE.
+ vmm.unmap_pages(mm, mapped)?;
+
+ // Test 3: Two-phase prepare/execute API.
+ let prepared = vmm.prepare_map(mm, 1, None)?;
+ let mapped2 = vmm.execute_map(mm, prepared, &[test_pfn], true)?;
+ let readback = vmm.read_mapping(mm, mapped2.vfn_start)?;
+ let test3_passed = if readback == Some(test_pfn) {
+ true
+ } else {
+ dev_err!(dev, "MM: Test 3 FAILED - Two-phase map readback mismatch\n");
+ false
+ };
+ vmm.unmap_pages(mm, mapped2)?;
+
+ if test1_passed && test2_passed && test3_passed {
+ dev_info!(dev, "MM: All self-tests PASSED\n");
+ Ok(())
+ } else {
+ dev_err!(dev, "MM: Self-tests FAILED\n");
+ Err(EIO)
+ }
+}
--
2.34.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox