From: Dave Airlie <airlied@gmail.com>
To: dri-devel@lists.freedesktop.org
Cc: nouveau@lists.freedesktop.org
Subject: [PATCH 1/3] nouveau/vmm: rewrite pte tracker using a struct and bitfields.
Date: Wed, 4 Feb 2026 13:00:05 +1000 [thread overview]
Message-ID: <20260204030208.2313241-2-airlied@gmail.com> (raw)
In-Reply-To: <20260204030208.2313241-1-airlied@gmail.com>
From: Dave Airlie <airlied@redhat.com>
I want to increase the counters here and start tracking LPTs as well
as there are certain situations where userspace with mixed page sizes
can cause ref/unrefs to live longer so need better reference counting.
This should be entirely non-functional.
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 41 ++++++++++---------
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 14 +++++--
2 files changed, 31 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index f95c58b67633..efc334f6104c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
}
}
- if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
+ if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL)))
return NULL;
pgt->page = page ? page->shift : 0;
pgt->sparse = sparse;
@@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
*/
for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
const u32 pten = min(sptn - spti, ptes);
- pgt->pte[lpti] -= pten;
+ pgt->pte[lpti].s.sptes -= pten;
ptes -= pten;
}
@@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
/* Skip over any LPTEs that still have valid SPTEs. */
- if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
+ if (pgt->pte[pteb].s.sptes) {
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
- if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
+ if (!(pgt->pte[ptei].s.sptes))
break;
}
continue;
@@ -232,14 +232,14 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
*
* Determine how many LPTEs need to transition state.
*/
- pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+ pgt->pte[ptei].s.spte_valid = false;
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
- if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
+ if (pgt->pte[ptei].s.sptes)
break;
- pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+ pgt->pte[ptei].s.spte_valid = false;
}
- if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+ if (pgt->pte[pteb].s.sparse) {
TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
} else
@@ -307,7 +307,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
*/
for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
const u32 pten = min(sptn - spti, ptes);
- pgt->pte[lpti] += pten;
+ pgt->pte[lpti].s.sptes += pten;
ptes -= pten;
}
@@ -317,9 +317,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
/* Skip over any LPTEs that already have valid SPTEs. */
- if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
+ if (pgt->pte[pteb].s.spte_valid) {
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
- if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
+ if (!pgt->pte[ptei].s.spte_valid)
break;
}
continue;
@@ -331,14 +331,14 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
*
* Determine how many LPTEs need to transition state.
*/
- pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+ pgt->pte[ptei].s.spte_valid = true;
for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
- if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
+ if (pgt->pte[ptei].s.spte_valid)
break;
- pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+ pgt->pte[ptei].s.spte_valid = true;
}
- if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+ if (pgt->pte[pteb].s.sparse) {
const u32 spti = pteb * sptn;
const u32 sptc = ptes * sptn;
/* The entire LPTE is marked as sparse, we need
@@ -386,7 +386,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
} else
if (desc->type == LPT) {
- memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
+ union nvkm_pte_tracker sparse = { .s.sparse = 1 };
+ memset(&pgt->pte[ptei].u, sparse.u, ptes);
}
}
@@ -398,7 +399,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte
memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
else
if (it->desc->type == LPT)
- memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
+ memset(&pt->pte[ptei].u, 0x00, sizeof(pt->pte[0]) * ptes);
return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
}
@@ -445,9 +446,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
* the SPTEs on some GPUs.
*/
for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
- bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+ bool spte = !!pgt->pte[ptei].s.sptes;
for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
- bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+ bool next = !!pgt->pte[ptei].s.sptes;
if (spte != next)
break;
}
@@ -461,7 +462,7 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
} else {
desc->func->unmap(vmm, pt, pteb, ptes);
while (ptes--)
- pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
+ pgt->pte[pteb++].s.spte_valid = true;
}
}
} else {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 4586a425dbe4..a6312a0e6b84 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -4,6 +4,15 @@
#include <core/memory.h>
enum nvkm_memory_target;
+union nvkm_pte_tracker {
+ u8 u;
+ struct {
+ u8 sparse:1;
+ u8 spte_valid:1;
+ u8 sptes:6;
+ } s;
+};
+
struct nvkm_vmm_pt {
/* Some GPUs have a mapping level with a dual page tables to
* support large and small pages in the same address-range.
@@ -44,10 +53,7 @@ struct nvkm_vmm_pt {
*
* This information is used to manage LPTE state transitions.
*/
-#define NVKM_VMM_PTE_SPARSE 0x80
-#define NVKM_VMM_PTE_VALID 0x40
-#define NVKM_VMM_PTE_SPTES 0x3f
- u8 pte[];
+ union nvkm_pte_tracker pte[];
};
typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *,
--
2.52.0
next prev parent reply other threads:[~2026-02-04 3:02 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-04 3:00 [PATCH 0/3] nouveau/vmm: fix switching between small and large PTEs (series v2) Dave Airlie
2026-02-04 3:00 ` Dave Airlie [this message]
2026-02-04 3:00 ` [PATCH 2/3] nouveau/vmm: increase size of vmm pte tracker struct to u32 (v2) Dave Airlie
2026-02-04 3:00 ` [PATCH 3/3] nouveau/vmm: start tracking if the LPT PTE is valid. (v6) Dave Airlie
2026-02-04 12:43 ` [PATCH 0/3] nouveau/vmm: fix switching between small and large PTEs (series v2) Mary Guillemard
2026-02-04 16:40 ` M Henning
2026-02-04 16:40 ` M Henning
-- strict thread matches above, loose matches on Subject: below --
2026-02-03 5:25 nouveau/vmm: fix switching between small and large PTEs Dave Airlie
2026-02-03 5:25 ` [PATCH 1/3] nouveau/vmm: rewrite pte tracker using a struct and bitfields Dave Airlie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260204030208.2313241-2-airlied@gmail.com \
--to=airlied@gmail.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=nouveau@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.