[PATCH] nouveau/vmm: start tracking if the LPT PTE is valid. (v4)

public inbox for nouveau@lists.freedesktop.org
 help / color / mirror / Atom feed

From: Dave Airlie <airlied@gmail.com>
To: dri-devel@lists.freedesktop.org
Cc: nouveau@lists.freedesktop.org
Subject: [PATCH] nouveau/vmm: start tracking if the LPT PTE is valid. (v4)
Date: Tue,  3 Feb 2026 08:04:02 +1000	[thread overview]
Message-ID: <20260202220402.2217813-1-airlied@gmail.com> (raw)

From: Dave Airlie <airlied@redhat.com>

When NVK enabled large pages userspace tests were seeing fault
reports at a valid address.

There was a case where an address moving from 64k page to 4k pages
could expose a race between unmapping the 4k page, mapping the 64k
page and unref the 4k pages.

Unref 4k pages would cause the dual-page table handling to always
set the LPTE entry to SPARSE or INVALID, but if we'd mapped a valid
LPTE in the meantime, it would get trashed. Keep track of when
a valid LPTE has been referenced, and don't reset in that case.

This increase the tracking to 32-bit, because it turns out if
unref can get delayed, you can get a lot of these outstanding
and this can cause strange behaviours.

Cc: stable@vger.kernel.org
Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14610
Signed-off-by: Dave Airlie <airlied@redhat.com>

--
v2: move to 32-bit from 8-bit tracker
fix some more flag changes.
v3: missed one BIG_PTE unset
v4: start referencing counting LPTE
---
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 80 ++++++++++++-------
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 14 +++-
 2 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index f95c58b67633..c2dfaa4b89cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -53,7 +53,7 @@ nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
 		}
 	}
 
-	if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
+	if (!(pgt = kzalloc(sizeof(*pgt) + (sizeof(pgt->pte[0]) * lpte), GFP_KERNEL)))
 		return NULL;
 	pgt->page = page ? page->shift : 0;
 	pgt->sparse = sparse;
@@ -208,7 +208,7 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 	 */
 	for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 		const u32 pten = min(sptn - spti, ptes);
-		pgt->pte[lpti] -= pten;
+		pgt->pte[lpti].spte_count -= pten;
 		ptes -= pten;
 	}
 
@@ -218,9 +218,9 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 
 	for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 		/* Skip over any LPTEs that still have valid SPTEs. */
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
+		if (pgt->pte[pteb].spte_count) {
 			for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-				if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
+				if (!pgt->pte[ptei].spte_count)
 					break;
 			}
 			continue;
@@ -232,24 +232,27 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 		 *
 		 * Determine how many LPTEs need to transition state.
 		 */
-		pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+		pgt->pte[ptei].spte_valid = false;
 		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-			if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
+			if (pgt->pte[ptei].spte_count)
 				break;
-			pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
+			pgt->pte[ptei].spte_valid = false;
 		}
 
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+		if (pgt->pte[pteb].sparse) {
 			TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
 			pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
-		} else
-		if (pair->func->invalid) {
-			/* If the MMU supports it, restore the LPTE to the
-			 * INVALID state to tell the MMU there is no point
-			 * trying to fetch the corresponding SPTEs.
-			 */
-			TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
-			pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
+		} else if (!pgt->pte[pteb].lpte_valid) {
+			if (pair->func->invalid) {
+				/* If the MMU supports it, restore the LPTE to the
+				 * INVALID state to tell the MMU there is no point
+				 * trying to fetch the corresponding SPTEs.
+				 */
+				TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
+				pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
+			}
+		} else {
+			TRA(it, "LPTE %05x: V %d PTEs", pteb, ptes);
 		}
 	}
 }
@@ -280,6 +283,13 @@ nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 	if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1]))
 		nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes);
 
+	if (desc->type == LPT && (pgt->refs[0] || pgt->refs[1])) {
+		for (u32 lpti = ptei; ptes; lpti++) {
+			pgt->pte[lpti].lpte_count--;
+			ptes--;
+		}
+	}
+
 	/* PT no longer needed? Destroy it. */
 	if (!pgt->refs[type]) {
 		it->lvl++;
@@ -307,7 +317,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 	 */
 	for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 		const u32 pten = min(sptn - spti, ptes);
-		pgt->pte[lpti] += pten;
+		pgt->pte[lpti].spte_count += pten;
 		ptes -= pten;
 	}
 
@@ -317,9 +327,9 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 
 	for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 		/* Skip over any LPTEs that already have valid SPTEs. */
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
+		if (pgt->pte[pteb].spte_valid) {
 			for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-				if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
+				if (!pgt->pte[ptei].spte_valid)
 					break;
 			}
 			continue;
@@ -331,14 +341,16 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 		 *
 		 * Determine how many LPTEs need to transition state.
 		 */
-		pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+		pgt->pte[ptei].spte_valid = true;
+		pgt->pte[ptei].lpte_valid = false;
 		for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
-			if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
+			if (pgt->pte[ptei].spte_valid)
 				break;
-			pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
+			pgt->pte[ptei].spte_valid = true;
+			pgt->pte[ptei].lpte_valid = false;
 		}
 
-		if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
+		if (pgt->pte[pteb].sparse) {
 			const u32 spti = pteb * sptn;
 			const u32 sptc = ptes * sptn;
 			/* The entire LPTE is marked as sparse, we need
@@ -374,6 +386,15 @@ nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 	if (desc->type == SPT)
 		nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes);
 
+	if (desc->type == LPT) {
+		for (u32 lpti = ptei; ptes; lpti++) {
+			pgt->pte[lpti].spte_valid = false;
+			pgt->pte[lpti].lpte_valid = true;
+			pgt->pte[lpti].lpte_count++;
+			ptes--;
+		}
+	}
+
 	return true;
 }
 
@@ -386,7 +407,8 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
 			pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
 	} else
 	if (desc->type == LPT) {
-		memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
+		struct pt_tracker sparse = { .sparse = 1 };
+		memset32((u32 *)&pgt->pte[ptei], *(u32 *)&sparse, ptes);
 	}
 }
 
@@ -398,7 +420,7 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 pte
 		memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes);
 	else
 	if (it->desc->type == LPT)
-		memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
+		memset32((u32 *)&pt->pte[ptei], 0x00, ptes);
 	return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
 }
 
@@ -445,9 +467,9 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 		 * the SPTEs on some GPUs.
 		 */
 		for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
-			bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+			bool spte = !!pgt->pte[ptei].spte_count;
 			for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
-				bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
+				bool next = !!pgt->pte[ptei].spte_count;
 				if (spte != next)
 					break;
 			}
@@ -457,11 +479,11 @@ nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 					desc->func->sparse(vmm, pt, pteb, ptes);
 				else
 					desc->func->invalid(vmm, pt, pteb, ptes);
-				memset(&pgt->pte[pteb], 0x00, ptes);
+				memset32((u32 *)&pgt->pte[pteb], 0x00, ptes);
 			} else {
 				desc->func->unmap(vmm, pt, pteb, ptes);
 				while (ptes--)
-					pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
+					pgt->pte[pteb++].spte_valid = true;
 			}
 		}
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 4586a425dbe4..8c4531a70a3a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -4,6 +4,15 @@
 #include <core/memory.h>
 enum nvkm_memory_target;
 
+struct pt_tracker {
+	u32 sparse:1;
+	u32 spte_valid:1;
+	u32 lpte_valid:1;
+	u32 lpte_count:13;
+	u32 spte_count:16;
+};
+
+
 struct nvkm_vmm_pt {
 	/* Some GPUs have a mapping level with a dual page tables to
 	 * support large and small pages in the same address-range.
@@ -44,10 +53,7 @@ struct nvkm_vmm_pt {
 	 *
 	 * This information is used to manage LPTE state transitions.
 	 */
-#define NVKM_VMM_PTE_SPARSE 0x80
-#define NVKM_VMM_PTE_VALID  0x40
-#define NVKM_VMM_PTE_SPTES  0x3f
-	u8 pte[];
+	struct pt_tracker pte[];
 };
 
 typedef void (*nvkm_vmm_pxe_func)(struct nvkm_vmm *,
-- 
2.52.0

next             reply	other threads:[~2026-02-02 22:04 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-02 22:04 Dave Airlie [this message]
2026-02-02 23:39 ` [PATCH] nouveau/vmm: start tracking if the LPT PTE is valid. (v4) M Henning
2026-02-03 17:16   ` M Henning

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f95c58b6763 dfblob:c2dfaa4b89c dfblob:4586a425dbe
dfblob:8c4531a70a3 )
 OR (
bs:"[PATCH] nouveau/vmm: start tracking if the LPT PTE is valid. (v4)" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260202220402.2217813-1-airlied@gmail.com \
    --to=airlied@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=nouveau@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox