* [PATCH 1/2] iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires
2025-11-27 23:54 [PATCH 0/2] Fix VT-d when the IOVA limit is small Jason Gunthorpe
@ 2025-11-27 23:54 ` Jason Gunthorpe
2025-11-28 6:54 ` Baolu Lu
2025-11-27 23:54 ` [PATCH 2/2] iommupt/vtd: Support mgaw's less than a 4 level walk for first stage Jason Gunthorpe
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Jason Gunthorpe @ 2025-11-27 23:54 UTC (permalink / raw)
To: David Woodhouse, iommu, Joerg Roedel, Robin Murphy,
Suravee Suthikulpanit, Will Deacon
Cc: Lu Baolu, Calvin Owens, Chaitanya Kumar Borah, Joerg Roedel,
Kevin Tian, patches, Tina Zhang
VT-d second stage HW specifies both the maximum IOVA and the supported
table walk starting points. Weirdly there is HW that only supports a 4
level walk but has a maximum IOVA that only needs 3.
The current code miscalculates this and creates a wrongly sized page table
which ultimately fails the compatability check for number of levels.
This is fixed by allowing the page table to be created with both a vasz
and top_level input. The vasz will set the aperture for the domain while
the top_level will set the page table geometry.
Add top_level to vtdss and correct the logic in VT-d to generate the right
top_level and vasz from mgaw and sagaw.
Fixes: d373449d8e97 ("iommu/vt-d: Use the generic iommu page table")
Reported-by: Calvin Owens <calvin@wbinvd.org>
Closes: https://lore.kernel.org/r/8f257d2651eb8a4358fcbd47b0145002e5f1d638.1764237717.git.calvin@wbinvd.org
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/generic_pt/fmt/vtdss.h | 19 ++++++-------------
drivers/iommu/generic_pt/iommu_pt.h | 14 ++++++++++++++
drivers/iommu/intel/iommu.c | 20 +++++++++++++-------
include/linux/generic_pt/iommu.h | 2 ++
4 files changed, 35 insertions(+), 20 deletions(-)
diff --git a/drivers/iommu/generic_pt/fmt/vtdss.h b/drivers/iommu/generic_pt/fmt/vtdss.h
index 50ffed9d0e508f..f5f8981edde72e 100644
--- a/drivers/iommu/generic_pt/fmt/vtdss.h
+++ b/drivers/iommu/generic_pt/fmt/vtdss.h
@@ -248,18 +248,11 @@ static inline int vtdss_pt_iommu_fmt_init(struct pt_iommu_vtdss *iommu_table,
const struct pt_iommu_vtdss_cfg *cfg)
{
struct pt_vtdss *table = &iommu_table->vtdss_pt;
- unsigned int vasz_lg2 = cfg->common.hw_max_vasz_lg2;
- if (vasz_lg2 > PT_MAX_VA_ADDRESS_LG2)
- return -EOPNOTSUPP;
- else if (vasz_lg2 > 48)
- pt_top_set_level(&table->common, 4);
- else if (vasz_lg2 > 39)
- pt_top_set_level(&table->common, 3);
- else if (vasz_lg2 > 30)
- pt_top_set_level(&table->common, 2);
- else
+ if (cfg->top_level > 4 || cfg->top_level < 2)
return -EOPNOTSUPP;
+
+ pt_top_set_level(&table->common, cfg->top_level);
return 0;
}
#define pt_iommu_fmt_init vtdss_pt_iommu_fmt_init
@@ -282,9 +275,9 @@ vtdss_pt_iommu_fmt_hw_info(struct pt_iommu_vtdss *table,
#if defined(GENERIC_PT_KUNIT)
static const struct pt_iommu_vtdss_cfg vtdss_kunit_fmt_cfgs[] = {
- [0] = { .common.hw_max_vasz_lg2 = 39 },
- [1] = { .common.hw_max_vasz_lg2 = 48 },
- [2] = { .common.hw_max_vasz_lg2 = 57 },
+ [0] = { .common.hw_max_vasz_lg2 = 39, .top_level = 2},
+ [1] = { .common.hw_max_vasz_lg2 = 48, .top_level = 3},
+ [2] = { .common.hw_max_vasz_lg2 = 57, .top_level = 4},
};
#define kunit_fmt_cfgs vtdss_kunit_fmt_cfgs
enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_VTDSS_FORCE_WRITEABLE) };
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 032d04ec7b568d..97aeda1ad01cca 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -1128,6 +1128,20 @@ static int pt_init_common(struct pt_common *common)
PT_FORCE_ENABLED_FEATURES))
return -EOPNOTSUPP;
+ /*
+ * Check if the top level of the page table is too small to hold the
+ * specified maxvasz.
+ */
+ if (!pt_feature(common, PT_FEAT_DYNAMIC_TOP) &&
+ top_range.top_level != PT_MAX_TOP_LEVEL) {
+ struct pt_state pts = { .range = &top_range,
+ .level = top_range.top_level };
+
+ if (common->max_vasz_lg2 >
+ pt_num_items_lg2(&pts) + pt_table_item_lg2sz(&pts))
+ return -EOPNOTSUPP;
+ }
+
if (common->max_oasz_lg2 == 0)
common->max_oasz_lg2 = pt_max_oa_lg2(common);
else
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7b3016491ca586..f117349d67dbf8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2858,22 +2858,28 @@ intel_iommu_domain_alloc_first_stage(struct device *dev,
return &dmar_domain->domain;
}
-static int compute_vasz_lg2_ss(struct intel_iommu *iommu)
+static unsigned int compute_vasz_lg2_ss(struct intel_iommu *iommu,
+ unsigned int *top_level)
{
unsigned int sagaw = cap_sagaw(iommu->cap);
unsigned int mgaw = cap_mgaw(iommu->cap);
/*
* Find the largest table size that both the mgaw and sagaw support.
- * This sets both the number of table levels and the valid range of
- * IOVA.
+ * This sets the valid range of IOVA and the top starting level.
+ * Some HW may only support a 4 or 5 level walk but must limit IOVA to
+ * 3 levels.
*/
- if (mgaw >= 48 && (sagaw & BIT(3)))
+ if (mgaw > 48 && sagaw >= BIT(3)) {
+ *top_level = 4;
return min(57, mgaw);
- else if (mgaw >= 39 && (sagaw & BIT(2)))
+ } else if (mgaw > 39 && sagaw >= BIT(2)) {
+ *top_level = 3 + ffs(sagaw >> 3);
return min(48, mgaw);
- else if (mgaw >= 30 && (sagaw & BIT(1)))
+ } else if (mgaw > 30 && sagaw >= BIT(1)) {
+ *top_level = 2 + ffs(sagaw >> 2);
return min(39, mgaw);
+ }
return 0;
}
@@ -2910,7 +2916,7 @@ intel_iommu_domain_alloc_second_stage(struct device *dev,
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
- cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu);
+ cfg.common.hw_max_vasz_lg2 = compute_vasz_lg2_ss(iommu, &cfg.top_level);
cfg.common.hw_max_oasz_lg2 = 52;
cfg.common.features = BIT(PT_FEAT_FLUSH_RANGE);
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index cfe05a77f86b05..c134132ed10f89 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -264,6 +264,8 @@ IOMMU_PROTOTYPES(amdv1_mock);
struct pt_iommu_vtdss_cfg {
struct pt_iommu_cfg common;
+ /* 4 is a 57 bit 5 level table */
+ unsigned int top_level;
};
struct pt_iommu_vtdss_hw_info {
--
2.43.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 1/2] iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires
2025-11-27 23:54 ` [PATCH 1/2] iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires Jason Gunthorpe
@ 2025-11-28 6:54 ` Baolu Lu
0 siblings, 0 replies; 7+ messages in thread
From: Baolu Lu @ 2025-11-28 6:54 UTC (permalink / raw)
To: Jason Gunthorpe, David Woodhouse, iommu, Joerg Roedel,
Robin Murphy, Suravee Suthikulpanit, Will Deacon
Cc: Calvin Owens, Chaitanya Kumar Borah, Joerg Roedel, Kevin Tian,
patches, Tina Zhang
On 11/28/25 07:54, Jason Gunthorpe wrote:
> VT-d second stage HW specifies both the maximum IOVA and the supported
> table walk starting points. Weirdly there is HW that only supports a 4
> level walk but has a maximum IOVA that only needs 3.
>
> The current code miscalculates this and creates a wrongly sized page table
> which ultimately fails the compatability check for number of levels.
s/compatability/compatibility/
>
> This is fixed by allowing the page table to be created with both a vasz
> and top_level input. The vasz will set the aperture for the domain while
> the top_level will set the page table geometry.
>
> Add top_level to vtdss and correct the logic in VT-d to generate the right
> top_level and vasz from mgaw and sagaw.
>
> Fixes: d373449d8e97 ("iommu/vt-d: Use the generic iommu page table")
> Reported-by: Calvin Owens<calvin@wbinvd.org>
> Closes:https://lore.kernel.org/
> r/8f257d2651eb8a4358fcbd47b0145002e5f1d638.1764237717.git.calvin@wbinvd.org
> Signed-off-by: Jason Gunthorpe<jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/2] iommupt/vtd: Support mgaw's less than a 4 level walk for first stage
2025-11-27 23:54 [PATCH 0/2] Fix VT-d when the IOVA limit is small Jason Gunthorpe
2025-11-27 23:54 ` [PATCH 1/2] iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires Jason Gunthorpe
@ 2025-11-27 23:54 ` Jason Gunthorpe
2025-11-28 7:03 ` Baolu Lu
2025-11-28 0:04 ` [PATCH 0/2] Fix VT-d when the IOVA limit is small Jason Gunthorpe
2025-11-28 7:48 ` Joerg Roedel
3 siblings, 1 reply; 7+ messages in thread
From: Jason Gunthorpe @ 2025-11-27 23:54 UTC (permalink / raw)
To: David Woodhouse, iommu, Joerg Roedel, Robin Murphy,
Suravee Suthikulpanit, Will Deacon
Cc: Lu Baolu, Calvin Owens, Chaitanya Kumar Borah, Joerg Roedel,
Kevin Tian, patches, Tina Zhang
If the IOVA is limited to less than 48 the page table will be constructed
with a 3 level configuration which is unsupported by hardware.
Like the second stage the caller needs to pass in both the top_level an
the vasz to specify a table that has more levels than required to hold the
IOVA range.
Fixes: 6cbc09b7719e ("iommu/vt-d: Restore previous domain::aperture_end calculation")
Reported-by: Calvin Owens <calvin@wbinvd.org>
Closes: https://lore.kernel.org/r/8f257d2651eb8a4358fcbd47b0145002e5f1d638.1764237717.git.calvin@wbinvd.org
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/iommu/amd/iommu.c | 7 +++--
drivers/iommu/generic_pt/fmt/x86_64.h | 17 +++++-------
drivers/iommu/intel/iommu.c | 38 +++++++++++++++++----------
include/linux/generic_pt/iommu.h | 2 ++
4 files changed, 38 insertions(+), 26 deletions(-)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 48bca4dc8eb61f..273951b4501cd5 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2708,10 +2708,13 @@ static struct iommu_domain *amd_iommu_domain_alloc_paging_v2(struct device *dev,
* in both modes the top bit is removed and PT_FEAT_SIGN_EXTEND is not
* set which creates a table that is compatible in both modes.
*/
- if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
+ if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
cfg.common.hw_max_vasz_lg2 = 56;
- else
+ cfg.top_level = 4;
+ } else {
cfg.common.hw_max_vasz_lg2 = 47;
+ cfg.top_level = 3;
+ }
cfg.common.hw_max_oasz_lg2 = 52;
domain->domain.ops = &amdv2_ops;
diff --git a/drivers/iommu/generic_pt/fmt/x86_64.h b/drivers/iommu/generic_pt/fmt/x86_64.h
index 507abf2c934ccb..210748d9d6e8aa 100644
--- a/drivers/iommu/generic_pt/fmt/x86_64.h
+++ b/drivers/iommu/generic_pt/fmt/x86_64.h
@@ -241,13 +241,10 @@ x86_64_pt_iommu_fmt_init(struct pt_iommu_x86_64 *iommu_table,
{
struct pt_x86_64 *table = &iommu_table->x86_64_pt;
- if (cfg->common.hw_max_vasz_lg2 < 31 ||
- cfg->common.hw_max_vasz_lg2 > 57)
- return -EINVAL;
+ if (cfg->top_level < 3 || cfg->top_level > 4)
+ return -EOPNOTSUPP;
- /* Top of 2, 3, 4 */
- pt_top_set_level(&table->common,
- (cfg->common.hw_max_vasz_lg2 - 31) / 9 + 2);
+ pt_top_set_level(&table->common, cfg->top_level);
table->common.max_oasz_lg2 =
min(PT_MAX_OUTPUT_ADDRESS_LG2, cfg->common.hw_max_oasz_lg2);
@@ -269,12 +266,12 @@ x86_64_pt_iommu_fmt_hw_info(struct pt_iommu_x86_64 *table,
#if defined(GENERIC_PT_KUNIT)
static const struct pt_iommu_x86_64_cfg x86_64_kunit_fmt_cfgs[] = {
[0] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
- .common.hw_max_vasz_lg2 = 48 },
+ .common.hw_max_vasz_lg2 = 48, .top_level = 3 },
[1] = { .common.features = BIT(PT_FEAT_SIGN_EXTEND),
- .common.hw_max_vasz_lg2 = 57 },
+ .common.hw_max_vasz_lg2 = 57, .top_level = 4 },
/* AMD IOMMU PASID 0 formats with no SIGN_EXTEND */
- [2] = { .common.hw_max_vasz_lg2 = 47 },
- [3] = { .common.hw_max_vasz_lg2 = 56 },
+ [2] = { .common.hw_max_vasz_lg2 = 47, .top_level = 3 },
+ [3] = { .common.hw_max_vasz_lg2 = 56, .top_level = 4},
};
#define kunit_fmt_cfgs x86_64_kunit_fmt_cfgs
enum { KUNIT_FMT_FEATURES = BIT(PT_FEAT_SIGN_EXTEND)};
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index f117349d67dbf8..4e888867e85c03 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2794,6 +2794,28 @@ static struct dmar_domain *paging_domain_alloc(void)
return domain;
}
+static unsigned int compute_vasz_lg2_fs(struct intel_iommu *iommu,
+ unsigned int *top_level)
+{
+ unsigned int mgaw = cap_mgaw(iommu->cap);
+
+ /*
+ * Spec 3.6 First-Stage Translation:
+ *
+ * Software must limit addresses to less than the minimum of MGAW
+ * and the lower canonical address width implied by FSPM (i.e.,
+ * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
+ */
+ if (mgaw > 48 && cap_fl5lp_support(iommu->cap)) {
+ *top_level = 4;
+ return min(57, mgaw);
+ }
+
+ /* Four level is always supported */
+ *top_level = 3;
+ return min(48, mgaw);
+}
+
static struct iommu_domain *
intel_iommu_domain_alloc_first_stage(struct device *dev,
struct intel_iommu *iommu, u32 flags)
@@ -2813,20 +2835,8 @@ intel_iommu_domain_alloc_first_stage(struct device *dev,
if (IS_ERR(dmar_domain))
return ERR_CAST(dmar_domain);
- if (cap_fl5lp_support(iommu->cap))
- cfg.common.hw_max_vasz_lg2 = 57;
- else
- cfg.common.hw_max_vasz_lg2 = 48;
-
- /*
- * Spec 3.6 First-Stage Translation:
- *
- * Software must limit addresses to less than the minimum of MGAW
- * and the lower canonical address width implied by FSPM (i.e.,
- * 47-bit when FSPM is 4-level and 56-bit when FSPM is 5-level).
- */
- cfg.common.hw_max_vasz_lg2 = min(cap_mgaw(iommu->cap),
- cfg.common.hw_max_vasz_lg2);
+ cfg.common.hw_max_vasz_lg2 =
+ compute_vasz_lg2_fs(iommu, &cfg.top_level);
cfg.common.hw_max_oasz_lg2 = 52;
cfg.common.features = BIT(PT_FEAT_SIGN_EXTEND) |
BIT(PT_FEAT_FLUSH_RANGE);
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index c134132ed10f89..9eefbb74efd087 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -277,6 +277,8 @@ IOMMU_FORMAT(vtdss, vtdss_pt);
struct pt_iommu_x86_64_cfg {
struct pt_iommu_cfg common;
+ /* 4 is a 57 bit 5 level table */
+ unsigned int top_level;
};
struct pt_iommu_x86_64_hw_info {
--
2.43.0
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [PATCH 2/2] iommupt/vtd: Support mgaw's less than a 4 level walk for first stage
2025-11-27 23:54 ` [PATCH 2/2] iommupt/vtd: Support mgaw's less than a 4 level walk for first stage Jason Gunthorpe
@ 2025-11-28 7:03 ` Baolu Lu
0 siblings, 0 replies; 7+ messages in thread
From: Baolu Lu @ 2025-11-28 7:03 UTC (permalink / raw)
To: Jason Gunthorpe, David Woodhouse, iommu, Joerg Roedel,
Robin Murphy, Suravee Suthikulpanit, Will Deacon
Cc: Calvin Owens, Chaitanya Kumar Borah, Joerg Roedel, Kevin Tian,
patches, Tina Zhang
On 11/28/25 07:54, Jason Gunthorpe wrote:
> If the IOVA is limited to less than 48 the page table will be constructed
> with a 3 level configuration which is unsupported by hardware.
>
> Like the second stage the caller needs to pass in both the top_level an
> the vasz to specify a table that has more levels than required to hold the
> IOVA range.
>
> Fixes: 6cbc09b7719e ("iommu/vt-d: Restore previousdomain::aperture_end calculation")
> Reported-by: Calvin Owens<calvin@wbinvd.org>
> Closes:https://lore.kernel.org/
> r/8f257d2651eb8a4358fcbd47b0145002e5f1d638.1764237717.git.calvin@wbinvd.org
> Signed-off-by: Jason Gunthorpe<jgg@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 0/2] Fix VT-d when the IOVA limit is small
2025-11-27 23:54 [PATCH 0/2] Fix VT-d when the IOVA limit is small Jason Gunthorpe
2025-11-27 23:54 ` [PATCH 1/2] iommupt/vtd: Allow VT-d to have a larger table top than the vasz requires Jason Gunthorpe
2025-11-27 23:54 ` [PATCH 2/2] iommupt/vtd: Support mgaw's less than a 4 level walk for first stage Jason Gunthorpe
@ 2025-11-28 0:04 ` Jason Gunthorpe
2025-11-28 7:48 ` Joerg Roedel
3 siblings, 0 replies; 7+ messages in thread
From: Jason Gunthorpe @ 2025-11-28 0:04 UTC (permalink / raw)
To: David Woodhouse, iommu, Joerg Roedel, Robin Murphy,
Suravee Suthikulpanit, Will Deacon
Cc: Lu Baolu, Calvin Owens, Chaitanya Kumar Borah, Joerg Roedel,
Kevin Tian, patches, Tina Zhang
On Thu, Nov 27, 2025 at 07:54:06PM -0400, Jason Gunthorpe wrote:
> Calvin notes:
>
> =======================
> A Skylake machine has problems with strict translation on next-20251124:
>
> pci 0000:06:00.0: Adding to iommu group 18
> ------------[ cut here ]------------
> WARNING: drivers/iommu/iommu.c:3055 at iommu_setup_default_domain+0x268/0x2f0, CPU#2: swapper/0/1
> CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.18.0-rc6-next-20251124 #1 PREEMPTLAZY
> Hardware name: ASUSTeK COMPUTER INC. WS C246M PRO Series/WS C246M PRO Series, BIOS 6101 06/26/2024
> RIP: 0010:iommu_setup_default_domain+0x268/0x2f0
> <snip>
> Call Trace:
> <TASK>
> iommu_device_register+0x126/0x200
> intel_iommu_init+0x2bf/0x580
> pci_iommu_init+0xb/0x30
> do_one_initcall+0xad/0x1c0
> kernel_init_freeable+0x238/0x290
> kernel_init+0x16/0x120
> ret_from_fork+0x1ba/0x1f0
> ret_from_fork_asm+0x11/0x20
> </TASK>
> Kernel panic - not syncing: kernel: panic_on_warn set ...
> <snip>
> Dumping ftrace buffer:
> ---------------------------------
> 2) | __iommu_group_set_domain_internal() { /* <-iommu_setup_default_domain+0x25e/0x2f0 */
> 2) | __iommu_device_set_domain() { /* <-__iommu_group_set_domain_internal+0x6d/0x140 */
> 2) | __iommu_attach_device() { /* <-__iommu_device_set_domain+0x6d/0xb0 */
> 2) | intel_iommu_attach_device() { /* <-__iommu_attach_device+0x1f/0xe0 */
> 2) 0.140 us | device_block_translation(); /* <-intel_iommu_attach_device+0x19/0x80 ret=0xffffffff81b5e980 */
> 2) | paging_domain_compatible() { /* <-intel_iommu_attach_device+0x24/0x80 */
> 2) | paging_domain_compatible_second_stage() { /* <-paging_domain_compatible+0x47/0x170 */
> 2) 0.137 us | pt_iommu_vtdss_hw_info(); /* <-paging_domain_compatible_second_stage+0x29/0x1a0 ret=0x1 */
> 2) 0.530 us | } /* paging_domain_compatible_second_stage ret=-22 */
> 2) 0.907 us | } /* paging_domain_compatible ret=-22 */
> 2) 1.653 us | } /* intel_iommu_attach_device ret=-22 */
> 2) 2.157 us | } /* __iommu_attach_device ret=-22 */
> 2) 2.528 us | } /* __iommu_device_set_domain ret=-22 */
> 2) 2.954 us | } /* __iommu_group_set_domain_internal ret=-22 */
> ---------------------------------
> Rebooting in 10 seconds..
>
> The failing condition in paging_domain_compatible_second_stage() is:
>
> /* Page table level is supported. */
> if (!(cap_sagaw(iommu->cap) & BIT(pt_info.aw)))
> return -EINVAL;
>
> This happens because, for many domains on this machine, MGAW=39 but
> SAGAW=0x04: that claims a 39-bit maximum address width, but also claims
> to only support 48-bit/4-level paging, which seems odd.
>
> Before the GENERIC_PT rewrite, the kernel only looked at SAGAW, so this
> machine has been happily running for years using 4-level paging.
>
> Now, the kernel refuses to use 4-level paging because MGAW=39. But SAGAW
> claims not to support anything else, so we hit the -EINVAL case above
> and fail to initialize.
>
> If I force 4-level paging, everything works. If I force 39-bit/3-level
> paging, nothing works (lots of bad context faults). So it seems like the
> machine really only supports 4-level paging despite the 3-level MGAW.
> =======================
>
> Which is not a possible condition that was considered when this was
> made. Allow VT-d to pass in the top level of the page table as well as the
> max vasz as seperate things. This lets it setup something compatible with
> the HW.
>
> This is happening because VT-d doesn't quite fit into the architecture we
> expect on Linux where the IOMMU driver should be reporting its full page
> table capability as an aperture and bus width or addressing limitations
> should be attached to the end point devices as a DMA mask. Instead VT-d is
> putting the device limitations in the iommu as well.
>
> Jason Gunthorpe (2):
> iommupt/vtd: Allow VT-d to have a larger table top than the vasz
> requires
> iommupt/vtd: Support mgaw's less than a 4 level walk for first stage
>
> drivers/iommu/amd/iommu.c | 7 +++-
> drivers/iommu/generic_pt/fmt/vtdss.h | 19 +++------
> drivers/iommu/generic_pt/fmt/x86_64.h | 17 ++++----
> drivers/iommu/generic_pt/iommu_pt.h | 14 +++++++
> drivers/iommu/intel/iommu.c | 58 +++++++++++++++++----------
> include/linux/generic_pt/iommu.h | 4 ++
> 6 files changed, 73 insertions(+), 46 deletions(-)
I forgot:
Tested-by: Calvin Owens <calvin@wbinvd.org>
Jason
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 0/2] Fix VT-d when the IOVA limit is small
2025-11-27 23:54 [PATCH 0/2] Fix VT-d when the IOVA limit is small Jason Gunthorpe
` (2 preceding siblings ...)
2025-11-28 0:04 ` [PATCH 0/2] Fix VT-d when the IOVA limit is small Jason Gunthorpe
@ 2025-11-28 7:48 ` Joerg Roedel
3 siblings, 0 replies; 7+ messages in thread
From: Joerg Roedel @ 2025-11-28 7:48 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: David Woodhouse, iommu, Robin Murphy, Suravee Suthikulpanit,
Will Deacon, Lu Baolu, Calvin Owens, Chaitanya Kumar Borah,
Joerg Roedel, Kevin Tian, patches, Tina Zhang
On Thu, Nov 27, 2025 at 07:54:06PM -0400, Jason Gunthorpe wrote:
> Jason Gunthorpe (2):
> iommupt/vtd: Allow VT-d to have a larger table top than the vasz
> requires
> iommupt/vtd: Support mgaw's less than a 4 level walk for first stage
>
> drivers/iommu/amd/iommu.c | 7 +++-
> drivers/iommu/generic_pt/fmt/vtdss.h | 19 +++------
> drivers/iommu/generic_pt/fmt/x86_64.h | 17 ++++----
> drivers/iommu/generic_pt/iommu_pt.h | 14 +++++++
> drivers/iommu/intel/iommu.c | 58 +++++++++++++++++----------
> include/linux/generic_pt/iommu.h | 4 ++
> 6 files changed, 73 insertions(+), 46 deletions(-)
Applied, thanks.
^ permalink raw reply [flat|nested] 7+ messages in thread