* [PATCH v5 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 2/8] x86/sev: Add support for the RMPREAD instruction Tom Lendacky
` (6 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
The RMPREAD instruction returns an architecture defined format of an
RMP entry. This is the preferred method for examining RMP entries.
In preparation for using the RMPREAD instruction, convert the existing
code that directly accesses the RMP to map the raw RMP information into
the architecture defined format.
RMPREAD output returns a status bit for the 2MB region status. If the
input page address is 2MB aligned and any other pages within the 2MB
region are assigned, then 2MB region status will be set to 1. Otherwise,
the 2MB region status will be set to 0. For systems that do not support
RMPREAD, calculating this value would require looping over all of the RMP
table entries within that range until one is found with the assigned bit
set. Since this bit is not defined in the current format, and so not used
today, do not incur the overhead associated with calculating it.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/virt/svm/sev.c | 144 ++++++++++++++++++++++++++++------------
1 file changed, 100 insertions(+), 44 deletions(-)
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 9a6a943d8e41..b705c726e913 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -31,10 +31,29 @@
#include <asm/iommu.h>
/*
- * The RMP entry format is not architectural. The format is defined in PPR
- * Family 19h Model 01h, Rev B1 processor.
+ * The RMP entry information as returned by the RMPREAD instruction.
*/
struct rmpentry {
+ u64 gpa;
+ u8 assigned :1,
+ rsvd1 :7;
+ u8 pagesize :1,
+ hpage_region_status :1,
+ rsvd2 :6;
+ u8 immutable :1,
+ rsvd3 :7;
+ u8 rsvd4;
+ u32 asid;
+} __packed;
+
+/*
+ * The raw RMP entry format is not architectural. The format is defined in PPR
+ * Family 19h Model 01h, Rev B1 processor. This format represents the actual
+ * entry in the RMP table memory. The bitfield definitions are used for machines
+ * without the RMPREAD instruction (Zen3 and Zen4), otherwise the "hi" and "lo"
+ * fields are only used for dumping the raw data.
+ */
+struct rmpentry_raw {
union {
struct {
u64 assigned : 1,
@@ -62,7 +81,7 @@ struct rmpentry {
#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
static u64 probed_rmp_base, probed_rmp_size;
-static struct rmpentry *rmptable __ro_after_init;
+static struct rmpentry_raw *rmptable __ro_after_init;
static u64 rmptable_max_pfn __ro_after_init;
static LIST_HEAD(snp_leaked_pages_list);
@@ -249,8 +268,8 @@ static int __init snp_rmptable_init(void)
rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
- rmptable = (struct rmpentry *)rmptable_start;
- rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
+ rmptable = (struct rmpentry_raw *)rmptable_start;
+ rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry_raw) - 1;
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
@@ -272,48 +291,77 @@ static int __init snp_rmptable_init(void)
*/
device_initcall(snp_rmptable_init);
-static struct rmpentry *get_rmpentry(u64 pfn)
+static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
{
- if (WARN_ON_ONCE(pfn > rmptable_max_pfn))
- return ERR_PTR(-EFAULT);
-
- return &rmptable[pfn];
-}
-
-static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
-{
- struct rmpentry *large_entry, *entry;
-
- if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ if (!rmptable)
return ERR_PTR(-ENODEV);
- entry = get_rmpentry(pfn);
- if (IS_ERR(entry))
- return entry;
+ if (unlikely(pfn > rmptable_max_pfn))
+ return ERR_PTR(-EFAULT);
+
+ return rmptable + pfn;
+}
+
+static int get_rmpentry(u64 pfn, struct rmpentry *e)
+{
+ struct rmpentry_raw *e_raw;
+
+ e_raw = get_raw_rmpentry(pfn);
+ if (IS_ERR(e_raw))
+ return PTR_ERR(e_raw);
+
+ /*
+ * Map the raw RMP table entry onto the RMPREAD output format.
+ * The 2MB region status indicator (hpage_region_status field) is not
+ * calculated, since the overhead could be significant and the field
+ * is not used.
+ */
+ memset(e, 0, sizeof(*e));
+ e->gpa = e_raw->gpa << PAGE_SHIFT;
+ e->asid = e_raw->asid;
+ e->assigned = e_raw->assigned;
+ e->pagesize = e_raw->pagesize;
+ e->immutable = e_raw->immutable;
+
+ return 0;
+}
+
+static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *e, int *level)
+{
+ struct rmpentry e_large;
+ int ret;
+
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ return -ENODEV;
+
+ ret = get_rmpentry(pfn, e);
+ if (ret)
+ return ret;
/*
* Find the authoritative RMP entry for a PFN. This can be either a 4K
* RMP entry or a special large RMP entry that is authoritative for a
* whole 2M area.
*/
- large_entry = get_rmpentry(pfn & PFN_PMD_MASK);
- if (IS_ERR(large_entry))
- return large_entry;
+ ret = get_rmpentry(pfn & PFN_PMD_MASK, &e_large);
+ if (ret)
+ return ret;
- *level = RMP_TO_PG_LEVEL(large_entry->pagesize);
+ *level = RMP_TO_PG_LEVEL(e_large.pagesize);
- return entry;
+ return 0;
}
int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
{
- struct rmpentry *e;
+ struct rmpentry e;
+ int ret;
- e = __snp_lookup_rmpentry(pfn, level);
- if (IS_ERR(e))
- return PTR_ERR(e);
+ ret = __snp_lookup_rmpentry(pfn, &e, level);
+ if (ret)
+ return ret;
- *assigned = !!e->assigned;
+ *assigned = !!e.assigned;
return 0;
}
EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
@@ -326,20 +374,28 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
*/
static void dump_rmpentry(u64 pfn)
{
+ struct rmpentry e;
+ struct rmpentry_raw *e_raw;
u64 pfn_i, pfn_end;
- struct rmpentry *e;
- int level;
+ int level, ret;
- e = __snp_lookup_rmpentry(pfn, &level);
- if (IS_ERR(e)) {
- pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
- pfn, PTR_ERR(e));
+ ret = __snp_lookup_rmpentry(pfn, &e, &level);
+ if (ret) {
+ pr_err("Failed to read RMP entry for PFN 0x%llx, error %d\n",
+ pfn, ret);
return;
}
- if (e->assigned) {
+ if (e.assigned) {
+ e_raw = get_raw_rmpentry(pfn);
+ if (IS_ERR(e_raw)) {
+ pr_err("Failed to read RMP contents for PFN 0x%llx, error %ld\n",
+ pfn, PTR_ERR(e_raw));
+ return;
+ }
+
pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n",
- pfn, e->lo, e->hi);
+ pfn, e_raw->lo, e_raw->hi);
return;
}
@@ -358,16 +414,16 @@ static void dump_rmpentry(u64 pfn)
pfn, pfn_i, pfn_end);
while (pfn_i < pfn_end) {
- e = __snp_lookup_rmpentry(pfn_i, &level);
- if (IS_ERR(e)) {
- pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
- PTR_ERR(e), pfn_i);
+ e_raw = get_raw_rmpentry(pfn_i);
+ if (IS_ERR(e_raw)) {
+ pr_err("Error %ld reading RMP contents for PFN 0x%llx\n",
+ PTR_ERR(e_raw), pfn_i);
pfn_i++;
continue;
}
- if (e->lo || e->hi)
- pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
+ if (e_raw->lo || e_raw->hi)
+ pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e_raw->lo, e_raw->hi);
pfn_i++;
}
}
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 2/8] x86/sev: Add support for the RMPREAD instruction
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-11-12 18:40 ` Borislav Petkov
2024-10-28 19:32 ` [PATCH v5 3/8] x86/sev: Require the RMPREAD instruction after Zen4 Tom Lendacky
` (5 subsequent siblings)
7 siblings, 1 reply; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
The RMPREAD instruction returns an architecture defined format of an
RMP table entry. This is the preferred method for examining RMP entries.
The instruction is advertised in CPUID 0x8000001f_EAX[21]. Use this
instruction when available.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/virt/svm/sev.c | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ea33439a5d00..d7395a55c04f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -448,6 +448,7 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index b705c726e913..7cca3d07e678 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -306,6 +306,18 @@ static int get_rmpentry(u64 pfn, struct rmpentry *e)
{
struct rmpentry_raw *e_raw;
+ if (cpu_feature_enabled(X86_FEATURE_RMPREAD)) {
+ int ret;
+
+ /* RMPREAD */
+ asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfd"
+ : "=a" (ret)
+ : "a" (pfn << PAGE_SHIFT), "c" (e)
+ : "memory", "cc");
+
+ return ret;
+ }
+
e_raw = get_raw_rmpentry(pfn);
if (IS_ERR(e_raw))
return PTR_ERR(e_raw);
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 2/8] x86/sev: Add support for the RMPREAD instruction
2024-10-28 19:32 ` [PATCH v5 2/8] x86/sev: Add support for the RMPREAD instruction Tom Lendacky
@ 2024-11-12 18:40 ` Borislav Petkov
0 siblings, 0 replies; 17+ messages in thread
From: Borislav Petkov @ 2024-11-12 18:40 UTC (permalink / raw)
To: Tom Lendacky
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On Mon, Oct 28, 2024 at 02:32:37PM -0500, Tom Lendacky wrote:
> + if (cpu_feature_enabled(X86_FEATURE_RMPREAD)) {
> + int ret;
> +
> + /* RMPREAD */
Now that we know:
/* Supported in binutils version 2.44 */
> + asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfd"
> + : "=a" (ret)
> + : "a" (pfn << PAGE_SHIFT), "c" (e)
> + : "memory", "cc");
> +
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 3/8] x86/sev: Require the RMPREAD instruction after Zen4
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 2/8] x86/sev: Add support for the RMPREAD instruction Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 4/8] x86/sev: Move the SNP probe routine out of the way Tom Lendacky
` (4 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
Limit usage of the non-architectural RMP format to Zen3/Zen4 processors.
The RMPREAD instruction, with architectural defined output, is available
and should be used for RMP access beyond Zen4.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/kernel/cpu/amd.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fab5caec0b72..547bcdf50f1b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -355,10 +355,15 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c)
/*
* RMP table entry format is not architectural and is defined by the
* per-processor PPR. Restrict SNP support on the known CPU models
- * for which the RMP table entry format is currently defined for.
+ * for which the RMP table entry format is currently defined or for
+ * processors which support the architecturally defined RMPREAD
+ * instruction.
*/
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
- c->x86 >= 0x19 && snp_probe_rmptable_info()) {
+ (cpu_feature_enabled(X86_FEATURE_ZEN3) ||
+ cpu_feature_enabled(X86_FEATURE_ZEN4) ||
+ cpu_feature_enabled(X86_FEATURE_RMPREAD)) &&
+ snp_probe_rmptable_info()) {
cc_platform_set(CC_ATTR_HOST_SEV_SNP);
} else {
setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 4/8] x86/sev: Move the SNP probe routine out of the way
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
` (2 preceding siblings ...)
2024-10-28 19:32 ` [PATCH v5 3/8] x86/sev: Require the RMPREAD instruction after Zen4 Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 5/8] x86/sev: Map only the RMP table entries instead of the full RMP range Tom Lendacky
` (3 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
To make patch review easier for the segmented RMP support, move the SNP
probe function out from in between the initialization-related routines.
No functional change.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/virt/svm/sev.c | 60 ++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 30 deletions(-)
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 7cca3d07e678..0b778cf7fa3d 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -135,36 +135,6 @@ static __init void snp_enable(void *arg)
__snp_enable(smp_processor_id());
}
-#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
-
-bool snp_probe_rmptable_info(void)
-{
- u64 rmp_sz, rmp_base, rmp_end;
-
- rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
- rdmsrl(MSR_AMD64_RMP_END, rmp_end);
-
- if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
- pr_err("Memory for the RMP table has not been reserved by BIOS\n");
- return false;
- }
-
- if (rmp_base > rmp_end) {
- pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
- return false;
- }
-
- rmp_sz = rmp_end - rmp_base + 1;
-
- probed_rmp_base = rmp_base;
- probed_rmp_size = rmp_sz;
-
- pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
- rmp_base, rmp_end);
-
- return true;
-}
-
static void __init __snp_fixup_e820_tables(u64 pa)
{
if (IS_ALIGNED(pa, PMD_SIZE))
@@ -291,6 +261,36 @@ static int __init snp_rmptable_init(void)
*/
device_initcall(snp_rmptable_init);
+#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
+
+bool snp_probe_rmptable_info(void)
+{
+ u64 rmp_sz, rmp_base, rmp_end;
+
+ rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+ if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ if (rmp_base > rmp_end) {
+ pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end);
+ return false;
+ }
+
+ rmp_sz = rmp_end - rmp_base + 1;
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = rmp_sz;
+
+ pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n",
+ rmp_base, rmp_end);
+
+ return true;
+}
+
static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
{
if (!rmptable)
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 5/8] x86/sev: Map only the RMP table entries instead of the full RMP range
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
` (3 preceding siblings ...)
2024-10-28 19:32 ` [PATCH v5 4/8] x86/sev: Move the SNP probe routine out of the way Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-11-04 6:28 ` Borislav Petkov
2024-10-28 19:32 ` [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment Tom Lendacky
` (2 subsequent siblings)
7 siblings, 1 reply; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
In preparation for support of a segmented RMP table, map only the RMP
table entries. The RMP bookkeeping area is only ever accessed when
first enabling SNP and does not need to remain mapped. To accomplish
this, split the initialization of the RMP bookkeeping area and the
initialization of the RMP entry area. The RMP bookkeeping area will be
mapped only while it is being initialized.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/virt/svm/sev.c | 36 +++++++++++++++++++++++++++++++-----
1 file changed, 31 insertions(+), 5 deletions(-)
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 0b778cf7fa3d..5871c924c0b2 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -173,6 +173,23 @@ void __init snp_fixup_e820_tables(void)
__snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
}
+static bool __init init_rmptable_bookkeeping(void)
+{
+ void *bk;
+
+ bk = memremap(probed_rmp_base, RMPTABLE_CPU_BOOKKEEPING_SZ, MEMREMAP_WB);
+ if (!bk) {
+ pr_err("Failed to map RMP bookkeeping area\n");
+ return false;
+ }
+
+ memset(bk, 0, RMPTABLE_CPU_BOOKKEEPING_SZ);
+
+ memunmap(bk);
+
+ return true;
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
@@ -210,12 +227,17 @@ static int __init snp_rmptable_init(void)
goto nosnp;
}
- rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB);
+ /* Map only the RMP entries */
+ rmptable_start = memremap(probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ,
+ probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ,
+ MEMREMAP_WB);
if (!rmptable_start) {
pr_err("Failed to map RMP table\n");
goto nosnp;
}
+ rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
/*
* Check if SEV-SNP is already enabled, this can happen in case of
* kexec boot.
@@ -224,7 +246,14 @@ static int __init snp_rmptable_init(void)
if (val & MSR_AMD64_SYSCFG_SNP_EN)
goto skip_enable;
- memset(rmptable_start, 0, probed_rmp_size);
+ /* Zero out the RMP bookkeeping area */
+ if (!init_rmptable_bookkeeping()) {
+ memunmap(rmptable_start);
+ goto nosnp;
+ }
+
+ /* Zero out the RMP entries */
+ memset(rmptable_start, 0, rmptable_size);
/* Flush the caches to ensure that data is written before SNP is enabled. */
wbinvd_on_all_cpus();
@@ -235,9 +264,6 @@ static int __init snp_rmptable_init(void)
on_each_cpu(snp_enable, NULL, 1);
skip_enable:
- rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
- rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
-
rmptable = (struct rmpentry_raw *)rmptable_start;
rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry_raw) - 1;
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 5/8] x86/sev: Map only the RMP table entries instead of the full RMP range
2024-10-28 19:32 ` [PATCH v5 5/8] x86/sev: Map only the RMP table entries instead of the full RMP range Tom Lendacky
@ 2024-11-04 6:28 ` Borislav Petkov
2024-11-04 15:03 ` Tom Lendacky
0 siblings, 1 reply; 17+ messages in thread
From: Borislav Petkov @ 2024-11-04 6:28 UTC (permalink / raw)
To: Tom Lendacky
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On Mon, Oct 28, 2024 at 02:32:40PM -0500, Tom Lendacky wrote:
> @@ -224,7 +246,14 @@ static int __init snp_rmptable_init(void)
> if (val & MSR_AMD64_SYSCFG_SNP_EN)
> goto skip_enable;
>
> - memset(rmptable_start, 0, probed_rmp_size);
> + /* Zero out the RMP bookkeeping area */
> + if (!init_rmptable_bookkeeping()) {
So let's call it what it is then:
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 5871c924c0b2..8145a7b14fa2 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -173,7 +173,7 @@ void __init snp_fixup_e820_tables(void)
__snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
}
-static bool __init init_rmptable_bookkeeping(void)
+static bool __init clear_rmptable_bookkeeping(void)
{
void *bk;
@@ -246,8 +246,7 @@ static int __init snp_rmptable_init(void)
if (val & MSR_AMD64_SYSCFG_SNP_EN)
goto skip_enable;
- /* Zero out the RMP bookkeeping area */
- if (!init_rmptable_bookkeeping()) {
+ if (!clear_rmptable_bookkeeping()) {
memunmap(rmptable_start);
goto nosnp;
}
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 5/8] x86/sev: Map only the RMP table entries instead of the full RMP range
2024-11-04 6:28 ` Borislav Petkov
@ 2024-11-04 15:03 ` Tom Lendacky
0 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-11-04 15:03 UTC (permalink / raw)
To: Borislav Petkov
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On 11/4/24 00:28, Borislav Petkov wrote:
> On Mon, Oct 28, 2024 at 02:32:40PM -0500, Tom Lendacky wrote:
>> @@ -224,7 +246,14 @@ static int __init snp_rmptable_init(void)
>> if (val & MSR_AMD64_SYSCFG_SNP_EN)
>> goto skip_enable;
>>
>> - memset(rmptable_start, 0, probed_rmp_size);
>> + /* Zero out the RMP bookkeeping area */
>> + if (!init_rmptable_bookkeeping()) {
>
> So let's call it what it is then:
>
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 5871c924c0b2..8145a7b14fa2 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -173,7 +173,7 @@ void __init snp_fixup_e820_tables(void)
> __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
> }
>
> -static bool __init init_rmptable_bookkeeping(void)
> +static bool __init clear_rmptable_bookkeeping(void)
Sure.
Thanks,
Tom
> {
> void *bk;
>
> @@ -246,8 +246,7 @@ static int __init snp_rmptable_init(void)
> if (val & MSR_AMD64_SYSCFG_SNP_EN)
> goto skip_enable;
>
> - /* Zero out the RMP bookkeeping area */
> - if (!init_rmptable_bookkeeping()) {
> + if (!clear_rmptable_bookkeeping()) {
> memunmap(rmptable_start);
> goto nosnp;
> }
>
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
` (4 preceding siblings ...)
2024-10-28 19:32 ` [PATCH v5 5/8] x86/sev: Map only the RMP table entries instead of the full RMP range Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-11-04 10:33 ` Borislav Petkov
2024-10-28 19:32 ` [PATCH v5 7/8] x86/sev: Add full support for a segmented RMP table Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 8/8] x86/sev/docs: Document the SNP Reverse Map Table (RMP) Tom Lendacky
7 siblings, 1 reply; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
In preparation for support of a segmented RMP table, treat the contiguous
RMP table as a segmented RMP table with a single segment covering all
of memory. By treating a contiguous RMP table as a single segment, much
of the code that initializes and accesses the RMP can be re-used.
Segmented RMP tables can have up to 512 segment entries. Each segment
will have metadata associated with it to identify the segment location,
the segment size, etc. The segment data and the physical address are used
to determine the index of the segment within the table and then the RMP
entry within the segment. For an actual segmented RMP table environment,
much of the segment information will come from a configuration MSR. For
the contiguous RMP, though, much of the information will be statically
defined.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/virt/svm/sev.c | 193 ++++++++++++++++++++++++++++++++++++----
1 file changed, 174 insertions(+), 19 deletions(-)
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 5871c924c0b2..37ff4f98e8d1 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -18,6 +18,7 @@
#include <linux/cpumask.h>
#include <linux/iommu.h>
#include <linux/amd-iommu.h>
+#include <linux/nospec.h>
#include <asm/sev.h>
#include <asm/processor.h>
@@ -77,12 +78,42 @@ struct rmpentry_raw {
*/
#define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
+/*
+ * For a non-segmented RMP table, use the maximum physical addressing as the
+ * segment size in order to always arrive at index 0 in the table.
+ */
+#define RMPTABLE_NON_SEGMENTED_SHIFT 52
+
+struct rmp_segment_desc {
+ struct rmpentry_raw *rmp_entry;
+ u64 max_index;
+ u64 size;
+};
+
+/*
+ * Segmented RMP Table support.
+ * - The segment size is used for two purposes:
+ * - Identify the amount of memory covered by an RMP segment
+ * - Quickly locate an RMP segment table entry for a physical address
+ *
+ * - The RMP segment table contains pointers to an RMP table that covers
+ * a specific portion of memory. There can be up to 512 8-byte entries,
+ * one pages worth.
+ */
+static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
+static unsigned int rst_max_index __ro_after_init = 512;
+
+static u64 rmp_segment_size_max;
+static unsigned int rmp_segment_coverage_shift;
+static u64 rmp_segment_coverage_size;
+static u64 rmp_segment_coverage_mask;
+#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_coverage_shift)
+#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
+
/* Mask to apply to a PFN to get the first PFN of a 2MB page */
#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
static u64 probed_rmp_base, probed_rmp_size;
-static struct rmpentry_raw *rmptable __ro_after_init;
-static u64 rmptable_max_pfn __ro_after_init;
static LIST_HEAD(snp_leaked_pages_list);
static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
@@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
return true;
}
+static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
+{
+ struct rmp_segment_desc *desc;
+ void *rmp_segment;
+ u64 rst_index;
+
+ /* Validate the RMP segment size */
+ if (segment_size > rmp_segment_size_max) {
+ pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
+ segment_size, rmp_segment_size_max);
+ return false;
+ }
+
+ /* Validate the RMP segment table index */
+ rst_index = RST_ENTRY_INDEX(pa);
+ if (rst_index >= rst_max_index) {
+ pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
+ pa, rmp_segment_coverage_size);
+ return false;
+ }
+ rst_index = array_index_nospec(rst_index, rst_max_index);
+
+ if (rmp_segment_table[rst_index]) {
+ pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
+ return false;
+ }
+
+ /* Map the RMP entries */
+ rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
+ if (!rmp_segment) {
+ pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
+ segment_pa, segment_size);
+ return false;
+ }
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ memunmap(rmp_segment);
+ return false;
+ }
+
+ desc->rmp_entry = rmp_segment;
+ desc->max_index = segment_size / sizeof(*desc->rmp_entry);
+ desc->size = segment_size;
+
+ /* Add the segment descriptor to the table */
+ rmp_segment_table[rst_index] = desc;
+
+ return true;
+}
+
+static void __init free_rmp_segment_table(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < rst_max_index; i++) {
+ struct rmp_segment_desc *desc;
+
+ desc = rmp_segment_table[i];
+ if (!desc)
+ continue;
+
+ memunmap(desc->rmp_entry);
+
+ kfree(desc);
+ }
+
+ free_page((unsigned long)rmp_segment_table);
+
+ rmp_segment_table = NULL;
+}
+
+static bool __init alloc_rmp_segment_table(void)
+{
+ struct page *page;
+
+ /* Allocate the table used to index into the RMP segments */
+ page = alloc_page(__GFP_ZERO);
+ if (!page)
+ return false;
+
+ rmp_segment_table = page_address(page);
+
+ return true;
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
@@ -197,8 +314,8 @@ static bool __init init_rmptable_bookkeeping(void)
*/
static int __init snp_rmptable_init(void)
{
- u64 max_rmp_pfn, calc_rmp_sz, rmptable_size, rmp_end, val;
- void *rmptable_start;
+ u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end, val;
+ unsigned int i;
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
@@ -227,17 +344,18 @@ static int __init snp_rmptable_init(void)
goto nosnp;
}
+ if (!alloc_rmp_segment_table())
+ goto nosnp;
+
/* Map only the RMP entries */
- rmptable_start = memremap(probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ,
- probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ,
- MEMREMAP_WB);
- if (!rmptable_start) {
- pr_err("Failed to map RMP table\n");
+ rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+ if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
+ free_rmp_segment_table();
goto nosnp;
}
- rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
-
/*
* Check if SEV-SNP is already enabled, this can happen in case of
* kexec boot.
@@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
/* Zero out the RMP bookkeeping area */
if (!init_rmptable_bookkeeping()) {
- memunmap(rmptable_start);
+ free_rmp_segment_table();
goto nosnp;
}
/* Zero out the RMP entries */
- memset(rmptable_start, 0, rmptable_size);
+ for (i = 0; i < rst_max_index; i++) {
+ struct rmp_segment_desc *desc;
+
+ desc = rmp_segment_table[i];
+ if (!desc)
+ continue;
+
+ memset(desc->rmp_entry, 0, desc->size);
+ }
/* Flush the caches to ensure that data is written before SNP is enabled. */
wbinvd_on_all_cpus();
@@ -264,9 +390,6 @@ static int __init snp_rmptable_init(void)
on_each_cpu(snp_enable, NULL, 1);
skip_enable:
- rmptable = (struct rmpentry_raw *)rmptable_start;
- rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry_raw) - 1;
-
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
/*
@@ -287,6 +410,17 @@ static int __init snp_rmptable_init(void)
*/
device_initcall(snp_rmptable_init);
+static void set_rmp_segment_info(unsigned int segment_shift)
+{
+ rmp_segment_coverage_shift = segment_shift;
+ rmp_segment_coverage_size = 1ULL << rmp_segment_coverage_shift;
+ rmp_segment_coverage_mask = rmp_segment_coverage_size - 1;
+
+ /* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
+ rmp_segment_size_max = PHYS_PFN(rmp_segment_coverage_size);
+ rmp_segment_size_max <<= 4;
+}
+
#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
bool snp_probe_rmptable_info(void)
@@ -308,6 +442,11 @@ bool snp_probe_rmptable_info(void)
rmp_sz = rmp_end - rmp_base + 1;
+ /* Treat the contiguous RMP table as a single segment */
+ rst_max_index = 1;
+
+ set_rmp_segment_info(RMPTABLE_NON_SEGMENTED_SHIFT);
+
probed_rmp_base = rmp_base;
probed_rmp_size = rmp_sz;
@@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
{
- if (!rmptable)
+ struct rmp_segment_desc *desc;
+ u64 paddr, rst_index, segment_index;
+
+ if (!rmp_segment_table)
return ERR_PTR(-ENODEV);
- if (unlikely(pfn > rmptable_max_pfn))
+ paddr = pfn << PAGE_SHIFT;
+
+ rst_index = RST_ENTRY_INDEX(paddr);
+ if (unlikely(rst_index >= rst_max_index))
+ return ERR_PTR(-EFAULT);
+ rst_index = array_index_nospec(rst_index, rst_max_index);
+
+ desc = rmp_segment_table[rst_index];
+ if (unlikely(!desc))
return ERR_PTR(-EFAULT);
- return rmptable + pfn;
+ segment_index = RMP_ENTRY_INDEX(paddr);
+ if (unlikely(segment_index >= desc->max_index))
+ return ERR_PTR(-EFAULT);
+ segment_index = array_index_nospec(segment_index, desc->max_index);
+
+ return desc->rmp_entry + segment_index;
}
static int get_rmpentry(u64 pfn, struct rmpentry *e)
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
2024-10-28 19:32 ` [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment Tom Lendacky
@ 2024-11-04 10:33 ` Borislav Petkov
2024-11-04 16:03 ` Tom Lendacky
0 siblings, 1 reply; 17+ messages in thread
From: Borislav Petkov @ 2024-11-04 10:33 UTC (permalink / raw)
To: Tom Lendacky
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On Mon, Oct 28, 2024 at 02:32:41PM -0500, Tom Lendacky wrote:
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 5871c924c0b2..37ff4f98e8d1 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -18,6 +18,7 @@
> #include <linux/cpumask.h>
> #include <linux/iommu.h>
> #include <linux/amd-iommu.h>
> +#include <linux/nospec.h>
What's that include for?
For array_index_nospec() below?
It builds without it or did you trigger a build error with some funky .config?
> #include <asm/sev.h>
> #include <asm/processor.h>
> @@ -77,12 +78,42 @@ struct rmpentry_raw {
> */
> #define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
>
> +/*
> + * For a non-segmented RMP table, use the maximum physical addressing as the
> + * segment size in order to always arrive at index 0 in the table.
> + */
> +#define RMPTABLE_NON_SEGMENTED_SHIFT 52
> +
> +struct rmp_segment_desc {
> + struct rmpentry_raw *rmp_entry;
> + u64 max_index;
> + u64 size;
> +};
> +
> +/*
> + * Segmented RMP Table support.
> + * - The segment size is used for two purposes:
> + * - Identify the amount of memory covered by an RMP segment
> + * - Quickly locate an RMP segment table entry for a physical address
> + *
> + * - The RMP segment table contains pointers to an RMP table that covers
> + * a specific portion of memory. There can be up to 512 8-byte entries,
> + * one pages worth.
> + */
> +static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
> +static unsigned int rst_max_index __ro_after_init = 512;
> +
> +static u64 rmp_segment_size_max;
This one is used in a single function. Generate it there pls.
> +static unsigned int rmp_segment_coverage_shift;
> +static u64 rmp_segment_coverage_size;
> +static u64 rmp_segment_coverage_mask;
That "_coverage_" in there looks redundant to me and could go and make those
variables shorter.
> +#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_coverage_shift)
> +#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
> +
> /* Mask to apply to a PFN to get the first PFN of a 2MB page */
> #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>
> static u64 probed_rmp_base, probed_rmp_size;
> -static struct rmpentry_raw *rmptable __ro_after_init;
> -static u64 rmptable_max_pfn __ro_after_init;
>
> static LIST_HEAD(snp_leaked_pages_list);
> static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
> @@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
> return true;
> }
>
> +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
> +{
> + struct rmp_segment_desc *desc;
> + void *rmp_segment;
> + u64 rst_index;
> +
> + /* Validate the RMP segment size */
> + if (segment_size > rmp_segment_size_max) {
> + pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
> + segment_size, rmp_segment_size_max);
> + return false;
> + }
> +
> + /* Validate the RMP segment table index */
> + rst_index = RST_ENTRY_INDEX(pa);
> + if (rst_index >= rst_max_index) {
> + pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
> + pa, rmp_segment_coverage_size);
> + return false;
> + }
> + rst_index = array_index_nospec(rst_index, rst_max_index);
Why are we doing this here? Are you expecting some out-of-bounds
user-controlled values here?
AFAICT, this is all read from the hw/fw so why are speculative accesses
a problem?
> + if (rmp_segment_table[rst_index]) {
> + pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
> + return false;
> + }
> +
> + /* Map the RMP entries */
Kinda obvious...
> + rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
> + if (!rmp_segment) {
> + pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
> + segment_pa, segment_size);
> + return false;
> + }
> +
> + desc = kzalloc(sizeof(*desc), GFP_KERNEL);
sizeof(struct rmp_segment_desc)
> + if (!desc) {
> + memunmap(rmp_segment);
> + return false;
> + }
> +
> + desc->rmp_entry = rmp_segment;
> + desc->max_index = segment_size / sizeof(*desc->rmp_entry);
> + desc->size = segment_size;
> +
> + /* Add the segment descriptor to the table */
> + rmp_segment_table[rst_index] = desc;
> +
> + return true;
> +}
...
> @@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
>
> /* Zero out the RMP bookkeeping area */
> if (!init_rmptable_bookkeeping()) {
> - memunmap(rmptable_start);
> + free_rmp_segment_table();
> goto nosnp;
> }
>
> /* Zero out the RMP entries */
> - memset(rmptable_start, 0, rmptable_size);
> + for (i = 0; i < rst_max_index; i++) {
> + struct rmp_segment_desc *desc;
> +
> + desc = rmp_segment_table[i];
> + if (!desc)
> + continue;
> +
> + memset(desc->rmp_entry, 0, desc->size);
> + }
Why isn't this zeroing out part of alloc_rmp_segment_table()?
> @@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
>
> static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
> {
> - if (!rmptable)
> + struct rmp_segment_desc *desc;
> + u64 paddr, rst_index, segment_index;
Reverse xmas tree order pls.
> + if (!rmp_segment_table)
> return ERR_PTR(-ENODEV);
>
> - if (unlikely(pfn > rmptable_max_pfn))
> + paddr = pfn << PAGE_SHIFT;
> +
> + rst_index = RST_ENTRY_INDEX(paddr);
> + if (unlikely(rst_index >= rst_max_index))
> + return ERR_PTR(-EFAULT);
> + rst_index = array_index_nospec(rst_index, rst_max_index);
Same question as above.
> +
> + desc = rmp_segment_table[rst_index];
> + if (unlikely(!desc))
> return ERR_PTR(-EFAULT);
>
> - return rmptable + pfn;
> + segment_index = RMP_ENTRY_INDEX(paddr);
> + if (unlikely(segment_index >= desc->max_index))
> + return ERR_PTR(-EFAULT);
> + segment_index = array_index_nospec(segment_index, desc->max_index);
> +
> + return desc->rmp_entry + segment_index;
> }
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
2024-11-04 10:33 ` Borislav Petkov
@ 2024-11-04 16:03 ` Tom Lendacky
2024-11-04 17:05 ` Borislav Petkov
2024-11-08 17:23 ` Borislav Petkov
0 siblings, 2 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-11-04 16:03 UTC (permalink / raw)
To: Borislav Petkov
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On 11/4/24 04:33, Borislav Petkov wrote:
> On Mon, Oct 28, 2024 at 02:32:41PM -0500, Tom Lendacky wrote:
>> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
>> index 5871c924c0b2..37ff4f98e8d1 100644
>> --- a/arch/x86/virt/svm/sev.c
>> +++ b/arch/x86/virt/svm/sev.c
>> @@ -18,6 +18,7 @@
>> #include <linux/cpumask.h>
>> #include <linux/iommu.h>
>> #include <linux/amd-iommu.h>
>> +#include <linux/nospec.h>
>
> What's that include for?
>
> For array_index_nospec() below?
>
> It builds without it or did you trigger a build error with some funky .config?
But the define is in linux/nospec.h, so I might only be a side effect of
another include.
>
>> #include <asm/sev.h>
>> #include <asm/processor.h>
>> @@ -77,12 +78,42 @@ struct rmpentry_raw {
>> */
>> #define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000
>>
>> +/*
>> + * For a non-segmented RMP table, use the maximum physical addressing as the
>> + * segment size in order to always arrive at index 0 in the table.
>> + */
>> +#define RMPTABLE_NON_SEGMENTED_SHIFT 52
>> +
>> +struct rmp_segment_desc {
>> + struct rmpentry_raw *rmp_entry;
>> + u64 max_index;
>> + u64 size;
>> +};
>> +
>> +/*
>> + * Segmented RMP Table support.
>> + * - The segment size is used for two purposes:
>> + * - Identify the amount of memory covered by an RMP segment
>> + * - Quickly locate an RMP segment table entry for a physical address
>> + *
>> + * - The RMP segment table contains pointers to an RMP table that covers
>> + * a specific portion of memory. There can be up to 512 8-byte entries,
>> + * one pages worth.
>> + */
>> +static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
>> +static unsigned int rst_max_index __ro_after_init = 512;
>> +
>> +static u64 rmp_segment_size_max;
>
> This one is used in a single function. Generate it there pls.
It's used in two functions, alloc_rmp_segment_desc() and
set_rmp_segment_info().
>
>> +static unsigned int rmp_segment_coverage_shift;
>> +static u64 rmp_segment_coverage_size;
>> +static u64 rmp_segment_coverage_mask;
>
> That "_coverage_" in there looks redundant to me and could go and make those
> variables shorter.
Sure, I can remove the _coverage_ portion.
>
>> +#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_coverage_shift)
>> +#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
>> +
>> /* Mask to apply to a PFN to get the first PFN of a 2MB page */
>> #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>>
>> static u64 probed_rmp_base, probed_rmp_size;
>> -static struct rmpentry_raw *rmptable __ro_after_init;
>> -static u64 rmptable_max_pfn __ro_after_init;
>>
>> static LIST_HEAD(snp_leaked_pages_list);
>> static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
>> @@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
>> return true;
>> }
>>
>> +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
>> +{
>> + struct rmp_segment_desc *desc;
>> + void *rmp_segment;
>> + u64 rst_index;
>> +
>> + /* Validate the RMP segment size */
>> + if (segment_size > rmp_segment_size_max) {
>> + pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
>> + segment_size, rmp_segment_size_max);
>> + return false;
>> + }
>> +
>> + /* Validate the RMP segment table index */
>> + rst_index = RST_ENTRY_INDEX(pa);
>> + if (rst_index >= rst_max_index) {
>> + pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
>> + pa, rmp_segment_coverage_size);
>> + return false;
>> + }
>> + rst_index = array_index_nospec(rst_index, rst_max_index);
>
> Why are we doing this here? Are you expecting some out-of-bounds
> user-controlled values here?
>
> AFAICT, this is all read from the hw/fw so why are speculative accesses
> a problem?
Yeah, not needed here since this is before userspace has even started.
>
>> + if (rmp_segment_table[rst_index]) {
>> + pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
>> + return false;
>> + }
>> +
>> + /* Map the RMP entries */
>
> Kinda obvious...
>
>> + rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
>> + if (!rmp_segment) {
>> + pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
>> + segment_pa, segment_size);
>> + return false;
>> + }
>> +
>> + desc = kzalloc(sizeof(*desc), GFP_KERNEL);
>
> sizeof(struct rmp_segment_desc)
Hmmm... I prefer to keep this as sizeof(*desc). If any changes are made
to the structure name, then this line doesn't need to be changed.
>
>
>> + if (!desc) {
>> + memunmap(rmp_segment);
>> + return false;
>> + }
>> +
>> + desc->rmp_entry = rmp_segment;
>> + desc->max_index = segment_size / sizeof(*desc->rmp_entry);
>> + desc->size = segment_size;
>> +
>> + /* Add the segment descriptor to the table */
>> + rmp_segment_table[rst_index] = desc;
>> +
>> + return true;
>> +}
>
> ...
>
>> @@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
>>
>> /* Zero out the RMP bookkeeping area */
>> if (!init_rmptable_bookkeeping()) {
>> - memunmap(rmptable_start);
>> + free_rmp_segment_table();
>> goto nosnp;
>> }
>>
>> /* Zero out the RMP entries */
>> - memset(rmptable_start, 0, rmptable_size);
>> + for (i = 0; i < rst_max_index; i++) {
>> + struct rmp_segment_desc *desc;
>> +
>> + desc = rmp_segment_table[i];
>> + if (!desc)
>> + continue;
>> +
>> + memset(desc->rmp_entry, 0, desc->size);
>> + }
>
> Why isn't this zeroing out part of alloc_rmp_segment_table()?
If the SNP_EN bit is set in the SYSCFG MSR, the code needs to skip the
zeroing of the RMP table since it no longer has write access to the
table (this happens with kexec).
This keeps the code consistent with the prior code as to where the
zeroing occurs. I can add another argument to alloc_rmp_segment_desc()
that tells it whether to clear it or not, if you prefer.
>
>> @@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
>>
>> static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
>> {
>> - if (!rmptable)
>> + struct rmp_segment_desc *desc;
>> + u64 paddr, rst_index, segment_index;
>
> Reverse xmas tree order pls.
Right.
>
>> + if (!rmp_segment_table)
>> return ERR_PTR(-ENODEV);
>>
>> - if (unlikely(pfn > rmptable_max_pfn))
>> + paddr = pfn << PAGE_SHIFT;
>> +
>> + rst_index = RST_ENTRY_INDEX(paddr);
>> + if (unlikely(rst_index >= rst_max_index))
>> + return ERR_PTR(-EFAULT);
>> + rst_index = array_index_nospec(rst_index, rst_max_index);
>
> Same question as above.
This is where I was worried about the VMM/guest being able to get into
this routine with a bad PFN value.
This function is invoked from dump_rmpentry(), which can be invoked from:
rmpupdate() - I think this is safe because the adjust_direct_map() will
fail if the PFN isn't valid, before the RMP is accessed.
snp_leak_pages() - I think this is safe because the PFN is based on an
actual allocation.
snp_dump_hva_rmpentry() - This is called from the page fault handler. I
think this invocation is safe for now because because it is only called
if the fault type is an RMP fault type, which implies that the RMP is
involved. But as an external function, there's no guarantee as to the
situation it can be called from in the future.
I can remove it for now if you think it will be safe going forward.
Thanks,
Tom
>
>> +
>> + desc = rmp_segment_table[rst_index];
>> + if (unlikely(!desc))
>> return ERR_PTR(-EFAULT);
>>
>> - return rmptable + pfn;
>> + segment_index = RMP_ENTRY_INDEX(paddr);
>> + if (unlikely(segment_index >= desc->max_index))
>> + return ERR_PTR(-EFAULT);
>> + segment_index = array_index_nospec(segment_index, desc->max_index);
>> +
>> + return desc->rmp_entry + segment_index;
>> }
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
2024-11-04 16:03 ` Tom Lendacky
@ 2024-11-04 17:05 ` Borislav Petkov
2024-11-04 17:15 ` Tom Lendacky
2024-11-08 17:23 ` Borislav Petkov
1 sibling, 1 reply; 17+ messages in thread
From: Borislav Petkov @ 2024-11-04 17:05 UTC (permalink / raw)
To: Tom Lendacky
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On Mon, Nov 04, 2024 at 10:03:22AM -0600, Tom Lendacky wrote:
> >> +static u64 rmp_segment_size_max;
> >
> > This one is used in a single function. Generate it there pls.
>
> It's used in two functions, alloc_rmp_segment_desc() and
> set_rmp_segment_info().
You can always lift it up later if it is needed more than now:
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 37ff4f98e8d1..77bdf4d6a8f4 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -103,7 +103,6 @@ struct rmp_segment_desc {
static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
static unsigned int rst_max_index __ro_after_init = 512;
-static u64 rmp_segment_size_max;
static unsigned int rmp_segment_coverage_shift;
static u64 rmp_segment_coverage_size;
static u64 rmp_segment_coverage_mask;
@@ -223,9 +222,13 @@ static bool __init init_rmptable_bookkeeping(void)
static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
{
+ u64 rst_index, rmp_segment_size_max;
struct rmp_segment_desc *desc;
void *rmp_segment;
- u64 rst_index;
+
+ /* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
+ rmp_segment_size_max = PHYS_PFN(rmp_segment_coverage_size);
+ rmp_segment_size_max <<= 4;
/* Validate the RMP segment size */
if (segment_size > rmp_segment_size_max) {
@@ -415,10 +418,6 @@ static void set_rmp_segment_info(unsigned int segment_shift)
rmp_segment_coverage_shift = segment_shift;
rmp_segment_coverage_size = 1ULL << rmp_segment_coverage_shift;
rmp_segment_coverage_mask = rmp_segment_coverage_size - 1;
-
- /* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
- rmp_segment_size_max = PHYS_PFN(rmp_segment_coverage_size);
- rmp_segment_size_max <<= 4;
}
#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
2024-11-04 17:05 ` Borislav Petkov
@ 2024-11-04 17:15 ` Tom Lendacky
0 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-11-04 17:15 UTC (permalink / raw)
To: Borislav Petkov
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On 11/4/24 11:05, Borislav Petkov wrote:
> On Mon, Nov 04, 2024 at 10:03:22AM -0600, Tom Lendacky wrote:
>>>> +static u64 rmp_segment_size_max;
>>>
>>> This one is used in a single function. Generate it there pls.
>>
>> It's used in two functions, alloc_rmp_segment_desc() and
>> set_rmp_segment_info().
>
> You can always lift it up later if it is needed more than now:
Will do.
Thanks,
Tom
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
2024-11-04 16:03 ` Tom Lendacky
2024-11-04 17:05 ` Borislav Petkov
@ 2024-11-08 17:23 ` Borislav Petkov
1 sibling, 0 replies; 17+ messages in thread
From: Borislav Petkov @ 2024-11-08 17:23 UTC (permalink / raw)
To: Tom Lendacky
Cc: linux-kernel, x86, Thomas Gleixner, Ingo Molnar, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
On Mon, Nov 04, 2024 at 10:03:22AM -0600, Tom Lendacky wrote:
> >> + desc = kzalloc(sizeof(*desc), GFP_KERNEL);
> >
> > sizeof(struct rmp_segment_desc)
>
> Hmmm... I prefer to keep this as sizeof(*desc). If any changes are made
> to the structure name, then this line doesn't need to be changed.
Oh boy, we really do that:
from Documentation/process/coding-style.rst
"The preferred form for passing a size of a struct is the following:
p = kmalloc(sizeof(*p), ...);
The alternative form where struct name is spelled out hurts readability and
introduces an opportunity for a bug when the pointer variable type is changed
but the corresponding sizeof that is passed to a memory allocator is not."
Well, my problem with using the variable name as sizeof() argument is that you
need to go and look at what type it is to know what you're sizing. And it
doesn't really hurt readability - on the contrary - it makes it more readable.
/facepalm.
> > Why isn't this zeroing out part of alloc_rmp_segment_table()?
First of all:
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 37ff4f98e8d1..1ae782194626 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -293,18 +293,16 @@ static void __init free_rmp_segment_table(void)
rmp_segment_table = NULL;
}
-static bool __init alloc_rmp_segment_table(void)
+static struct __init rmp_segment_desc **alloc_rmp_segment_table(void)
{
struct page *page;
/* Allocate the table used to index into the RMP segments */
page = alloc_page(__GFP_ZERO);
if (!page)
- return false;
-
- rmp_segment_table = page_address(page);
+ return NULL;
- return true;
+ return page_address(page);
}
/*
@@ -344,7 +342,8 @@ static int __init snp_rmptable_init(void)
goto nosnp;
}
- if (!alloc_rmp_segment_table())
+ rmp_segment_table = alloc_rmp_segment_table();
+ if (!rmp_segment_table)
goto nosnp;
/* Map only the RMP entries */
---
which makes the code a lot more readable and natural.
> If the SNP_EN bit is set in the SYSCFG MSR, the code needs to skip the
> zeroing of the RMP table since it no longer has write access to the
> table (this happens with kexec).
So we allocate a rmp_segment_table page when we kexec but we can't write the
entries? Why?
This sounds like some weird limitation. Or maybe I'm missing something.
> This keeps the code consistent with the prior code as to where the
> zeroing occurs. I can add another argument to alloc_rmp_segment_desc()
> that tells it whether to clear it or not, if you prefer.
You can also merge that init_rmptable_bookkeeping() and the zeroing of the RMP
entries into one function because they happen back-to-back.
In any case the placement of this whole dance still doesn't look completely
optimal to me.
> This is where I was worried about the VMM/guest being able to get into
> this routine with a bad PFN value.
>
> This function is invoked from dump_rmpentry(), which can be invoked from:
>
> rmpupdate() - I think this is safe because the adjust_direct_map() will
> fail if the PFN isn't valid, before the RMP is accessed.
>
> snp_leak_pages() - I think this is safe because the PFN is based on an
> actual allocation.
>
> snp_dump_hva_rmpentry() - This is called from the page fault handler. I
> think this invocation is safe for now because because it is only called
> if the fault type is an RMP fault type, which implies that the RMP is
> involved. But as an external function, there's no guarantee as to the
> situation it can be called from in the future.
>
> I can remove it for now if you think it will be safe going forward.
I like being cautious and you probably should put this deliberation above the
array_index_nospec() so that it is clear to future generations reading this
code.
And yeah, it might be safe but it is not like it costs compared to the
remaining operations happening here so maybe, out of abundance of caution, we
should keep it there and explain why we do so.
Hmmm?
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH v5 7/8] x86/sev: Add full support for a segmented RMP table
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
` (5 preceding siblings ...)
2024-10-28 19:32 ` [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
2024-10-28 19:32 ` [PATCH v5 8/8] x86/sev/docs: Document the SNP Reverse Map Table (RMP) Tom Lendacky
7 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
A segmented RMP table allows for improved locality of reference between
the memory protected by the RMP and the RMP entries themselves.
Add support to detect and initialize a segmented RMP table with multiple
segments as configured by the system BIOS. While the RMPREAD instruction
will be used to read an RMP entry in a segmented RMP, initialization and
debugging capabilities will require the mapping of the segments.
The RMP_CFG MSR indicates if segmented RMP support is enabled and, if
enabled, the amount of memory that an RMP segment covers. When segmented
RMP support is enabled, the RMP_BASE MSR points to the start of the RMP
bookkeeping area, which is 16K in size. The RMP Segment Table (RST) is
located immediately after the bookkeeping area and is 4K in size. The RST
contains up to 512 8-byte entries that identify the location of the RMP
segment and amount of memory mapped by the segment (which must be less
than or equal to the configured segment size). The physical address that
is covered by a segment is based on the segment size and the index of the
segment in the RST. The RMP entry for a physical address is based on the
offset within the segment.
For example, if the segment size is 64GB (0x1000000000 or 1 << 36), then
physical address 0x9000800000 is RST entry 9 (0x9000800000 >> 36) and
RST entry 9 covers physical memory 0x9000000000 to 0x9FFFFFFFFF.
The RMP entry index within the RMP segment is the physical address
AND-ed with the segment mask, 64GB - 1 (0xFFFFFFFFF), and then
right-shifted 12 bits or PHYS_PFN(0x9000800000 & 0xFFFFFFFFF), which
is 0x800.
CPUID 0x80000025_EBX[9:0] describes the number of RMP segments that can
be cached by the hardware. Additionally, if CPUID 0x80000025_EBX[10] is
set, then the number of actual RMP segments defined cannot exceed the
number of RMP segments that can be cached and can be used as a maximum
RST index.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/msr-index.h | 8 +-
arch/x86/virt/svm/sev.c | 251 ++++++++++++++++++++++++++---
3 files changed, 236 insertions(+), 24 deletions(-)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index d7395a55c04f..dd5481ee9c7e 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -449,6 +449,7 @@
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ae84c3b8e6d..3f3e2bc99162 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -644,6 +644,7 @@
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
@@ -682,11 +683,12 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_RESV_BIT 18
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
-
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
#define MSR_SVSM_CAA 0xc001f000
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 37ff4f98e8d1..2051c527d1e5 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -100,6 +100,10 @@ struct rmp_segment_desc {
* a specific portion of memory. There can be up to 512 8-byte entries,
* one pages worth.
*/
+#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0))
+#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20))
+
+#define RMP_SEGMENT_TABLE_SIZE SZ_4K
static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
static unsigned int rst_max_index __ro_after_init = 512;
@@ -110,6 +114,9 @@ static u64 rmp_segment_coverage_mask;
#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_coverage_shift)
#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
+static u64 rmp_cfg;
+#define RMP_IS_SEGMENTED(x) ((x) & MSR_AMD64_SEG_RMP_ENABLED)
+
/* Mask to apply to a PFN to get the first PFN of a 2MB page */
#define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
@@ -201,7 +208,49 @@ static void __init __snp_fixup_e820_tables(u64 pa)
void __init snp_fixup_e820_tables(void)
{
__snp_fixup_e820_tables(probed_rmp_base);
- __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
+
+ if (RMP_IS_SEGMENTED(rmp_cfg)) {
+ u64 pa, *rst, size, mapped_size;
+ unsigned int i;
+
+ pa = probed_rmp_base;
+ pa += RMPTABLE_CPU_BOOKKEEPING_SZ;
+ pa += RMP_SEGMENT_TABLE_SIZE;
+ __snp_fixup_e820_tables(pa);
+
+ pa -= RMP_SEGMENT_TABLE_SIZE;
+ rst = early_memremap(pa, RMP_SEGMENT_TABLE_SIZE);
+ if (!rst)
+ return;
+
+ for (i = 0; i < rst_max_index; i++) {
+ pa = RST_ENTRY_SEGMENT_BASE(rst[i]);
+ mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
+ if (!mapped_size)
+ continue;
+
+ __snp_fixup_e820_tables(pa);
+
+ /*
+ * Mapped size in GB. Mapped size is allowed to exceed
+ * the segment coverage size, but gets reduced to the
+ * segment coverage size.
+ */
+ mapped_size <<= 30;
+ if (mapped_size > rmp_segment_coverage_size)
+ mapped_size = rmp_segment_coverage_size;
+
+ /* Calculate the RMP segment size (16 bytes/page mapped) */
+ size = PHYS_PFN(mapped_size);
+ size <<= 4;
+
+ __snp_fixup_e820_tables(pa + size);
+ }
+
+ early_memunmap(rst, RMP_SEGMENT_TABLE_SIZE);
+ } else {
+ __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
+ }
}
static bool __init init_rmptable_bookkeeping(void)
@@ -307,29 +356,17 @@ static bool __init alloc_rmp_segment_table(void)
return true;
}
-/*
- * Do the necessary preparations which are verified by the firmware as
- * described in the SNP_INIT_EX firmware command description in the SNP
- * firmware ABI spec.
- */
-static int __init snp_rmptable_init(void)
+static bool __init contiguous_rmptable_setup(void)
{
- u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end, val;
- unsigned int i;
-
- if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
-
- if (!amd_iommu_snp_en)
- goto nosnp;
+ u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end;
if (!probed_rmp_size)
- goto nosnp;
+ return false;
rmp_end = probed_rmp_base + probed_rmp_size - 1;
/*
- * Calculate the amount the memory that must be reserved by the BIOS to
+ * Calculate the amount of memory that must be reserved by the BIOS to
* address the whole RAM, including the bookkeeping area. The RMP itself
* must also be covered.
*/
@@ -341,11 +378,11 @@ static int __init snp_rmptable_init(void)
if (calc_rmp_sz > probed_rmp_size) {
pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
calc_rmp_sz, probed_rmp_size);
- goto nosnp;
+ return false;
}
if (!alloc_rmp_segment_table())
- goto nosnp;
+ return false;
/* Map only the RMP entries */
rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
@@ -353,9 +390,127 @@ static int __init snp_rmptable_init(void)
if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
free_rmp_segment_table();
- goto nosnp;
+ return false;
}
+ return true;
+}
+
+static bool __init segmented_rmptable_setup(void)
+{
+ u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max;
+ unsigned int i, max_index;
+
+ if (!probed_rmp_base)
+ return false;
+
+ if (!alloc_rmp_segment_table())
+ return false;
+
+ /* Map the RMP Segment Table */
+ rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+ rst = memremap(rst_pa, RMP_SEGMENT_TABLE_SIZE, MEMREMAP_WB);
+ if (!rst) {
+ pr_err("Failed to map RMP segment table addr %#llx\n", rst_pa);
+ goto e_free;
+ }
+
+ /* Get the address for the end of system RAM */
+ ram_pa_max = max_pfn << PAGE_SHIFT;
+
+ /* Process each RMP segment */
+ max_index = 0;
+ ram_pa_end = 0;
+ for (i = 0; i < rst_max_index; i++) {
+ u64 rmp_segment, rmp_size, mapped_size;
+
+ mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]);
+ if (!mapped_size)
+ continue;
+
+ max_index = i;
+
+ /*
+ * Mapped size in GB. Mapped size is allowed to exceed the
+ * segment coverage size, but gets reduced to the segment
+ * coverage size.
+ */
+ mapped_size <<= 30;
+ if (mapped_size > rmp_segment_coverage_size) {
+ pr_info("RMP segment %u mapped size (0x%llx) reduced to 0x%llx\n",
+ i, mapped_size, rmp_segment_coverage_size);
+ mapped_size = rmp_segment_coverage_size;
+ }
+
+ rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]);
+
+ /* Calculate the RMP segment size (16 bytes/page mapped) */
+ rmp_size = PHYS_PFN(mapped_size);
+ rmp_size <<= 4;
+
+ pa = (u64)i << rmp_segment_coverage_shift;
+
+ /*
+ * Some segments may be for MMIO mapped above system RAM. These
+ * segments are used for Trusted I/O.
+ */
+ if (pa < ram_pa_max)
+ ram_pa_end = pa + mapped_size;
+
+ if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa))
+ goto e_unmap;
+
+ pr_info("RMP segment %u physical address [%#llx - %#llx] covering [%#llx - %#llx]\n",
+ i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1);
+ }
+
+ if (ram_pa_max > ram_pa_end) {
+ pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n",
+ ram_pa_max, ram_pa_end);
+ goto e_unmap;
+ }
+
+ /* Adjust the maximum index based on the found segments */
+ rst_max_index = max_index + 1;
+
+ memunmap(rst);
+
+ return true;
+
+e_unmap:
+ memunmap(rst);
+
+e_free:
+ free_rmp_segment_table();
+
+ return false;
+}
+
+static bool __init rmptable_setup(void)
+{
+ return RMP_IS_SEGMENTED(rmp_cfg) ? segmented_rmptable_setup()
+ : contiguous_rmptable_setup();
+}
+
+/*
+ * Do the necessary preparations which are verified by the firmware as
+ * described in the SNP_INIT_EX firmware command description in the SNP
+ * firmware ABI spec.
+ */
+static int __init snp_rmptable_init(void)
+{
+ unsigned int i;
+ u64 val;
+
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ return 0;
+
+ if (!amd_iommu_snp_en)
+ goto nosnp;
+
+ if (!rmptable_setup())
+ goto nosnp;
+
/*
* Check if SEV-SNP is already enabled, this can happen in case of
* kexec boot.
@@ -423,7 +578,7 @@ static void set_rmp_segment_info(unsigned int segment_shift)
#define RMP_ADDR_MASK GENMASK_ULL(51, 13)
-bool snp_probe_rmptable_info(void)
+static bool probe_contiguous_rmptable_info(void)
{
u64 rmp_sz, rmp_base, rmp_end;
@@ -456,6 +611,60 @@ bool snp_probe_rmptable_info(void)
return true;
}
+static bool probe_segmented_rmptable_info(void)
+{
+ unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max;
+ u64 rmp_base, rmp_end;
+
+ rdmsrl(MSR_AMD64_RMP_BASE, rmp_base);
+ rdmsrl(MSR_AMD64_RMP_END, rmp_end);
+
+ if (!(rmp_base & RMP_ADDR_MASK)) {
+ pr_err("Memory for the RMP table has not been reserved by BIOS\n");
+ return false;
+ }
+
+ WARN_ONCE(rmp_end & RMP_ADDR_MASK,
+ "Segmented RMP enabled but RMP_END MSR is non-zero\n");
+
+ /* Obtain the min and max supported RMP segment size */
+ eax = cpuid_eax(0x80000025);
+ segment_shift_min = eax & GENMASK(5, 0);
+ segment_shift_max = (eax & GENMASK(11, 6)) >> 6;
+
+ /* Verify the segment size is within the supported limits */
+ segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg);
+ if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) {
+ pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n",
+ segment_shift, segment_shift_min, segment_shift_max);
+ return false;
+ }
+
+ /* Override the max supported RST index if a hardware limit exists */
+ ebx = cpuid_ebx(0x80000025);
+ if (ebx & BIT(10))
+ rst_max_index = ebx & GENMASK(9, 0);
+
+ set_rmp_segment_info(segment_shift);
+
+ probed_rmp_base = rmp_base;
+ probed_rmp_size = 0;
+
+ pr_info("Segmented RMP base table physical range [0x%016llx - 0x%016llx]\n",
+ rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RMP_SEGMENT_TABLE_SIZE);
+
+ return true;
+}
+
+bool snp_probe_rmptable_info(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP))
+ rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg);
+
+ return RMP_IS_SEGMENTED(rmp_cfg) ? probe_segmented_rmptable_info()
+ : probe_contiguous_rmptable_info();
+}
+
static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
{
struct rmp_segment_desc *desc;
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 8/8] x86/sev/docs: Document the SNP Reverse Map Table (RMP)
2024-10-28 19:32 [PATCH v5 0/8] Provide support for RMPREAD and a segmented RMP Tom Lendacky
` (6 preceding siblings ...)
2024-10-28 19:32 ` [PATCH v5 7/8] x86/sev: Add full support for a segmented RMP table Tom Lendacky
@ 2024-10-28 19:32 ` Tom Lendacky
7 siblings, 0 replies; 17+ messages in thread
From: Tom Lendacky @ 2024-10-28 19:32 UTC (permalink / raw)
To: linux-kernel, x86
Cc: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen,
Michael Roth, Ashish Kalra, Nikunj A Dadhania, Neeraj Upadhyay
Update the AMD memory encryption documentation to include information on
the Reverse Map Table (RMP) and the two table formats.
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
.../arch/x86/amd-memory-encryption.rst | 118 ++++++++++++++++++
1 file changed, 118 insertions(+)
diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst
index 6df3264f23b9..bd840df708ea 100644
--- a/Documentation/arch/x86/amd-memory-encryption.rst
+++ b/Documentation/arch/x86/amd-memory-encryption.rst
@@ -130,8 +130,126 @@ SNP feature support.
More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR
+Reverse Map Table (RMP)
+=======================
+
+The RMP is a structure in system memory that is used to ensure a one-to-one
+mapping between system physical addresses and guest physical addresses. Each
+page of memory that is potentially assignable to guests has one entry within
+the RMP.
+
+The RMP table can be either contiguous in memory or a collection of segments
+in memory.
+
+Contiguous RMP
+--------------
+
+Support for this form of the RMP is present when support for SEV-SNP is
+present, which can be determined using the CPUID instruction::
+
+ 0x8000001f[eax]:
+ Bit[4] indicates support for SEV-SNP
+
+The location of the RMP is identified to the hardware through two MSRs::
+
+ 0xc0010132 (RMP_BASE):
+ System physical address of the first byte of the RMP
+
+ 0xc0010133 (RMP_END):
+ System physical address of the last byte of the RMP
+
+Hardware requires that RMP_BASE and (RPM_END + 1) be 8KB aligned, but SEV
+firmware increases the alignment requirement to require a 1MB alignment.
+
+The RMP consists of a 16KB region used for processor bookkeeping followed
+by the RMP entries, which are 16 bytes in size. The size of the RMP
+determines the range of physical memory that the hypervisor can assign to
+SEV-SNP guests. The RMP covers the system physical address from::
+
+ 0 to ((RMP_END + 1 - RMP_BASE - 16KB) / 16B) x 4KB.
+
+The current Linux support relies on BIOS to allocate/reserve the memory for
+the RMP and to set RMP_BASE and RMP_END appropriately. Linux uses the MSR
+values to locate the RMP and determine the size of the RMP. The RMP must
+cover all of system memory in order for Linux to enable SEV-SNP.
+
+Segmented RMP
+-------------
+
+Segmented RMP support is a new way of representing the layout of an RMP.
+Initial RMP support required the RMP table to be contiguous in memory.
+RMP accesses from a NUMA node on which the RMP doesn't reside
+can take longer than accesses from a NUMA node on which the RMP resides.
+Segmented RMP support allows the RMP entries to be located on the same
+node as the memory the RMP is covering, potentially reducing latency
+associated with accessing an RMP entry associated with the memory. Each
+RMP segment covers a specific range of system physical addresses.
+
+Support for this form of the RMP can be determined using the CPUID
+instruction::
+
+ 0x8000001f[eax]:
+ Bit[23] indicates support for segmented RMP
+
+If supported, segmented RMP attributes can be found using the CPUID
+instruction::
+
+ 0x80000025[eax]:
+ Bits[5:0] minimum supported RMP segment size
+ Bits[11:6] maximum supported RMP segment size
+
+ 0x80000025[ebx]:
+ Bits[9:0] number of cacheable RMP segment definitions
+ Bit[10] indicates if the number of cacheable RMP segments
+ is a hard limit
+
+To enable a segmented RMP, a new MSR is available::
+
+ 0xc0010136 (RMP_CFG):
+ Bit[0] indicates if segmented RMP is enabled
+ Bits[13:8] contains the size of memory covered by an RMP
+ segment (expressed as a power of 2)
+
+The RMP segment size defined in the RMP_CFG MSR applies to all segments
+of the RMP. Therefore each RMP segment covers a specific range of system
+physical addresses. For example, if the RMP_CFG MSR value is 0x2401, then
+the RMP segment coverage value is 0x24 => 36, meaning the size of memory
+covered by an RMP segment is 64GB (1 << 36). So the first RMP segment
+covers physical addresses from 0 to 0xF_FFFF_FFFF, the second RMP segment
+covers physical addresses from 0x10_0000_0000 to 0x1F_FFFF_FFFF, etc.
+
+When a segmented RMP is enabled, RMP_BASE points to the RMP bookkeeping
+area as it does today (16K in size). However, instead of RMP entries
+beginning immediately after the bookkeeping area, there is a 4K RMP
+segment table (RST). Each entry in the RST is 8-bytes in size and represents
+an RMP segment::
+
+ Bits[19:0] mapped size (in GB)
+ The mapped size can be less than the defined segment size.
+ A value of zero, indicates that no RMP exists for the range
+ of system physical addresses associated with this segment.
+ Bits[51:20] segment physical address
+ This address is left shift 20-bits (or just masked when
+ read) to form the physical address of the segment (1MB
+ alignment).
+
+The RST can hold 512 segment entries but can be limited in size to the number
+of cacheable RMP segments (CPUID 0x80000025_EBX[9:0]) if the number of cacheable
+RMP segments is a hard limit (CPUID 0x80000025_EBX[10]).
+
+The current Linux support relies on BIOS to allocate/reserve the memory for
+the segmented RMP (the bookkeeping area, RST, and all segments), build the RST
+and to set RMP_BASE, RMP_END, and RMP_CFG appropriately. Linux uses the MSR
+values to locate the RMP and determine the size and location of the RMP
+segments. The RMP must cover all of system memory in order for Linux to enable
+SEV-SNP.
+
+More details in the AMD64 APM Vol 2, section "15.36.3 Reverse Map Table",
+docID: 24593.
+
Secure VM Service Module (SVSM)
===============================
+
SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which
defines four privilege levels at which guest software can run. The most
privileged level is 0 and numerically higher numbers have lesser privileges.
--
2.46.2
^ permalink raw reply related [flat|nested] 17+ messages in thread