* [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() [not found] <20220222185740.26228-1-kirill.shutemov@linux.intel.com> @ 2022-02-23 4:35 ` Brijesh Singh 2022-02-23 11:31 ` Borislav Petkov 0 siblings, 1 reply; 8+ messages in thread From: Brijesh Singh @ 2022-02-23 4:35 UTC (permalink / raw) To: x86, linux-kernel, kvm, linux-coco Cc: Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Borislav Petkov, Michael Roth, Kirill A . Shutemov, Andi Kleen, Brijesh Singh The kernel provides infrastructure to set or clear the encryption mask from the pages for AMD SEV, but TDX requires few tweaks. - TDX and SEV have different requirements to the cache and tlb flushing. - TDX has own routine to notify VMM about page encryption status change. Modify __set_memory_enc_pgtable() and make it flexible enough to cover both AMD SEV and Intel TDX. The AMD-specific behavior is isolated in callback under x86_platform.cc. TDX will provide own version of the callbacks. Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> --- Depends on Krill's CC cleanup https://lore.kernel.org/all/20220222185740.26228-1-kirill.shutemov@linux.intel.com/ arch/x86/include/asm/set_memory.h | 1 - arch/x86/include/asm/x86_init.h | 21 +++++++++ arch/x86/mm/mem_encrypt_amd.c | 75 ++++++++++++++++++++++--------- arch/x86/mm/pat/set_memory.c | 20 +++++---- 4 files changed, 85 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a..ce8dd215f5b3 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -84,7 +84,6 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 22b7412c08f6..dce92e2cb9e1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -141,6 +141,26 @@ struct x86_init_acpi { void (*reduced_hw_early_init)(void); }; +/** + * struct x86_cc_runtime - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range + * is changed. + * + * @enc_status_change_finish Notify HV after the encryption status of a range + * is changed. + * + * @enc_tlb_flush_required Flush the TLB before changing the encryption status. + * + * @enc_cache_flush_required Flush the caches before changing the encryption status. + */ +struct x86_cc_runtime { + void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + void (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -287,6 +307,7 @@ struct x86_platform_ops { struct x86_legacy_features legacy; void (*set_legacy_features)(void); struct x86_hyper_runtime hyper; + const struct x86_cc_runtime *cc; }; struct x86_apic_ops { diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b2d018ea345..22b86af5edf1 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - void __init sev_setup_arch(void) { phys_addr_t total_mem = memblock_phys_mem_size(); @@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) return pfn; } -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_tlb_flush_required(bool enc) +{ + return true; +} + +static bool amd_enc_cache_flush_required(void) +{ + return !this_cpu_has(X86_FEATURE_SME_COHERENT); +} + +static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { #ifdef CONFIG_PARAVIRT unsigned long sz = npages << PAGE_SHIFT; @@ -287,6 +278,18 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) #endif } +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +{ +} + +static void amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +{ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return; + + enc_dec_hypercall(vaddr, npages, enc); +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -392,7 +395,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -410,7 +413,35 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { - notify_range_enc_status_changed(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages, enc); +} + +static const struct x86_cc_runtime amd_cc_runtime = { + .enc_status_change_prepare = amd_enc_status_change_prepare, + .enc_status_change_finish = amd_enc_status_change_finish, + .enc_tlb_flush_required = amd_enc_tlb_flush_required, + .enc_cache_flush_required = amd_enc_cache_flush_required, +}; + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; + + x86_platform.cc = &amd_cc_runtime; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index af77dbfd143c..4de2a7509039 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1997,6 +1997,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr)) addr &= PAGE_MASK; + BUG_ON(!x86_platform.cc); + memset(&cpa, 0, sizeof(cpa)); cpa.vaddr = &addr; cpa.numpages = numpages; @@ -2008,10 +2010,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) kmap_flush_unused(); vm_unmap_aliases(); - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* Flush the caches as needed before changing the encryption attribute. */ + if (x86_platform.cc->enc_tlb_flush_required(enc)) + cpa_flush(&cpa, x86_platform.cc->enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ + x86_platform.cc->enc_status_change_prepare(addr, numpages, enc); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2024,11 +2028,9 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); - /* - * Notify hypervisor that a given memory range is mapped encrypted - * or decrypted. - */ - notify_range_enc_status_changed(addr, numpages, enc); + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ + if (!ret) + x86_platform.cc->enc_status_change_finish(addr, numpages, enc); return ret; } -- 2.25.1 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 4:35 ` [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() Brijesh Singh @ 2022-02-23 11:31 ` Borislav Petkov 2022-02-23 11:55 ` Kirill A. Shutemov 0 siblings, 1 reply; 8+ messages in thread From: Borislav Petkov @ 2022-02-23 11:31 UTC (permalink / raw) To: Brijesh Singh Cc: x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Kirill A . Shutemov, Andi Kleen On Tue, Feb 22, 2022 at 10:35:28PM -0600, Brijesh Singh wrote: > Depends on Krill's CC cleanup > https://lore.kernel.org/all/20220222185740.26228-1-kirill.shutemov@linux.intel.com/ I've massaged it into what it should be, see below. The BUG_ON() is gone because we don't do BUG_ONs - if you had used checkpatch, it would've told you. So I've added noops like that x86_init stuff is usually done. Anyway, Kirill, your turn. Is the below enough for TDX? --- From daa6fb150e495511c699820bce925d89e55e96d4 Mon Sep 17 00:00:00 2001 From: Brijesh Singh <brijesh.singh@amd.com> Date: Tue, 22 Feb 2022 22:35:28 -0600 Subject: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() The kernel provides infrastructure to set or clear the encryption mask from the pages for AMD SEV, but TDX requires few tweaks. - TDX and SEV have different requirements to the cache and TLB flushing. - TDX has own routine to notify VMM about page encryption status change. Modify __set_memory_enc_pgtable() and make it flexible enough to cover both AMD SEV and Intel TDX. The AMD-specific behavior is isolated in callback under x86_platform_cc. TDX will provide own version of the callbacks. [ bp: Beat into submission. ] Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Link: https://lore.kernel.org/r/20220223043528.2093214-1-brijesh.singh@amd.com --- arch/x86/include/asm/set_memory.h | 1 - arch/x86/include/asm/x86_init.h | 16 +++++++ arch/x86/kernel/x86_init.c | 16 ++++++- arch/x86/mm/mem_encrypt_amd.c | 71 +++++++++++++++++++++---------- arch/x86/mm/pat/set_memory.c | 18 ++++---- 5 files changed, 88 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a..ce8dd215f5b3 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -84,7 +84,6 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 22b7412c08f6..dfdcd3152f8d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -141,6 +141,21 @@ struct x86_init_acpi { void (*reduced_hw_early_init)(void); }; +/** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range is changed + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ +struct x86_guest { + void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + void (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -287,6 +302,7 @@ struct x86_platform_ops { struct x86_legacy_features legacy; void (*set_legacy_features)(void); struct x86_hyper_runtime hyper; + struct x86_guest guest; }; struct x86_apic_ops { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7d20c1d34a3c..bb57410dde2d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; +static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static void enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_tlb_flush_required_noop(bool enc) { return false; } +static bool enc_cache_flush_required_noop(void) { return false; } + struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, @@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = { .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, - .save_sched_clock_state = tsc_save_sched_clock_state, - .restore_sched_clock_state = tsc_restore_sched_clock_state, + .save_sched_clock_state = tsc_save_sched_clock_state, + .restore_sched_clock_state = tsc_restore_sched_clock_state, .hyper.pin_vcpu = x86_op_int_noop, + + .guest = { + .enc_status_change_prepare = enc_status_change_prepare_noop, + .enc_status_change_finish = enc_status_change_finish_noop, + .enc_tlb_flush_required = enc_tlb_flush_required_noop, + .enc_cache_flush_required = enc_cache_flush_required_noop, + }, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b2d018ea345..4c57c8988f37 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - void __init sev_setup_arch(void) { phys_addr_t total_mem = memblock_phys_mem_size(); @@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) return pfn; } -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_tlb_flush_required(bool enc) +{ + return true; +} + +static bool amd_enc_cache_flush_required(void) +{ + return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); +} + +static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { #ifdef CONFIG_PARAVIRT unsigned long sz = npages << PAGE_SHIFT; @@ -287,6 +278,18 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) #endif } +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +{ +} + +static void amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +{ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return; + + enc_dec_hypercall(vaddr, npages, enc); +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -392,7 +395,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -410,7 +413,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { - notify_range_enc_status_changed(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages, enc); +} + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; + + x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; + x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; + x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index af77dbfd143c..92c26828265c 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2008,10 +2008,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) kmap_flush_unused(); vm_unmap_aliases(); - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* Flush the caches as needed before changing the encryption attribute. */ + if (x86_platform.guest.enc_tlb_flush_required(enc)) + cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ + x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2024,11 +2026,9 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); - /* - * Notify hypervisor that a given memory range is mapped encrypted - * or decrypted. - */ - notify_range_enc_status_changed(addr, numpages, enc); + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ + if (!ret) + x86_platform.guest.enc_status_change_finish(addr, numpages, enc); return ret; } -- 2.29.2 -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 11:31 ` Borislav Petkov @ 2022-02-23 11:55 ` Kirill A. Shutemov 2022-02-23 12:13 ` Borislav Petkov 0 siblings, 1 reply; 8+ messages in thread From: Kirill A. Shutemov @ 2022-02-23 11:55 UTC (permalink / raw) To: Borislav Petkov Cc: Brijesh Singh, x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Andi Kleen On Wed, Feb 23, 2022 at 12:31:56PM +0100, Borislav Petkov wrote: > @@ -2024,11 +2026,9 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) > */ > cpa_flush(&cpa, 0); > > - /* > - * Notify hypervisor that a given memory range is mapped encrypted > - * or decrypted. > - */ > - notify_range_enc_status_changed(addr, numpages, enc); > + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ > + if (!ret) > + x86_platform.guest.enc_status_change_finish(addr, numpages, enc); > > return ret; > } This operation can fail for TDX. We need to be able to return error code here: /* Notify hypervisor that we have successfully set/clr encryption attribute. */ if (!ret) ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc); -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 11:55 ` Kirill A. Shutemov @ 2022-02-23 12:13 ` Borislav Petkov 2022-02-23 12:25 ` Kirill A. Shutemov 0 siblings, 1 reply; 8+ messages in thread From: Borislav Petkov @ 2022-02-23 12:13 UTC (permalink / raw) To: Kirill A. Shutemov Cc: Brijesh Singh, x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Andi Kleen On Wed, Feb 23, 2022 at 02:55:39PM +0300, Kirill A. Shutemov wrote: > This operation can fail for TDX. We need to be able to return error code > here: > /* Notify hypervisor that we have successfully set/clr encryption attribute. */ > if (!ret) > ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc); bool to state failure/success or you need to return a specific value? -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 12:13 ` Borislav Petkov @ 2022-02-23 12:25 ` Kirill A. Shutemov 2022-02-23 12:38 ` Borislav Petkov 0 siblings, 1 reply; 8+ messages in thread From: Kirill A. Shutemov @ 2022-02-23 12:25 UTC (permalink / raw) To: Borislav Petkov Cc: Brijesh Singh, x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Andi Kleen On Wed, Feb 23, 2022 at 01:13:03PM +0100, Borislav Petkov wrote: > On Wed, Feb 23, 2022 at 02:55:39PM +0300, Kirill A. Shutemov wrote: > > This operation can fail for TDX. We need to be able to return error code > > here: > > /* Notify hypervisor that we have successfully set/clr encryption attribute. */ > > if (!ret) > > ret = x86_platform.guest.enc_status_change_finish(addr, numpages, enc); > > bool to state failure/success or you need to return a specific value? So far it is only success or failure. I used int and -EIO as failure. bool is enough, but I don't see a reason not to use int. -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 12:25 ` Kirill A. Shutemov @ 2022-02-23 12:38 ` Borislav Petkov 2022-02-23 12:54 ` Kirill A. Shutemov 2022-02-23 14:33 ` Brijesh Singh 0 siblings, 2 replies; 8+ messages in thread From: Borislav Petkov @ 2022-02-23 12:38 UTC (permalink / raw) To: Kirill A. Shutemov Cc: Brijesh Singh, x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Andi Kleen On Wed, Feb 23, 2022 at 03:25:08PM +0300, Kirill A. Shutemov wrote: > So far it is only success or failure. I used int and -EIO as failure. > bool is enough, but I don't see a reason not to use int. bool it is. --- From 8855bca859d8768ac04bfcf5b4aeb9cf3c69295a Mon Sep 17 00:00:00 2001 From: Brijesh Singh <brijesh.singh@amd.com> Date: Tue, 22 Feb 2022 22:35:28 -0600 Subject: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() The kernel provides infrastructure to set or clear the encryption mask from the pages for AMD SEV, but TDX requires few tweaks. - TDX and SEV have different requirements to the cache and TLB flushing. - TDX has own routine to notify VMM about page encryption status change. Modify __set_memory_enc_pgtable() and make it flexible enough to cover both AMD SEV and Intel TDX. The AMD-specific behavior is isolated in callback under x86_platform_cc. TDX will provide own version of the callbacks. [ bp: Beat into submission. ] Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Link: https://lore.kernel.org/r/20220223043528.2093214-1-brijesh.singh@amd.com --- arch/x86/include/asm/set_memory.h | 1 - arch/x86/include/asm/x86_init.h | 16 +++++++ arch/x86/kernel/x86_init.c | 16 ++++++- arch/x86/mm/mem_encrypt_amd.c | 72 +++++++++++++++++++++---------- arch/x86/mm/pat/set_memory.c | 22 +++++----- 5 files changed, 92 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a..ce8dd215f5b3 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -84,7 +84,6 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 22b7412c08f6..e9170457697e 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -141,6 +141,21 @@ struct x86_init_acpi { void (*reduced_hw_early_init)(void); }; +/** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range is changed + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ +struct x86_guest { + void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + /** * struct x86_init_ops - functions for platform specific setup * @@ -287,6 +302,7 @@ struct x86_platform_ops { struct x86_legacy_features legacy; void (*set_legacy_features)(void); struct x86_hyper_runtime hyper; + struct x86_guest guest; }; struct x86_apic_ops { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7d20c1d34a3c..e84ee5cdbd8c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; +static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_tlb_flush_required_noop(bool enc) { return false; } +static bool enc_cache_flush_required_noop(void) { return false; } + struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, @@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = { .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, - .save_sched_clock_state = tsc_save_sched_clock_state, - .restore_sched_clock_state = tsc_restore_sched_clock_state, + .save_sched_clock_state = tsc_save_sched_clock_state, + .restore_sched_clock_state = tsc_restore_sched_clock_state, .hyper.pin_vcpu = x86_op_int_noop, + + .guest = { + .enc_status_change_prepare = enc_status_change_prepare_noop, + .enc_status_change_finish = enc_status_change_finish_noop, + .enc_tlb_flush_required = enc_tlb_flush_required_noop, + .enc_cache_flush_required = enc_cache_flush_required_noop, + }, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b2d018ea345..9619a5833811 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - void __init sev_setup_arch(void) { phys_addr_t total_mem = memblock_phys_mem_size(); @@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) return pfn; } -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_tlb_flush_required(bool enc) +{ + return true; +} + +static bool amd_enc_cache_flush_required(void) +{ + return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); +} + +static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { #ifdef CONFIG_PARAVIRT unsigned long sz = npages << PAGE_SHIFT; @@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) #endif } +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +{ +} + +static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +{ + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + return false; + + enc_dec_hypercall(vaddr, npages, enc); + return true; +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { - notify_range_enc_status_changed(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages, enc); +} + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; + + x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; + x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; + x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index af77dbfd143c..3b75262cfb27 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1989,8 +1989,8 @@ int set_memory_global(unsigned long addr, int numpages) */ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) { - struct cpa_data cpa; pgprot_t empty = __pgprot(0); + struct cpa_data cpa; int ret; /* Should not be working on unaligned addresses */ @@ -2008,10 +2008,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) kmap_flush_unused(); vm_unmap_aliases(); - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* Flush the caches as needed before changing the encryption attribute. */ + if (x86_platform.guest.enc_tlb_flush_required(enc)) + cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ + x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2024,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); - /* - * Notify hypervisor that a given memory range is mapped encrypted - * or decrypted. - */ - notify_range_enc_status_changed(addr, numpages, enc); + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ + if (!ret) { + if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc)) + ret = -EIO; + } return ret; } -- 2.29.2 -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 12:38 ` Borislav Petkov @ 2022-02-23 12:54 ` Kirill A. Shutemov 2022-02-23 14:33 ` Brijesh Singh 1 sibling, 0 replies; 8+ messages in thread From: Kirill A. Shutemov @ 2022-02-23 12:54 UTC (permalink / raw) To: Borislav Petkov Cc: Brijesh Singh, x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Andi Kleen On Wed, Feb 23, 2022 at 01:38:03PM +0100, Borislav Petkov wrote: > On Wed, Feb 23, 2022 at 03:25:08PM +0300, Kirill A. Shutemov wrote: > > So far it is only success or failure. I used int and -EIO as failure. > > bool is enough, but I don't see a reason not to use int. > > bool it is. > > --- > From 8855bca859d8768ac04bfcf5b4aeb9cf3c69295a Mon Sep 17 00:00:00 2001 > From: Brijesh Singh <brijesh.singh@amd.com> > Date: Tue, 22 Feb 2022 22:35:28 -0600 > Subject: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() > > The kernel provides infrastructure to set or clear the encryption mask > from the pages for AMD SEV, but TDX requires few tweaks. > > - TDX and SEV have different requirements to the cache and TLB > flushing. > > - TDX has own routine to notify VMM about page encryption status change. > > Modify __set_memory_enc_pgtable() and make it flexible enough to cover > both AMD SEV and Intel TDX. The AMD-specific behavior is isolated in > callback under x86_platform_cc. TDX will provide own version of the "under x86_platform.guest" > callbacks. > > [ bp: Beat into submission. ] > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Borislav Petkov <bp@suse.de> > Link: https://lore.kernel.org/r/20220223043528.2093214-1-brijesh.singh@amd.com Otherwise, LGTM: Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> -- Kirill A. Shutemov ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() 2022-02-23 12:38 ` Borislav Petkov 2022-02-23 12:54 ` Kirill A. Shutemov @ 2022-02-23 14:33 ` Brijesh Singh 1 sibling, 0 replies; 8+ messages in thread From: Brijesh Singh @ 2022-02-23 14:33 UTC (permalink / raw) To: Borislav Petkov, Kirill A. Shutemov Cc: brijesh.singh, x86, linux-kernel, kvm, linux-coco, Thomas Gleixner, Ingo Molnar, Joerg Roedel, Tom Lendacky, H. Peter Anvin, Paolo Bonzini, Sean Christopherson, Andy Lutomirski, Dave Hansen, Peter Gonda, Peter Zijlstra, David Rientjes, Michael Roth, Andi Kleen On 2/23/22 06:38, Borislav Petkov wrote: ... > +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) > +{ > +} > + > +static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) > +{ > + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) > + return false; ^^^^^ We should return true otherwise set_memory_{decrypt,encrypt} will fail for the SME case. thanks ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2022-02-23 14:33 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20220222185740.26228-1-kirill.shutemov@linux.intel.com>
2022-02-23 4:35 ` [PATCH] x86/mm/cpa: Generalize __set_memory_enc_pgtable() Brijesh Singh
2022-02-23 11:31 ` Borislav Petkov
2022-02-23 11:55 ` Kirill A. Shutemov
2022-02-23 12:13 ` Borislav Petkov
2022-02-23 12:25 ` Kirill A. Shutemov
2022-02-23 12:38 ` Borislav Petkov
2022-02-23 12:54 ` Kirill A. Shutemov
2022-02-23 14:33 ` Brijesh Singh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox