* [PATCH RFC v2 1/3] mm: Add f_op->populate() for populating memory outside of core mm
2022-03-08 11:10 [PATCH RFC v2 0/3] MAP_POPULATE for device memory Jarkko Sakkinen
@ 2022-03-08 11:10 ` Jarkko Sakkinen
2022-03-08 11:16 ` Jarkko Sakkinen
2022-03-08 11:10 ` [PATCH RFC v2 2/3] x86/sgx: Export sgx_encl_page_alloc() Jarkko Sakkinen
2022-03-08 11:10 ` [PATCH RFC v2 3/3] x86/sgx: Implement EAUG population with MAP_POPULATE Jarkko Sakkinen
2 siblings, 1 reply; 5+ messages in thread
From: Jarkko Sakkinen @ 2022-03-08 11:10 UTC (permalink / raw)
To: linux-mm, linux-fsdevel
Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, Alexander Viro,
linux-sgx, linux-kernel, Andrew Morton, Jarkko Sakkinen
SGX memory is managed outside the core mm. It doesn't have a 'struct
page' and get_user_pages() doesn't work on it. Its VMAs are marked with
VM_IO. So, none of the existing methods for avoiding page faults work
on SGX memory.
Add f_op->populate() to overcome this issue:
int (*populate)(struct file *, unsigned long start, unsigned long end);
Then in populate_vma_page_range(), allow it to be used in the place of
get_user_pages() for memory that falls outside of its scope.
Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
v4:
* Reimplement based on Dave's suggestion:
https://lore.kernel.org/linux-sgx/c3083144-bfc1-3260-164c-e59b2d110df8@intel.com/
* Copy the text from the suggestion as part of the commit message (and
cover letter).
v3:
- if (!ret && do_populate && file->f_op->populate)
+ if (!ret && do_populate && file->f_op->populate &&
+ !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
(reported by Matthew Wilcox)
v2:
- if (!ret && do_populate)
+ if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
include/linux/fs.h | 1 +
mm/gup.c | 9 +++++----
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..54151af88ee0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+ int (*populate)(struct file *, unsigned long start, unsigned long end);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
diff --git a/mm/gup.c b/mm/gup.c
index a9d4d724aef7..66736a188a9c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1519,8 +1519,11 @@ long populate_vma_page_range(struct vm_area_struct *vma,
* We made sure addr is within a VMA, so the following will
* not result in a stack expansion that recurses back here.
*/
- return __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, NULL, locked);
+ if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) && vma->vm_file->f_op->populate)
+ return vma->vm_file->f_op->populate(vma->vm_file, start, end);
+ else
+ return __get_user_pages(mm, start, nr_pages, gup_flags,
+ NULL, NULL, locked);
}
/*
@@ -1619,8 +1622,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
* range with the first VMA. Also, skip undesirable VMA types.
*/
nend = min(end, vma->vm_end);
- if (vma->vm_flags & (VM_IO | VM_PFNMAP))
- continue;
if (nstart < vma->vm_start)
nstart = vma->vm_start;
/*
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH RFC v2 1/3] mm: Add f_op->populate() for populating memory outside of core mm
2022-03-08 11:10 ` [PATCH RFC v2 1/3] mm: Add f_op->populate() for populating memory outside of core mm Jarkko Sakkinen
@ 2022-03-08 11:16 ` Jarkko Sakkinen
0 siblings, 0 replies; 5+ messages in thread
From: Jarkko Sakkinen @ 2022-03-08 11:16 UTC (permalink / raw)
To: linux-mm, linux-fsdevel
Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, Alexander Viro,
linux-sgx, linux-kernel, Andrew Morton
On Tue, Mar 08, 2022 at 01:10:01PM +0200, Jarkko Sakkinen wrote:
> @@ -1619,8 +1622,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
> * range with the first VMA. Also, skip undesirable VMA types.
> */
> nend = min(end, vma->vm_end);
> - if (vma->vm_flags & (VM_IO | VM_PFNMAP))
> - continue;
> if (nstart < vma->vm_start)
> nstart = vma->vm_start;
> /*
This was unintended, i.e. leaked into patch by mistake. I'll send an
update. There was one diff missing from staging area.
BR, Jarkko
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH RFC v2 2/3] x86/sgx: Export sgx_encl_page_alloc()
2022-03-08 11:10 [PATCH RFC v2 0/3] MAP_POPULATE for device memory Jarkko Sakkinen
2022-03-08 11:10 ` [PATCH RFC v2 1/3] mm: Add f_op->populate() for populating memory outside of core mm Jarkko Sakkinen
@ 2022-03-08 11:10 ` Jarkko Sakkinen
2022-03-08 11:10 ` [PATCH RFC v2 3/3] x86/sgx: Implement EAUG population with MAP_POPULATE Jarkko Sakkinen
2 siblings, 0 replies; 5+ messages in thread
From: Jarkko Sakkinen @ 2022-03-08 11:10 UTC (permalink / raw)
To: linux-mm, linux-fsdevel
Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, Alexander Viro,
linux-sgx, linux-kernel, Andrew Morton, Jarkko Sakkinen
Move sgx_encl_page_alloc() to encl.c and export it so that it can be
used in the implementation for MAP_POPULATE, which requires to allocate
new enclave pages.
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
arch/x86/kernel/cpu/sgx/encl.c | 38 +++++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/sgx/encl.h | 3 +++
arch/x86/kernel/cpu/sgx/ioctl.c | 38 ---------------------------------
3 files changed, 41 insertions(+), 38 deletions(-)
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index f24a41d3ec70..0256918b2c2f 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -913,6 +913,44 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
return ret;
}
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags)
+{
+ struct sgx_encl_page *encl_page;
+ unsigned long prot;
+
+ encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+ if (!encl_page)
+ return ERR_PTR(-ENOMEM);
+
+ encl_page->desc = encl->base + offset;
+ encl_page->encl = encl;
+
+ prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
+ _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
+ _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
+
+ /*
+ * TCS pages must always RW set for CPU access while the SECINFO
+ * permissions are *always* zero - the CPU ignores the user provided
+ * values and silently overwrites them with zero permissions.
+ */
+ if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
+ prot |= PROT_READ | PROT_WRITE;
+
+ /* Calculate maximum of the VM flags for the page. */
+ encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+ /*
+ * At time of allocation, the runtime protection bits are the same
+ * as the maximum protection bits.
+ */
+ encl_page->vm_run_prot_bits = encl_page->vm_max_prot_bits;
+
+ return encl_page;
+}
+
/**
* sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
* @encl: the enclave
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 1b6ce1da7c92..3df0d3faf3a1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -113,6 +113,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
int sgx_encl_test_and_clear_young(struct mm_struct *mm,
struct sgx_encl_page *page);
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags);
void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr);
struct sgx_epc_page *sgx_alloc_va_page(void);
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index d8c3c07badb3..3e3ca27a6f72 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -169,44 +169,6 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
return ret;
}
-static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
- unsigned long offset,
- u64 secinfo_flags)
-{
- struct sgx_encl_page *encl_page;
- unsigned long prot;
-
- encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
- if (!encl_page)
- return ERR_PTR(-ENOMEM);
-
- encl_page->desc = encl->base + offset;
- encl_page->encl = encl;
-
- prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) |
- _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
- _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
-
- /*
- * TCS pages must always RW set for CPU access while the SECINFO
- * permissions are *always* zero - the CPU ignores the user provided
- * values and silently overwrites them with zero permissions.
- */
- if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
- prot |= PROT_READ | PROT_WRITE;
-
- /* Calculate maximum of the VM flags for the page. */
- encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
-
- /*
- * At time of allocation, the runtime protection bits are the same
- * as the maximum protection bits.
- */
- encl_page->vm_run_prot_bits = encl_page->vm_max_prot_bits;
-
- return encl_page;
-}
-
static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
{
u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH RFC v2 3/3] x86/sgx: Implement EAUG population with MAP_POPULATE
2022-03-08 11:10 [PATCH RFC v2 0/3] MAP_POPULATE for device memory Jarkko Sakkinen
2022-03-08 11:10 ` [PATCH RFC v2 1/3] mm: Add f_op->populate() for populating memory outside of core mm Jarkko Sakkinen
2022-03-08 11:10 ` [PATCH RFC v2 2/3] x86/sgx: Export sgx_encl_page_alloc() Jarkko Sakkinen
@ 2022-03-08 11:10 ` Jarkko Sakkinen
2 siblings, 0 replies; 5+ messages in thread
From: Jarkko Sakkinen @ 2022-03-08 11:10 UTC (permalink / raw)
To: linux-mm, linux-fsdevel
Cc: Dave Hansen, Nathaniel McCallum, Reinette Chatre, Alexander Viro,
linux-sgx, linux-kernel, Andrew Morton, Jarkko Sakkinen
With SGX1 an enclave needs to be created with its maximum memory demands
pre-allocated. Pages cannot be added to an enclave after it is initialized.
SGX2 introduces a new function, ENCLS[EAUG] for adding pages to an
initialized enclave.
Add support for dynamically adding pages to an initialized enclave with
mmap() by populating pages with EAUG. Use f_ops->populate() callback to
achieve this behaviour.
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
arch/x86/kernel/cpu/sgx/driver.c | 128 +++++++++++++++++++++++++++++++
1 file changed, 128 insertions(+)
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..848938334e8a 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -9,6 +9,7 @@
#include <asm/traps.h>
#include "driver.h"
#include "encl.h"
+#include "encls.h"
u64 sgx_attributes_reserved_mask;
u64 sgx_xfrm_reserved_mask = ~0x3;
@@ -101,6 +102,132 @@ static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+static int sgx_encl_augment_page(struct sgx_encl *encl, unsigned long offset)
+{
+ struct sgx_pageinfo pginfo = {0};
+ struct sgx_encl_page *encl_page;
+ struct sgx_epc_page *epc_page;
+ struct sgx_va_page *va_page;
+ u64 secinfo_flags;
+ int ret;
+
+ /*
+ * Ignore internal permission checking for dynamically added pages.
+ * They matter only for data added during the pre-initialization phase.
+ * The enclave decides the permissions by the means of EACCEPT,
+ * EACCEPTCOPY and EMODPE.
+ */
+ secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
+ encl_page = sgx_encl_page_alloc(encl, offset, secinfo_flags);
+ if (IS_ERR(encl_page))
+ return PTR_ERR(encl_page);
+
+ epc_page = sgx_alloc_epc_page(encl_page, true);
+ if (IS_ERR(epc_page)) {
+ ret = PTR_ERR(epc_page);
+ goto err_alloc_epc_page;
+ }
+
+ va_page = sgx_encl_grow(encl);
+ if (IS_ERR(va_page)) {
+ ret = PTR_ERR(va_page);
+ goto err_grow;
+ }
+
+ mutex_lock(&encl->lock);
+
+ /*
+ * Adding to encl->va_pages must be done under encl->lock. Ditto for
+ * deleting (via sgx_encl_shrink()) in the error path.
+ */
+ if (va_page)
+ list_add(&va_page->list, &encl->va_pages);
+
+ /*
+ * Insert prior to EADD in case of OOM. EADD modifies MRENCLAVE, i.e.
+ * can't be gracefully unwound, while failure on EADD/EXTEND is limited
+ * to userspace errors (or kernel/hardware bugs).
+ */
+ ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
+ encl_page, GFP_KERNEL);
+
+ /*
+ * If ret == -EBUSY then page was created in another flow while
+ * running without encl->lock
+ */
+ if (ret)
+ goto err_xa_insert;
+
+ pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
+ pginfo.addr = encl_page->desc & PAGE_MASK;
+ pginfo.metadata = 0;
+
+ ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
+ if (ret)
+ goto err_eaug;
+
+ encl_page->encl = encl;
+ encl_page->epc_page = epc_page;
+ encl_page->type = SGX_PAGE_TYPE_REG;
+ encl->secs_child_cnt++;
+
+ sgx_mark_page_reclaimable(encl_page->epc_page);
+
+ mutex_unlock(&encl->lock);
+
+ return 0;
+
+err_eaug:
+ xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
+
+err_xa_insert:
+ sgx_encl_shrink(encl, va_page);
+ mutex_unlock(&encl->lock);
+
+err_grow:
+ sgx_encl_free_epc_page(epc_page);
+
+err_alloc_epc_page:
+ kfree(encl_page);
+
+ return VM_FAULT_SIGBUS;
+}
+
+/*
+ * Add new pages to the enclave sequentially with ENCLS[EAUG]. Note that
+ * sgx_mmap() validates that the given VMA is within the enclave range. Calling
+ * here sgx_encl_may_map() second time would too time consuming.
+ */
+static int sgx_populate(struct file *file, unsigned long start, unsigned long end)
+{
+ struct sgx_encl *encl = file->private_data;
+ unsigned long length = end - start;
+ unsigned long pos;
+ int ret;
+
+ /* EAUG works only for initialized enclaves. */
+ if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
+ return -EINVAL;
+
+ for (pos = 0 ; pos < length; pos += PAGE_SIZE) {
+ if (signal_pending(current)) {
+ if (!pos)
+ ret = -ERESTARTSYS;
+
+ break;
+ }
+
+ if (need_resched())
+ cond_resched();
+
+ ret = sgx_encl_augment_page(encl, start + pos);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
static unsigned long sgx_get_unmapped_area(struct file *file,
unsigned long addr,
unsigned long len,
@@ -133,6 +260,7 @@ static const struct file_operations sgx_encl_fops = {
.compat_ioctl = sgx_compat_ioctl,
#endif
.mmap = sgx_mmap,
+ .populate = sgx_populate,
.get_unmapped_area = sgx_get_unmapped_area,
};
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread