From: Sean Christopherson <sean.j.christopherson@intel.com>
To: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Cc: linux-sgx@vger.kernel.org,
Shay Katz-zamir <shay.katz-zamir@intel.com>,
Serge Ayoun <serge.ayoun@intel.com>
Subject: Re: [PATCH v3 17/17] x86/sgx: Fix pages in the BLOCKED state ending up to the free pool
Date: Tue, 17 Sep 2019 16:34:35 -0700 [thread overview]
Message-ID: <20190917233435.GI10319@linux.intel.com> (raw)
In-Reply-To: <20190916101803.30726-18-jarkko.sakkinen@linux.intel.com>
On Mon, Sep 16, 2019 at 01:18:03PM +0300, Jarkko Sakkinen wrote:
> A blocked page can end up legitly to the free pool if pinning fails because
> we interpret that as an EWB failure and simply put it to the free pool.
> This corrupts the EPC page allocator.
>
> Fix the bug by pinning the backing storage when picking the victim pages. A
> clean rollback can still be done when the memory allocation fails as pages
> can be still returned back to the enclave.
>
> This in effect removes any other failure cases from sgx_encl_ewb() other
> than EPCM conflict when the host has went through a sleep cycle. In that
> case putting a page back to the free pool is perfectly fine because it is
> uninitialized.
>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Shay Katz-zamir <shay.katz-zamir@intel.com>
> Cc: Serge Ayoun <serge.ayoun@intel.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
> arch/x86/kernel/cpu/sgx/reclaim.c | 95 ++++++++++++++++++-------------
> 1 file changed, 57 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
> index 7d628a1388e2..d6e580e55456 100644
> --- a/arch/x86/kernel/cpu/sgx/reclaim.c
> +++ b/arch/x86/kernel/cpu/sgx/reclaim.c
> @@ -206,32 +206,24 @@ static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
>
> static int __sgx_encl_ewb(struct sgx_encl *encl, struct sgx_epc_page *epc_page,
> struct sgx_va_page *va_page, unsigned int va_offset,
> - unsigned int page_index)
> + struct sgx_backing *backing)
> {
> struct sgx_pageinfo pginfo;
> - struct sgx_backing b;
> int ret;
>
> - ret = sgx_encl_get_backing(encl, page_index, &b);
> - if (ret)
> - return ret;
> -
> pginfo.addr = 0;
> - pginfo.contents = (unsigned long)kmap_atomic(b.contents);
> - pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + b.pcmd_offset;
> pginfo.secs = 0;
> +
> + pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
> + pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
> + backing->pcmd_offset;
> +
> ret = __ewb(&pginfo, sgx_epc_addr(epc_page),
> sgx_epc_addr(va_page->epc_page) + va_offset);
> - kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
> - kunmap_atomic((void *)(unsigned long)pginfo.contents);
>
> - if (!ret) {
> - set_page_dirty(b.pcmd);
> - set_page_dirty(b.contents);
> - }
> -
> - put_page(b.pcmd);
> - put_page(b.contents);
> + kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
> + backing->pcmd_offset));
> + kunmap_atomic((void *)(unsigned long)pginfo.contents);
>
> return ret;
> }
> @@ -265,7 +257,7 @@ static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
> }
>
> static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
> - unsigned int page_index)
> + struct sgx_backing *backing)
> {
> struct sgx_encl_page *encl_page = epc_page->owner;
> struct sgx_encl *encl = encl_page->encl;
> @@ -281,8 +273,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
> if (sgx_va_page_full(va_page))
> list_move_tail(&va_page->list, &encl->va_pages);
>
> - ret = __sgx_encl_ewb(encl, epc_page, va_page, va_offset,
> - page_index);
> + ret = __sgx_encl_ewb(encl, epc_page, va_page, va_offset, backing);
> if (ret == SGX_NOT_TRACKED) {
> ret = __etrack(sgx_epc_addr(encl->secs.epc_page));
> if (ret) {
> @@ -292,7 +283,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
> }
>
> ret = __sgx_encl_ewb(encl, epc_page, va_page, va_offset,
> - page_index);
> + backing);
> if (ret == SGX_NOT_TRACKED) {
> /*
> * Slow path, send IPIs to kick cpus out of the
> @@ -304,7 +295,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
> on_each_cpu_mask(sgx_encl_ewb_cpumask(encl),
> sgx_ipi_cb, NULL, 1);
> ret = __sgx_encl_ewb(encl, epc_page, va_page,
> - va_offset, page_index);
> + va_offset, backing);
> }
> }
>
> @@ -314,15 +305,20 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
>
> sgx_encl_destroy(encl);
> } else {
> + set_page_dirty(backing->pcmd);
> + set_page_dirty(backing->contents);
> +
> encl_page->desc |= va_offset;
> encl_page->va_page = va_page;
> }
> }
>
> -static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
> +static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
> + struct sgx_backing *backing)
> {
> struct sgx_encl_page *encl_page = epc_page->owner;
> struct sgx_encl *encl = encl_page->encl;
> + struct sgx_backing secs_backing;
> int ret;
>
> mutex_lock(&encl->lock);
> @@ -331,7 +327,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
> ret = __eremove(sgx_epc_addr(epc_page));
> WARN(ret, "EREMOVE returned %d\n", ret);
> } else {
> - sgx_encl_ewb(epc_page, SGX_ENCL_PAGE_INDEX(encl_page));
> + sgx_encl_ewb(epc_page, backing);
> }
>
> encl_page->epc_page = NULL;
> @@ -340,10 +336,17 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
> if (!encl->secs_child_cnt &&
> (atomic_read(&encl->flags) &
> (SGX_ENCL_DEAD | SGX_ENCL_INITIALIZED))) {
> - sgx_encl_ewb(encl->secs.epc_page, PFN_DOWN(encl->size));
> - sgx_free_page(encl->secs.epc_page);
> + ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
> + &secs_backing);
> + if (!ret) {
> + sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
> + sgx_free_page(encl->secs.epc_page);
> +
> + encl->secs.epc_page = NULL;
>
> - encl->secs.epc_page = NULL;
> + put_page(secs_backing.pcmd);
> + put_page(secs_backing.contents);
> + }
> }
>
> mutex_unlock(&encl->lock);
> @@ -351,17 +354,21 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
>
> /**
> * sgx_reclaim_pages() - Reclaim EPC pages from the consumers
> - * Takes a fixed chunk of pages from the global list of consumed EPC pages and
> - * tries to swap them. Only the pages that are either being freed by the
> - * consumer or actively used are skipped.
> + *
> + * Take a fixed number of pages from the head of the active page pool and
> + * reclaim them to the enclave's private shmem files. Skip the pages, which
> + * have been accessed since the last scan. Move those pages to the tail of
> + * active page pool so that the pages get scanned in LRU like fashion.
> */
> void sgx_reclaim_pages(void)
> {
> - struct sgx_epc_page *chunk[SGX_NR_TO_SCAN + 1];
> + struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
> + struct sgx_backing backing[SGX_NR_TO_SCAN];
> struct sgx_epc_section *section;
> struct sgx_encl_page *encl_page;
> struct sgx_epc_page *epc_page;
> int cnt = 0;
> + int ret;
> int i;
>
> spin_lock(&sgx_active_page_list_lock);
> @@ -388,13 +395,21 @@ void sgx_reclaim_pages(void)
> epc_page = chunk[i];
> encl_page = epc_page->owner;
>
> - if (sgx_can_reclaim(epc_page)) {
> - mutex_lock(&encl_page->encl->lock);
> - encl_page->desc |= SGX_ENCL_PAGE_RECLAIMED;
> - mutex_unlock(&encl_page->encl->lock);
> - continue;
> - }
> + if (!sgx_can_reclaim(epc_page))
Would it make sense to use a more explicit name for sgx_can_reclaim(),
e.g. sgx_age_epc_page() or something? "can reclaim" makes it sound like
there are scenarios where reclaim is impossible, but really it's just that
we don't want to reclaim a recently accessed page.
> + goto skip;
>
> + ret = sgx_encl_get_backing(encl_page->encl,
> + SGX_ENCL_PAGE_INDEX(encl_page),
> + &backing[i]);
> + if (ret)
> + goto skip;
> +
> + mutex_lock(&encl_page->encl->lock);
> + encl_page->desc |= SGX_ENCL_PAGE_RECLAIMED;
> + mutex_unlock(&encl_page->encl->lock);
> + continue;
> +
> +skip:
Eww. The call to sgx_encl_get_backing() makes it rather ugly no matter
what, but this seems slightly less ugly:
for (i = 0; i < cnt; i++) {
epc_page = chunk[i];
encl_page = epc_page->owner;
if (!sgx_can_reclaim(chunk[i]) ||
sgx_encl_get_backing(encl_page->encl,
SGX_ENCL_PAGE_INDEX(encl_page),
&backing[i]) {
kref_put(&encl_page->encl->refcount, sgx_encl_release);
spin_lock(&sgx_active_page_list_lock);
list_add_tail(&epc_page->list, &sgx_active_page_list);
spin_unlock(&sgx_active_page_list_lock);
chunk[i] = NULL;
continue;
}
mutex_lock(&encl_page->encl->lock);
encl_page->desc |= SGX_ENCL_PAGE_RECLAIMED;
mutex_unlock(&encl_page->encl->lock);
}
> kref_put(&encl_page->encl->refcount, sgx_encl_release);
>
> spin_lock(&sgx_active_page_list_lock);
> @@ -416,7 +431,11 @@ void sgx_reclaim_pages(void)
> continue;
>
> encl_page = epc_page->owner;
> - sgx_reclaimer_write(epc_page);
> + sgx_reclaimer_write(epc_page, &backing[i]);
> +
> + put_page(backing->pcmd);
> + put_page(backing->contents);
These should be backing[i]->
> +
> kref_put(&encl_page->encl->refcount, sgx_encl_release);
> epc_page->desc &= ~SGX_EPC_PAGE_RECLAIMABLE;
>
> --
> 2.20.1
>
next prev parent reply other threads:[~2019-09-17 23:34 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-16 10:17 [PATCH v3 00/17] Fixes and updates for v23 Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 01/17] selftest/x86/sgx: Remove encl_piggy.h Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 02/17] x86/sgx: Clean up internal includes Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 03/17] x86/sgx: Write backing storage only if EWB is successful Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 04/17] x86/sgx: Rename 'j' as 'cnt' in sgx_reclaim_pages() Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 05/17] x86/sgx: Turn encls_failed() as inline function Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 06/17] x86/sgx: Move sgx_einit() to encls.c Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 07/17] x86/sgx: Remove pages in sgx_reclaimer_write() Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 08/17] x86/sgx: Calculate page index " Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 09/17] x86/sgx: Move SGX_ENCL_DEAD check to sgx_reclaimer_write() Jarkko Sakkinen
2019-09-17 23:13 ` Sean Christopherson
2019-09-18 4:15 ` Jarkko Sakkinen
2019-09-17 23:21 ` Sean Christopherson
2019-09-18 4:16 ` Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 10/17] x86/sgx: Free VA slot when the EWB flow fails Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 11/17] x86/sgx: Call sgx_encl_destroy() " Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 12/17] x86/sgx: Open code sgx_reclaimer_get() and sgx_reclaimer_put() Jarkko Sakkinen
2019-09-17 23:07 ` Sean Christopherson
2019-09-18 4:12 ` Jarkko Sakkinen
2019-09-20 13:38 ` Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 13/17] x86/sgx: Introduce sgx_can_reclaim() Jarkko Sakkinen
2019-09-17 23:25 ` Sean Christopherson
2019-09-25 18:28 ` Sean Christopherson
2019-09-27 15:33 ` Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 14/17] x86/sgx: Replace section->free_cnt with a global sgx_nr_free_pages Jarkko Sakkinen
2019-09-17 22:50 ` Sean Christopherson
2019-09-18 4:07 ` Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 15/17] x86/sgx: sgx_vma_access(): Do not return -ECANCELED on invalid TCS pages Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 16/17] x86/sgx: Introduce sgx_encl_get_backing() Jarkko Sakkinen
2019-09-17 23:05 ` Sean Christopherson
2019-09-18 4:10 ` Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 17/17] x86/sgx: Fix pages in the BLOCKED state ending up to the free pool Jarkko Sakkinen
2019-09-17 23:34 ` Sean Christopherson [this message]
2019-09-18 4:21 ` Jarkko Sakkinen
2019-09-25 0:27 ` Jarkko Sakkinen
2019-09-25 18:33 ` Sean Christopherson
2019-09-27 15:39 ` Jarkko Sakkinen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190917233435.GI10319@linux.intel.com \
--to=sean.j.christopherson@intel.com \
--cc=jarkko.sakkinen@linux.intel.com \
--cc=linux-sgx@vger.kernel.org \
--cc=serge.ayoun@intel.com \
--cc=shay.katz-zamir@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).