All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Cc: linux-sgx@vger.kernel.org,
	Shay Katz-zamir <shay.katz-zamir@intel.com>,
	Serge Ayoun <serge.ayoun@intel.com>
Subject: Re: [PATCH v3 17/17] x86/sgx: Fix pages in the BLOCKED state ending up to the free pool
Date: Tue, 17 Sep 2019 16:34:35 -0700	[thread overview]
Message-ID: <20190917233435.GI10319@linux.intel.com> (raw)
In-Reply-To: <20190916101803.30726-18-jarkko.sakkinen@linux.intel.com>

On Mon, Sep 16, 2019 at 01:18:03PM +0300, Jarkko Sakkinen wrote:
> A blocked page can end up legitly to the free pool if pinning fails because
> we interpret that as an EWB failure and simply put it to the free pool.
> This corrupts the EPC page allocator.
> 
> Fix the bug by pinning the backing storage when picking the victim pages. A
> clean rollback can still be done when the memory allocation fails as pages
> can be still returned back to the enclave.
> 
> This in effect removes any other failure cases from sgx_encl_ewb() other
> than EPCM conflict when the host has went through a sleep cycle. In that
> case putting a page back to the free pool is perfectly fine because it is
> uninitialized.
> 
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Shay Katz-zamir <shay.katz-zamir@intel.com>
> Cc: Serge Ayoun <serge.ayoun@intel.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/reclaim.c | 95 ++++++++++++++++++-------------
>  1 file changed, 57 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
> index 7d628a1388e2..d6e580e55456 100644
> --- a/arch/x86/kernel/cpu/sgx/reclaim.c
> +++ b/arch/x86/kernel/cpu/sgx/reclaim.c
> @@ -206,32 +206,24 @@ static void sgx_reclaimer_block(struct sgx_epc_page *epc_page)
>  
>  static int __sgx_encl_ewb(struct sgx_encl *encl, struct sgx_epc_page *epc_page,
>  			  struct sgx_va_page *va_page, unsigned int va_offset,
> -			  unsigned int page_index)
> +			  struct sgx_backing *backing)
>  {
>  	struct sgx_pageinfo pginfo;
> -	struct sgx_backing b;
>  	int ret;
>  
> -	ret = sgx_encl_get_backing(encl, page_index, &b);
> -	if (ret)
> -		return ret;
> -
>  	pginfo.addr = 0;
> -	pginfo.contents = (unsigned long)kmap_atomic(b.contents);
> -	pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + b.pcmd_offset;
>  	pginfo.secs = 0;
> +
> +	pginfo.contents = (unsigned long)kmap_atomic(backing->contents);
> +	pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) +
> +			  backing->pcmd_offset;
> +
>  	ret = __ewb(&pginfo, sgx_epc_addr(epc_page),
>  		    sgx_epc_addr(va_page->epc_page) + va_offset);
> -	kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset));
> -	kunmap_atomic((void *)(unsigned long)pginfo.contents);
>  
> -	if (!ret) {
> -		set_page_dirty(b.pcmd);
> -		set_page_dirty(b.contents);
> -	}
> -
> -	put_page(b.pcmd);
> -	put_page(b.contents);
> +	kunmap_atomic((void *)(unsigned long)(pginfo.metadata -
> +					      backing->pcmd_offset));
> +	kunmap_atomic((void *)(unsigned long)pginfo.contents);
>  
>  	return ret;
>  }
> @@ -265,7 +257,7 @@ static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
>  }
>  
>  static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
> -			 unsigned int page_index)
> +			 struct sgx_backing *backing)
>  {
>  	struct sgx_encl_page *encl_page = epc_page->owner;
>  	struct sgx_encl *encl = encl_page->encl;
> @@ -281,8 +273,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
>  	if (sgx_va_page_full(va_page))
>  		list_move_tail(&va_page->list, &encl->va_pages);
>  
> -	ret = __sgx_encl_ewb(encl, epc_page, va_page, va_offset,
> -			     page_index);
> +	ret = __sgx_encl_ewb(encl, epc_page, va_page, va_offset, backing);
>  	if (ret == SGX_NOT_TRACKED) {
>  		ret = __etrack(sgx_epc_addr(encl->secs.epc_page));
>  		if (ret) {
> @@ -292,7 +283,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
>  		}
>  
>  		ret = __sgx_encl_ewb(encl, epc_page, va_page, va_offset,
> -				     page_index);
> +				     backing);
>  		if (ret == SGX_NOT_TRACKED) {
>  			/*
>  			 * Slow path, send IPIs to kick cpus out of the
> @@ -304,7 +295,7 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
>  			on_each_cpu_mask(sgx_encl_ewb_cpumask(encl),
>  					 sgx_ipi_cb, NULL, 1);
>  			ret = __sgx_encl_ewb(encl, epc_page, va_page,
> -					     va_offset, page_index);
> +					     va_offset, backing);
>  		}
>  	}
>  
> @@ -314,15 +305,20 @@ static void sgx_encl_ewb(struct sgx_epc_page *epc_page,
>  
>  		sgx_encl_destroy(encl);
>  	} else {
> +		set_page_dirty(backing->pcmd);
> +		set_page_dirty(backing->contents);
> +
>  		encl_page->desc |= va_offset;
>  		encl_page->va_page = va_page;
>  	}
>  }
>  
> -static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
> +static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
> +				struct sgx_backing *backing)
>  {
>  	struct sgx_encl_page *encl_page = epc_page->owner;
>  	struct sgx_encl *encl = encl_page->encl;
> +	struct sgx_backing secs_backing;
>  	int ret;
>  
>  	mutex_lock(&encl->lock);
> @@ -331,7 +327,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
>  		ret = __eremove(sgx_epc_addr(epc_page));
>  		WARN(ret, "EREMOVE returned %d\n", ret);
>  	} else {
> -		sgx_encl_ewb(epc_page, SGX_ENCL_PAGE_INDEX(encl_page));
> +		sgx_encl_ewb(epc_page, backing);
>  	}
>  
>  	encl_page->epc_page = NULL;
> @@ -340,10 +336,17 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
>  	if (!encl->secs_child_cnt &&
>  	    (atomic_read(&encl->flags) &
>  	     (SGX_ENCL_DEAD | SGX_ENCL_INITIALIZED))) {
> -		sgx_encl_ewb(encl->secs.epc_page, PFN_DOWN(encl->size));
> -		sgx_free_page(encl->secs.epc_page);
> +		ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
> +					   &secs_backing);
> +		if (!ret) {
> +			sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
> +			sgx_free_page(encl->secs.epc_page);
> +
> +			encl->secs.epc_page = NULL;
>  
> -		encl->secs.epc_page = NULL;
> +			put_page(secs_backing.pcmd);
> +			put_page(secs_backing.contents);
> +		}
>  	}
>  
>  	mutex_unlock(&encl->lock);
> @@ -351,17 +354,21 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page)
>  
>  /**
>   * sgx_reclaim_pages() - Reclaim EPC pages from the consumers
> - * Takes a fixed chunk of pages from the global list of consumed EPC pages and
> - * tries to swap them. Only the pages that are either being freed by the
> - * consumer or actively used are skipped.
> + *
> + * Take a fixed number of pages from the head of the active page pool and
> + * reclaim them to the enclave's private shmem files. Skip the pages, which
> + * have been accessed since the last scan. Move those pages to the tail of
> + * active page pool so that the pages get scanned in LRU like fashion.
>   */
>  void sgx_reclaim_pages(void)
>  {
> -	struct sgx_epc_page *chunk[SGX_NR_TO_SCAN + 1];
> +	struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
> +	struct sgx_backing backing[SGX_NR_TO_SCAN];
>  	struct sgx_epc_section *section;
>  	struct sgx_encl_page *encl_page;
>  	struct sgx_epc_page *epc_page;
>  	int cnt = 0;
> +	int ret;
>  	int i;
>  
>  	spin_lock(&sgx_active_page_list_lock);
> @@ -388,13 +395,21 @@ void sgx_reclaim_pages(void)
>  		epc_page = chunk[i];
>  		encl_page = epc_page->owner;
>  
> -		if (sgx_can_reclaim(epc_page)) {
> -			mutex_lock(&encl_page->encl->lock);
> -			encl_page->desc |= SGX_ENCL_PAGE_RECLAIMED;
> -			mutex_unlock(&encl_page->encl->lock);
> -			continue;
> -		}
> +		if (!sgx_can_reclaim(epc_page))

Would it make sense to use a more explicit name for sgx_can_reclaim(),
e.g. sgx_age_epc_page() or something?  "can reclaim" makes it sound like
there are scenarios where reclaim is impossible, but really it's just that
we don't want to reclaim a recently accessed page.

> +			goto skip;
>  
> +		ret = sgx_encl_get_backing(encl_page->encl,
> +					   SGX_ENCL_PAGE_INDEX(encl_page),
> +					   &backing[i]);
> +		if (ret)
> +			goto skip;
> +
> +		mutex_lock(&encl_page->encl->lock);
> +		encl_page->desc |= SGX_ENCL_PAGE_RECLAIMED;
> +		mutex_unlock(&encl_page->encl->lock);
> +		continue;
> +
> +skip:

Eww.  The call to sgx_encl_get_backing() makes it rather ugly no matter
what, but this seems slightly less ugly:

	for (i = 0; i < cnt; i++) {
		epc_page = chunk[i];
		encl_page = epc_page->owner;

		if (!sgx_can_reclaim(chunk[i]) ||
		    sgx_encl_get_backing(encl_page->encl,
					 SGX_ENCL_PAGE_INDEX(encl_page),
					 &backing[i]) {
			kref_put(&encl_page->encl->refcount, sgx_encl_release);

			spin_lock(&sgx_active_page_list_lock);
			list_add_tail(&epc_page->list, &sgx_active_page_list);
			spin_unlock(&sgx_active_page_list_lock);

			chunk[i] = NULL;
			continue;
		}

		mutex_lock(&encl_page->encl->lock);
		encl_page->desc |= SGX_ENCL_PAGE_RECLAIMED;
		mutex_unlock(&encl_page->encl->lock);
	}

>  		kref_put(&encl_page->encl->refcount, sgx_encl_release);
>  
>  		spin_lock(&sgx_active_page_list_lock);
> @@ -416,7 +431,11 @@ void sgx_reclaim_pages(void)
>  			continue;
>  
>  		encl_page = epc_page->owner;
> -		sgx_reclaimer_write(epc_page);
> +		sgx_reclaimer_write(epc_page, &backing[i]);
> +
> +		put_page(backing->pcmd);
> +		put_page(backing->contents);

These should be backing[i]->

> +
>  		kref_put(&encl_page->encl->refcount, sgx_encl_release);
>  		epc_page->desc &= ~SGX_EPC_PAGE_RECLAIMABLE;
>  
> -- 
> 2.20.1
> 

  reply	other threads:[~2019-09-17 23:34 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-16 10:17 [PATCH v3 00/17] Fixes and updates for v23 Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 01/17] selftest/x86/sgx: Remove encl_piggy.h Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 02/17] x86/sgx: Clean up internal includes Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 03/17] x86/sgx: Write backing storage only if EWB is successful Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 04/17] x86/sgx: Rename 'j' as 'cnt' in sgx_reclaim_pages() Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 05/17] x86/sgx: Turn encls_failed() as inline function Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 06/17] x86/sgx: Move sgx_einit() to encls.c Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 07/17] x86/sgx: Remove pages in sgx_reclaimer_write() Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 08/17] x86/sgx: Calculate page index " Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 09/17] x86/sgx: Move SGX_ENCL_DEAD check to sgx_reclaimer_write() Jarkko Sakkinen
2019-09-17 23:13   ` Sean Christopherson
2019-09-18  4:15     ` Jarkko Sakkinen
2019-09-17 23:21   ` Sean Christopherson
2019-09-18  4:16     ` Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 10/17] x86/sgx: Free VA slot when the EWB flow fails Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 11/17] x86/sgx: Call sgx_encl_destroy() " Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 12/17] x86/sgx: Open code sgx_reclaimer_get() and sgx_reclaimer_put() Jarkko Sakkinen
2019-09-17 23:07   ` Sean Christopherson
2019-09-18  4:12     ` Jarkko Sakkinen
2019-09-20 13:38       ` Jarkko Sakkinen
2019-09-16 10:17 ` [PATCH v3 13/17] x86/sgx: Introduce sgx_can_reclaim() Jarkko Sakkinen
2019-09-17 23:25   ` Sean Christopherson
2019-09-25 18:28   ` Sean Christopherson
2019-09-27 15:33     ` Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 14/17] x86/sgx: Replace section->free_cnt with a global sgx_nr_free_pages Jarkko Sakkinen
2019-09-17 22:50   ` Sean Christopherson
2019-09-18  4:07     ` Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 15/17] x86/sgx: sgx_vma_access(): Do not return -ECANCELED on invalid TCS pages Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 16/17] x86/sgx: Introduce sgx_encl_get_backing() Jarkko Sakkinen
2019-09-17 23:05   ` Sean Christopherson
2019-09-18  4:10     ` Jarkko Sakkinen
2019-09-16 10:18 ` [PATCH v3 17/17] x86/sgx: Fix pages in the BLOCKED state ending up to the free pool Jarkko Sakkinen
2019-09-17 23:34   ` Sean Christopherson [this message]
2019-09-18  4:21     ` Jarkko Sakkinen
2019-09-25  0:27       ` Jarkko Sakkinen
2019-09-25 18:33         ` Sean Christopherson
2019-09-27 15:39           ` Jarkko Sakkinen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190917233435.GI10319@linux.intel.com \
    --to=sean.j.christopherson@intel.com \
    --cc=jarkko.sakkinen@linux.intel.com \
    --cc=linux-sgx@vger.kernel.org \
    --cc=serge.ayoun@intel.com \
    --cc=shay.katz-zamir@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.