public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Kristen Carlson Accardi <kristen@linux.intel.com>
To: jarkko@kernel.org, dave.hansen@linux.intel.com, tj@kernel.org,
	linux-kernel@vger.kernel.org, linux-sgx@vger.kernel.org,
	cgroups@vger.kernel.org, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>
Cc: zhiquan1.li@intel.com,
	Kristen Carlson Accardi <kristen@linux.intel.com>,
	Sean Christopherson <seanjc@google.com>
Subject: [PATCH v2 05/18] x86/sgx: Track epc pages on reclaimable or unreclaimable lists
Date: Fri,  2 Dec 2022 10:36:41 -0800	[thread overview]
Message-ID: <20221202183655.3767674-6-kristen@linux.intel.com> (raw)
In-Reply-To: <20221202183655.3767674-1-kristen@linux.intel.com>

Replace functions sgx_mark_page_reclaimable() and
sgx_unmark_page_reclaimable() with sgx_record_epc_page() and
sgx_drop_epc_page(). sgx_record_epc_page() wil add the epc_page
to the correct "reclaimable" or "unreclaimable" list in the
sgx_epc_lru_lists struct. sgx_drop_epc_page() will delete the page
from the LRU list. Tracking pages that are not tracked by
the reclaimer in the sgx_epc_lru_lists "unreclaimable" list allows
an OOM event to cause all the pages in use by an enclave to be freed,
regardless of whether they were reclaimable pages or not.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kernel/cpu/sgx/encl.c  | 10 +++++++---
 arch/x86/kernel/cpu/sgx/ioctl.c | 11 +++++++----
 arch/x86/kernel/cpu/sgx/main.c  | 26 +++++++++++++++-----------
 arch/x86/kernel/cpu/sgx/sgx.h   |  4 ++--
 arch/x86/kernel/cpu/sgx/virt.c  | 28 ++++++++++++++++++++--------
 5 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 4eaf9d21e71b..4683da9ef4f1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -252,6 +252,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 		epc_page = sgx_encl_eldu(&encl->secs, NULL);
 		if (IS_ERR(epc_page))
 			return ERR_CAST(epc_page);
+		sgx_record_epc_page(epc_page, 0);
 	}
 
 	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
@@ -259,7 +260,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 		return ERR_CAST(epc_page);
 
 	encl->secs_child_cnt++;
-	sgx_mark_page_reclaimable(entry->epc_page);
+	sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 
 	return entry;
 }
@@ -375,7 +376,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 	encl_page->type = SGX_PAGE_TYPE_REG;
 	encl->secs_child_cnt++;
 
-	sgx_mark_page_reclaimable(encl_page->epc_page);
+	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 
 	phys_addr = sgx_get_epc_phys_addr(epc_page);
 	/*
@@ -687,7 +688,7 @@ void sgx_encl_release(struct kref *ref)
 			 * The page and its radix tree entry cannot be freed
 			 * if the page is being held by the reclaimer.
 			 */
-			if (sgx_unmark_page_reclaimable(entry->epc_page))
+			if (sgx_drop_epc_page(entry->epc_page))
 				continue;
 
 			sgx_encl_free_epc_page(entry->epc_page);
@@ -703,6 +704,7 @@ void sgx_encl_release(struct kref *ref)
 	xa_destroy(&encl->page_array);
 
 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
+		sgx_drop_epc_page(encl->secs.epc_page);
 		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 	}
@@ -711,6 +713,7 @@ void sgx_encl_release(struct kref *ref)
 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 					   list);
 		list_del(&va_page->list);
+		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		kfree(va_page);
 	}
@@ -1218,6 +1221,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct sgx_encl *encl, bool reclaim)
 		sgx_encl_free_epc_page(epc_page);
 		return ERR_PTR(-EFAULT);
 	}
+	sgx_record_epc_page(epc_page, 0);
 
 	return epc_page;
 }
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 9a1bb3c3211a..aca80a3f38a1 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -48,6 +48,7 @@ void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
 	encl->page_cnt--;
 
 	if (va_page) {
+		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		list_del(&va_page->list);
 		kfree(va_page);
@@ -113,6 +114,8 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
 	encl->attributes = secs->attributes;
 	encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
 
+	sgx_record_epc_page(encl->secs.epc_page, 0);
+
 	/* Set only after completion, as encl->lock has not been taken. */
 	set_bit(SGX_ENCL_CREATED, &encl->flags);
 
@@ -322,7 +325,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 			goto err_out;
 	}
 
-	sgx_mark_page_reclaimable(encl_page->epc_page);
+	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 	mutex_unlock(&encl->lock);
 	mmap_read_unlock(current->mm);
 	return ret;
@@ -958,7 +961,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
 			 * Prevent page from being reclaimed while mutex
 			 * is released.
 			 */
-			if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+			if (sgx_drop_epc_page(entry->epc_page)) {
 				ret = -EAGAIN;
 				goto out_entry_changed;
 			}
@@ -973,7 +976,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
 
 			mutex_lock(&encl->lock);
 
-			sgx_mark_page_reclaimable(entry->epc_page);
+			sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 		}
 
 		/* Change EPC type */
@@ -1130,7 +1133,7 @@ static long sgx_encl_remove_pages(struct sgx_encl *encl,
 			goto out_unlock;
 		}
 
-		if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+		if (sgx_drop_epc_page(entry->epc_page)) {
 			ret = -EBUSY;
 			goto out_unlock;
 		}
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 447cf4b8580c..ecd7f8e704cc 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -262,7 +262,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 			goto out;
 
 		sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
-
+		sgx_drop_epc_page(encl->secs.epc_page);
 		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 
@@ -499,31 +499,35 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
 }
 
 /**
- * sgx_mark_page_reclaimable() - Mark a page as reclaimable
+ * sgx_record_epc_page() - Add a page to the LRU tracking
  * @page:	EPC page
  *
- * Mark a page as reclaimable and add it to the active page list. Pages
- * are automatically removed from the active list when freed.
+ * Mark a page with the specified flags and add it to the appropriate
+ * (un)reclaimable list.
  */
-void sgx_mark_page_reclaimable(struct sgx_epc_page *page)
+void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
 {
 	spin_lock(&sgx_global_lru.lock);
-	page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED;
-	sgx_epc_push_reclaimable(&sgx_global_lru, page);
+	WARN_ON(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+	page->flags |= flags;
+	if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
+		sgx_epc_push_reclaimable(&sgx_global_lru, page);
+	else
+		sgx_epc_push_unreclaimable(&sgx_global_lru, page);
 	spin_unlock(&sgx_global_lru.lock);
 }
 
 /**
- * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list
+ * sgx_drop_epc_page() - Remove a page from a LRU list
  * @page:	EPC page
  *
- * Clear the reclaimable flag and remove the page from the active page list.
+ * Clear the reclaimable flag if set and remove the page from its LRU.
  *
  * Return:
  *   0 on success,
  *   -EBUSY if the page is in the process of being reclaimed
  */
-int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
+int sgx_drop_epc_page(struct sgx_epc_page *page)
 {
 	spin_lock(&sgx_global_lru.lock);
 	if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
@@ -533,9 +537,9 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
 			return -EBUSY;
 		}
 
-		list_del(&page->list);
 		page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
 	}
+	list_del(&page->list);
 	spin_unlock(&sgx_global_lru.lock);
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 5e6d88438fae..ba4338b7303f 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -159,8 +159,8 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void);
 void sgx_free_epc_page(struct sgx_epc_page *page);
 
 void sgx_reclaim_direct(void);
-void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
-int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
+void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags);
+int sgx_drop_epc_page(struct sgx_epc_page *page);
 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
 
 void sgx_ipi_cb(void *info);
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 776ae5c1c032..0eabc4db91d0 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -64,6 +64,8 @@ static int __sgx_vepc_fault(struct sgx_vepc *vepc,
 		goto err_delete;
 	}
 
+	sgx_record_epc_page(epc_page, 0);
+
 	return 0;
 
 err_delete:
@@ -148,6 +150,7 @@ static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
 		return ret;
 	}
 
+	sgx_drop_epc_page(epc_page);
 	sgx_free_epc_page(epc_page);
 	return 0;
 }
@@ -220,8 +223,15 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 		 * have been removed, the SECS page must have a child on
 		 * another instance.
 		 */
-		if (sgx_vepc_free_page(epc_page))
+		if (sgx_vepc_free_page(epc_page)) {
+			/*
+			 * Drop the page before adding it to the list of SECS
+			 * pages.  Moving the page off the unreclaimable list
+			 * needs to be done under the LRU's spinlock.
+			 */
+			sgx_drop_epc_page(epc_page);
 			list_add_tail(&epc_page->list, &secs_pages);
+		}
 
 		xa_erase(&vepc->page_array, index);
 	}
@@ -236,15 +246,17 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 	mutex_lock(&zombie_secs_pages_lock);
 	list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
 		/*
-		 * Speculatively remove the page from the list of zombies,
-		 * if the page is successfully EREMOVE'd it will be added to
-		 * the list of free pages.  If EREMOVE fails, throw the page
-		 * on the local list, which will be spliced on at the end.
+		 * If EREMOVE fails, throw the page on the local list, which
+		 * will be spliced on at the end.
+		 *
+		 * Note, this abuses sgx_drop_epc_page() to delete the page off
+		 * the list of zombies, but this is a very rare path (probably
+		 * never hit in production).  It's not worth special casing the
+		 * free path for this super rare case just to avoid taking the
+		 * LRU's spinlock.
 		 */
-		list_del(&epc_page->list);
-
 		if (sgx_vepc_free_page(epc_page))
-			list_add_tail(&epc_page->list, &secs_pages);
+			list_move_tail(&epc_page->list, &secs_pages);
 	}
 
 	if (!list_empty(&secs_pages))
-- 
2.38.1


  parent reply	other threads:[~2022-12-02 18:37 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-02 18:36 [PATCH v2 00/18] Add Cgroup support for SGX EPC memory Kristen Carlson Accardi
2022-12-02 18:36 ` [PATCH v2 01/18] x86/sgx: Call cond_resched() at the end of sgx_reclaim_pages() Kristen Carlson Accardi
2022-12-02 21:33   ` Dave Hansen
2022-12-02 21:37     ` Kristen Carlson Accardi
2022-12-02 21:45       ` Dave Hansen
2022-12-02 22:17         ` Kristen Carlson Accardi
2022-12-02 22:37           ` Dave Hansen
2022-12-02 18:36 ` [PATCH v2 02/18] x86/sgx: Store struct sgx_encl when allocating new VA pages Kristen Carlson Accardi
2022-12-02 21:35   ` Dave Hansen
2022-12-02 21:40     ` Kristen Carlson Accardi
2022-12-02 21:48       ` Dave Hansen
2022-12-02 22:35         ` Sean Christopherson
2022-12-02 22:47           ` Dave Hansen
2022-12-02 22:49             ` Sean Christopherson
2022-12-02 18:36 ` [PATCH v2 03/18] x86/sgx: Add 'struct sgx_epc_lru_lists' to encapsulate lru list(s) Kristen Carlson Accardi
2022-12-02 21:39   ` Dave Hansen
2022-12-08 15:31   ` Jarkko Sakkinen
2022-12-08 18:03     ` Kristen Carlson Accardi
2022-12-02 18:36 ` [PATCH v2 04/18] x86/sgx: Use sgx_epc_lru_lists for existing active page list Kristen Carlson Accardi
2022-12-02 21:43   ` Dave Hansen
2022-12-02 21:51     ` Kristen Carlson Accardi
2022-12-02 22:10       ` Dave Hansen
2022-12-02 18:36 ` Kristen Carlson Accardi [this message]
2022-12-02 22:13   ` [PATCH v2 05/18] x86/sgx: Track epc pages on reclaimable or unreclaimable lists Dave Hansen
2022-12-02 22:28     ` Sean Christopherson
2022-12-02 18:36 ` [PATCH v2 06/18] x86/sgx: Introduce RECLAIM_IN_PROGRESS flag for EPC pages Kristen Carlson Accardi
2022-12-02 22:15   ` Dave Hansen
2022-12-08 15:46   ` Jarkko Sakkinen
2022-12-08 18:13     ` Kristen Carlson Accardi
2022-12-02 18:36 ` [PATCH v2 07/18] x86/sgx: Use a list to track to-be-reclaimed pages during reclaim Kristen Carlson Accardi
2022-12-02 22:33   ` Dave Hansen
2022-12-05 16:33     ` Kristen Carlson Accardi
2022-12-05 17:03       ` Dave Hansen
2022-12-05 18:25         ` Kristen Carlson Accardi
2022-12-02 18:36 ` [PATCH v2 08/18] x86/sgx: Allow reclaiming up to 32 pages, but scan 16 by default Kristen Carlson Accardi
2022-12-08  9:26   ` Jarkko Sakkinen
2022-12-08  9:27     ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 09/18] x86/sgx: Return the number of EPC pages that were successfully reclaimed Kristen Carlson Accardi
2022-12-08  9:30   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 10/18] x86/sgx: Add option to ignore age of page during EPC reclaim Kristen Carlson Accardi
2022-12-08  9:37   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 11/18] x86/sgx: Prepare for multiple LRUs Kristen Carlson Accardi
2022-12-08  9:42   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 12/18] x86/sgx: Expose sgx_reclaim_pages() for use by EPC cgroup Kristen Carlson Accardi
2022-12-08  9:46   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 13/18] x86/sgx: Add helper to grab pages from an arbitrary EPC LRU Kristen Carlson Accardi
2022-12-08  9:56   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 14/18] x86/sgx: Add EPC OOM path to forcefully reclaim EPC Kristen Carlson Accardi
2022-12-08 15:21   ` Jarkko Sakkinen
2022-12-09 16:05     ` Kristen Carlson Accardi
2022-12-09 16:22       ` Dave Hansen
2022-12-12 18:09         ` Sean Christopherson
2022-12-26 20:43           ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 15/18] cgroup/misc: Add per resource callbacks for css events Kristen Carlson Accardi
2022-12-08 14:53   ` Jarkko Sakkinen
2022-12-08 15:15     ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 16/18] cgroup/misc: Prepare for SGX usage Kristen Carlson Accardi
2022-12-08 15:23   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 17/18] x86/sgx: Add support for misc cgroup controller Kristen Carlson Accardi
2022-12-08 15:30   ` Jarkko Sakkinen
2022-12-02 18:36 ` [PATCH v2 18/18] Docs/x86/sgx: Add description for cgroup support Kristen Carlson Accardi
2023-04-03 21:26 ` [EXTERNAL] [PATCH v2 00/18] Add Cgroup support for SGX EPC memory Anand Krishnamoorthi
2023-04-13 18:49   ` Anand Krishnamoorthi
2023-04-18 16:44     ` Mikko Ylinen
2023-04-27 16:53       ` Anand Krishnamoorthi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221202183655.3767674-6-kristen@linux.intel.com \
    --to=kristen@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=cgroups@vger.kernel.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jarkko@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-sgx@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=x86@kernel.org \
    --cc=zhiquan1.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox