public inbox for cgroups@vger.kernel.org
 help / color / mirror / Atom feed
From: Kristen Carlson Accardi <kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-sgx-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Jarkko Sakkinen <jarkko-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Dave Hansen <dave.hansen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>,
	Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Borislav Petkov <bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org>,
	x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	"H. Peter Anvin" <hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>
Cc: Kristen Carlson Accardi
	<kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	Sean Christopherson
	<seanjc-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Subject: [RFC PATCH 16/20] x86/sgx: Add EPC OOM path to forcefully reclaim EPC
Date: Thu, 22 Sep 2022 10:10:53 -0700	[thread overview]
Message-ID: <20220922171057.1236139-17-kristen@linux.intel.com> (raw)
In-Reply-To: <20220922171057.1236139-1-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

From: Sean Christopherson <sean.j.christopherson-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Introduce the OOM path for killing an enclave with the reclaimer
is no longer able to reclaim enough EPC pages. Find a victim enclave,
which will be an enclave with EPC pages remaining that are not
accessible to the reclaimer ("unreclaimable"). Once a victim is
identified, mark the enclave as OOM and zap the enclaves entire
page range. Release all the enclaves resources except for the
struct sgx_encl memory itself.

Signed-off-by: Sean Christopherson <sean.j.christopherson-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Kristen Carlson Accardi <kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Cc: Sean Christopherson <seanjc-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/kernel/cpu/sgx/encl.c |  74 +++++++++++++++---
 arch/x86/kernel/cpu/sgx/encl.h |   2 +
 arch/x86/kernel/cpu/sgx/main.c | 135 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/sgx.h  |   1 +
 4 files changed, 201 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 672b302f3688..fe6f0a62c4f1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -622,7 +622,8 @@ static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
 	if (!encl)
 		return -EFAULT;
 
-	if (!test_bit(SGX_ENCL_DEBUG, &encl->flags))
+	if (!test_bit(SGX_ENCL_DEBUG, &encl->flags) ||
+	    test_bit(SGX_ENCL_OOM, &encl->flags))
 		return -EFAULT;
 
 	for (i = 0; i < len; i += cnt) {
@@ -668,16 +669,8 @@ const struct vm_operations_struct sgx_vm_ops = {
 	.access = sgx_vma_access,
 };
 
-/**
- * sgx_encl_release - Destroy an enclave instance
- * @ref:	address of a kref inside &sgx_encl
- *
- * Used together with kref_put(). Frees all the resources associated with the
- * enclave and the instance itself.
- */
-void sgx_encl_release(struct kref *ref)
+static void __sgx_encl_release(struct sgx_encl *encl)
 {
-	struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
 	struct sgx_va_page *va_page;
 	struct sgx_encl_page *entry;
 	unsigned long index;
@@ -712,7 +705,7 @@ void sgx_encl_release(struct kref *ref)
 	while (!list_empty(&encl->va_pages)) {
 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 					   list);
-		list_del(&va_page->list);
+		list_del_init(&va_page->list);
 		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		kfree(va_page);
@@ -728,10 +721,66 @@ void sgx_encl_release(struct kref *ref)
 	/* Detect EPC page leak's. */
 	WARN_ON_ONCE(encl->secs_child_cnt);
 	WARN_ON_ONCE(encl->secs.epc_page);
+}
+
+/**
+ * sgx_encl_release - Destroy an enclave instance
+ * @ref:	address of a kref inside &sgx_encl
+ *
+ * Used together with kref_put(). Frees all the resources associated with the
+ * enclave and the instance itself.
+ */
+void sgx_encl_release(struct kref *ref)
+{
+	struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount);
+
+	/* if the enclave was OOM killed previously, it just needs to be freed */
+	if (!test_bit(SGX_ENCL_OOM, &encl->flags))
+		__sgx_encl_release(encl);
 
 	kfree(encl);
 }
 
+/**
+ * sgx_encl_destroy - prepare the enclave for release
+ * @encl:	address of the sgx_encl to drain
+ *
+ * Used during oom kill to empty the mm_list entries after they have
+ * been zapped. Release the remaining enclave resources without freeing
+ * struct sgx_encl.
+ */
+void sgx_encl_destroy(struct sgx_encl *encl)
+{
+	struct sgx_encl_mm *encl_mm;
+
+	for ( ; ; )  {
+		spin_lock(&encl->mm_lock);
+
+		if (list_empty(&encl->mm_list)) {
+			encl_mm = NULL;
+		} else {
+			encl_mm = list_first_entry(&encl->mm_list,
+						   struct sgx_encl_mm, list);
+			list_del_rcu(&encl_mm->list);
+		}
+
+		spin_unlock(&encl->mm_lock);
+
+		/* The enclave is no longer mapped by any mm. */
+		if (!encl_mm)
+			break;
+
+		synchronize_srcu(&encl->srcu);
+		mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm);
+		kfree(encl_mm);
+
+		/* 'encl_mm' is gone, put encl_mm->encl reference: */
+		kref_put(&encl->refcount, sgx_encl_release);
+	}
+
+	__sgx_encl_release(encl);
+}
+
 /*
  * 'mm' is exiting and no longer needs mmu notifications.
  */
@@ -801,6 +850,9 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
 	struct sgx_encl_mm *encl_mm;
 	int ret;
 
+	if (test_bit(SGX_ENCL_OOM, &encl->flags))
+		return -ENOMEM;
+
 	/*
 	 * Even though a single enclave may be mapped into an mm more than once,
 	 * each 'mm' only appears once on encl->mm_list. This is guaranteed by
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 831d63f80f5a..f4935632e53a 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -39,6 +39,7 @@ enum sgx_encl_flags {
 	SGX_ENCL_DEBUG		= BIT(1),
 	SGX_ENCL_CREATED	= BIT(2),
 	SGX_ENCL_INITIALIZED	= BIT(3),
+	SGX_ENCL_OOM		= BIT(4),
 };
 
 struct sgx_encl_mm {
@@ -125,5 +126,6 @@ struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
 					 unsigned long addr);
 struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim);
 void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
+void sgx_encl_destroy(struct sgx_encl *encl);
 
 #endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 151ad720a4ec..082c08228840 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -657,6 +657,141 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
 	atomic_long_inc(&sgx_nr_free_pages);
 }
 
+static bool sgx_oom_get_ref(struct sgx_epc_page *epc_page)
+{
+	struct sgx_encl *encl;
+
+	if (epc_page->flags & SGX_EPC_PAGE_ENCLAVE)
+		encl = ((struct sgx_encl_page *)epc_page->owner)->encl;
+	else if (epc_page->flags & SGX_EPC_PAGE_VERSION_ARRAY)
+		encl = epc_page->owner;
+	else
+		return false;
+
+	return kref_get_unless_zero(&encl->refcount);
+}
+
+static struct sgx_epc_page *sgx_oom_get_victim(struct sgx_epc_lru *lru)
+{
+	struct sgx_epc_page *epc_page, *tmp;
+
+	if (list_empty(&lru->unreclaimable))
+		return NULL;
+
+	list_for_each_entry_safe(epc_page, tmp, &lru->unreclaimable, list) {
+		list_del_init(&epc_page->list);
+
+		if (sgx_oom_get_ref(epc_page))
+			return epc_page;
+	}
+	return NULL;
+}
+
+static void sgx_epc_oom_zap(void *owner, struct mm_struct *mm, unsigned long start,
+			    unsigned long end, const struct vm_operations_struct *ops)
+{
+	struct vm_area_struct *vma, *tmp;
+	unsigned long vm_end;
+
+	vma = find_vma(mm, start);
+	if (!vma || vma->vm_ops != ops || vma->vm_private_data != owner ||
+	    vma->vm_start >= end)
+		return;
+
+	for (tmp = vma; tmp->vm_start < end; tmp = tmp->vm_next) {
+		do {
+			vm_end = tmp->vm_end;
+			tmp = tmp->vm_next;
+		} while (tmp && tmp->vm_ops == ops &&
+			 vma->vm_private_data == owner && tmp->vm_start < end);
+
+		zap_page_range(vma, vma->vm_start, vm_end - vma->vm_start);
+
+		if (!tmp)
+			break;
+	}
+}
+
+static void sgx_oom_encl(struct sgx_encl *encl)
+{
+	unsigned long mm_list_version;
+	struct sgx_encl_mm *encl_mm;
+	int idx;
+
+	set_bit(SGX_ENCL_OOM, &encl->flags);
+
+	if (!test_bit(SGX_ENCL_CREATED, &encl->flags))
+		goto out;
+
+	do {
+		mm_list_version = encl->mm_list_version;
+
+		/* Pairs with smp_rmb() in sgx_encl_mm_add(). */
+		smp_rmb();
+
+		idx = srcu_read_lock(&encl->srcu);
+
+		list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
+			if (!mmget_not_zero(encl_mm->mm))
+				continue;
+
+			mmap_read_lock(encl_mm->mm);
+
+			sgx_epc_oom_zap(encl, encl_mm->mm, encl->base,
+					encl->base + encl->size, &sgx_vm_ops);
+
+			mmap_read_unlock(encl_mm->mm);
+
+			mmput_async(encl_mm->mm);
+		}
+
+		srcu_read_unlock(&encl->srcu, idx);
+	} while (WARN_ON_ONCE(encl->mm_list_version != mm_list_version));
+
+	mutex_lock(&encl->lock);
+	sgx_encl_destroy(encl);
+	mutex_unlock(&encl->lock);
+
+out:
+	/*
+	 * This puts the refcount we took when we identified this enclave as
+	 * an OOM victim.
+	 */
+	kref_put(&encl->refcount, sgx_encl_release);
+}
+
+static inline void sgx_oom_encl_page(struct sgx_encl_page *encl_page)
+{
+	return sgx_oom_encl(encl_page->encl);
+}
+
+/**
+ * sgx_epc_oom() - invoke EPC out-of-memory handling on target LRU
+ * @lru:	LRU that is low
+ *
+ * Return:	%true if a victim was found and kicked.
+ */
+bool sgx_epc_oom(struct sgx_epc_lru *lru)
+{
+	struct sgx_epc_page *victim;
+
+	spin_lock(&lru->lock);
+	victim = sgx_oom_get_victim(lru);
+	spin_unlock(&lru->lock);
+
+	if (!victim)
+		return false;
+
+	if (victim->flags & SGX_EPC_PAGE_ENCLAVE)
+		sgx_oom_encl_page(victim->owner);
+	else if (victim->flags & SGX_EPC_PAGE_VERSION_ARRAY)
+		sgx_oom_encl(victim->owner);
+	else
+		WARN_ON_ONCE(1);
+
+	return true;
+}
+
 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
 					 unsigned long index,
 					 struct sgx_epc_section *section)
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 0598d534371b..a4c7ee0a4958 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -116,6 +116,7 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
 int sgx_reclaim_epc_pages(int nr_to_scan, bool ignore_age);
 void sgx_isolate_epc_pages(struct sgx_epc_lru *lru, int *nr_to_scan,
 			   struct list_head *dst);
+bool sgx_epc_oom(struct sgx_epc_lru *lru);
 
 void sgx_ipi_cb(void *info);
 
-- 
2.37.3


  parent reply	other threads:[~2022-09-22 17:10 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-22 17:10 [RFC PATCH 00/20] Add Cgroup support for SGX EPC memory Kristen Carlson Accardi
     [not found] ` <20220922171057.1236139-1-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-22 17:10   ` [RFC PATCH 01/20] x86/sgx: Call cond_resched() at the end of sgx_reclaim_pages() Kristen Carlson Accardi
     [not found]     ` <20220922171057.1236139-2-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-23 12:32       ` Jarkko Sakkinen
2022-09-23 12:35         ` Jarkko Sakkinen
     [not found]           ` <Yy2oCRfLrePCWjx7-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2022-09-23 12:37             ` Jarkko Sakkinen
2022-09-22 17:10   ` [RFC PATCH 02/20] x86/sgx: Store EPC page owner as a 'void *' to handle multiple users Kristen Carlson Accardi
     [not found]     ` <20220922171057.1236139-3-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-22 18:54       ` Dave Hansen
2022-09-23 12:49       ` Jarkko Sakkinen
2022-09-22 17:10   ` [RFC PATCH 03/20] x86/sgx: Track owning enclave in VA EPC pages Kristen Carlson Accardi
     [not found]     ` <20220922171057.1236139-4-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-22 18:55       ` Dave Hansen
     [not found]         ` <1adb03c8-1274-3898-0677-03015a1f5a5d-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2022-09-22 20:04           ` Kristen Carlson Accardi
     [not found]             ` <f031ac1bd6b16509f1d714cd65e6b017f054940c.camel-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-22 21:39               ` Dave Hansen
2022-09-23 12:52       ` Jarkko Sakkinen
2022-09-22 17:10   ` [RFC PATCH 04/20] x86/sgx: Add 'struct sgx_epc_lru' to encapsulate lru list(s) Kristen Carlson Accardi
     [not found]     ` <20220922171057.1236139-5-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-23 13:20       ` Jarkko Sakkinen
     [not found]         ` <Yy2ynLZ2KX6bOcHr-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2022-09-29 23:04           ` Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 05/20] x86/sgx: Introduce unreclaimable EPC page lists Kristen Carlson Accardi
     [not found]     ` <20220922171057.1236139-6-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-23 13:29       ` Jarkko Sakkinen
2022-09-22 17:10   ` [RFC PATCH 06/20] x86/sgx: Introduce RECLAIM_IN_PROGRESS flag for EPC pages Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 07/20] x86/sgx: Use a list to track to-be-reclaimed pages during reclaim Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 08/20] x86/sgx: Add EPC page flags to identify type of page Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 09/20] x86/sgx: Allow reclaiming up to 32 pages, but scan 16 by default Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 10/20] x86/sgx: Return the number of EPC pages that were successfully reclaimed Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 11/20] x86/sgx: Add option to ignore age of page during EPC reclaim Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 12/20] x86/sgx: Add helper to retrieve SGX EPC LRU given an EPC page Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 13/20] x86/sgx: Prepare for multiple LRUs Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 14/20] x86/sgx: Expose sgx_reclaim_pages() for use by EPC cgroup Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 15/20] x86/sgx: Add helper to grab pages from an arbitrary EPC LRU Kristen Carlson Accardi
2022-09-22 17:10   ` Kristen Carlson Accardi [this message]
2022-09-22 17:10   ` [RFC PATCH 17/20] cgroup, x86/sgx: Add SGX EPC cgroup controller Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 18/20] x86/sgx: Enable EPC cgroup controller in SGX core Kristen Carlson Accardi
2022-09-22 17:10   ` [RFC PATCH 19/20] x86/sgx: Add stats and events interfaces to EPC cgroup controller Kristen Carlson Accardi
2022-09-22 17:41   ` [RFC PATCH 00/20] Add Cgroup support for SGX EPC memory Tejun Heo
     [not found]     ` <YyyeSVSk/lWdo/W4-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2022-09-22 18:59       ` Kristen Carlson Accardi
     [not found]         ` <4b8605533e5deade739249bfb341ab9c06d56a1e.camel-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-09-22 19:08           ` Tejun Heo
     [not found]             ` <YyyykUJQtYbPVctn-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2022-09-22 21:03               ` Dave Hansen
     [not found]                 ` <7ff6d114-a6cc-e3c5-5edb-8ac0e527d8a9-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2022-09-24  0:09                   ` Tejun Heo
     [not found]                     ` <Yy5KwnRTbFjmKE9X-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2022-09-26 18:30                       ` Kristen Carlson Accardi
2022-10-07 16:39                       ` Kristen Carlson Accardi
     [not found]                         ` <0f42e11434b264e555559cab626c1828a9eae09f.camel-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2022-10-07 16:42                           ` Tejun Heo
     [not found]                             ` <Y0BW/GkfXG99+41O-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2022-10-07 16:46                               ` Kristen Carlson Accardi
2022-09-23 12:24   ` Jarkko Sakkinen
2022-09-22 17:10 ` [RFC PATCH 20/20] docs, cgroup, x86/sgx: Add SGX EPC cgroup controller documentation Kristen Carlson Accardi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220922171057.1236139-17-kristen@linux.intel.com \
    --to=kristen-vuqaysv1563yd54fqh9/ca@public.gmane.org \
    --cc=bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=dave.hansen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org \
    --cc=hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org \
    --cc=jarkko-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-sgx-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=seanjc-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org \
    --cc=x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox