public inbox for linux-arm-kernel@lists.infradead.org
 help / color / mirror / Atom feed
From: Alexandru Elisei <alexandru.elisei@arm.com>
To: maz@kernel.org, james.morse@arm.com, suzuki.poulose@arm.com,
	linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, will@kernel.org,
	mark.rutland@arm.com
Subject: [RFC PATCH v5 03/38] KVM: arm64: Implement the memslot lock/unlock functionality
Date: Wed, 17 Nov 2021 15:38:07 +0000	[thread overview]
Message-ID: <20211117153842.302159-4-alexandru.elisei@arm.com> (raw)
In-Reply-To: <20211117153842.302159-1-alexandru.elisei@arm.com>

Pin memory in the process address space and map it in the stage 2 tables as
a result of userspace enabling the KVM_CAP_ARM_LOCK_USER_MEMORY_REGION
capability; and unpin it from the process address space when the capability
is used with the KVM_ARM_LOCK_USER_MEMORY_REGION_FLAGS_UNLOCK flag.

The current implementation has two drawbacks which will be fixed in future
patches:

- The dcache maintenance is done when the memslot is locked, which means
  that it is possible that memory changes made by userspace after the ioctl
  completes won't be visible to a guest running with the MMU off.

- Tag scrubbing is done when the memslot is locked. If the MTE capability
  is enabled after the ioctl, the guest will be able to access unsanitised
  pages. This is prevented by forbidding userspace to enable the MTE
  capability if any memslots are locked.

Only PAGE_SIZE mappings are supported at stage 2.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
 Documentation/virt/kvm/api.rst    |   4 +-
 arch/arm64/include/asm/kvm_host.h |  11 ++
 arch/arm64/kvm/arm.c              |  22 +++-
 arch/arm64/kvm/mmu.c              | 204 ++++++++++++++++++++++++++++--
 4 files changed, 226 insertions(+), 15 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 16aa59eae3d9..0ac12a730013 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6979,8 +6979,8 @@ write permissions are specified for a memslot which logs dirty pages.
 
 Enabling this capability causes the memory pinned when locking the memslot
 specified in args[0] to be unpinned, or, optionally, all memslots to be
-unlocked. The IPA range is not unmapped from stage 2.
->>>>>>> 56641eee289e (KVM: arm64: Add lock/unlock memslot user API)
+unlocked. The IPA range is not unmapped from stage 2. It is considered an error
+to attempt to unlock a memslot which is not locked.
 
 8. Other capabilities.
 ======================
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 733621e41900..7fd70ad90c16 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -99,7 +99,18 @@ struct kvm_s2_mmu {
 	struct kvm_arch *arch;
 };
 
+#define KVM_MEMSLOT_LOCK_READ		(1 << 0)
+#define KVM_MEMSLOT_LOCK_WRITE		(1 << 1)
+#define KVM_MEMSLOT_LOCK_MASK		0x3
+
+struct kvm_memory_slot_page {
+	struct list_head list;
+	struct page *page;
+};
+
 struct kvm_arch_memory_slot {
+	struct kvm_memory_slot_page pages;
+	u32 flags;
 };
 
 struct kvm_arch {
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index d49905d18cee..b9b8b43835e3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -106,6 +106,25 @@ static int kvm_lock_user_memory_region_ioctl(struct kvm *kvm,
 	}
 }
 
+static bool kvm_arm_has_locked_memslots(struct kvm *kvm)
+{
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *memslot;
+	bool has_locked_memslots = false;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	kvm_for_each_memslot(memslot, slots) {
+		if (memslot->arch.flags & KVM_MEMSLOT_LOCK_MASK) {
+			has_locked_memslots = true;
+			break;
+		}
+	}
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return has_locked_memslots;
+}
+
 int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			    struct kvm_enable_cap *cap)
 {
@@ -120,7 +139,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		break;
 	case KVM_CAP_ARM_MTE:
 		mutex_lock(&kvm->lock);
-		if (!system_supports_mte() || kvm->created_vcpus) {
+		if (!system_supports_mte() || kvm->created_vcpus ||
+		    (kvm_arm_lock_memslot_supported() && kvm_arm_has_locked_memslots(kvm))) {
 			r = -EINVAL;
 		} else {
 			r = 0;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index f65bcbc9ae69..b0a8e61315e4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -72,6 +72,11 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
 }
 
+static bool memslot_is_locked(struct kvm_memory_slot *memslot)
+{
+	return memslot->arch.flags & KVM_MEMSLOT_LOCK_MASK;
+}
+
 /**
  * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
  * @kvm:	pointer to kvm structure.
@@ -769,6 +774,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
 	if (map_size == PAGE_SIZE)
 		return true;
 
+	/* Allow only PAGE_SIZE mappings for locked memslots */
+	if (memslot_is_locked(memslot))
+		return false;
+
 	size = memslot->npages * PAGE_SIZE;
 
 	gpa_start = memslot->base_gfn << PAGE_SHIFT;
@@ -1296,6 +1305,159 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int try_rlimit_memlock(unsigned long npages)
+{
+	unsigned long lock_limit;
+	bool has_lock_cap;
+	int ret = 0;
+
+	has_lock_cap = capable(CAP_IPC_LOCK);
+	if (has_lock_cap)
+		goto out;
+
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	mmap_read_lock(current->mm);
+	if (npages + current->mm->locked_vm > lock_limit)
+		ret = -ENOMEM;
+	mmap_read_unlock(current->mm);
+
+out:
+	return ret;
+}
+
+static void unpin_memslot_pages(struct kvm_memory_slot *memslot, bool writable)
+{
+	struct kvm_memory_slot_page *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &memslot->arch.pages.list, list) {
+		if (writable)
+			set_page_dirty_lock(entry->page);
+		unpin_user_page(entry->page);
+		kfree(entry);
+	}
+}
+
+static int lock_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			u64 flags)
+{
+	struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
+	struct kvm_memory_slot_page *page_entry;
+	bool writable = flags & KVM_ARM_LOCK_MEM_WRITE;
+	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
+	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+	struct vm_area_struct *vma;
+	unsigned long npages = memslot->npages;
+	unsigned int pin_flags = FOLL_LONGTERM;
+	unsigned long i, hva, ipa, mmu_seq;
+	int ret;
+
+	ret = try_rlimit_memlock(npages);
+	if (ret)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&memslot->arch.pages.list);
+
+	if (writable) {
+		prot |= KVM_PGTABLE_PROT_W;
+		pin_flags |= FOLL_WRITE;
+	}
+
+	hva = memslot->userspace_addr;
+	ipa = memslot->base_gfn << PAGE_SHIFT;
+
+	mmu_seq = kvm->mmu_notifier_seq;
+	smp_rmb();
+
+	for (i = 0; i < npages; i++) {
+		page_entry = kzalloc(sizeof(*page_entry), GFP_KERNEL);
+		if (!page_entry) {
+			unpin_memslot_pages(memslot, writable);
+			ret = -ENOMEM;
+			goto out_err;
+		}
+
+		mmap_read_lock(current->mm);
+		ret = pin_user_pages(hva, 1, pin_flags, &page_entry->page, &vma);
+		if (ret != 1) {
+			mmap_read_unlock(current->mm);
+			unpin_memslot_pages(memslot, writable);
+			ret = -ENOMEM;
+			goto out_err;
+		}
+		if (kvm_has_mte(kvm)) {
+			if (vma->vm_flags & VM_SHARED) {
+				ret = -EFAULT;
+			} else {
+				ret = sanitise_mte_tags(kvm,
+					page_to_pfn(page_entry->page),
+					PAGE_SIZE);
+			}
+			if (ret) {
+				mmap_read_unlock(current->mm);
+				goto out_err;
+			}
+		}
+		mmap_read_unlock(current->mm);
+
+		ret = kvm_mmu_topup_memory_cache(&cache, kvm_mmu_cache_min_pages(kvm));
+		if (ret) {
+			unpin_memslot_pages(memslot, writable);
+			goto out_err;
+		}
+
+		spin_lock(&kvm->mmu_lock);
+		if (mmu_notifier_retry(kvm, mmu_seq)) {
+			spin_unlock(&kvm->mmu_lock);
+			unpin_memslot_pages(memslot, writable);
+			ret = -EAGAIN;
+			goto out_err;
+		}
+
+		ret = kvm_pgtable_stage2_map(pgt, ipa, PAGE_SIZE,
+					     page_to_phys(page_entry->page),
+					     prot, &cache);
+		spin_unlock(&kvm->mmu_lock);
+
+		if (ret) {
+			kvm_pgtable_stage2_unmap(pgt, memslot->base_gfn << PAGE_SHIFT,
+						 i << PAGE_SHIFT);
+			unpin_memslot_pages(memslot, writable);
+			goto out_err;
+		}
+		list_add(&page_entry->list, &memslot->arch.pages.list);
+
+		hva += PAGE_SIZE;
+		ipa += PAGE_SIZE;
+	}
+
+
+	/*
+	 * Even though we've checked the limit at the start, we can still exceed
+	 * it if userspace locked other pages in the meantime or if the
+	 * CAP_IPC_LOCK capability has been revoked.
+	 */
+	ret = account_locked_vm(current->mm, npages, true);
+	if (ret) {
+		kvm_pgtable_stage2_unmap(pgt, memslot->base_gfn << PAGE_SHIFT,
+					 npages << PAGE_SHIFT);
+		unpin_memslot_pages(memslot, writable);
+		goto out_err;
+	}
+
+	memslot->arch.flags = KVM_MEMSLOT_LOCK_READ;
+	if (writable)
+		memslot->arch.flags |= KVM_MEMSLOT_LOCK_WRITE;
+
+	kvm_mmu_free_memory_cache(&cache);
+
+	return 0;
+
+out_err:
+	kvm_mmu_free_memory_cache(&cache);
+	return ret;
+}
+
 int kvm_mmu_lock_memslot(struct kvm *kvm, u64 slot, u64 flags)
 {
 	struct kvm_memory_slot *memslot;
@@ -1325,7 +1487,12 @@ int kvm_mmu_lock_memslot(struct kvm *kvm, u64 slot, u64 flags)
 		goto out_unlock_slots;
 	}
 
-	ret = -EINVAL;
+	if (memslot_is_locked(memslot)) {
+		ret = -EBUSY;
+		goto out_unlock_slots;
+	}
+
+	ret = lock_memslot(kvm, memslot, flags);
 
 out_unlock_slots:
 	mutex_unlock(&kvm->slots_lock);
@@ -1335,11 +1502,22 @@ int kvm_mmu_lock_memslot(struct kvm *kvm, u64 slot, u64 flags)
 	return ret;
 }
 
+static void unlock_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+	bool writable = memslot->arch.flags & KVM_MEMSLOT_LOCK_WRITE;
+	unsigned long npages = memslot->npages;
+
+	unpin_memslot_pages(memslot, writable);
+	account_locked_vm(current->mm, npages, false);
+
+	memslot->arch.flags &= ~KVM_MEMSLOT_LOCK_MASK;
+}
+
 int kvm_mmu_unlock_memslot(struct kvm *kvm, u64 slot, u64 flags)
 {
 	bool unlock_all = flags & KVM_ARM_UNLOCK_MEM_ALL;
 	struct kvm_memory_slot *memslot;
-	int ret;
+	int ret = 0;
 
 	if (!unlock_all && slot >= KVM_MEM_SLOTS_NUM)
 		return -EINVAL;
@@ -1347,18 +1525,20 @@ int kvm_mmu_unlock_memslot(struct kvm *kvm, u64 slot, u64 flags)
 	mutex_lock(&kvm->slots_lock);
 
 	if (unlock_all) {
-		ret = -EINVAL;
-		goto out_unlock_slots;
-	}
-
-	memslot = id_to_memslot(kvm_memslots(kvm), slot);
-	if (!memslot) {
-		ret = -EINVAL;
-		goto out_unlock_slots;
+		kvm_for_each_memslot(memslot, kvm_memslots(kvm)) {
+			if (!memslot_is_locked(memslot))
+				continue;
+			unlock_memslot(kvm, memslot);
+		}
+	} else {
+		memslot = id_to_memslot(kvm_memslots(kvm), slot);
+		if (!memslot || !memslot_is_locked(memslot)) {
+			ret = -EINVAL;
+			goto out_unlock_slots;
+		}
+		unlock_memslot(kvm, memslot);
 	}
 
-	ret = -EINVAL;
-
 out_unlock_slots:
 	mutex_unlock(&kvm->slots_lock);
 	return ret;
-- 
2.33.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2021-11-17 15:40 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-17 15:38 [RFC PATCH v5 00/38] KVM: arm64: Add Statistical Profiling Extension (SPE) support Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 01/38] KVM: arm64: Make lock_all_vcpus() available to the rest of KVM Alexandru Elisei
2022-02-15  5:34   ` Reiji Watanabe
2022-02-15 10:34     ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 02/38] KVM: arm64: Add lock/unlock memslot user API Alexandru Elisei
2022-02-15  5:59   ` Reiji Watanabe
2022-02-15 11:03     ` Alexandru Elisei
2022-02-15 12:02       ` Marc Zyngier
2022-02-15 12:13         ` Alexandru Elisei
2022-02-17  7:35       ` Reiji Watanabe
2022-02-17 10:31         ` Alexandru Elisei
2022-02-18  4:41           ` Reiji Watanabe
2021-11-17 15:38 ` Alexandru Elisei [this message]
2022-02-15  7:46   ` [RFC PATCH v5 03/38] KVM: arm64: Implement the memslot lock/unlock functionality Reiji Watanabe
2022-02-15 11:26     ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 04/38] KVM: arm64: Defer CMOs for locked memslots until a VCPU is run Alexandru Elisei
2022-02-24  5:56   ` Reiji Watanabe
2022-03-21 17:10     ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 05/38] KVM: arm64: Perform CMOs on locked memslots when userspace resets VCPUs Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 06/38] KVM: arm64: Delay tag scrubbing for locked memslots until a VCPU runs Alexandru Elisei
2022-03-18  5:03   ` Reiji Watanabe
2022-03-21 17:17     ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 07/38] KVM: arm64: Unmap unlocked memslot from stage 2 if kvm_mmu_has_pending_ops() Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 08/38] KVM: arm64: Unlock memslots after stage 2 tables are freed Alexandru Elisei
2022-03-18  5:19   ` Reiji Watanabe
2022-03-21 17:29     ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 09/38] KVM: arm64: Deny changes to locked memslots Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 10/38] KVM: Add kvm_warn{,_ratelimited} macros Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 11/38] KVM: arm64: Print a warning for unexpected faults on locked memslots Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 12/38] KVM: arm64: Allow userspace to lock and unlock memslots Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 13/38] KVM: arm64: Add CONFIG_KVM_ARM_SPE Kconfig option Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 14/38] KVM: arm64: Add SPE capability and VCPU feature Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 15/38] perf: arm_spe_pmu: Move struct arm_spe_pmu to a separate header file Alexandru Elisei
2022-07-05 16:57   ` Calvin Owens
2022-07-06 10:51     ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 16/38] KVM: arm64: Allow SPE emulation when the SPE hardware is present Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 17/38] KVM: arm64: Allow userspace to set the SPE feature only if SPE " Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 18/38] KVM: arm64: Expose SPE version to guests Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 19/38] KVM: arm64: Do not run a VCPU on a CPU without SPE Alexandru Elisei
2022-01-10 11:40   ` Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 20/38] KVM: arm64: Add a new VCPU device control group for SPE Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 21/38] KVM: arm64: Add SPE VCPU device attribute to set the interrupt number Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 22/38] KVM: arm64: Add SPE VCPU device attribute to initialize SPE Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 23/38] KVM: arm64: debug: Configure MDCR_EL2 when a VCPU has SPE Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 24/38] KVM: arm64: Move accesses to MDCR_EL2 out of __{activate, deactivate}_traps_common Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 25/38] KVM: arm64: VHE: Change MDCR_EL2 at world switch if VCPU has SPE Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 26/38] KVM: arm64: Add SPE system registers to VCPU context Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 27/38] KVM: arm64: nVHE: Save PMSCR_EL1 to the host context Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 28/38] KVM: arm64: Rename DEBUG_STATE_SAVE_SPE -> DEBUG_SAVE_SPE_BUFFER flags Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 29/38] KVM: arm64: nVHE: Context switch SPE state if VCPU has SPE Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 30/38] KVM: arm64: VHE: " Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 31/38] KVM: arm64: Save/restore PMSNEVFR_EL1 on VCPU put/load Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 32/38] KVM: arm64: Allow guest to use physical timestamps if perfmon_capable() Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 33/38] KVM: arm64: Emulate SPE buffer management interrupt Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 34/38] KVM: arm64: Add an userspace API to stop a VCPU profiling Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 35/38] KVM: arm64: Implement " Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 36/38] KVM: arm64: Add PMSIDR_EL1 to the SPE register context Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 37/38] KVM: arm64: Make CONFIG_KVM_ARM_SPE depend on !CONFIG_NUMA_BALANCING Alexandru Elisei
2021-11-17 15:38 ` [RFC PATCH v5 38/38] KVM: arm64: Allow userspace to enable SPE for guests Alexandru Elisei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211117153842.302159-4-alexandru.elisei@arm.com \
    --to=alexandru.elisei@arm.com \
    --cc=james.morse@arm.com \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=suzuki.poulose@arm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox