linux-coco.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Suzuki K Poulose <suzuki.poulose@arm.com>
To: Steven Price <steven.price@arm.com>,
	kvm@vger.kernel.org, kvmarm@lists.linux.dev
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Marc Zyngier <maz@kernel.org>, Will Deacon <will@kernel.org>,
	James Morse <james.morse@arm.com>,
	Oliver Upton <oliver.upton@linux.dev>,
	Zenghui Yu <yuzenghui@huawei.com>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, Joey Gouly <joey.gouly@arm.com>,
	Alexandru Elisei <alexandru.elisei@arm.com>,
	Christoffer Dall <christoffer.dall@arm.com>,
	Fuad Tabba <tabba@google.com>,
	linux-coco@lists.linux.dev,
	Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>,
	Gavin Shan <gshan@redhat.com>,
	Shanker Donthineni <sdonthineni@nvidia.com>,
	Alper Gun <alpergun@google.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@kernel.org>
Subject: Re: [PATCH v5 17/43] arm64: RME: Allow VMM to set RIPAS
Date: Wed, 16 Oct 2024 09:46:26 +0100	[thread overview]
Message-ID: <4075a8bc-2f1e-441d-815e-aaf83e88d3d0@arm.com> (raw)
In-Reply-To: <20241004152804.72508-18-steven.price@arm.com>

Hi Steven

On 04/10/2024 16:27, Steven Price wrote:
> Each page within the protected region of the realm guest can be marked
> as either RAM or EMPTY. Allow the VMM to control this before the guest
> has started and provide the equivalent functions to change this (with
> the guest's approval) at runtime.
> 
> When transitioning from RIPAS RAM (1) to RIPAS EMPTY (0) the memory is
> unmapped from the guest and undelegated allowing the memory to be reused
> by the host. When transitioning to RIPAS RAM the actual population of
> the leaf RTTs is done later on stage 2 fault, however it may be
> necessary to allocate additional RTTs to allow the RMM track the RIPAS
> for the requested range.
> 
> When freeing a block mapping it is necessary to temporarily unfold the
> RTT which requires delegating an extra page to the RMM, this page can
> then be recovered once the contents of the block mapping have been
> freed. A spare, delegated page (spare_page) is used for this purpose.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
> Changes from v2:
>   * {alloc,free}_delegated_page() moved from previous patch to this one.
>   * alloc_delegated_page() now takes a gfp_t flags parameter.
>   * Fix the reference counting of guestmem pages to avoid leaking memory.
>   * Several misc code improvements and extra comments.
> ---
>   arch/arm64/include/asm/kvm_rme.h |  17 ++
>   arch/arm64/kvm/mmu.c             |   8 +-
>   arch/arm64/kvm/rme.c             | 481 ++++++++++++++++++++++++++++++-
>   3 files changed, 501 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
> index 3a3aaf5d591c..c064bfb080ad 100644
> --- a/arch/arm64/include/asm/kvm_rme.h
> +++ b/arch/arm64/include/asm/kvm_rme.h
> @@ -96,6 +96,15 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits);
>   int kvm_create_rec(struct kvm_vcpu *vcpu);
>   void kvm_destroy_rec(struct kvm_vcpu *vcpu);
>   
> +void kvm_realm_unmap_range(struct kvm *kvm,
> +			   unsigned long ipa,
> +			   u64 size,
> +			   bool unmap_private);
> +int realm_set_ipa_state(struct kvm_vcpu *vcpu,
> +			unsigned long addr, unsigned long end,
> +			unsigned long ripas,
> +			unsigned long *top_ipa);
> +
>   #define RME_RTT_BLOCK_LEVEL	2
>   #define RME_RTT_MAX_LEVEL	3
>   
> @@ -114,4 +123,12 @@ static inline unsigned long rme_rtt_level_mapsize(int level)
>   	return (1UL << RME_RTT_LEVEL_SHIFT(level));
>   }
>   
> +static inline bool realm_is_addr_protected(struct realm *realm,
> +					   unsigned long addr)
> +{
> +	unsigned int ia_bits = realm->ia_bits;
> +
> +	return !(addr & ~(BIT(ia_bits - 1) - 1));
> +}
> +
>   #endif
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index a26cdac59eb3..23346b1d29cb 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -310,6 +310,7 @@ static void invalidate_icache_guest_page(void *va, size_t size)
>    * @start: The intermediate physical base address of the range to unmap
>    * @size:  The size of the area to unmap
>    * @may_block: Whether or not we are permitted to block
> + * @only_shared: If true then protected mappings should not be unmapped
>    *
>    * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
>    * be called while holding mmu_lock (unless for freeing the stage2 pgd before
> @@ -317,7 +318,7 @@ static void invalidate_icache_guest_page(void *va, size_t size)
>    * with things behind our backs.
>    */
>   static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
> -				 bool may_block)
> +				 bool may_block, bool only_shared)
>   {
>   	struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
>   	phys_addr_t end = start + size;
> @@ -330,7 +331,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
>   
>   void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
>   {
> -	__unmap_stage2_range(mmu, start, size, true);
> +	__unmap_stage2_range(mmu, start, size, true, false);
>   }
>   
>   void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
> @@ -1919,7 +1920,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
>   
>   	__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
>   			     (range->end - range->start) << PAGE_SHIFT,
> -			     range->may_block);
> +			     range->may_block,
> +			     range->only_shared);
>   
>   	kvm_nested_s2_unmap(kvm);
>   	return false;
> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
> index 6f0ced6e0cc1..1fa9991d708b 100644
> --- a/arch/arm64/kvm/rme.c
> +++ b/arch/arm64/kvm/rme.c
> @@ -47,9 +47,197 @@ static int rmi_check_version(void)
>   	return 0;
>   }
>   
> -u32 kvm_realm_ipa_limit(void)
> +static phys_addr_t alloc_delegated_page(struct realm *realm,
> +					struct kvm_mmu_memory_cache *mc,
> +					gfp_t flags)
>   {
> -	return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
> +	phys_addr_t phys = PHYS_ADDR_MAX;
> +	void *virt;
> +
> +	if (realm->spare_page != PHYS_ADDR_MAX) {
> +		swap(realm->spare_page, phys);
> +		goto out;
> +	}
> +
> +	if (mc)
> +		virt = kvm_mmu_memory_cache_alloc(mc);
> +	else
> +		virt = (void *)__get_free_page(flags);
> +
> +	if (!virt)
> +		goto out;
> +
> +	phys = virt_to_phys(virt);
> +
> +	if (rmi_granule_delegate(phys)) {
> +		free_page((unsigned long)virt);
> +
> +		phys = PHYS_ADDR_MAX;
> +	}
> +
> +out:
> +	return phys;
> +}
> +
> +static void free_delegated_page(struct realm *realm, phys_addr_t phys)
> +{
> +	if (realm->spare_page == PHYS_ADDR_MAX) {
> +		realm->spare_page = phys;
> +		return;
> +	}
> +

---  Cut here --

> +	if (WARN_ON(rmi_granule_undelegate(phys))) {
> +		/* Undelegate failed: leak the page */
> +		return;
> +	}
> +
> +	free_page((unsigned long)phys_to_virt(phys));

The above pattern of undelegate and reclaim a granule or leak appears 
elsewhere in the KVM support code. Is it worth having a common helper to
do the same ?

something like: reclaim_delegated_granule()


> +}
> +
> +static int realm_rtt_create(struct realm *realm,
> +			    unsigned long addr,
> +			    int level,
> +			    phys_addr_t phys)
> +{
> +	addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1));
> +	return rmi_rtt_create(virt_to_phys(realm->rd), phys, addr, level);
> +}
> +
> +static int realm_rtt_fold(struct realm *realm,
> +			  unsigned long addr,
> +			  int level,
> +			  phys_addr_t *rtt_granule)
> +{
> +	unsigned long out_rtt;
> +	int ret;
> +
> +	ret = rmi_rtt_fold(virt_to_phys(realm->rd), addr, level, &out_rtt);
> +
> +	if (RMI_RETURN_STATUS(ret) == RMI_SUCCESS && rtt_granule)
> +		*rtt_granule = out_rtt;
> +
> +	return ret;
> +}
> +
> +static int realm_destroy_protected(struct realm *realm,
> +				   unsigned long ipa,
> +				   unsigned long *next_addr)
> +{
> +	unsigned long rd = virt_to_phys(realm->rd);
> +	unsigned long addr;
> +	phys_addr_t rtt;
> +	int ret;
> +
> +loop:
> +	ret = rmi_data_destroy(rd, ipa, &addr, next_addr);
> +	if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
> +		if (*next_addr > ipa)
> +			return 0; /* UNASSIGNED */
> +		rtt = alloc_delegated_page(realm, NULL, GFP_KERNEL);
> +		if (WARN_ON(rtt == PHYS_ADDR_MAX))
> +			return -1;
> +		/*
> +		 * ASSIGNED - ipa is mapped as a block, so split. The index
> +		 * from the return code should be 2 otherwise it appears
> +		 * there's a huge page bigger than allowed

The comment could be misleading. RMM allows folding upto Level 1, but
KVM RMM driver doesn't do that yet.

> +		 */
> +		WARN_ON(RMI_RETURN_INDEX(ret) != 2);

I am not sure if we should relax it to something like :

		WARN_ON(RMI_RETURN_INDE(ret) < 1); ?

and

> +		ret = realm_rtt_create(realm, ipa, 3, rtt);

Use realm_rtt_create_levels(realm, ipa,..); ?

Agree that it complicates the whole code as we need at least two rtts 
and it becomes horribly complex for something we don't support yet.

So, the best approach would be, in the short term, to fix the comment
to make it explicit that it is something the KVM doesn't do yet.

> +		if (WARN_ON(ret)) {
> +			free_delegated_page(realm, rtt);
> +			return -1;
> +		}
> +		/* retry */
> +		goto loop;
> +	} else if (WARN_ON(ret)) {
> +		return -1;
> +	}


> +	ret = rmi_granule_undelegate(addr);
> +
> +	/*
> +	 * If the undelegate fails then something has gone seriously
> +	 * wrong: take an extra reference to just leak the page
> +	 */
> +	if (!WARN_ON(ret))
> +		put_page(phys_to_page(addr));

The same pattern of "reclaim_delegated_page()" repeats.

> +
> +	return 0;
> +}
> +
> +static void realm_unmap_range_shared(struct kvm *kvm,
> +				     int level,
> +				     unsigned long start,
> +				     unsigned long end)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	unsigned long rd = virt_to_phys(realm->rd);
> +	ssize_t map_size = rme_rtt_level_mapsize(level);
> +	unsigned long next_addr, addr;
> +	unsigned long shared_bit = BIT(realm->ia_bits - 1);
> +
> +	if (WARN_ON(level > RME_RTT_MAX_LEVEL))
> +		return;
> +
> +	start |= shared_bit;
> +	end |= shared_bit;
> +
> +	for (addr = start; addr < end; addr = next_addr) {
> +		unsigned long align_addr = ALIGN(addr, map_size);
> +		int ret;
> +
> +		next_addr = ALIGN(addr + 1, map_size);
> +
> +		if (align_addr != addr || next_addr > end) {
> +			/* Need to recurse deeper */
> +			if (addr < align_addr)
> +				next_addr = align_addr;
> +			realm_unmap_range_shared(kvm, level + 1, addr,
> +						 min(next_addr, end));
> +			continue;
> +		}
> +
> +		ret = rmi_rtt_unmap_unprotected(rd, addr, level, &next_addr);
> +		switch (RMI_RETURN_STATUS(ret)) {
> +		case RMI_SUCCESS:
> +			break;
> +		case RMI_ERROR_RTT:
> +			if (next_addr == addr) {
> +				/*
> +				 * There's a mapping here, but it's not a block
> +				 * mapping, so reset next_addr to the next block
> +				 * boundary and recurse to clear out the pages
> +				 * one level deeper.
> +				 */
> +				next_addr = ALIGN(addr + 1, map_size);

> +				realm_unmap_range_shared(kvm, level + 1, addr,
> +							 next_addr);

In this particular case, we could simply destroy the RTT at level + 1, 
instead of unmapping individual entries at a lower level. Given we have
verified that the entry at "level" covers the range that we want to tear
down and there is an RTT down there. For unprotected IPA range this is
safe and efficient.

> +			}
> +			break;
> +		default:
> +			WARN_ON(1);
> +			return;
> +		}
> +	}
> +}
> +
> +static void realm_unmap_range_private(struct kvm *kvm,
> +				      unsigned long start,
> +				      unsigned long end)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	ssize_t map_size = RME_PAGE_SIZE;
> +	unsigned long next_addr, addr;
> +
> +	for (addr = start; addr < end; addr = next_addr) {
> +		int ret;
> +
> +		next_addr = ALIGN(addr + 1, map_size);
> +
> +		ret = realm_destroy_protected(realm, addr, &next_addr);
> +
> +		if (WARN_ON(ret))
> +			break;

minor nit: We seem to be leaving the "spare" page dangling there, to be
collected later. May be we could try to reclaim the RTT pages once
"addr" crosses a "table worth" entry from "start". Or, given we deal
with entries at the L3, we could at the least reclaim L3 tables by
checking :

		if (IS_ALIGNED(next_addr, RME_L2_BLOCK_SIZE) &&
		    ALIGN(start, RME_L2_BLOCK_SIZE) != next_addr)
			/* Fold rtt for (addr, level=3) */

> +	}
>   }
>   
>   static int get_start_level(struct realm *realm)
> @@ -57,6 +245,26 @@ static int get_start_level(struct realm *realm)
>   	return 4 - stage2_pgtable_levels(realm->ia_bits);
>   }
>   
> +static void realm_unmap_range(struct kvm *kvm,
> +			      unsigned long start,
> +			      unsigned long end,
> +			      bool unmap_private)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +
> +	if (realm->state == REALM_STATE_NONE)
> +		return;
> +
> +	realm_unmap_range_shared(kvm, get_start_level(realm), start, end);

Could we use the "find_map_level(start, end)" instead of starting at the
top level ?

> +	if (unmap_private)
> +		realm_unmap_range_private(kvm, start, end);
> +}
> +
> +u32 kvm_realm_ipa_limit(void)
> +{
> +	return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
> +}
> +
>   static int realm_create_rd(struct kvm *kvm)
>   {
>   	struct realm *realm = &kvm->arch.realm;
> @@ -140,6 +348,30 @@ static int realm_rtt_destroy(struct realm *realm, unsigned long addr,
>   	return ret;
>   }
>   
> +static int realm_create_rtt_levels(struct realm *realm,
> +				   unsigned long ipa,
> +				   int level,
> +				   int max_level,
> +				   struct kvm_mmu_memory_cache *mc)
> +{
> +	if (WARN_ON(level == max_level))
> +		return 0;
> +
> +	while (level++ < max_level) {
> +		phys_addr_t rtt = alloc_delegated_page(realm, mc, GFP_KERNEL);
> +
> +		if (rtt == PHYS_ADDR_MAX)
> +			return -ENOMEM;
> +
> +		if (realm_rtt_create(realm, ipa, level, rtt)) {
> +			free_delegated_page(realm, rtt);
> +			return -ENXIO;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static int realm_tear_down_rtt_level(struct realm *realm, int level,
>   				     unsigned long start, unsigned long end)
>   {
> @@ -231,6 +463,90 @@ static int realm_tear_down_rtt_range(struct realm *realm,
>   					 start, end);
>   }
>   
> +/*
> + * Returns 0 on successful fold, a negative value on error, a positive value if
> + * we were not able to fold all tables at this level.
> + */
> +static int realm_fold_rtt_level(struct realm *realm, int level,
> +				unsigned long start, unsigned long end)
> +{
> +	int not_folded = 0;
> +	ssize_t map_size;
> +	unsigned long addr, next_addr;
> +
> +	if (WARN_ON(level > RME_RTT_MAX_LEVEL))
> +		return -EINVAL;
> +
> +	map_size = rme_rtt_level_mapsize(level - 1);
> +
> +	for (addr = start; addr < end; addr = next_addr) {
> +		phys_addr_t rtt_granule;
> +		int ret;
> +		unsigned long align_addr = ALIGN(addr, map_size);
> +
> +		next_addr = ALIGN(addr + 1, map_size);
> +
> +		ret = realm_rtt_fold(realm, align_addr, level, &rtt_granule);
> +
> +		switch (RMI_RETURN_STATUS(ret)) {
> +		case RMI_SUCCESS:
> +			if (!WARN_ON(rmi_granule_undelegate(rtt_granule)))
> +				free_page((unsigned long)phys_to_virt(rtt_granule));

Same pattern for reclaim_delegated_page()

> +			break;
> +		case RMI_ERROR_RTT:
> +			if (level == RME_RTT_MAX_LEVEL ||
> +			    RMI_RETURN_INDEX(ret) < level) {
> +				not_folded++;
> +				break;
> +			}
> +			/* Recurse a level deeper */
> +			ret = realm_fold_rtt_level(realm,
> +						   level + 1,
> +						   addr,
> +						   next_addr);
> +			if (ret < 0)
> +				return ret;
> +			else if (ret == 0)
> +				/* Try again at this level */
> +				next_addr = addr;
> +			break;
> +		default:
> +			WARN_ON(1);
> +			return -ENXIO;
> +		}
> +	}
> +
> +	return not_folded;
> +}
> +
> +static int realm_fold_rtt_range(struct realm *realm,
> +				unsigned long start, unsigned long end)
> +{
> +	return realm_fold_rtt_level(realm, get_start_level(realm) + 1,

Could we use find_map_level() for this too ?

> +				    start, end);
> +}
> +
> +static void ensure_spare_page(struct realm *realm)
> +{
> +	phys_addr_t tmp_rtt;
> +
> +	/*
> +	 * Make sure we have a spare delegated page for tearing down the
> +	 * block mappings. We do this by allocating then freeing a page.
> +	 * We must use Atomic allocations as we are called with kvm->mmu_lock
> +	 * held.
> +	 */
> +	tmp_rtt = alloc_delegated_page(realm, NULL, GFP_ATOMIC);
> +
> +	/*
> +	 * If the allocation failed, continue as we may not have a block level
> +	 * mapping so it may not be fatal, otherwise free it to assign it
> +	 * to the spare page.
> +	 */
> +	if (tmp_rtt != PHYS_ADDR_MAX)

Ah, it took a while to understand this logic :-(. "free it to assign it"
sounds unconventional, but it works.

> +		free_delegated_page(realm, tmp_rtt);
> +}
> +
>   void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits)
>   {
>   	struct realm *realm = &kvm->arch.realm;
> @@ -238,6 +554,155 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits)
>   	WARN_ON(realm_tear_down_rtt_range(realm, 0, (1UL << ia_bits)));
>   }
>   
> +void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, u64 size,
> +			   bool unmap_private)
> +{
> +	unsigned long end = ipa + size;
> +	struct realm *realm = &kvm->arch.realm;
> +
> +	end = min(BIT(realm->ia_bits - 1), end);
> +
> +	ensure_spare_page(realm);
> +
> +	realm_unmap_range(kvm, ipa, end, unmap_private);
> +
> +	if (unmap_private)
> +		realm_fold_rtt_range(realm, ipa, end);

I see this (and ate up my agressive RTT reclaim thoughts at
realm_unmap_range_private()) , but we may need more than one spare
rtt if the range is sufficiently bigger, given the "spare" rtt is not
reclaimed until we finish the range ? May be we could reclaim the
"spare" rtt alone as make progress, as commented above ?

> +}
> +
> +static int find_map_level(struct realm *realm,
> +			  unsigned long start,
> +			  unsigned long end)
> +{

As mentioned above, this is useful for unmap_shared() to
start at a level that is suitable.

> +	int level = RME_RTT_MAX_LEVEL;
> +
> +	while (level > get_start_level(realm)) {
> +		unsigned long map_size = rme_rtt_level_mapsize(level - 1);
> +
> +		if (!IS_ALIGNED(start, map_size) ||
> +		    (start + map_size) > end)
> +			break;
> +
> +		level--;
> +	}
> +
> +	return level;
> +}
> +
> +int realm_set_ipa_state(struct kvm_vcpu *vcpu,
> +			unsigned long start,
> +			unsigned long end,
> +			unsigned long ripas,
> +			unsigned long *top_ipa)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct realm *realm = &kvm->arch.realm;
> +	struct realm_rec *rec = &vcpu->arch.rec;
> +	phys_addr_t rd_phys = virt_to_phys(realm->rd);
> +	phys_addr_t rec_phys = virt_to_phys(rec->rec_page);
> +	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
> +	unsigned long ipa = start;
> +	int ret = 0;
> +
> +	while (ipa < end) {
> +		unsigned long next;
> +
> +		ret = rmi_rtt_set_ripas(rd_phys, rec_phys, ipa, end, &next);
> +
> +		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
> +			int walk_level = RMI_RETURN_INDEX(ret);
> +			int level = find_map_level(realm, ipa, end);
> +
> +			/*
> +			 * If the RMM walk ended early then more tables are
> +			 * needed to reach the required depth to set the RIPAS.
> +			 */
> +			if (walk_level < level) {
> +				ret = realm_create_rtt_levels(realm, ipa,
> +							      walk_level,
> +							      level,
> +							      memcache);
> +				/* Retry with RTTs created */
> +				if (!ret)
> +					continue;
> +			} else {
> +				ret = -EINVAL;
> +			}
> +
> +			break;
> +		} else if (RMI_RETURN_STATUS(ret) != RMI_SUCCESS) {
> +			WARN(1, "Unexpected error in %s: %#x\n", __func__,
> +			     ret);
> +			ret = -EINVAL;
> +			break;
> +		}
> +		ipa = next;
> +	}
> +
> +	*top_ipa = ipa;
> +
> +	if (ripas == RMI_EMPTY && ipa != start) {
> +		realm_unmap_range_private(kvm, start, ipa);
> +		realm_fold_rtt_range(realm, start, ipa);

minor nit: Could we not move the realm_fold_rtt_range() into the 
unmap_range_private() ? Both cases (from here as well as unmap_range())
we seem to be doing it.

Suzuki



> +	}
> +
> +	return ret;
> +}
> +
> +static int realm_init_ipa_state(struct realm *realm,
> +				unsigned long ipa,
> +				unsigned long end)
> +{
> +	phys_addr_t rd_phys = virt_to_phys(realm->rd);
> +	int ret;
> +
> +	while (ipa < end) {
> +		unsigned long next;
> +
> +		ret = rmi_rtt_init_ripas(rd_phys, ipa, end, &next);
> +
> +		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
> +			int err_level = RMI_RETURN_INDEX(ret);
> +			int level = find_map_level(realm, ipa, end);
> +
> +			if (WARN_ON(err_level >= level))
> +				return -ENXIO;
> +
> +			ret = realm_create_rtt_levels(realm, ipa,
> +						      err_level,
> +						      level, NULL);
> +			if (ret)
> +				return ret;
> +			/* Retry with the RTT levels in place */
> +			continue;
> +		} else if (WARN_ON(ret)) {
> +			return -ENXIO;
> +		}
> +
> +		ipa = next;
> +	}
> +
> +	return 0;
> +}
> +
> +static int kvm_init_ipa_range_realm(struct kvm *kvm,
> +				    struct kvm_cap_arm_rme_init_ipa_args *args)
> +{
> +	gpa_t addr, end;
> +	struct realm *realm = &kvm->arch.realm;
> +
> +	addr = args->init_ipa_base;
> +	end = addr + args->init_ipa_size;
> +
> +	if (end < addr)
> +		return -EINVAL;
> +
> +	if (kvm_realm_state(kvm) != REALM_STATE_NEW)
> +		return -EINVAL;
> +
> +	return realm_init_ipa_state(realm, addr, end);
> +}
> +
>   /* Protects access to rme_vmid_bitmap */
>   static DEFINE_SPINLOCK(rme_vmid_lock);
>   static unsigned long *rme_vmid_bitmap;
> @@ -363,6 +828,18 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>   	case KVM_CAP_ARM_RME_CREATE_RD:
>   		r = kvm_create_realm(kvm);
>   		break;
> +	case KVM_CAP_ARM_RME_INIT_IPA_REALM: {
> +		struct kvm_cap_arm_rme_init_ipa_args args;
> +		void __user *argp = u64_to_user_ptr(cap->args[1]);
> +
> +		if (copy_from_user(&args, argp, sizeof(args))) {
> +			r = -EFAULT;
> +			break;
> +		}
> +
> +		r = kvm_init_ipa_range_realm(kvm, &args);
> +		break;
> +	}
>   	default:
>   		r = -EINVAL;
>   		break;


  reply	other threads:[~2024-10-16  8:46 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-04 15:27 [PATCH v5 00/43] arm64: Support for Arm CCA in KVM Steven Price
2024-10-04 15:27 ` [PATCH v5 01/43] KVM: Prepare for handling only shared mappings in mmu_notifier events Steven Price
2024-10-04 15:27 ` [PATCH v5 02/43] kvm: arm64: pgtable: Track the number of pages in the entry level Steven Price
2024-10-23  4:03   ` Gavin Shan
2024-10-23 14:35     ` Steven Price
2024-10-04 15:27 ` [PATCH v5 03/43] kvm: arm64: Include kvm_emulate.h in kvm/arm_psci.h Steven Price
2024-10-04 15:27 ` [PATCH v5 04/43] arm64: RME: Handle Granule Protection Faults (GPFs) Steven Price
2024-10-24 14:17   ` Aneesh Kumar K.V
2024-10-25 13:24     ` Steven Price
2024-10-04 15:27 ` [PATCH v5 05/43] arm64: RME: Add SMC definitions for calling the RMM Steven Price
2024-10-07  8:54   ` Suzuki K Poulose
2024-10-25  6:37   ` Gavin Shan
2024-10-25 13:24     ` Steven Price
2024-10-04 15:27 ` [PATCH v5 06/43] arm64: RME: Add wrappers for RMI calls Steven Price
2024-10-25  7:03   ` Gavin Shan
2024-10-25 13:24     ` Steven Price
2024-10-04 15:27 ` [PATCH v5 07/43] arm64: RME: Check for RME support at KVM init Steven Price
2024-10-07 10:34   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 08/43] arm64: RME: Define the user ABI Steven Price
2024-10-04 15:27 ` [PATCH v5 09/43] arm64: RME: ioctls to create and configure realms Steven Price
2024-10-08 16:31   ` Suzuki K Poulose
2024-10-30  7:55   ` Aneesh Kumar K.V
2024-11-01 16:22     ` Steven Price
2024-10-04 15:27 ` [PATCH v5 10/43] kvm: arm64: Expose debug HW register numbers for Realm Steven Price
2024-10-04 15:27 ` [PATCH v5 11/43] arm64: kvm: Allow passing machine type in KVM creation Steven Price
2024-10-04 15:27 ` [PATCH v5 12/43] arm64: RME: Keep a spare page delegated to the RMM Steven Price
2024-10-04 15:27 ` [PATCH v5 13/43] arm64: RME: RTT tear down Steven Price
2024-10-15 11:25   ` Suzuki K Poulose
2024-11-01 16:35     ` Steven Price
2024-10-04 15:27 ` [PATCH v5 14/43] arm64: RME: Allocate/free RECs to match vCPUs Steven Price
2024-10-15 12:48   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 15/43] arm64: RME: Support for the VGIC in realms Steven Price
2024-10-15 13:02   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 16/43] KVM: arm64: Support timers in realm RECs Steven Price
2024-10-04 15:27 ` [PATCH v5 17/43] arm64: RME: Allow VMM to set RIPAS Steven Price
2024-10-16  8:46   ` Suzuki K Poulose [this message]
2024-10-30  7:52     ` Aneesh Kumar K.V
2024-10-04 15:27 ` [PATCH v5 18/43] arm64: RME: Handle realm enter/exit Steven Price
2024-10-17 13:00   ` Suzuki K Poulose
2024-11-29 12:18     ` Steven Price
2024-11-29 13:45       ` Suzuki K Poulose
2024-11-29 14:55         ` Steven Price
2024-10-04 15:27 ` [PATCH v5 19/43] KVM: arm64: Handle realm MMIO emulation Steven Price
2024-10-07  4:31   ` Aneesh Kumar K.V
2024-10-07 10:22     ` Steven Price
2024-10-17 11:59   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 20/43] arm64: RME: Allow populating initial contents Steven Price
2024-10-04 15:27 ` [PATCH v5 21/43] arm64: RME: Runtime faulting of memory Steven Price
2024-10-22  5:36   ` Aneesh Kumar K.V
2024-10-23  5:50   ` Aneesh Kumar K.V
2024-10-24 13:51   ` Suzuki K Poulose
2024-10-24 14:30   ` Aneesh Kumar K.V
2024-10-04 15:27 ` [PATCH v5 22/43] KVM: arm64: Handle realm VCPU load Steven Price
2024-10-04 15:27 ` [PATCH v5 23/43] KVM: arm64: Validate register access for a Realm VM Steven Price
2024-10-17 15:32   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 24/43] KVM: arm64: Handle Realm PSCI requests Steven Price
2024-10-04 15:27 ` [PATCH v5 25/43] KVM: arm64: WARN on injected undef exceptions Steven Price
2024-10-04 15:27 ` [PATCH v5 26/43] arm64: Don't expose stolen time for realm guests Steven Price
2024-10-18 13:17   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 27/43] arm64: rme: allow userspace to inject aborts Steven Price
2024-10-04 15:27 ` [PATCH v5 28/43] arm64: rme: support RSI_HOST_CALL Steven Price
2024-10-04 15:27 ` [PATCH v5 29/43] arm64: rme: Allow checking SVE on VM instance Steven Price
2024-10-04 15:27 ` [PATCH v5 30/43] arm64: RME: Always use 4k pages for realms Steven Price
2024-10-04 15:27 ` [PATCH v5 31/43] arm64: rme: Prevent Device mappings for Realms Steven Price
2024-10-18 13:30   ` Suzuki K Poulose
2024-10-04 15:27 ` [PATCH v5 32/43] arm_pmu: Provide a mechanism for disabling the physical IRQ Steven Price
2024-10-04 15:27 ` [PATCH v5 33/43] arm64: rme: Enable PMU support with a realm guest Steven Price
2024-10-04 15:27 ` [PATCH v5 34/43] kvm: rme: Hide KVM_CAP_READONLY_MEM for realm guests Steven Price
2024-10-04 15:27 ` [PATCH v5 35/43] arm64: RME: Propagate number of breakpoints and watchpoints to userspace Steven Price
2024-10-04 15:27 ` [PATCH v5 36/43] arm64: RME: Set breakpoint parameters through SET_ONE_REG Steven Price
2024-10-04 15:27 ` [PATCH v5 37/43] arm64: RME: Initialize PMCR.N with number counter supported by RMM Steven Price
2024-10-04 15:27 ` [PATCH v5 38/43] arm64: RME: Propagate max SVE vector length from RMM Steven Price
2024-10-04 15:28 ` [PATCH v5 39/43] arm64: RME: Configure max SVE vector length for a Realm Steven Price
2024-10-04 15:28 ` [PATCH v5 40/43] arm64: RME: Provide register list for unfinalized RME RECs Steven Price
2024-10-04 15:28 ` [PATCH v5 41/43] arm64: RME: Provide accurate register list Steven Price
2024-10-04 15:28 ` [PATCH v5 42/43] arm64: kvm: Expose support for private memory Steven Price
2024-10-09  7:03   ` kernel test robot
2024-10-04 15:28 ` [PATCH v5 43/43] KVM: arm64: Allow activating realms Steven Price
2024-12-02  5:10 ` [PATCH v5 00/43] arm64: Support for Arm CCA in KVM Itaru Kitayama
2024-12-02  8:54   ` Steven Price
2024-12-02 10:26     ` Jean-Philippe Brucker
2024-12-02 10:42       ` Itaru Kitayama

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4075a8bc-2f1e-441d-815e-aaf83e88d3d0@arm.com \
    --to=suzuki.poulose@arm.com \
    --cc=alexandru.elisei@arm.com \
    --cc=alpergun@google.com \
    --cc=aneesh.kumar@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=christoffer.dall@arm.com \
    --cc=gankulkarni@os.amperecomputing.com \
    --cc=gshan@redhat.com \
    --cc=james.morse@arm.com \
    --cc=joey.gouly@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=sdonthineni@nvidia.com \
    --cc=steven.price@arm.com \
    --cc=tabba@google.com \
    --cc=will@kernel.org \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).