Linux Confidential Computing Development

Linux Confidential Computing Development
 help / color / mirror / Atom feed

* Re: [PATCH v14 29/44] arm64: RMI: Runtime faulting of memory
From: Gavin Shan @ 2026-06-25 13:53 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: Steven Price, kvm, kvmarm, Catalin Marinas, Marc Zyngier,
	Will Deacon, James Morse, Oliver Upton, Suzuki K Poulose,
	Zenghui Yu, linux-arm-kernel, linux-kernel, Joey Gouly,
	Alexandru Elisei, Christoffer Dall, Fuad Tabba, linux-coco,
	Ganapatrao Kulkarni, Shanker Donthineni, Alper Gun,
	Aneesh Kumar K . V, Emi Kisanuki, Vishal Annapurve, WeiLin.Chang,
	Lorenzo.Pieralisi2
In-Reply-To: <aiLes2ecZSr17UwZ@lpieralisi>

On 6/6/26 12:35 AM, Lorenzo Pieralisi wrote:
> On Fri, Jun 05, 2026 at 06:11:11PM +1000, Gavin Shan wrote:
>> On 6/5/26 5:28 PM, Lorenzo Pieralisi wrote:
>>> On Fri, Jun 05, 2026 at 04:23:15PM +1000, Gavin Shan wrote:
>>>
>>> [...]
>>>
>>>>> +static int realm_map_ipa(struct kvm *kvm, phys_addr_t ipa,
>>>>> +			 kvm_pfn_t pfn, unsigned long map_size,
>>>>> +			 enum kvm_pgtable_prot prot,
>>>>> +			 struct kvm_mmu_memory_cache *memcache)
>>>>> +{
>>>>> +	struct realm *realm = &kvm->arch.realm;
>>>>> +
>>>>> +	/*
>>>>> +	 * Write permission is required for now even though it's possible to
>>>>> +	 * map unprotected pages (granules) as read-only. It's impossible to
>>>>> +	 * map protected pages (granules) as read-only.
>>>>> +	 */
>>>>> +	if (WARN_ON(!(prot & KVM_PGTABLE_PROT_W)))
>>>>> +		return -EFAULT;
>>>>> +
>>>>
>>>> I'm a bit concerned with this. We don't have KVM_PGTABLE_PROT_W set in @prot
>>>> if the stage2 fault is raised due to memory read. With -EFAULT returned to VMM
>>>> (e.g. QEMU), the vCPU continuous execution is stopped and system won't be
>>>> working any more.
>>>>
>>>>> +	ipa = ALIGN_DOWN(ipa, PAGE_SIZE);
>>>>> +	if (!kvm_realm_is_private_address(realm, ipa))
>>>>> +		return realm_map_non_secure(realm, ipa, pfn, map_size, prot,
>>>>> +					    memcache);
>>>>> +
>>>>> +	return realm_map_protected(kvm, ipa, pfn, map_size, memcache);
>>>>> +}
>>>>> +
>>>>>     static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
>>>>>     {
>>>>>     	switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
>>>>> @@ -1604,27 +1641,52 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
>>>>>     	bool write_fault, exec_fault;
>>>>>     	enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
>>>>>     	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
>>>>> -	struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
>>>>> +	struct kvm_vcpu *vcpu = s2fd->vcpu;
>>>>> +	struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
>>>>> +	gpa_t gpa = kvm_gpa_from_fault(vcpu->kvm, s2fd->fault_ipa);
>>>>>     	unsigned long mmu_seq;
>>>>>     	struct page *page;
>>>>> -	struct kvm *kvm = s2fd->vcpu->kvm;
>>>>> +	struct kvm *kvm = vcpu->kvm;
>>>>>     	void *memcache;
>>>>>     	kvm_pfn_t pfn;
>>>>>     	gfn_t gfn;
>>>>>     	int ret;
>>>>> -	memcache = get_mmu_memcache(s2fd->vcpu);
>>>>> -	ret = topup_mmu_memcache(s2fd->vcpu, memcache);
>>>>> +	if (kvm_is_realm(vcpu->kvm)) {
>>>>> +		/* check for memory attribute mismatch */
>>>>> +		bool is_priv_gfn = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
>>>>> +		/*
>>>>> +		 * For Realms, the shared address is an alias of the private
>>>>> +		 * PA with the top bit set. Thus if the fault address matches
>>>>> +		 * the GPA then it is the private alias.
>>>>> +		 */
>>>>> +		bool is_priv_fault = (gpa == s2fd->fault_ipa);
>>>>> +
>>>>> +		if (is_priv_gfn != is_priv_fault) {
>>>>> +			kvm_prepare_memory_fault_exit(vcpu, gpa, PAGE_SIZE,
>>>>> +						      kvm_is_write_fault(vcpu),
>>>>> +						      false,
>>>>> +						      is_priv_fault);
>>>>> +			/*
>>>>> +			 * KVM_EXIT_MEMORY_FAULT requires an return code of
>>>>> +			 * -EFAULT, see the API documentation
>>>>> +			 */
>>>>> +			return -EFAULT;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +	memcache = get_mmu_memcache(vcpu);
>>>>> +	ret = topup_mmu_memcache(vcpu, memcache);
>>>>>     	if (ret)
>>>>>     		return ret;
>>>>>     	if (s2fd->nested)
>>>>>     		gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
>>>>>     	else
>>>>> -		gfn = s2fd->fault_ipa >> PAGE_SHIFT;
>>>>> +		gfn = gpa >> PAGE_SHIFT;
>>>>> -	write_fault = kvm_is_write_fault(s2fd->vcpu);
>>>>> -	exec_fault = kvm_vcpu_trap_is_exec_fault(s2fd->vcpu);
>>>>> +	write_fault = kvm_is_write_fault(vcpu);
>>>>> +	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
>>>>>     	VM_WARN_ON_ONCE(write_fault && exec_fault);
>>>>> @@ -1634,7 +1696,7 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
>>>>>     	ret = kvm_gmem_get_pfn(kvm, s2fd->memslot, gfn, &pfn, &page, NULL);
>>>>>     	if (ret) {
>>>>> -		kvm_prepare_memory_fault_exit(s2fd->vcpu, s2fd->fault_ipa, PAGE_SIZE,
>>>>> +		kvm_prepare_memory_fault_exit(vcpu, gpa, PAGE_SIZE,
>>>>>     					      write_fault, exec_fault, false);
>>>>>     		return ret;
>>>>>     	}
>>>>> @@ -1654,14 +1716,20 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
>>>>>     	kvm_fault_lock(kvm);
>>>>>     	if (mmu_invalidate_retry(kvm, mmu_seq)) {
>>>>>     		ret = -EAGAIN;
>>>>> -		goto out_unlock;
>>>>> +		goto out_release_page;
>>>>> +	}
>>>>> +
>>>>> +	if (kvm_is_realm(kvm)) {
>>>>> +		ret = realm_map_ipa(kvm, s2fd->fault_ipa, pfn,
>>>>> +				    PAGE_SIZE, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W, memcache);
>>>>> +		goto out_release_page;
>>>>>     	}
>>>>>     	ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
>>>>>     						 __pfn_to_phys(pfn), prot,
>>>>>     						 memcache, flags);
>>>>> -out_unlock:
>>>>> +out_release_page:
>>>>>     	kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);
>>>>>     	kvm_fault_unlock(kvm);
>>>>> @@ -1847,7 +1915,7 @@ static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd,
>>>>>     	 * mapping size to ensure we find the right PFN and lay down the
>>>>>     	 * mapping in the right place.
>>>>>     	 */
>>>>> -	s2vi->gfn = ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize) >> PAGE_SHIFT;
>>>>> +	s2vi->gfn = kvm_gpa_from_fault(kvm, ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize)) >> PAGE_SHIFT;
>>>>>     	s2vi->mte_allowed = kvm_vma_mte_allowed(vma);
>>>>> @@ -2056,6 +2124,9 @@ static int kvm_s2_fault_map(const struct kvm_s2_fault_desc *s2fd,
>>>>>     		prot &= ~KVM_NV_GUEST_MAP_SZ;
>>>>>     		ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, gfn_to_gpa(gfn),
>>>>>     								 prot, flags);
>>>>> +	} else if (kvm_is_realm(kvm)) {
>>>>> +		ret = realm_map_ipa(kvm, s2fd->fault_ipa, pfn, mapping_size,
>>>>> +				    prot, memcache);
>>>>>     	} else {
>>>>>     		ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, gfn_to_gpa(gfn), mapping_size,
>>>>>     							 __pfn_to_phys(pfn), prot,
>>>>
>>>> For the case kvm_is_realm(), need we adjust 's2fd->fault_ipa' for the sake of
>>>> huge pages. In kvm_s2_fault_map(), @gfn and @pfn may have been adjusted by
>>>> transparent_hugepage_adjust() to be aligned with huge page size. If the
>>>> adjustment happened in transparent_hugepage_adjust(), we need to align
>>>> s2fd->fault_ipa down to the huge page size either.
>>>
>>> All of the above + some RMM changes are needed to get QEmu VMM going
>>> with anon pages guest memory backing - currently testing various
>>> configurations in the background.
>>>
>>
>> I tried to rebase Jean's latest QEMU series [1] to upstream QEMU, and found
>> that memory slots backed by THP are broken. With THP disabled on the host and
>> other fixes (mentioned in my prevous replies) applied on the top of this (v14)
>> series, I'm able to boot a realm guest with rebased QEMU series [2], plus more
>> fxies on the top.
>>
>> [1] https://git.codelinaro.org/linaro/dcap/qemu.git  (branch: cca/latest)
>> [2] https://git.qemu.org/git/qemu.git                (branch: cca/gavin)
>>
>> Lorenzo, You may be saying there is someone making QEMU to support ARM/CCA?
> 
> Mathieu and I are working on that yes and with Steven/Suzuki to fix the THP
> issues you pointed out above.
> 
>> If so, I'm not sure if there is a QEMU repository for me to try?
> 
> We should be able to submit patches by end of June - we shall let you know
> whether we can make something available earlier.
> 

Not sure if there are other known issues in this series. It seems the stage2
page fault handling on the shared space isn't working well. In my test, the
vring (struct vring_desc) of virtio-net-pci is updated by the guest, and the
data isn't seen by QEMU, I'm suspecting if the host-page-frame-number is properly
resolved in the s2 page fault handler for shared (unprotected) space.

- I rebased Jean's latest qemu branch to the upstream qemu;

- On the host, which is emulated by qemu/tcg, the THP (transparent huge page) is
   disabled.

- On the guest, I can see the virtio vring (struct vring_desc) is updated. The
   S1 page-table entry looks correct because the corresponding physical address
   0x10046880000 is a sane shared (unprotected) space address.

   [   52.094143] software IO TLB: Memory encryption is active and system is using DMA bounce buffers
   [   52.289746] virtqueue_add_desc_split: desc[0]@0xffff000006880000, [00000100b983f000  00000640  0002  0001]
   [   52.432150] PTE 0x00e8010046880707 at address 0xffff000006880000

- On the host, the s2 page-table-entry is unmapped due to attribute transition (private -> shared).
   A subsequent S2 page fault is raised against the adress and the s2 page-table-entry is built.

   [  109.259077] ====> realm_unmap_shared_range: tracked_unprot_addr=0x10046880000
   [  109.260249] realm_unmap_shared_range: unmapped shared range at 0x10046880000
   [  109.317786] realm_unmap_shared_range: unmapped shared range at 0x10046880000
   [  109.629939] ====> kvm_handle_guest_abort: fault_ipa=0x10046880000, esr=0x92000007
   [  109.630245] realm_map_non_secure: ipa=0x10046880000, pfn=0xb8b59, size=0x1000, prot=0xf
   [  109.630331] realm_map_non_secure: ipa=0x10046880000, ipa_top=0x10046881000, flags=0x1e0001, range_desc=0xb8b59004

- On QEMU, the updated vring (struct vring_desc) at GPA 0x46880000 isn't seen. All the
   data in that adress are zeros.

   ====> virtqueue_split_pop: vdev=<virtio-net>, sz=0x38, queue_index=0x0, vq->vring.num=0x100
   virtqueue_split_pop: last_avail_idx=0x0, head=0x0
   address_space_read_cached_slow: cache@0xffff1c036440, addr=0x0, buf=0xffffeee34880, len=0x10
   address_space_read_cached_slow: cache: ptr=0x0, xlat=0x10046880000, len=0x1000, mrs=<realm-dma-region>, is_write=no
   address_space_read_cached_slow: translated to mr=<mach-virt.ram>, mr_addr=0x6880000, l=0x10
   flatview_read_continue_step: mr=<mach-virt.ram>, host=0xffff23e00000, mr_addr=0x6880000, ram_ptr=0xffff2a680000
   virtqueue_split_pop: desc: 0000000000000000 - 00000000 - 00000000 - 00000000
   qemu-system-aarch64: virtio: zero sized buffers are not allowed


Thanks,
Gavin


^ permalink raw reply

* Re: [PATCH v8 18/46] KVM: guest_memfd: Handle lru_add fbatch refcounts during conversion safety check
From: David Hildenbrand (Arm) @ 2026-06-25 12:57 UTC (permalink / raw)
  To: Sean Christopherson, Ackerley Tng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, jmattson,
	jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, tabba, willy,
	wyihan, yan.y.zhao, forkloop, pratyush, suzuki.poulose,
	aneesh.kumar, liam, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Jonathan Corbet, Shuah Khan,
	Shuah Khan, Vishal Annapurve, Andrew Morton, Chris Li,
	Kairui Song, Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen,
	Yuanchu Xie, Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt,
	Kiryl Shutsemau, Baoquan He, Jason Gunthorpe, Vlastimil Babka,
	kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm, linux-coco
In-Reply-To: <ajx3vmNPRf-M9kR6@google.com>

On 6/25/26 02:35, Sean Christopherson wrote:
> On Wed, Jun 24, 2026, Ackerley Tng wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>>
>>>
>>> Under what circumstances does this happen,
>>
>> It happened 100% of the time in selftests. Perhaps it's because in the
>> selftests the pages are almost always freshly allocated and so the
>> lru_add fbatch isn't full yet? (and that the host isn't super busy so
>> lru_add fbatch doesn't get drained yet).
> 
> I chatted with Ackerley about this.  What I wanted to understand is why guest_memfd
> pages were getting put onto per-CPU batches for lru_add(), given that guest_memfd
> pages are unevictable.  The answer (assuming I read the code right), is that
> lruvec_add_folio() updates stats and other per-lru metadata for the unevictable
> lru, and does so under a per-lru lock.  I.e. we don't want to skip that stuff
> entirely.

Hm. Our pages don't participate in any LRU activity (including
isolation+migration). Isolation+migration would only apply once we'd support
page migration.

But yes, secretmem also does it like that: filemap_add_folio() will call
folio_add_lru().

Traditionally we used the unevictable LRU only for mlock purposes.

But yeah, there are "unevictable" stats involved ....

> 
> One thought I had, to avoid the IPIs that draining all per-CPU caches requires,
> was to disallow putting guest_memfd pages in folio batches, e.g. by hacking
> something into folio_may_be_lru_cached().  But due to taking a per-lru lock,
> that would penalize the relatively hot path and definitely common operation of
> faulting in guest memory.  On the other hand, memory conversion is already a
> relatively slow operation and is relatively uncommon compared to page faults,
> (and likely very uncommon for real world setups).  I.e. having to drain all
> caches if conversion isn't safe penalizes a relatively slow, relatively uncommon
> path.

Yeah, the lru_add_drain_all is rather messy.

We have similar code in

collect_longterm_unpinnable_folios(), where we first try a lru_add_drain(), to
then escalate to a lru_add_drain_all().

Maybe we could factor that (suboptimal code) out to not have to reinvent the
same thing multiple times?

-- 
Cheers,

David

^ permalink raw reply

* Re: [PATCH v7 10/42] KVM: guest_memfd: Ensure pages are not in use before conversion
From: David Hildenbrand (Arm) @ 2026-06-25 12:36 UTC (permalink / raw)
  To: Ackerley Tng, Vlastimil Babka (SUSE), aik, andrew.jones,
	binbin.wu, brauner, chao.p.peng, ira.weiny, jmattson, jthoughton,
	michael.roth, oupton, pankaj.gupta, qperret, rick.p.edgecombe,
	rientjes, shivankg, steven.price, tabba, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Baoquan He, Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu,
	Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
	Jason Gunthorpe
  Cc: kvm, linux-kernel, linux-trace-kernel, linux-doc, linux-kselftest,
	linux-mm, linux-coco
In-Reply-To: <CAEvNRgHM4a66Jx9++6iioQLpFY-KgPvjY5+bg_X97DfSjpXzRQ@mail.gmail.com>

On 6/19/26 02:17, Ackerley Tng wrote:
> "Vlastimil Babka (SUSE)" <vbabka@kernel.org> writes:
> 
>> On 5/23/26 02:17, Ackerley Tng via B4 Relay wrote:
>>> From: Ackerley Tng <ackerleytng@google.com>
>>>
>>> When converting memory to private in guest_memfd, it is necessary to ensure
>>> that the pages are not currently being accessed by any other part of the
>>> kernel or userspace to avoid any current user writing to guest private
>>> memory.
>>>
>>> guest_memfd checks for unexpected refcounts to determine whether a page is
>>> still in use. The only expected refcounts after unmapping the range
>>> requested for conversion are those that are held by guest_memfd itself.
>>
>> Is it sufficient to only check, and not also freeze the refcount? (i.e.
>> using folio_ref_freeze()), because without freezing, anything (e.g.
>> compaction's pfn-based scanner) could do a speculative folio_try_get() and
>> the checked refcount becomes stale.
>>
> 
> I believe there's no issue here, since the main thing here is to check
> for long-term pins on the folio. Perhaps David can help me verify. :)

I think I raised this in the past as well: ideally, we'd be freezing the
refcount, then, there is no need to worry about any concurrent access.

However, we could really only get additional page references through PFN walkers
(or speculative references), not through page tables or GUP pins, which is what
we care about.

So if we can tolerate a speculative bump+release of a folio reference, likely
we're good.

-- 
Cheers,

David

^ permalink raw reply

* [Invitation] bi-weekly guest_memfd upstream call on 2026-06-25
From: David Hildenbrand (Arm) @ 2026-06-25 12:12 UTC (permalink / raw)
  To: linux-coco@lists.linux.dev, linux-mm@kvack.org, KVM

Hi,

very late reminder :/

Our next guest_memfd upstream call is scheduled for today, Thursday,
2026-06-25 8:00 - 9:00am (GMT-07:00) Pacific Time - Vancouver.

So far we don't have a lot of topics, so maybe this could be one of these rare
short meetings :)

If we have the right people in the call, I would like to continue the discussion
on proposed memory hot(un)plug/virtio-mem support.

We'll be using the following Google meet:
http://meet.google.com/wxp-wtju-jzw

The meeting notes can be found at [1], where we also link recordings and
collect current guest_memfd upstream proposals. If you want an google
calendar invitation that also covers all future meetings, just write me
or Ackerley a mail.

To put something to discuss onto the agenda, reply to this mail or add
them to the "Topics/questions for next meeting(s)" section in the
meeting notes as a comment.

[1]
https://docs.google.com/document/d/1M6766BzdY1Lhk7LiR5IqVR8B8mG3cr-cxTxOrAosPOk/edit?usp=sharing
-- 
Cheers,

David

^ permalink raw reply

* Re: [PATCH v8 24/46] KVM: guest_memfd: Make in-place conversion the default
From: Yan Zhao @ 2026-06-25 10:57 UTC (permalink / raw)
  To: Sean Christopherson, Ackerley Tng, aik, andrew.jones, binbin.wu,
	brauner, chao.p.peng, david, jmattson, jthoughton, michael.roth,
	oupton, pankaj.gupta, qperret, rick.p.edgecombe, rientjes,
	shivankg, steven.price, tabba, willy, wyihan, forkloop, pratyush,
	suzuki.poulose, aneesh.kumar, liam, Paolo Bonzini,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
	Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
	Baoquan He, Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <ajyJhZcgfYFtGfS2@yzhao56-desk.sh.intel.com>

On Thu, Jun 25, 2026 at 09:51:01AM +0800, Yan Zhao wrote:
> On Wed, Jun 24, 2026 at 05:41:58PM -0700, Sean Christopherson wrote:
> > On Wed, Jun 24, 2026, Ackerley Tng wrote:
> > > Yan Zhao <yan.y.zhao@intel.com> writes:
> > > > With gmem_in_place_conversion=true, userspace can create guest_memfd without the
> > > > MMAP flag. In such cases, shared memory is allocated from different backends.
> > > > This means this module parameter only enables per-gmem memory attribute and does
> > > > not guarantee that gmem in-place conversion will actually occur.
> > 
> > KVM module params are pretty much always about what KVM supports, not what is
> > guaranteed to happen.
> > 
> >   - enable_mmio_caching doesn't guarantee there will actually be MMIO SPTEs,
> >     because maybe the guest never accesses emulated MMIO.
> >   - enable_pmu doesn't guarantee VMs will get a PMU, because userspace may elect
> >     not to advertise one.
> >   - and so on and so forth...
> > 
> > Yes, there's a small mental jump to get from "KVM supports in-place conversion"
> > to "I need to set memory attributes on the guest_memfd instance, not the VM",
> > but I don't see that as a big hurdle, certainly not in the long term.  And once
> > the VMM code is written, I really do think most people are going to care about
> > whether or not KVM supports in-place conversion, not where PRIVATE is tracked.
> Sorry, I just saw this mail after posting my reply in [1].
> 
> I'm ok with gmem_in_place_conversion=true just means KVM supports in-place
> conversion, while we can still create VMs with shared memory not from gmem.
Or what about "allow_gmem_in_place_conversion" ?


> Though it still feels a bit odd to require TDX huge pages to depend on
> gmem_in_place_conversion=true when shared memory is not currently allocated from
> gmem, it should become more natural over time once gmem supports in-place
> conversions for huge page.
> 
> [1] https://lore.kernel.org/all/ajyCn0PnFtQK+Nka@yzhao56-desk.sh.intel.com
> 
> 
> > > > To avoid confusion, could we rename this module parameter to something more
> > > > accurate, such as gmem_memory_attribute?
> > > 
> > > I asked Sean about this after getting some fixes off list. Sean said
> > > gmem_in_place_conversion is named for a host admin to use, and something
> > > like gmem_memory_attributes is too much implementation details for the
> > > admin.
> > > 
> > > Sean, would you reconsider since Yan also asked? If the admin compiled
> > > the kernel knowing what CONFIG_KVM_VM_MEMORY_ATTRIBUTES means, then the
> > > admin would also be able to use a param like gmem_memory_attributes?
> > 
> > No, because it's not all memory attributes, it's very specifically the PRIVATE
> > attribute that will get moved to guest_memfd.  I don't want to pick a name that
> > will become stale and confusing when RWX attributes come along.  The RWX bits
> > will be per-VM, while PRIVATE will be per-guest_memfd.

^ permalink raw reply

* Re: [PATCH v2 03/17] x86/virt/tdx: Detect if the extensions initialization is required
From: Xu Yilun @ 2026-06-25 10:57 UTC (permalink / raw)
  To: Tony Lindgren
  Cc: x86, kvm, linux-coco, linux-kernel, djbw, kas, rick.p.edgecombe,
	yilun.xu, xiaoyao.li, sohil.mehta, adrian.hunter, kishen.maloor,
	peter.fang, baolu.lu, zhenzhong.duan, dave.hansen, dave.hansen,
	seanjc
In-Reply-To: <ajy6VMlPK08K7kIT@tlindgre-MOBL1>

On Thu, Jun 25, 2026 at 08:19:16AM +0300, Tony Lindgren wrote:
> On Thu, Jun 18, 2026 at 04:13:41PM +0800, Xu Yilun wrote:
> > TDX module extensions support extension SEAMCALLs that are preemptible
> > and resumable, unlike normal SEAMCALLs that run to completion while
> > monopolizing the CPU. This allows for higher-level API constructions,
> > so better supports some add-on features that implement higher order
> > security protocols.
> 
> How about "TDX module extension SEAMCALLs are preemptible and resumable..."
> above to make it easier to read?

Included, thanks.

> 
> Other than that:
> 
> Reviewed-by: Tony Lindgren <tony.lindgren@linux.intel.com>

^ permalink raw reply

* Re: [PATCH v2 02/17] x86/virt/tdx: Configure add-on features on TDX module init and update
From: Xu Yilun @ 2026-06-25 10:50 UTC (permalink / raw)
  To: Chao Gao
  Cc: x86, kvm, linux-coco, linux-kernel, djbw, kas, rick.p.edgecombe,
	yilun.xu, xiaoyao.li, sohil.mehta, adrian.hunter, kishen.maloor,
	tony.lindgren, peter.fang, baolu.lu, zhenzhong.duan, dave.hansen,
	dave.hansen, seanjc
In-Reply-To: <ajpHRNaq+z5bdn+R@intel.com>

> >For runtime update, Linux applies a policy that no newer features should
> >be added after update to avoid disrupting live TDX operations. To adhere
> >to this, TDH.SYS.UPDATE must configure the same features as the
> >TDH.SYS.CONFIG. Record the kernel required add-on feature bitmap in a
> >global var so that both phases can use it.
> 
> Actually, we do not need another global variable here. tdx_features0 is cached
> and is not updated across a runtime update, so the derived add-on feature
> bitmap will be the same before and after the update.

I think a global var "static u64 tdx_addon_feature0 *__ro_after_init*;"
better illustrates the policy that add-on feature bitmap should be decided at
boot up and never change later. It will also be used to decide if a specific
add-on feature initialization is needed. We don't want to calculate the bitmap
again and again, though the result must be the same.

Maybe I should strenghthen the commit message:

  ... both phases can use it. This actually mirrors a TDX module internal state
  so that kernel knows which add-on TDX operations (for example, quoting
  SEAMCALLs, which will be added in later patches) are valid.

> 
> 
> > static __init int config_tdx_module(struct tdmr_info_list *tdmr_list,
> > 				    u64 global_keyid)
> > {
> >+	u64 seamcall_fn = TDH_SYS_CONFIG_V0;
> > 	struct tdx_module_args args = {};
> > 	u64 *tdmr_pa_array;
> > 	size_t array_sz;
> >@@ -1032,7 +1042,15 @@ static __init int config_tdx_module(struct tdmr_info_list *tdmr_list,
> > 	args.rcx = __pa(tdmr_pa_array);
> > 	args.rdx = tdmr_list->nr_consumed_tdmrs;
> > 	args.r8 = global_keyid;
> >-	ret = seamcall_prerr(TDH_SYS_CONFIG, &args);
> >+
> >+	set_tdx_addon_features();
> >+
> >+	if (tdx_addon_feature0) {
> >+		args.r9 = tdx_addon_feature0;
> 
> How about moving this r9 assignment out of the if block and placing it next to
> 'args.r8 = global_keyid;'? There is no need to guard it, because args.r9 will
> be 0 when no add-on features are enabled, which is perfectly fine.

I tend to keep r9 assignment in the block, it clearly shows which
SEAMCALL version needs what parameters, help people map the code to TDX
module spec.

> 
> >+		seamcall_fn = TDH_SYS_CONFIG;
> >+	}
> >+
> >+	ret = seamcall_prerr(seamcall_fn, &args);

^ permalink raw reply

* Re: [PATCH v8 46/46] KVM: selftests: Update private memory exits test to work with per-gmem attributes
From: Fuad Tabba @ 2026-06-25  9:56 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-46-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Skip setting memory to private in the private memory exits test when using
> per-gmem memory attributes, as memory is initialized to private by default
> for guest_memfd, and using vm_mem_set_private() on a guest_memfd instance
> requires creating guest_memfd with GUEST_MEMFD_FLAG_MMAP (which is totally
> doable, but would need to be conditional and is ultimately unnecessary).
>
> Expect an emulated MMIO instead of a memory fault exit when attributes are
> per-gmem, as deleting the memslot effectively drops the private status,
> i.e. the GPA becomes shared and thus supports emulated MMIO.
>
> Skip the "memslot not private" test entirely, as private vs. shared state
> for x86 software-protected VMs comes from the memory attributes themselves,
> and so when doing in-place conversions there can never be a disconnect
> between the expected and actual states.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  .../selftests/kvm/x86/private_mem_kvm_exits_test.c | 36 ++++++++++++++++++----
>  1 file changed, 30 insertions(+), 6 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
> index 10db9fe6d9063..70ed16066c63e 100644
> --- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
> +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
> @@ -62,8 +62,9 @@ static void test_private_access_memslot_deleted(void)
>
>         virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
>
> -       /* Request to access page privately */
> -       vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
> +       /* Request to access page privately. */
> +       if (!kvm_has_gmem_attributes)
> +               vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
>
>         pthread_create(&vm_thread, NULL,
>                        (void *(*)(void *))run_vcpu_get_exit_reason,
> @@ -74,10 +75,26 @@ static void test_private_access_memslot_deleted(void)
>         pthread_join(vm_thread, &thread_return);
>         exit_reason = (u32)(u64)thread_return;
>
> -       TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
> -       TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
> -       TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
> -       TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
> +       /*
> +        * If attributes are tracked per-gmem, deleting the memslot that points
> +        * at the gmem instance effectively makes the memory shared, and so the
> +        * read should trigger emulated MMIO.
> +        *
> +        * If attributes are tracked per-VM, deleting the memslot shouldn't
> +        * affect the private attribute, and so KVM should generate a memory
> +        * fault exit (emulated MMIO on private GPAs is disallowed).
> +        */
> +       if (kvm_has_gmem_attributes) {
> +               TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MMIO);
> +               TEST_ASSERT_EQ(vcpu->run->mmio.phys_addr, EXITS_TEST_GPA);
> +               TEST_ASSERT_EQ(vcpu->run->mmio.len, sizeof(u64));
> +               TEST_ASSERT_EQ(vcpu->run->mmio.is_write, false);
> +       } else {
> +               TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
> +               TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
> +               TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
> +               TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
> +       }
>
>         kvm_vm_free(vm);
>  }
> @@ -88,6 +105,13 @@ static void test_private_access_memslot_not_private(void)
>         struct kvm_vcpu *vcpu;
>         u32 exit_reason;
>
> +       /*
> +        * Accessing non-private memory as private with a software-protected VM
> +        * isn't possible when doing in-place conversions.
> +        */
> +       if (kvm_has_gmem_attributes)
> +               return;
> +
>         vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
>                                            guest_repeatedly_read);
>
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 45/46] KVM: selftests: Update private_mem_conversions_test to mmap() guest_memfd
From: Fuad Tabba @ 2026-06-25  9:43 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-45-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Update the private memory conversions selftest to also test conversions
> that are done "in-place" via per-guest_memfd memory attributes. In-place
> conversions require the host to be able to mmap() the guest_memfd so that
> the host and guest can share the same backing physical memory.
>
> This includes several updates, that are conditioned on the system
> supporting per-guest_memfd attributes (kvm_has_gmem_attributes):
>
> 1. Set up guest_memfd requesting MMAP and INIT_SHARED.
>
> 2. With in-place conversions, the host's mapping points directly to the
>    guest's memory. When the guest converts a region to private, host access
>    to that region is blocked. Update the test to expect a SIGBUS when
>    attempting to access the host virtual address (HVA) of private memory.
>
> 3. Use vm_mem_set_memory_attributes(), which chooses how to set memory
>    attributes based on whether kvm_has_gmem_attributes.
>
> Restrict the test to using VM_MEM_SRC_SHMEM because guest_memfd's required
> mmap() flags and page sizes happens to align with those of
> VM_MEM_SRC_SHMEM. As long as VM_MEM_SRC_SHMEM is used for src_type,
> vm_mem_add() works as intended.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  .../kvm/x86/private_mem_conversions_test.c         | 44 ++++++++++++++++++----
>  1 file changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> index 289ad10063fca..4308c67952310 100644
> --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> @@ -306,9 +306,12 @@ static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
>         if (do_fallocate)
>                 vm_guest_mem_fallocate(vm, gpa, size, map_shared);
>
> -       if (set_attributes)
> -               vm_set_memory_attributes(vm, gpa, size,
> -                                        map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
> +       if (set_attributes) {
> +               u64 attrs = map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE;
> +
> +               vm_mem_set_memory_attributes(vm, gpa, size, attrs);
> +       }
> +
>         run->hypercall.ret = 0;
>  }
>
> @@ -352,8 +355,20 @@ static void *__test_mem_conversions(void *__vcpu)
>                                 size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
>                                 u8 *hva = addr_gpa2hva(vm, gpa + i);
>
> -                               /* In all cases, the host should observe the shared data. */
> -                               memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
> +                               /*
> +                                * When using per-guest_memfd memory attributes,
> +                                * i.e. in-place conversion, host accesses will
> +                                * point at guest memory and should SIGBUS when
> +                                * guest memory is private.  When using per-VM
> +                                * attributes, i.e. separate backing for shared
> +                                * vs. private, the host should always observe
> +                                * the shared data.
> +                                */
> +                               if (kvm_has_gmem_attributes &&
> +                                   uc.args[0] == SYNC_PRIVATE)
> +                                       TEST_EXPECT_SIGBUS(READ_ONCE(*hva));
> +                               else
> +                                       memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
>
>                                 /* For shared, write the new pattern to guest memory. */
>                                 if (uc.args[0] == SYNC_SHARED)
> @@ -382,6 +397,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_v
>         const size_t slot_size = memfd_size / nr_memslots;
>         struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
>         pthread_t threads[KVM_MAX_VCPUS];
> +       u64 gmem_flags;
>         struct kvm_vm *vm;
>         int memfd, i;
>
> @@ -397,12 +413,17 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_v
>
>         vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
>
> -       memfd = vm_create_guest_memfd(vm, memfd_size, 0);
> +       if (kvm_has_gmem_attributes)
> +               gmem_flags = GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED;
> +       else
> +               gmem_flags = 0;
> +
> +       memfd = vm_create_guest_memfd(vm, memfd_size, gmem_flags);
>
>         for (i = 0; i < nr_memslots; i++)
>                 vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
>                            BASE_DATA_SLOT + i, slot_size / vm->page_size,
> -                          KVM_MEM_GUEST_MEMFD, memfd, slot_size * i, 0);
> +                          KVM_MEM_GUEST_MEMFD, memfd, slot_size * i, gmem_flags);
>
>         for (i = 0; i < nr_vcpus; i++) {
>                 gpa_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
> @@ -452,17 +473,24 @@ static void usage(const char *cmd)
>
>  int main(int argc, char *argv[])
>  {
> -       enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
> +       enum vm_mem_backing_src_type src_type;
>         u32 nr_memslots = 1;
>         u32 nr_vcpus = 1;
>         int opt;
>
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> +       src_type = kvm_has_gmem_attributes ? VM_MEM_SRC_SHMEM :
> +                                            DEFAULT_VM_MEM_SRC;
> +
>         while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
>                 switch (opt) {
>                 case 's':
>                         src_type = parse_backing_src_type(optarg);
> +                       TEST_ASSERT(!kvm_has_gmem_attributes ||
> +                                   src_type == VM_MEM_SRC_SHMEM,
> +                                   "Testing in-place conversions, only %s mem_type supported\n",
> +                                   vm_mem_backing_src_alias(VM_MEM_SRC_SHMEM)->name);
>                         break;
>                 case 'n':
>                         nr_vcpus = atoi_positive("nr_vcpus", optarg);
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 44/46] KVM: selftests: Make TEST_EXPECT_SIGBUS thread-safe
From: Fuad Tabba @ 2026-06-25  9:30 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-44-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> The TEST_EXPECT_SIGBUS macro is not thread-safe as it uses a global
> sigjmp_buf and installs a global SIGBUS signal handler. If multiple threads
> execute the macro concurrently, they will race on installing the signal
> handler and stomp on other threads' jump buffers, leading to incorrect test
> behavior.
>
> Make TEST_EXPECT_SIGBUS thread-safe with the following changes:
>
> Share the KVM tests' global signal handler. sigaction() applies to all
> threads; without sharing a global signal handler, one thread may have
> removed the signal handler that another thread added, hence leading to
> unexpected signals.
>
> The alternative of layering signal handlers was considered, but calling
> sigaction() within TEST_EXPECT_SIGBUS() necessarily creates a race. To
> avoid adding new setup and teardown routines to do sigaction() and keep
> usage of TEST_EXPECT_SIGBUS() simple, share the KVM tests' global signal
> handler.
>
> Opportunistically rename report_unexpected_signal to
> catchall_signal_handler.
>
> To continue to only expect SIGBUS within specific regions of code, use a
> thread-specific variable, expecting_sigbus, to replace installing and
> removing signal handlers.
>
> Make the execution environment for the thread, sigjmp_buf, a
> thread-specific variable.
>
> As part of TEST_EXPECT_SIGBUS(), assert the prerequisite for this setup,
> that the current signal handler is the catchall_signal_handler.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  tools/testing/selftests/kvm/include/test_util.h | 32 +++++++++++++------------
>  tools/testing/selftests/kvm/lib/kvm_util.c      | 18 ++++++++++----
>  tools/testing/selftests/kvm/lib/test_util.c     |  7 ------
>  3 files changed, 30 insertions(+), 27 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
> index 51287fac8138a..bd75162ec868d 100644
> --- a/tools/testing/selftests/kvm/include/test_util.h
> +++ b/tools/testing/selftests/kvm/include/test_util.h
> @@ -82,21 +82,23 @@ do {                                                                        \
>         __builtin_unreachable(); \
>  } while (0)
>
> -extern sigjmp_buf expect_sigbus_jmpbuf;
> -void expect_sigbus_handler(int signum);
> -
> -#define TEST_EXPECT_SIGBUS(action)                                             \
> -do {                                                                           \
> -       struct sigaction sa_old, sa_new = {                                     \
> -               .sa_handler = expect_sigbus_handler,                            \
> -       };                                                                      \
> -                                                                               \
> -       sigaction(SIGBUS, &sa_new, &sa_old);                                    \
> -       if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) {                          \
> -               action;                                                         \
> -               TEST_FAIL("'%s' should have triggered SIGBUS", #action);        \
> -       }                                                                       \
> -       sigaction(SIGBUS, &sa_old, NULL);                                       \
> +extern __thread sigjmp_buf expect_sigbus_jmpbuf;
> +extern __thread volatile sig_atomic_t expecting_sigbus;
> +extern void catchall_signal_handler(int signum);
> +
> +#define TEST_EXPECT_SIGBUS(action)                                     \
> +do {                                                                   \
> +       struct sigaction __sa = {};                                     \
> +                                                                       \
> +       TEST_ASSERT_EQ(sigaction(SIGBUS, NULL, &__sa), 0);              \
> +       TEST_ASSERT_EQ(__sa.sa_handler, &catchall_signal_handler);      \
> +                                                                       \
> +       expecting_sigbus = true;                                        \
> +       if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) {                  \
> +               action;                                                 \
> +               TEST_FAIL("'%s' should have triggered SIGBUS", #action);\
> +       }                                                               \
> +       expecting_sigbus = false;                                       \
>  } while (0)
>
>  size_t parse_size(const char *size);
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 6b304e8a0e0d5..b4f104436875b 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -2292,13 +2292,20 @@ __weak void kvm_selftest_arch_init(void)
>  {
>  }
>
> -static void report_unexpected_signal(int signum)
> +__thread sigjmp_buf expect_sigbus_jmpbuf;
> +__thread volatile sig_atomic_t expecting_sigbus;
> +
> +void catchall_signal_handler(int signum)
>  {
> +       switch (signum) {
> +       case SIGBUS: {
> +               if (expecting_sigbus)
> +                       siglongjmp(expect_sigbus_jmpbuf, 1);
> +
> +               TEST_FAIL("Unexpected SIGBUS (%d)\n", signum);
> +       }
>  #define KVM_CASE_SIGNUM(sig)                                   \
>         case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
> -
> -       switch (signum) {
> -       KVM_CASE_SIGNUM(SIGBUS);
>         KVM_CASE_SIGNUM(SIGSEGV);
>         KVM_CASE_SIGNUM(SIGILL);
>         KVM_CASE_SIGNUM(SIGFPE);
> @@ -2310,12 +2317,13 @@ static void report_unexpected_signal(int signum)
>  void __attribute((constructor)) kvm_selftest_init(void)
>  {
>         struct sigaction sig_sa = {
> -               .sa_handler = report_unexpected_signal,
> +               .sa_handler = catchall_signal_handler,
>         };
>
>         /* Tell stdout not to buffer its content. */
>         setbuf(stdout, NULL);
>
> +       expecting_sigbus = false;
>         sigaction(SIGBUS, &sig_sa, NULL);
>         sigaction(SIGSEGV, &sig_sa, NULL);
>         sigaction(SIGILL, &sig_sa, NULL);
> diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
> index bab1bd2b775b6..30eb701e4becd 100644
> --- a/tools/testing/selftests/kvm/lib/test_util.c
> +++ b/tools/testing/selftests/kvm/lib/test_util.c
> @@ -18,13 +18,6 @@
>
>  #include "test_util.h"
>
> -sigjmp_buf expect_sigbus_jmpbuf;
> -
> -void __attribute__((used)) expect_sigbus_handler(int signum)
> -{
> -       siglongjmp(expect_sigbus_jmpbuf, 1);
> -}
> -
>  /*
>   * Random number generator that is usable from guest code. This is the
>   * Park-Miller LCG using standard constants.
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 43/46] KVM: selftests: Check fd/flags provided to mmap() when setting up memslot
From: Fuad Tabba @ 2026-06-25  9:20 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-43-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Check that a valid fd provided to mmap() must be accompanied by MAP_SHARED.
>
> With an invalid fd (usually used for anonymous mappings), there are no
> constraints on mmap() flags.
>
> Add this check to make sure that when a guest_memfd is used as region->fd,
> the flag provided to mmap() will include MAP_SHARED.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> [Rephrase assertion message.]
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  tools/testing/selftests/kvm/lib/kvm_util.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 0b2256ea65ff9..6b304e8a0e0d5 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -1110,6 +1110,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
>                                              src_type == VM_MEM_SRC_SHARED_HUGETLB);
>         }
>
> +       TEST_ASSERT(region->fd == -1 || backing_src_is_shared(src_type),
> +                   "A valid fd provided to mmap() must be accompanied by MAP_SHARED.");
> +
>         region->mmap_start = __kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
>                                         vm_mem_backing_src_alias(src_type)->flag,
>                                         region->fd, mmap_offset);
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 42/46] KVM: selftests: Provide common function to set memory attributes
From: Fuad Tabba @ 2026-06-25  9:09 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-42-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Introduce vm_mem_set_memory_attributes(), which handles setting of memory
> attributes for a range of guest physical addresses, regardless of whether
> the attributes should be set via guest_memfd or via the memory attributes
> at the VM level.
>
> Refactor existing vm_mem_set_{shared,private} functions to use the new
> function. Opportunistically update the size parameter to use size_t instead
> of u64.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  tools/testing/selftests/kvm/include/kvm_util.h | 46 +++++++++++++++++++-------
>  1 file changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index 3a6b1fa7f26ef..db1442da21bb1 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -454,18 +454,6 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
>         vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
>  }
>
> -static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa,
> -                                     u64 size)
> -{
> -       vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
> -}
> -
> -static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa,
> -                                    u64 size)
> -{
> -       vm_set_memory_attributes(vm, gpa, size, 0);
> -}
> -
>  static inline int __gmem_set_memory_attributes(int fd, u64 offset,
>                                                size_t size, u64 attributes,
>                                                u64 *error_offset)
> @@ -532,6 +520,40 @@ static inline void gmem_set_shared(int fd, u64 offset, size_t size)
>         gmem_set_memory_attributes(fd, offset, size, 0);
>  }
>
> +static inline void vm_mem_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
> +                                               size_t size, u64 attrs)
> +{
> +       if (kvm_has_gmem_attributes) {
> +               gpa_t end = gpa + size;
> +               off_t fd_offset;
> +               gpa_t addr;
> +               size_t len;
> +               int fd;
> +
> +               for (addr = gpa; addr < end; addr += len) {
> +                       fd = kvm_gpa_to_guest_memfd(vm, addr, &fd_offset, &len);
> +                       len = min(end - addr, len);
> +
> +                       gmem_set_memory_attributes(fd, fd_offset, len, attrs);
> +               }
> +       } else {
> +               vm_set_memory_attributes(vm, gpa, size, attrs);
> +       }
> +}
> +
> +static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa,
> +                                     size_t size)
> +{
> +       vm_mem_set_memory_attributes(vm, gpa, size,
> +                                    KVM_MEMORY_ATTRIBUTE_PRIVATE);
> +}
> +
> +static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa,
> +                                    size_t size)
> +{
> +       vm_mem_set_memory_attributes(vm, gpa, size, 0);
> +}
> +
>  void vm_guest_mem_fallocate(struct kvm_vm *vm, gpa_t gpa, u64 size,
>                             bool punch_hole);
>
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 41/46] KVM: selftests: Provide function to look up guest_memfd details from gpa
From: Fuad Tabba @ 2026-06-25  8:58 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-41-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Introduce a new helper, kvm_gpa_to_guest_memfd(), to find the
> guest_memfd-related details of a memory region that contains a given guest
> physical address (GPA).
>
> The function returns the file descriptor for the memfd, the offset into
> the file that corresponds to the GPA, and the number of bytes remaining
> in the region from that GPA.
>
> kvm_gpa_to_guest_memfd() was factored out from vm_guest_mem_fallocate();
> refactor vm_guest_mem_fallocate() to use the new helper.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  tools/testing/selftests/kvm/include/kvm_util.h |  3 +++
>  tools/testing/selftests/kvm/lib/kvm_util.c     | 37 ++++++++++++++++----------
>  2 files changed, 26 insertions(+), 14 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index 79ab64ac8b869..3a6b1fa7f26ef 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -428,6 +428,9 @@ static inline void vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0)
>         vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
>  }
>
> +int kvm_gpa_to_guest_memfd(struct kvm_vm *vm, gpa_t gpa, off_t *fd_offset,
> +                          size_t *nr_bytes);
> +
>  /*
>   * KVM_SET_MEMORY_ATTRIBUTES{,2} overwrites _all_ attributes.  These
>   * flows need significant enhancements to support multiple attributes.
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 524ef97d634bf..0b2256ea65ff9 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -1305,27 +1305,20 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 base, u64 size,
>                             bool punch_hole)
>  {
>         const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
> -       struct userspace_mem_region *region;
>         u64 end = base + size;
> -       gpa_t gpa, len;
>         off_t fd_offset;
> -       int ret;
> +       int fd, ret;
> +       size_t len;
> +       gpa_t gpa;
>
>         for (gpa = base; gpa < end; gpa += len) {
> -               u64 offset;
> -
> -               region = userspace_mem_region_find(vm, gpa, gpa);
> -               TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
> -                           "Private memory region not found for GPA 0x%lx", gpa);
> +               fd = kvm_gpa_to_guest_memfd(vm, gpa, &fd_offset, &len);
> +               len = min(end - gpa, len);
>
> -               offset = gpa - region->region.guest_phys_addr;
> -               fd_offset = region->region.guest_memfd_offset + offset;
> -               len = min_t(u64, end - gpa, region->region.memory_size - offset);
> -
> -               ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
> +               ret = fallocate(fd, mode, fd_offset, len);
>                 TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
>                             punch_hole ? "punch hole" : "allocate", gpa, len,
> -                           region->region.guest_memfd, mode, fd_offset);
> +                           fd, mode, fd_offset);
>         }
>  }
>
> @@ -1662,6 +1655,22 @@ void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa)
>         return (void *) ((uintptr_t) region->host_alias + offset);
>  }
>
> +int kvm_gpa_to_guest_memfd(struct kvm_vm *vm, gpa_t gpa, off_t *fd_offset,
> +                          size_t *nr_bytes)
> +{
> +       struct userspace_mem_region *region;
> +       gpa_t gpa_offset;
> +
> +       region = userspace_mem_region_find(vm, gpa, gpa);
> +       TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
> +                   "guest_memfd memory region not found for GPA 0x%lx", gpa);
> +
> +       gpa_offset = gpa - region->region.guest_phys_addr;
> +       *fd_offset = region->region.guest_memfd_offset + gpa_offset;
> +       *nr_bytes = region->region.memory_size - gpa_offset;
> +       return region->region.guest_memfd;
> +}
> +
>  /* Create an interrupt controller chip for the specified VM. */
>  void vm_create_irqchip(struct kvm_vm *vm)
>  {
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 40/46] KVM: selftests: Reset shared memory after hole-punching
From: Fuad Tabba @ 2026-06-25  8:46 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-40-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> private_mem_conversions_test used to reset the shared memory that was used
> for the test to an initial pattern at the end of each test iteration. Then,
> it would punch out the pages, which would zero memory.
>
> Without in-place conversion, the resetting would write shared memory, and
> hole-punching will zero private memory, hence resetting the test to the
> state at the beginning of the for loop.
>
> With in-place conversion, resetting writes memory as shared, and
> hole-punching zeroes the same physical memory, hence undoing the reset
> done before the hole punch.
>
> Move the resetting after the hole-punching, and reset the entire
> PER_CPU_DATA_SIZE instead of just the tested range.
>
> With in-place conversion, this zeroes and then resets the same physical
> memory. Without in-place conversion, the private memory is zeroed, and the
> shared memory is reset to init_p.
>
> This is sufficient since at each test stage, the memory is assumed to start
> as shared, and private memory is always assumed to start zeroed. Conversion
> zeroes memory, so the future test stages will work as expected.
>
> Fixes: 43f623f350ce1 ("KVM: selftests: Add x86-only selftest for private memory conversions")
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  tools/testing/selftests/kvm/x86/private_mem_conversions_test.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> index 861baff201e78..289ad10063fca 100644
> --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
> @@ -202,15 +202,18 @@ static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate)
>                 guest_sync_shared(gpa, size, p3, p4);
>                 memcmp_g(gpa, p4, size);
>
> -               /* Reset the shared memory back to the initial pattern. */
> -               memset((void *)gpa, init_p, size);
> -
>                 /*
>                  * Free (via PUNCH_HOLE) *all* private memory so that the next
>                  * iteration starts from a clean slate, e.g. with respect to
>                  * whether or not there are pages/folios in guest_mem.
>                  */
>                 guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
> +
> +               /*
> +                * Hole-punching above zeroed private memory. Reset shared
> +                * memory in preparation for the next GUEST_STAGE.
> +                */
> +               memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
>         }
>  }
>
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 39/46] KVM: selftests: Test conversion with elevated page refcount
From: Fuad Tabba @ 2026-06-25  8:04 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-39-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Add a selftest to verify that converting a shared guest_memfd page to a
> private page fails if the page has an elevated reference count.
>
> When KVM converts a shared page to a private one, it expects the page to
> have a reference count equal to the reference counts taken by the
> filemap. If another kernel subsystem holds a reference to the page, the
> conversion must be aborted.
>
> The test asserts that both bulk and single-page conversion attempts
> correctly fail with EAGAIN for the pinned page. After the page is unpinned,
> the test verifies that subsequent conversions succeed.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Not sure Sashiko's concern is worth it.

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  .../kvm/x86/guest_memfd_conversions_test.c         | 56 ++++++++++++++++++++++
>  1 file changed, 56 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index 99b0023609670..4ebbd29029526 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -441,6 +441,62 @@ GMEM_CONVERSION_TEST_INIT_SHARED(forked_accesses)
>  #undef TEST_STATE_AWAIT
>  }
>
> +static void test_convert_to_private_fails(test_data_t *t, u64 pgoff,
> +                                         size_t nr_pages,
> +                                         u64 expected_error_offset)
> +{
> +       /* +1 to make it anything but expected_error_offset. */
> +       u64 error_offset = expected_error_offset + 1;
> +       u64 offset = pgoff * page_size;
> +       int ret;
> +
> +       do {
> +               ret = __gmem_set_private(t->gmem_fd, offset,
> +                                        nr_pages * page_size, &error_offset);
> +       } while (ret == -1 && errno == EINTR);
> +       TEST_ASSERT(ret == -1 && errno == EAGAIN,
> +                   "Wanted EAGAIN on page %lu, got %d (ret = %d)", pgoff,
> +                   errno, ret);
> +       TEST_ASSERT_EQ(error_offset, expected_error_offset);
> +}
> +
> +GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(elevated_refcount, 4)
> +{
> +       int i;
> +
> +       pin_pages(t->mem + test_page * page_size, page_size);
> +
> +       for (i = 0; i < nr_pages; i++)
> +               test_shared(t, i, 0, 'A', 'B');
> +
> +       /*
> +        * Converting in bulk should fail as long any page in the range has
> +        * unexpected refcounts.
> +        */
> +       test_convert_to_private_fails(t, 0, nr_pages, test_page * page_size);
> +
> +       for (i = 0; i < nr_pages; i++) {
> +               /*
> +                * Converting page-wise should also fail as long any page in the
> +                * range has unexpected refcounts.
> +                */
> +               if (i == test_page)
> +                       test_convert_to_private_fails(t, i, 1, test_page * page_size);
> +               else
> +                       test_convert_to_private(t, i, 'B', 'C');
> +       }
> +
> +       unpin_pages();
> +
> +       gmem_set_private(t->gmem_fd, 0, nr_pages * page_size);
> +
> +       for (i = 0; i < nr_pages; i++) {
> +               char expected = i == test_page ? 'B' : 'C';
> +
> +               test_private(t, i, expected, 'D');
> +       }
> +}
> +
>  int main(int argc, char *argv[])
>  {
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 38/46] KVM: selftests: Add helpers to pin pages with CONFIG_GUP_TEST
From: Fuad Tabba @ 2026-06-25  7:40 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-38-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Add helper functions to allow KVM selftests to pin memory using
> CONFIG_GUP_TEST. This is useful for testing scenarios where some page has
> an increased refcount. such as in guest_memfd in-place conversion tests.
>
> The helpers open /sys/kernel/debug/gup_test and invoke the
> PIN_LONGTERM_TEST_START and PIN_LONGTERM_TEST_STOP ioctls. Since this
> functionality depends on the kernel being built with CONFIG_GUP_TEST,
> provide stub implementations that trigger a test failure if the
> configuration is missing.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>

nit below, otherwise:

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  tools/testing/selftests/kvm/include/kvm_util.h |  3 +++
>  tools/testing/selftests/kvm/lib/kvm_util.c     | 23 +++++++++++++++++++++++
>  2 files changed, 26 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
> index 323d06b5699ec..79ab64ac8b869 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util.h
> @@ -1195,6 +1195,9 @@ static inline int pin_self_to_any_cpu(void)
>         return pin_task_to_any_cpu(pthread_self());
>  }
>
> +void pin_pages(void *vaddr, uint64_t size);
> +void unpin_pages(void);
> +
>  void kvm_print_vcpu_pinning_help(void);
>  void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[],
>                             int nr_vcpus);
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index b73817f7bc803..524ef97d634bf 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -18,6 +18,8 @@
>  #include <unistd.h>
>  #include <linux/kernel.h>
>
> +#include "../../../../mm/gup_test.h"
> +
>  #define KVM_UTIL_MIN_PFN       2
>
>  u32 guest_random_seed;
> @@ -639,6 +641,27 @@ int __pin_task_to_cpu(pthread_t task, int cpu)
>         return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
>  }
>
> +static int gup_test_fd = -1;
> +
> +void pin_pages(void *vaddr, uint64_t size)
> +{
> +       const struct pin_longterm_test args = {
> +               .addr = (uint64_t)vaddr,
> +               .size = size,
> +               .flags = PIN_LONGTERM_TEST_FLAG_USE_WRITE,
> +       };
> +
> +       gup_test_fd = __open_path_or_exit("/sys/kernel/debug/gup_test", O_RDWR,
> +                                         "Is CONFIG_GUP_TEST enabled?");

nit: should you close this/reset it to -1 after the tests?

> +
> +       TEST_ASSERT_EQ(ioctl(gup_test_fd, PIN_LONGTERM_TEST_START, &args), 0);
> +}
> +
> +void unpin_pages(void)
> +{
> +       TEST_ASSERT_EQ(ioctl(gup_test_fd, PIN_LONGTERM_TEST_STOP), 0);
> +}
> +
>  static u32 parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
>  {
>         u32 pcpu = atoi_non_negative("CPU number", cpu_str);
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 37/46] KVM: selftests: Test that shared/private status is consistent across processes
From: Fuad Tabba @ 2026-06-25  7:14 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-37-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Add a test to verify that a guest_memfd's shared/private status is
> consistent across processes, and that any shared pages previously mapped in
> any process are unmapped from all processes.
>
> The test forks a child process after creating the shared guest_memfd
> region so that the second process exists alongside the main process for the
> entire test.
>
> The processes then take turns to access memory to check that the
> shared/private status is consistent across processes.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> ---

Two things below, otherwise:

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad


>  .../kvm/x86/guest_memfd_conversions_test.c         | 118 +++++++++++++++++++++
>  1 file changed, 118 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index f03af2c46426f..99b0023609670 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -2,6 +2,8 @@
>  /*
>   * Copyright (c) 2024, Google LLC.
>   */
> +#include <pthread.h>
> +#include <time.h>
>  #include <sys/mman.h>
>  #include <unistd.h>

nit: include order

>
> @@ -323,6 +325,122 @@ GMEM_CONVERSION_TEST_INIT_SHARED(truncate)
>         test_private(t, 0, 0, 'A');
>  }
>
> +/* Test that shared/private memory protections work and are seen from any process. */
> +GMEM_CONVERSION_TEST_INIT_SHARED(forked_accesses)
> +{
> +       enum test_state {
> +               STATE_INIT,
> +               STATE_CHECK_SHARED,
> +               STATE_DONE_CHECKING_SHARED,
> +               STATE_CHECK_PRIVATE,
> +               STATE_DONE_CHECKING_PRIVATE,
> +       };
> +
> +       struct sync_state {
> +               pthread_mutex_t mutex;
> +               pthread_cond_t cond;
> +               enum test_state step;
> +       } *sync;
> +
> +       pthread_mutexattr_t mattr;
> +       pthread_condattr_t cattr;
> +       pid_t child_pid, parent_pid;
> +       int status;
> +
> +       sync = kvm_mmap(sizeof(*sync), PROT_READ | PROT_WRITE,
> +                       MAP_SHARED | MAP_ANONYMOUS, -1);
> +
> +       pthread_mutexattr_init(&mattr);
> +       pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
> +       pthread_mutex_init(&sync->mutex, &mattr);
> +       pthread_mutexattr_destroy(&mattr);
> +
> +       pthread_condattr_init(&cattr);
> +       pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
> +       pthread_cond_init(&sync->cond, &cattr);
> +       pthread_condattr_destroy(&cattr);
> +
> +       sync->step = STATE_INIT;
> +
> +#define TEST_STATE_AWAIT(__state)                                              \
> +       do {                                                                    \
> +               pthread_mutex_lock(&sync->mutex);                               \
> +               while (sync->step != (__state)) {                               \
> +                       struct timespec ts, stop;                               \
> +                       int ret;                                                \
> +                                                                               \
> +                       clock_gettime(CLOCK_REALTIME, &ts);                     \
> +                       stop = timespec_add_ns(ts, 100 * 1000000UL);            \
> +                                                                               \
> +                       ret = pthread_cond_timedwait(&sync->cond, &sync->mutex, &stop); \
> +                       if (ret == ETIMEDOUT) {                                 \
> +                               bool alive = (child_pid == 0) ?                 \
> +                                            (getppid() == parent_pid) :                \
> +                                            (waitpid(child_pid, NULL, WNOHANG) == 0); \

Not sure it's worth it, but if you want to silence Sashiko, waitid
with WNOWAIT might be the way to go (not tested, just from looking at
the man page). This is though very unlikely, mentioning it since
Sashiko complained.


> +                               TEST_ASSERT(alive, "Other process exited prematurely"); \
> +                       } else {                                                \
> +                               TEST_ASSERT(!ret, "pthread_cond_timedwait failed"); \
> +                       }                                                       \
> +               }                                                               \
> +               pthread_mutex_unlock(&sync->mutex);                             \
> +       } while (0)
> +
> +#define TEST_STATE_SET(__state)                                                        \
> +       do {                                                                    \
> +               pthread_mutex_lock(&sync->mutex);                               \
> +               sync->step = (__state);                                         \
> +               pthread_cond_broadcast(&sync->cond);                            \
> +               pthread_mutex_unlock(&sync->mutex);                             \
> +       } while (0)
> +
> +       parent_pid = getpid();
> +       child_pid = fork();
> +       TEST_ASSERT(child_pid != -1, "fork failed");
> +
> +       if (child_pid == 0) {
> +               const char inconsequential = 0xdd;
> +
> +               TEST_STATE_AWAIT(STATE_CHECK_SHARED);
> +
> +               /*
> +                * This maps the pages into the child process as well, and tests
> +                * that the conversion process will unmap the guest_memfd memory
> +                * from all processes.
> +                */
> +               host_do_rmw(t->mem, 0, 0xB, 0xC);
> +
> +               TEST_STATE_SET(STATE_DONE_CHECKING_SHARED);
> +               TEST_STATE_AWAIT(STATE_CHECK_PRIVATE);
> +
> +               TEST_EXPECT_SIGBUS(READ_ONCE(t->mem[0]));
> +               TEST_EXPECT_SIGBUS(WRITE_ONCE(t->mem[0], inconsequential));
> +
> +               TEST_STATE_SET(STATE_DONE_CHECKING_PRIVATE);
> +               exit(0);
> +       }
> +
> +       test_shared(t, 0, 0, 0xA, 0xB);
> +
> +       TEST_STATE_SET(STATE_CHECK_SHARED);
> +       TEST_STATE_AWAIT(STATE_DONE_CHECKING_SHARED);
> +
> +       test_convert_to_private(t, 0, 0xC, 0xD);
> +
> +       TEST_STATE_SET(STATE_CHECK_PRIVATE);
> +       TEST_STATE_AWAIT(STATE_DONE_CHECKING_PRIVATE);
> +
> +       TEST_ASSERT_EQ(waitpid(child_pid, &status, 0), child_pid);
> +       TEST_ASSERT(WIFEXITED(status) && WEXITSTATUS(status) == 0,
> +                   "Child exited with unexpected status");
> +
> +       pthread_mutex_destroy(&sync->mutex);
> +       pthread_cond_destroy(&sync->cond);
> +       kvm_munmap(sync, sizeof(*sync));
> +
> +#undef TEST_STATE_SET
> +#undef TEST_STATE_AWAIT
> +}
> +
>  int main(int argc, char *argv[])
>  {
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 36/46] KVM: selftests: Test that truncation does not change shared/private status
From: Fuad Tabba @ 2026-06-25  7:03 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-36-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Add a test to verify that deallocating a page in a guest memfd region via
> fallocate() with FALLOC_FL_PUNCH_HOLE does not alter the shared or private
> status of the corresponding memory range.
>
> When a page backing a guest memfd mapping is deallocated, e.g., by punching
> a hole or truncating the file, and then subsequently faulted back in, the
> new page must inherit the correct shared/private status tracked by
> guest_memfd.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  .../selftests/kvm/x86/guest_memfd_conversions_test.c       | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index 0b024fb7227f0..f03af2c46426f 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -10,6 +10,7 @@
>  #include <linux/sizes.h>
>
>  #include "kvm_util.h"
> +#include "kvm_syscalls.h"
>  #include "kselftest_harness.h"
>  #include "test_util.h"
>  #include "ucall_common.h"
> @@ -309,6 +310,19 @@ GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(unallocated_folios, 8)
>                 test_convert_to_shared(t, i, 'B', 'C', 'D');
>  }
>
> +/* Truncation should not affect shared/private status. */
> +GMEM_CONVERSION_TEST_INIT_SHARED(truncate)
> +{
> +       host_do_rmw(t->mem, 0, 0, 'A');
> +       kvm_fallocate(t->gmem_fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
> +       host_do_rmw(t->mem, 0, 0, 'A');
> +
> +       test_convert_to_private(t, 0, 'A', 'B');
> +
> +       kvm_fallocate(t->gmem_fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
> +       test_private(t, 0, 0, 'A');
> +}
> +
>  int main(int argc, char *argv[])
>  {
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 35/46] KVM: selftests: Convert with allocated folios in different layouts
From: Fuad Tabba @ 2026-06-25  7:03 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-35-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Add a guest_memfd selftest to verify that memory conversions work
> correctly with allocated folios in different layouts.
>
> By iterating through which pages are initially faulted, the test covers
> various layouts of contiguous allocated and unallocated regions, exercising
> conversion with different range layouts.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  .../kvm/x86/guest_memfd_conversions_test.c         | 30 ++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index b43ac196330f1..0b024fb7227f0 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -279,6 +279,36 @@ GMEM_CONVERSION_TEST_INIT_PRIVATE(before_allocation_private)
>         test_convert_to_shared(t, 0, 0, 'A', 'B');
>  }
>
> +/*
> + * Test that when some of the folios in the conversion range are allocated,
> + * conversion requests are handled correctly in guest_memfd.  Vary the ranges
> + * allocated before conversion, using test_page, to cover various layouts of
> + * contiguous allocated and unallocated regions.
> + */
> +GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(unallocated_folios, 8)
> +{
> +       const int second_page_to_fault = 4;
> +       int i;
> +
> +       /*
> +        * Fault 2 of the pages to test filemap range operations except when
> +        * test_page == second_page_to_fault.
> +        */
> +       host_do_rmw(t->mem, test_page, 0, 'A');
> +       if (test_page != second_page_to_fault)
> +               host_do_rmw(t->mem, second_page_to_fault, 0, 'A');
> +
> +       gmem_set_private(t->gmem_fd, 0, nr_pages * page_size);
> +       for (i = 0; i < nr_pages; ++i) {
> +               char expected = (i == test_page || i == second_page_to_fault) ? 'A' : 0;
> +
> +               test_private(t, i, expected, 'B');
> +       }
> +
> +       for (i = 0; i < nr_pages; ++i)
> +               test_convert_to_shared(t, i, 'B', 'C', 'D');
> +}
> +
>  int main(int argc, char *argv[])
>  {
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 34/46] KVM: selftests: Test conversion before allocation
From: Fuad Tabba @ 2026-06-25  7:00 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-34-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> Add two test cases to the guest_memfd conversions selftest to cover
> the scenario where a conversion is requested before any memory has been
> allocated in the guest_memfd region.
>
> The KVM_SET_MEMORY_ATTRIBUTES2 ioctl can be called on a memory region at
> any time. If the guest had not yet faulted in any pages for that region,
> the kernel must record the conversion request and apply the requested state
> when the pages are eventually allocated.
>
> The new tests cover both conversion directions.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

> ---
>  .../selftests/kvm/x86/guest_memfd_conversions_test.c       | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index 8e17d5c08aeb8..b43ac196330f1 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -265,6 +265,20 @@ GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(indexing, 4)
>  #undef combine
>  }
>
> +/*
> + * Test that even if there are no folios yet, conversion requests are recorded
> + * in guest_memfd.
> + */
> +GMEM_CONVERSION_TEST_INIT_SHARED(before_allocation_shared)
> +{
> +       test_convert_to_private(t, 0, 0, 'A');
> +}
> +
> +GMEM_CONVERSION_TEST_INIT_PRIVATE(before_allocation_private)
> +{
> +       test_convert_to_shared(t, 0, 0, 'A', 'B');
> +}
> +
>  int main(int argc, char *argv[])
>  {
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 33/46] KVM: selftests: Test conversion precision in guest_memfd
From: Fuad Tabba @ 2026-06-25  6:57 UTC (permalink / raw)
  To: ackerleytng
  Cc: aik, andrew.jones, binbin.wu, brauner, chao.p.peng, david,
	jmattson, jthoughton, michael.roth, oupton, pankaj.gupta, qperret,
	rick.p.edgecombe, rientjes, shivankg, steven.price, willy, wyihan,
	yan.y.zhao, forkloop, pratyush, suzuki.poulose, aneesh.kumar,
	liam, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, Shuah Khan, Vishal Annapurve,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu, Youngjun Park,
	Qi Zheng, Shakeel Butt, Kiryl Shutsemau, Baoquan He,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <20260618-gmem-inplace-conversion-v8-33-9d2959357853@google.com>

On Fri, 19 Jun 2026 at 01:32, Ackerley Tng via B4 Relay
<devnull+ackerleytng.google.com@kernel.org> wrote:
>
> From: Ackerley Tng <ackerleytng@google.com>
>
> The existing guest_memfd conversion tests only use single-page memory
> regions. This provides no coverage for multi-page guest_memfd objects,
> specifically whether KVM correctly handles the page index for conversion
> operations. An incorrect implementation could, for example, always operate
> on the first page regardless of the index provided.
>
> Add a new test case to verify that conversions between private and shared
> memory correctly target the specified page within a multi-page guest_memfd.
>
> This test also verifies the precision of memory conversions by converting a
> single page an then iterating through all other pages ensure they remain in
> their original state.
>
> To support this test, add a new GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED
> macro that handles setting up and tearing down the VM for each page
> iteration. The teardown logic is adjusted to prevent a double-free in this
> new scenario.
>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad


> ---
>  .../kvm/x86/guest_memfd_conversions_test.c         | 66 ++++++++++++++++++++++
>  1 file changed, 66 insertions(+)
>
> diff --git a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> index 5b070d3374eae..8e17d5c08aeb8 100644
> --- a/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> +++ b/tools/testing/selftests/kvm/x86/guest_memfd_conversions_test.c
> @@ -61,8 +61,13 @@ static void gmem_conversions_do_setup(test_data_t *t, int nr_pages,
>
>  static void gmem_conversions_do_teardown(test_data_t *t)
>  {
> +       /* Use NULL to avoid second free in FIXTURE_TEARDOWN (multipage tests). */
> +       if (!t->vcpu)
> +               return;
> +
>         /* No need to close gmem_fd, it's owned by the VM structure. */
>         kvm_vm_free(t->vcpu->vm);
> +       t->vcpu = NULL;
>  }
>
>  FIXTURE_TEARDOWN(gmem_conversions)
> @@ -101,6 +106,29 @@ static void __gmem_conversions_##test(test_data_t *t, int nr_pages)                \
>  #define GMEM_CONVERSION_TEST_INIT_SHARED(test)                                 \
>         __GMEM_CONVERSION_TEST_INIT_SHARED(test, 1)
>
> +/*
> + * Repeats test over nr_pages in a guest_memfd of size nr_pages, providing each
> + * test iteration with test_page, the index of the page under test in
> + * guest_memfd. test_page takes values 0..(nr_pages - 1) inclusive.
> + */
> +#define GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(test, __nr_pages)           \
> +static void __gmem_conversions_multipage_##test(test_data_t *t, int nr_pages,  \
> +                                               const int test_page);           \
> +                                                                               \
> +TEST_F(gmem_conversions, test)                                                 \
> +{                                                                              \
> +       const u64 flags = GUEST_MEMFD_FLAG_MMAP | GUEST_MEMFD_FLAG_INIT_SHARED; \
> +       int i;                                                                  \
> +                                                                               \
> +       for (i = 0; i < __nr_pages; ++i) {                                      \
> +               gmem_conversions_do_setup(self, __nr_pages, flags);             \
> +               __gmem_conversions_multipage_##test(self, __nr_pages, i);       \
> +               gmem_conversions_do_teardown(self);                             \
> +       }                                                                       \
> +}                                                                              \
> +static void __gmem_conversions_multipage_##test(test_data_t *t, int nr_pages,  \
> +                                               const int test_page)
> +
>  struct guest_check_data {
>         void *mem;
>         char expected_val;
> @@ -199,6 +227,44 @@ GMEM_CONVERSION_TEST_INIT_SHARED(init_shared)
>         test_convert_to_shared(t, 0, 'C', 'D', 'E');
>  }
>
> +GMEM_CONVERSION_MULTIPAGE_TEST_INIT_SHARED(indexing, 4)
> +{
> +       int i;
> +
> +       /* Get a char that varies with both i and n. */
> +#define combine(x, n) ((x << 4) + (n))
> +#define i_(n) (combine(i, n))
> +#define t_(n) (combine(test_page, n))
> +
> +       /*
> +        * Start with the highest index, to catch any errors when, perhaps, the
> +        * first page is returned even for the last index.
> +        */
> +       for (i = nr_pages - 1; i >= 0; --i)
> +               test_shared(t, i, 0, i_(0), i_(2));
> +
> +       test_convert_to_private(t, test_page, t_(2), t_(3));
> +
> +       for (i = 0; i < nr_pages; ++i) {
> +               if (i == test_page)
> +                       test_private(t, test_page, t_(3), t_(4));
> +               else
> +                       test_shared(t, i, i_(2), i_(3), i_(4));
> +       }
> +
> +       test_convert_to_shared(t, test_page, t_(4), t_(5), t_(6));
> +
> +       for (i = 0; i < nr_pages; ++i) {
> +               char expected = i == test_page ? t_(6) : i_(4);
> +
> +               test_shared(t, i, expected, i_(7), i_(8));
> +       }
> +
> +#undef t_
> +#undef i_
> +#undef combine
> +}
> +
>  int main(int argc, char *argv[])
>  {
>         TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
>
> --
> 2.55.0.rc0.738.g0c8ab3ebcc-goog
>
>

^ permalink raw reply

* Re: [PATCH v8 15/46] KVM: guest_memfd: Call arch invalidate hooks on conversion
From: Fuad Tabba @ 2026-06-25  6:48 UTC (permalink / raw)
  To: Ackerley Tng
  Cc: Sean Christopherson, aik, andrew.jones, binbin.wu, brauner,
	chao.p.peng, david, jmattson, jthoughton, michael.roth, oupton,
	pankaj.gupta, qperret, rick.p.edgecombe, rientjes, shivankg,
	steven.price, willy, wyihan, yan.y.zhao, forkloop, pratyush,
	suzuki.poulose, aneesh.kumar, liam, Paolo Bonzini,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Vishal Annapurve, Andrew Morton, Chris Li, Kairui Song,
	Kemeng Shi, Nhat Pham, Barry Song, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Youngjun Park, Qi Zheng, Shakeel Butt, Kiryl Shutsemau,
	Baoquan He, Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm,
	linux-coco
In-Reply-To: <CAEvNRgGX3GkazCWM=6y9YLgn=YemXuG==Oo+L58cac1Fd86_TQ@mail.gmail.com>

On Wed, 24 Jun 2026 at 18:46, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Sean Christopherson <seanjc@google.com> writes:
>
> > On Fri, Jun 19, 2026, Fuad Tabba wrote:
> >> On Fri, 19 Jun 2026 at 01:31, Ackerley Tng via B4 Relay
> >> <devnull+ackerleytng.google.com@kernel.org> wrote:
> >> >
> >> > From: Ackerley Tng <ackerleytng@google.com>
> >> >
> >> > When memory in guest_memfd is converted from private to shared, the
> >> > platform-specific state associated with the guest-private pages must be
> >> > invalidated or cleaned up.
> >> >
> >> > Iterate over the folios in the affected range and call the
> >> > kvm_arch_gmem_invalidate() hook for each PFN range. This allows
> >> > architectures to perform necessary teardown, such as updating hardware
> >> > metadata or encryption states, before the pages are transitioned to the
> >> > shared state.
> >> >
> >> > Invoke this helper after indicating to KVM's mmu code that an invalidation
> >> > is in progress to stop in-flight page faults from succeeding.
> >> >
> >> > Reviewed-by: Fuad Tabba <tabba@google.com>
> >> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> >>
> >> Coming back to this after working through the arm64/pKVM side. My
> >> Reviewed-by here is from the previous round and the patch hasn't
> >> changed, but I missed an implication for arm64.
> >>
> >> kvm_arch_gmem_invalidate() is now called from two paths with the same
> >> (start, end) signature: folio teardown (kvm_gmem_free_folio) and
> >> private->shared conversion (here). For SNP/TDX that's fine, conversion is
> >> destructive anyway. For pKVM the two need opposite content semantics:
> >> conversion must preserve the page in place (same physical page, the point
> >> of in-place conversion without encryption), while teardown must scrub it
> >> before returning it to the host.
> >>
> >> The hook gets only a pfn range with no indication of which caller it's
> >> serving, so arm64 can't give the two paths the behaviour they need. It
> >> would help to signal intent on the conversion path: a reason/flag, a
> >> separate hook, or not routing non-destructive conversion through the
> >> teardown hook.
> >>
> >> arm64 isn't here yet, so this isn't urgent, but the hook is gaining a
> >> second caller now, and it's cheaper to leave room for the distinction
> >> than to change a generic contract other arches depend on later.
> >
> > Crud.  It may not be urgent for arm64, but it's urgent for other reasons that
> > I "can't" describe in detail at the moment, and even if that weren't the case, I
> > think we should clean things up now.  More below.
> >
> >> >  virt/kvm/guest_memfd.c | 41 +++++++++++++++++++++++++++++++++++++++++
> >> >  1 file changed, 41 insertions(+)
> >> >
> >> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> >> > index 433f79047b9d1..3c94442bc8131 100644
> >> > --- a/virt/kvm/guest_memfd.c
> >> > +++ b/virt/kvm/guest_memfd.c
> >> > @@ -607,6 +607,42 @@ static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
> >> >         return safe;
> >> >  }
> >> >
> >> > +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
> >> > +static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end)
> >
> > Not your fault, but kvm_arch_gmem_invalidate() is badly misnamed.  It's not
> > "invalidating" anything, it's much more of a "free" callback, as SNP uses it to
> > put physical pages back into a shared state when a maybe-private folio is freed.
> >
> > As Fuad points out, (ab)using that hook for the private=>shared conversion case
> > "works", but not broadly.  And it makes the bad name worse, because it's called
> > from code that _is_ doing true invalidations.  For pKVM, it may not even need to
> > do anything invalidation-like.
> >
>
> Thanks, I also didn't like the naming of kvm_gmem_invalidate(),
> especially when conversions also calls
> kvm_gmem_invalidate_{start,end}() and those do different things.
>
> > To avoid a conflict with patches that are going to have priority over this series,
> > to set the stage for arm64 support, and to avoid avoid bleeding vendor details
> > into guest_memfd, as if they are core guest_memfd behavior (only SNP needs the
> > "invalidation" on this specific transition), I think we should add an arch hook
> > to do conversions straightaway.
> >
> > Unless there's a clever option I'm missing, it'll mean adding yet another
> > HAVE_KVM_ARCH_GMEM_XXX flag?  Hmm, especially because IIUC, arm64/pKVM doesn't
> > need a callback for this case, only the free_folio case.
> >
> >> > +{
> >> > +       struct folio_batch fbatch;
> >> > +       pgoff_t next = start;
> >> > +       int i;
> >> > +
> >> > +       folio_batch_init(&fbatch);
> >> > +       while (filemap_get_folios(inode->i_mapping, &next, end - 1, &fbatch)) {
> >> > +               for (i = 0; i < folio_batch_count(&fbatch); ++i) {
> >> > +                       struct folio *folio = fbatch.folios[i];
> >> > +                       pgoff_t start_index, end_index;
> >> > +                       kvm_pfn_t start_pfn, end_pfn;
> >> > +
> >> > +                       start_index = max(start, folio->index);
> >> > +                       end_index = min(end, folio_next_index(folio));
> >> > +                       /*
> >> > +                        * end_index is either in folio or points to
> >> > +                        * the first page of the next folio. Hence,
> >> > +                        * all pages in range [start_index, end_index)
> >> > +                        * are contiguous.
> >> > +                        */
> >> > +                       start_pfn = folio_file_pfn(folio, start_index);
> >> > +                       end_pfn = start_pfn + end_index - start_index;
> >> > +
> >> > +                       kvm_arch_gmem_invalidate(start_pfn, end_pfn);
> >> > +               }
> >> > +
> >> > +               folio_batch_release(&fbatch);
> >> > +               cond_resched();
> >> > +       }
> >> > +}
> >> > +#else
> >> > +static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end) {}
> >> > +#endif
> >> > +
> >> >  static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> >> >                                      size_t nr_pages, uint64_t attrs,
> >> >                                      pgoff_t *err_index)
> >> > @@ -647,7 +683,12 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> >> >          */
> >> >
> >> >         kvm_gmem_invalidate_start(inode, start, end);
> >> > +
> >> > +       if (!to_private)
> >> > +               kvm_gmem_invalidate(inode, start, end);
> >
> > E.g. instead make this something like this?
> >
> >       kvm_gmem_set_pfn_attributes(...)
> >
> > Hrm, though that wastes folio lookups in the to_private case.  So maybe just this,
> > assuming pKVM doesn't need to take additional action on conversions?
> >
> >       if (!to_private)
> >               kvm_gmem_make_shared(...)
> >
> > Actually, if we do that, then we don't need a separate arch hook, just a separate
> > config.  It'll still bleed SNP details into guest_memfd, but it'll at least be
> > done in a way that's more explicitly arch specific (and it's no different than
> > what we already do for PREPARE...).
> >
>
> pKVM needs some arch guest_memfd lifecycle functions that
>
> + for conversion, doesn't do anything,
> + for teardown, resets page state (IIUC it'll be reset to
>   PKVM_PAGE_OWNED (by the host))
>
> So I think we need different functions for those two stages in the
> lifecycle of a page with guest_memfd? What if we have

Yes, the split is what I was after. One PFN-range hook for both
teardown and private->shared conversion can't tell them apart, and for
pKVM the two want opposite content semantics.

Two configs rather than one is right, since the needs are independent.
pKVM wants teardown but not conversion.

>
> CONFIG_HAVE_KVM_ARCH_GMEM_SET_PFN_ATTRIBUTES, which gates
>
> + kvm_gmem_should_set_pfn_attributes(attributes) and
>   .gmem_should_set_pfn_attributes
> + kvm_gmem_set_pfn_attributes(start_pfn, end_pfn, attributes) and
>   .gmem_set_pfn_attributes
>
> CONFIG_HAVE_KVM_ARCH_GMEM_TEARDOWN, which gates
>
> + kvm_gmem_teardown() and .gmem_teardown
>
> SNP:
>
> + .gmem_should_set_pfn_attributes = sev_gmem_should_set_pfn_attributes,
>   and sev_gmem_should_set_pfn_attributes returns !is_private
> + Rename .gmem_invalidate and sev_gmem_invalidate to *set_pfn_attributes
> + .gmem_teardown = sev_gmem_set_pfn_attributes
>
> TDX:
>
> + Disable CONFIG_HAVE_KVM_ARCH_GMEM_SET_PFN_ATTRIBUTES
> + Disable CONFIG_HAVE_KVM_ARCH_GMEM_TEARDOWN
>
> pKVM:
>
> + Disable CONFIG_HAVE_KVM_ARCH_GMEM_SET_PFN_ATTRIBUTES
> + .gmem_teardown = pkvm_gmem_set_pfn_attributes

Right for pKVM:

- teardown is not a no-op: it scrubs the page and resets the host
  state to PKVM_PAGE_OWNED before the page returns to the host. Your
  "reset to PKVM_PAGE_OWNED" reading is correct.

- the arch conversion hook is a no-op, so disabling SET_PFN_ATTRIBUTES
  is correct. Conversions in pKVM are guest-initiated: the
  share/unshare hypercall does the stage-2 and page-state transition
  at EL2. The host still runs the generic conversion path (safety
  check, attribute update) and accepts the conversion, but EL2 has
  already done the transition, so there is nothing arch-specific left
  for a hook to do. The page is preserved in place (no scrub).

  If pKVM does turn out to need a step on conversion, it stays
  non-destructive either way, and it can opt in later without touching
  a contract others depend on.


Folding the direction check behind .gmem_should_set_pfn_attributes is
a good cleanup, it keeps the !to_private check out of generic gmem.

On naming: gmem_teardown is better. gmem_set_pfn_attributes reads a
bit close to KVM_SET_MEMORY_ATTRIBUTES, but naming is hard. :)

>
> Suzuki, does this work for ARM CCA?
>
> This way,
>
> + The if (is_private) check doesn't leak SNP details into guest_memfd
> + .gmem_make_shared doesn't stick out without a .gmem_make_private
> + .gmem_set_pfn_attributes, .gmem_prepare and .gmem_teardown are aligned
>   conceptually as lifecycle hooks
>
> + I think the private/shared check for prepare can also be folded into
>   preparation.
>     + Preparation perhaps doesn't need a should_prepare equivalent since
>       there's no iteration and getting the gfn is just doing some math?
>     + In another patch series?

Agreed, separate series.

Thank you Ackerley!


/fuad

>
> > E.g. this?  There will still be a looming rename conflict, but that's easy enough
> > to handle.
> >
> > diff --git virt/kvm/guest_memfd.c virt/kvm/guest_memfd.c
> > index 9ce5be7843f2..8aead0abd788 100644
> > --- virt/kvm/guest_memfd.c
> > +++ virt/kvm/guest_memfd.c
> > @@ -648,8 +648,8 @@ static bool kvm_gmem_is_safe_for_conversion(struct inode *inode, pgoff_t start,
> >         return safe;
> >  }
> >
> > -#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
> > -static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end)
> > +#ifdef CONFIG_KVM_ARCH_GMEM_FREE_ON_SHARED_CONVERSION
> > +static void kvm_gmem_make_shared(struct inode *inode, pgoff_t start, pgoff_t end)
> >  {
> >         struct folio_batch fbatch;
> >         pgoff_t next = start;
> > @@ -681,7 +681,7 @@ static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end)
> >         }
> >  }
> >  #else
> > -static void kvm_gmem_invalidate(struct inode *inode, pgoff_t start, pgoff_t end) {}
> > +static void kvm_gmem_make_shared(struct inode *inode, pgoff_t start, pgoff_t end) { }
> >  #endif
> >
> >  static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> > @@ -729,7 +729,7 @@ static int __kvm_gmem_set_attributes(struct inode *inode, pgoff_t start,
> >         kvm_gmem_invalidate_start(inode, start, end);
> >
> >         if (!to_private)
> > -               kvm_gmem_invalidate(inode, start, end);
> > +               kvm_gmem_make_shared(inode, start, end);
> >
> >         mas_store_prealloc(&mas, xa_mk_value(attrs));

^ permalink raw reply

* RE: [RFCv2 PATCH 2/6] efi/unaccepted: Set unaccepted bits for all hotplug memory
From: Duan, Zhenzhong @ 2026-06-25  6:38 UTC (permalink / raw)
  To: Kiryl Shutsemau
  Cc: marcandre.lureau@redhat.com, david@kernel.org, Edgecombe, Rick P,
	prsampat@amd.com, pbonzini@redhat.com, mst@redhat.com,
	peterx@redhat.com, Qiang, Chenyi, Reshetova, Elena,
	michael.roth@amd.com, ackerleytng@google.com,
	linux-kernel@vger.kernel.org, linux-coco@lists.linux.dev,
	virtualization@lists.linux.dev, x86@kernel.org, Xu, Yilun,
	Li, Xiaoyao, Peng, Chao P
In-Reply-To: <ajvNXyYwb7FXAJhP@thinkstation>



>-----Original Message-----
>From: Kiryl Shutsemau <kas@kernel.org>
>Subject: Re: [RFCv2 PATCH 2/6] efi/unaccepted: Set unaccepted bits for all hotplug
>memory
>
>On Tue, Jun 23, 2026 at 06:17:33AM -0400, Zhenzhong Duan wrote:
>> In coco guests, hotpluggable memory ranges are initially unaccepted.
>> While a previous change expanded the unaccepted memory bitmap boundaries
>> to include these hotplug spaces, the actual bits inside the bitmap are
>> not yet marked as unaccepted.
>>
>> Walks SRAT a second time after the bitmap is allocated and sets the bits
>> corresponding to hotpluggable ranges.
>>
>> This ensures the bitmap state accurately reflects all static and hotplug
>> memory ranges before booting kernel.
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  .../firmware/efi/libstub/unaccepted_memory.c   | 18 ++++++++++++++++++
>>  1 file changed, 18 insertions(+)
>>
>> diff --git a/drivers/firmware/efi/libstub/unaccepted_memory.c
>b/drivers/firmware/efi/libstub/unaccepted_memory.c
>> index bfbb78bd7b8a..01bed8e751ca 100644
>> --- a/drivers/firmware/efi/libstub/unaccepted_memory.c
>> +++ b/drivers/firmware/efi/libstub/unaccepted_memory.c
>> @@ -92,6 +92,23 @@ static void update_mem_boundaries(struct
>acpi_srat_mem_affinity *mem, struct sra
>>  		*(ctx->mem_end) = range_end;
>>  }
>>
>> +static void mark_hotplug_memory_unaccepted(struct acpi_srat_mem_affinity
>*mem,
>> +					   struct srat_parse_ctx *ctx)
>> +{
>> +	u64 unit_size = unaccepted_table->unit_size;
>> +	u64 start, end;
>> +
>> +	start = round_up(mem->base_address, unit_size);
>> +	end = round_down(mem->base_address + mem->length, unit_size);
>
>We can get here with start > end if srat range is less then unit_size.

Will add a check to ignore small range less than unit_size:

+       if (start >= end)
+               return;
+

Thanks
Zhenzhong

^ permalink raw reply

* Re: [PATCH v2 02/17] x86/virt/tdx: Configure add-on features on TDX module init and update
From: Xu Yilun @ 2026-06-25  6:33 UTC (permalink / raw)
  To: Peter Fang
  Cc: Dave Hansen, x86, kvm, linux-coco, linux-kernel, djbw, kas,
	rick.p.edgecombe, yilun.xu, xiaoyao.li, sohil.mehta,
	adrian.hunter, kishen.maloor, tony.lindgren, baolu.lu,
	zhenzhong.duan, dave.hansen, seanjc
In-Reply-To: <20260624221037.GD923079@pedri>

On Wed, Jun 24, 2026 at 03:10:37PM -0700, Peter Fang wrote:
> On Wed, Jun 24, 2026 at 08:00:39PM +0800, Xu Yilun wrote:
> > > There's also zero stopping us from putting version in args:
> > > 
> > > 	struct tdx_module_args args = {};
> > >   	int ret;
> > > 
> > > 	if (tdx_addon_feature0) {
> > > 		args.r9 = tdx_addon_feature0;
> > > 		args.version = 1;
> > > 	}
> > > 
> > > 	ret = seamcall_prerr(TDH_SYS_UPDATE, &args);
> > > 
> > > Eh?
> > > 
> > > That gives args.version==0 in all the normal cases which just happens to
> > > be the exact behavior we want. It also avoids having to plumb version
> > > through all the seamcall*() wrappers.
> > 
> > Ah, on 2nd reading, I'm pretty sure now I understand your logical argument in
> > patch 1 and 2. It's good to me. I append my diff at the end.
> > 
> 
> [ ... ]
> 
> > diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
> > index 016a2a1ec1d6..d1d3d40c5614 100644
> > --- a/arch/x86/virt/vmx/tdx/tdxcall.S
> > +++ b/arch/x86/virt/vmx/tdx/tdxcall.S
> > @@ -48,6 +48,14 @@
> >         /* Move Leaf ID to RAX */
> >         mov %rdi, %rax
> > 
> > +       /*
> > +        * Extract the version from 'struct tdx_module_args', append it to
> > +        * RAX[23:16]
> > +        */
> > +       movzbl  TDX_MODULE_version(%rsi), %ecx
> > +       shll    $16, %ecx
> > +       orq     %rcx, %rax
> > +
> >         /* Move other input regs from 'struct tdx_module_args' */
> >         movq    TDX_MODULE_rcx(%rsi), %rcx
> >         movq    TDX_MODULE_rdx(%rsi), %rdx
> 
> This approach looks much cleaner to me. Would it be better to have a
> small C helper to encode the final RAX value instead of operating on RAX
> directly in asm? Looking at the May 2026 edition of the ABI spec,
> SEAMCALL RAX encoding is starting to get quite complex. Just thinking
> about this from a readability standpoint.

I'm also good to it. I made some diff for your proposal, Some additional
effort here is to update some comments and parameter names, to reflect
the differences between "function/func/fn" (the unversioned number) and
the final composite "fn_code" for RAX.

-----8<-------

diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index f20e91d7ac35..c26eca18fded 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -143,6 +143,8 @@ struct tdx_module_args {
 	u64 rbx;
 	u64 rdi;
 	u64 rsi;
+	/* for leaf encoding */
+	u8  version;
 };
 
 /* Used to communicate with the TDX module */
diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S
index 6854c52c374b..5cf3993e98f4 100644
--- a/arch/x86/virt/vmx/tdx/seamcall.S
+++ b/arch/x86/virt/vmx/tdx/seamcall.S
@@ -10,8 +10,8 @@
  *
  * __seamcall() function ABI:
  *
- * @fn   (RDI)  - SEAMCALL Leaf number, moved to RAX
- * @args (RSI)  - struct tdx_module_args for input
+ * @fn_code (RDI)  - SEAMCALL composite leaf code, moved to RAX
+ * @args    (RSI)  - struct tdx_module_args for input
  *
  * Only RCX/RDX/R8-R11 are used as input registers.
  *
@@ -29,8 +29,8 @@ SYM_FUNC_END(__seamcall)
  *
  * __seamcall_ret() function ABI:
  *
- * @fn   (RDI)  - SEAMCALL Leaf number, moved to RAX
- * @args (RSI)  - struct tdx_module_args for input and output
+ * @fn_code (RDI)  - SEAMCALL composite leaf code, moved to RAX
+ * @args    (RSI)  - struct tdx_module_args for input and output
  *
  * Only RCX/RDX/R8-R11 are used as input/output registers.
  *
@@ -51,8 +51,8 @@ SYM_FUNC_END(__seamcall_ret)
  *
  * __seamcall_saved_ret() function ABI:
  *
- * @fn   (RDI)  - SEAMCALL Leaf number, moved to RAX
- * @args (RSI)  - struct tdx_module_args for input and output
+ * @fn_code (RDI)  - SEAMCALL composite leaf code, moved to RAX
+ * @args    (RSI)  - struct tdx_module_args for input and output
  *
  * All registers in @args are used as input/output registers.
  *
diff --git a/arch/x86/virt/vmx/tdx/seamcall_internal.h b/arch/x86/virt/vmx/tdx/seamcall_internal.h
index be5f446467df..bb17d965b453 100644
--- a/arch/x86/virt/vmx/tdx/seamcall_internal.h
+++ b/arch/x86/virt/vmx/tdx/seamcall_internal.h
@@ -11,17 +11,28 @@
 #ifndef _X86_VIRT_SEAMCALL_INTERNAL_H
 #define _X86_VIRT_SEAMCALL_INTERNAL_H
 
+#include <linux/bitfield.h>
 #include <linux/printk.h>
 #include <linux/types.h>
 #include <asm/archrandom.h>
 #include <asm/processor.h>
 #include <asm/tdx.h>
 
-u64 __seamcall(u64 fn, struct tdx_module_args *args);
-u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
-u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
+u64 __seamcall(u64 fn_code, struct tdx_module_args *args);
+u64 __seamcall_ret(u64 fn_code, struct tdx_module_args *args);
+u64 __seamcall_saved_ret(u64 fn_code, struct tdx_module_args *args);
 
-typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
+typedef u64 (*sc_func_t)(u64 fn_code, struct tdx_module_args *args);
+
+#define SEAMCALL_VERSION_MASK		GENMASK_U64(23, 16)
+
+static __always_inline u64 __seamcall_fn_encoding(sc_func_t func, u64 fn,
+						  struct tdx_module_args *args)
+{
+	FIELD_MODIFY(SEAMCALL_VERSION_MASK, &fn, args->version);
+
+	return func(fn, args);
+}
 
 static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
 						  struct tdx_module_args *args)
@@ -39,7 +50,7 @@ static __always_inline u64 __seamcall_dirty_cache(sc_func_t func, u64 fn,
 	 */
 	this_cpu_write(cache_state_incoherent, true);
 
-	return func(fn, args);
+	return __seamcall_fn_encoding(func, fn, args);
 }
 
 static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S
index 016a2a1ec1d6..b0f7867bcd1c 100644
--- a/arch/x86/virt/vmx/tdx/tdxcall.S
+++ b/arch/x86/virt/vmx/tdx/tdxcall.S
@@ -24,7 +24,7 @@
  *-------------------------------------------------------------------------
  * Input Registers:
  *
- * RAX                        - TDCALL/SEAMCALL Leaf number.
+ * RAX                        - TDCALL/SEAMCALL composite Leaf code.
  * RCX,RDX,RDI,RSI,RBX,R8-R15 - TDCALL/SEAMCALL Leaf specific input registers.
  *
  * Output Registers:
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 6a1c4fe202bb..8c1a5b7f603a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -1019,7 +1019,6 @@ static __init void set_tdx_addon_features(void)
 static __init int config_tdx_module(struct tdmr_info_list *tdmr_list,
 				    u64 global_keyid)
 {
-	u64 seamcall_fn = TDH_SYS_CONFIG_V0;
 	struct tdx_module_args args = {};
 	u64 *tdmr_pa_array;
 	size_t array_sz;
@@ -1042,18 +1041,18 @@ static __init int config_tdx_module(struct tdmr_info_list *tdmr_list,
 	for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
 		tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i));
 
+	set_tdx_addon_features();
+
 	args.rcx = __pa(tdmr_pa_array);
 	args.rdx = tdmr_list->nr_consumed_tdmrs;
 	args.r8 = global_keyid;
 
-	set_tdx_addon_features();
-
 	if (tdx_addon_feature0) {
 		args.r9 = tdx_addon_feature0;
-		seamcall_fn = TDH_SYS_CONFIG;
+		args.version = 1;
 	}
 
-	ret = seamcall_prerr(seamcall_fn, &args);
+	ret = seamcall_prerr(TDH_SYS_CONFIG, &args);
 
 	/* Free the array as it is not required anymore. */
 	kfree(tdmr_pa_array);
@@ -1515,16 +1514,15 @@ int tdx_module_shutdown(void)
 
 int tdx_module_run_update(void)
 {
-	u64 seamcall_fn = TDH_SYS_UPDATE_V0;
 	struct tdx_module_args args = {};
 	int ret;
 
 	if (tdx_addon_feature0) {
 		args.r9 = tdx_addon_feature0;
-		seamcall_fn = TDH_SYS_UPDATE;
+		args.version = 1;
 	}
 
-	ret = seamcall_prerr(seamcall_fn, &args);
+	ret = seamcall_prerr(TDH_SYS_UPDATE, &args);
 	if (ret)
 		return ret;
 
@@ -2112,6 +2110,7 @@ u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid)
 		.rcx = vp->tdvpr_pa,
 		.rdx = initial_rcx,
 		.r8 = x2apicid,
+		.version = 1,
 	};
 
 	return seamcall(TDH_VP_INIT, &args);
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index 2deb0a5c902e..1f43d2eb2345 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -2,7 +2,6 @@
 #ifndef _X86_VIRT_TDX_H
 #define _X86_VIRT_TDX_H
 
-#include <linux/bitfield.h>
 #include <linux/bits.h>
 
 /*
@@ -12,18 +11,6 @@
  * architectural definitions come first.
  */
 
-/*
- * SEAMCALL leaf:
- *
- * Bit 15:0	Leaf number
- * Bit 23:16	Version number
- */
-#define SEAMCALL_LEAF			GENMASK(15, 0)
-#define SEAMCALL_VER			GENMASK(23, 16)
-
-#define SEAMCALL_LEAF_VER(l, v)		(FIELD_PREP(SEAMCALL_LEAF, l) | \
-					 FIELD_PREP(SEAMCALL_VER, v))
-
 /*
  * TDX module SEAMCALL leaf functions
  */
@@ -44,7 +31,7 @@
 #define TDH_VP_CREATE			10
 #define TDH_MNG_KEY_FREEID		20
 #define TDH_MNG_INIT			21
-#define TDH_VP_INIT			SEAMCALL_LEAF_VER(22, 1)
+#define TDH_VP_INIT			22
 #define TDH_PHYMEM_PAGE_RDMD		24
 #define TDH_VP_RD			26
 #define TDH_PHYMEM_PAGE_RECLAIM		28
@@ -58,11 +45,9 @@
 #define TDH_PHYMEM_CACHE_WB		40
 #define TDH_PHYMEM_PAGE_WBINVD		41
 #define TDH_VP_WR			43
-#define TDH_SYS_CONFIG_V0		45
-#define TDH_SYS_CONFIG			SEAMCALL_LEAF_VER(TDH_SYS_CONFIG_V0, 1)
+#define TDH_SYS_CONFIG			45
 #define TDH_SYS_SHUTDOWN		52
-#define TDH_SYS_UPDATE_V0		53
-#define TDH_SYS_UPDATE			SEAMCALL_LEAF_VER(TDH_SYS_UPDATE_V0, 1)
+#define TDH_SYS_UPDATE			53
 #define TDH_EXT_INIT			60
 #define TDH_EXT_MEM_ADD			61
 #define TDH_SYS_DISABLE			69



^ permalink raw reply related

* Re: [PATCH v2 17/17] KVM: TDX: Support event-notify interrupts only with userspace Quoting
From: Tony Lindgren @ 2026-06-25  6:28 UTC (permalink / raw)
  To: Xu Yilun
  Cc: x86, kvm, linux-coco, linux-kernel, djbw, kas, rick.p.edgecombe,
	yilun.xu, xiaoyao.li, sohil.mehta, adrian.hunter, kishen.maloor,
	peter.fang, baolu.lu, zhenzhong.duan, dave.hansen, dave.hansen,
	seanjc
In-Reply-To: <20260618081355.3253581-18-yilun.xu@linux.intel.com>

On Thu, Jun 18, 2026 at 04:13:55PM +0800, Xu Yilun wrote:
> From: Peter Fang <peter.fang@intel.com>
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -202,8 +202,15 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
>  
>  	caps->cpuid.nent = td_conf->num_cpuid_config;
>  
> -	caps->user_tdvmcallinfo_1_r11 =
> -		TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT;
> +	/*
> +	 * Don't advertise userspace event-notify interrupt support if TDX
> +	 * quoting service is enabled, as quote generation will be handled
> +	 * entirely in the kernel. Support in the kernel can be added later.
> +	 */
> +	if (!tdx_quote_enabled()) {
> +		caps->user_tdvmcallinfo_1_r11 |=
> +			TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT;
> +	}

Can you use kvm_tdx->get_quote_in_kernel also above? Or should it maybe
be initialized here if not used earlier?
  
> @@ -1684,9 +1691,16 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu)
>  
>  static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
>  	struct vcpu_tdx *tdx = to_tdx(vcpu);
>  	u64 vector = tdx->vp_enter_args.r12;
>  
> +	/* See comment in init_kvm_tdx_caps() */
> +	if (kvm_tdx->get_quote_in_kernel) {
> +		tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED);
> +		return 1;
> +	}
> +

Since you're using kvm_tdx->get_quote_in_kernel here.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox