All of lore.kernel.org
 help / color / mirror / Atom feed
From: Patrick Roy <patrick.roy@linux.dev>
To: "Roy, Patrick" <roypat@amazon.co.uk>
Cc: "pbonzini@redhat.com" <pbonzini@redhat.com>,
	"corbet@lwn.net" <corbet@lwn.net>,
	"maz@kernel.org" <maz@kernel.org>,
	"oliver.upton@linux.dev" <oliver.upton@linux.dev>,
	"joey.gouly@arm.com" <joey.gouly@arm.com>,
	"suzuki.poulose@arm.com" <suzuki.poulose@arm.com>,
	"yuzenghui@huawei.com" <yuzenghui@huawei.com>,
	"catalin.marinas@arm.com" <catalin.marinas@arm.com>,
	"will@kernel.org" <will@kernel.org>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"bp@alien8.de" <bp@alien8.de>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>,
	"x86@kernel.org" <x86@kernel.org>,
	"hpa@zytor.com" <hpa@zytor.com>,
	"luto@kernel.org" <luto@kernel.org>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"willy@infradead.org" <willy@infradead.org>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"david@redhat.com" <david@redhat.com>,
	"lorenzo.stoakes@oracle.com" <lorenzo.stoakes@oracle.com>,
	"Liam.Howlett@oracle.com" <Liam.Howlett@oracle.com>,
	"vbabka@suse.cz" <vbabka@suse.cz>,
	"rppt@kernel.org" <rppt@kernel.org>,
	"surenb@google.com" <surenb@google.com>,
	"mhocko@suse.com" <mhocko@suse.com>,
	"song@kernel.org" <song@kernel.org>,
	"jolsa@kernel.org" <jolsa@kernel.org>,
	"ast@kernel.org" <ast@kernel.org>,
	"daniel@iogearbox.net" <daniel@iogearbox.net>,
	"andrii@kernel.org" <andrii@kernel.org>,
	"martin.lau@linux.dev" <martin.lau@linux.dev>,
	"eddyz87@gmail.com" <eddyz87@gmail.com>,
	"yonghong.song@linux.dev" <yonghong.song@linux.dev>,
	"john.fastabend@gmail.com" <john.fastabend@gmail.com>,
	"kpsingh@kernel.org" <kpsingh@kernel.org>,
	"sdf@fomichev.me" <sdf@fomichev.me>,
	"haoluo@google.com" <haoluo@google.com>,
	"jgg@ziepe.ca" <jgg@ziepe.ca>,
	"jhubbard@nvidia.com" <jhubbard@nvidia.com>,
	"peterx@redhat.com" <peterx@redhat.com>,
	"jannh@google.com" <jannh@google.com>,
	"pfalcato@suse.de" <pfalcato@suse.de>,
	"shuah@kernel.org" <shuah@kernel.org>,
	"seanjc@google.com" <seanjc@google.com>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	"linux-doc@vger.kernel.org" <linux-doc@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-arm-kernel@lists.infradead.org"
	<linux-arm-kernel@lists.infradead.org>,
	"kvmarm@lists.linux.dev" <kvmarm@lists.linux.dev>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"bpf@vger.kernel.org" <bpf@vger.kernel.org>,
	"linux-kselftest@vger.kernel.org"
	<linux-kselftest@vger.kernel.org>,
	"Cali, Marco" <xmarcalx@amazon.co.uk>,
	"Kalyazin, Nikita" <kalyazin@amazon.co.uk>,
	"Thomson, Jack" <jackabt@amazon.co.uk>,
	"derekmn@amazon.co.uk" <derekmn@amazon.co.uk>,
	"tabba@google.com" <tabba@google.com>,
	"ackerleytng@google.com" <ackerleytng@google.com>
Subject: Re: [PATCH v7 05/12] KVM: guest_memfd: Add flag to remove from direct map
Date: Fri, 26 Sep 2025 15:49:13 +0100	[thread overview]
Message-ID: <32608c1b-6da5-4a06-9790-58dfd4ba2011@linux.dev> (raw)
In-Reply-To: <20250924152214.7292-2-roypat@amazon.co.uk>



On Wed, 2025-09-24 at 16:22 +0100, "Roy, Patrick" wrote:

[...]

> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 55b8d739779f..b7129c4868c5 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,6 +4,9 @@
>  #include <linux/kvm_host.h>
>  #include <linux/pagemap.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/set_memory.h>
> +
> +#include <asm/tlbflush.h>
>  
>  #include "kvm_mm.h"
>  
> @@ -42,6 +45,44 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo
>  	return 0;
>  }
>  
> +#define KVM_GMEM_FOLIO_NO_DIRECT_MAP BIT(0)
> +
> +static bool kvm_gmem_folio_no_direct_map(struct folio *folio)
> +{
> +	return ((u64) folio->private) & KVM_GMEM_FOLIO_NO_DIRECT_MAP;
> +}
> +
> +static int kvm_gmem_folio_zap_direct_map(struct folio *folio)
> +{
> +	if (kvm_gmem_folio_no_direct_map(folio))
> +		return 0;
> +
> +	int r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
> +					 false);
> +
> +	if (!r) {
> +		unsigned long addr = (unsigned long) folio_address(folio);
> +		folio->private = (void *) ((u64) folio->private & KVM_GMEM_FOLIO_NO_DIRECT_MAP);
> +		flush_tlb_kernel_range(addr, addr + folio_size(folio));
> +	}
> +
> +	return r;
> +}

No idea how I managed to mess this function up so completely, but it
should be more like

static int kvm_gmem_folio_zap_direct_map(struct folio *folio)
{
	int r = 0;
	unsigned long addr = (unsigned long) folio_address(folio);
	u64 gmem_flags = (u64) folio_inode(folio)->i_private;

	if (kvm_gmem_folio_no_direct_map(folio) || !(gmem_flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP))
		goto out;

	r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), false);

	if (r)
		goto out;

	folio->private = (void *) KVM_GMEM_FOLIO_NO_DIRECT_MAP;
	flush_tlb_kernel_range(addr, addr + folio_size(folio));

out:
	return r;
}

the version I sent (a) does not respect the flags passed to guest_memfd
on creation, and (b) does not correctly set the bit in folio->private.

> +static void kvm_gmem_folio_restore_direct_map(struct folio *folio)
> +{
> +	/*
> +	 * Direct map restoration cannot fail, as the only error condition
> +	 * for direct map manipulation is failure to allocate page tables
> +	 * when splitting huge pages, but this split would have already
> +	 * happened in set_direct_map_invalid_noflush() in kvm_gmem_folio_zap_direct_map().
> +	 * Thus set_direct_map_valid_noflush() here only updates prot bits.
> +	 */
> +	if (kvm_gmem_folio_no_direct_map(folio))
> +		set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio),
> +					 true);
> +}
> +
>  static inline void kvm_gmem_mark_prepared(struct folio *folio)
>  {
>  	folio_mark_uptodate(folio);
> @@ -324,13 +365,14 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>  	struct inode *inode = file_inode(vmf->vma->vm_file);
>  	struct folio *folio;
>  	vm_fault_t ret = VM_FAULT_LOCKED;
> +	int err;
>  
>  	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
>  		return VM_FAULT_SIGBUS;
>  
>  	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>  	if (IS_ERR(folio)) {
> -		int err = PTR_ERR(folio);
> +		err = PTR_ERR(folio);
>  
>  		if (err == -EAGAIN)
>  			return VM_FAULT_RETRY;
> @@ -348,6 +390,13 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>  		kvm_gmem_mark_prepared(folio);
>  	}
>  
> +	err = kvm_gmem_folio_zap_direct_map(folio);
> +
> +	if (err) {
> +		ret = vmf_error(err);
> +		goto out_folio;
> +	}
> +
>  	vmf->page = folio_file_page(folio, vmf->pgoff);
>  
>  out_folio:
> @@ -435,6 +484,8 @@ static void kvm_gmem_free_folio(struct folio *folio)
>  	kvm_pfn_t pfn = page_to_pfn(page);
>  	int order = folio_order(folio);
>  
> +	kvm_gmem_folio_restore_direct_map(folio);
> +
>  	kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
>  }
>  
> @@ -499,6 +550,9 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	/* Unmovable mappings are supposed to be marked unevictable as well. */
>  	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>  
> +	if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP)
> +		mapping_set_no_direct_map(inode->i_mapping);
> +
>  	kvm_get_kvm(kvm);
>  	gmem->kvm = kvm;
>  	xa_init(&gmem->bindings);
> @@ -523,6 +577,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
>  	if (kvm_arch_supports_gmem_mmap(kvm))
>  		valid_flags |= GUEST_MEMFD_FLAG_MMAP;
>  
> +	if (kvm_arch_gmem_supports_no_direct_map())
> +		valid_flags |= GUEST_MEMFD_FLAG_NO_DIRECT_MAP;
> +
>  	if (flags & ~valid_flags)
>  		return -EINVAL;
>  
> @@ -687,6 +744,8 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
>  	if (!is_prepared)
>  		r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
>  
> +	kvm_gmem_folio_zap_direct_map(folio);
> +
>  	folio_unlock(folio);
>  
>  	if (!r)

[...]

  parent reply	other threads:[~2025-09-26 14:49 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-24 15:10 [PATCH v7 00/12] Direct Map Removal Support for guest_memfd Patrick Roy
2025-09-24 15:10 ` [PATCH v7 01/12] arch: export set_direct_map_valid_noflush to KVM module Patrick Roy
2025-09-24 15:10   ` Patrick Roy
2025-09-24 15:10 ` [PATCH v7 02/12] x86/tlb: export flush_tlb_kernel_range " Patrick Roy
2025-09-24 15:10 ` [PATCH v7 03/12] mm: introduce AS_NO_DIRECT_MAP Patrick Roy
2025-09-24 15:22   ` [PATCH v7 04/12] KVM: guest_memfd: Add stub for kvm_arch_gmem_invalidate Roy, Patrick
2025-09-24 15:22     ` [PATCH v7 05/12] KVM: guest_memfd: Add flag to remove from direct map Roy, Patrick
2025-09-25 11:00       ` David Hildenbrand
2025-09-25 15:52         ` Roy, Patrick
2025-09-25 19:28           ` David Hildenbrand
2025-09-26 14:49       ` Patrick Roy [this message]
2025-10-31 17:30       ` Brendan Jackman
2025-11-01  9:39         ` Mike Rapoport
2025-11-03 10:35           ` Brendan Jackman
2025-11-03 10:50             ` Mike Rapoport
2025-11-04 11:08               ` Brendan Jackman
2025-11-10 12:34                 ` Mike Rapoport
2025-11-03  7:57       ` Aneesh Kumar K.V
2025-12-05 17:23         ` Nikita Kalyazin
2025-09-24 15:22     ` [PATCH v7 06/12] KVM: guest_memfd: add module param for disabling TLB flushing Roy, Patrick
2025-09-25 11:02       ` David Hildenbrand
2025-09-25 15:50         ` Roy, Patrick
2025-09-25 19:32           ` David Hildenbrand
2025-09-25 18:27       ` Dave Hansen
2025-09-25 19:20         ` David Hildenbrand
2025-09-25 19:59           ` Dave Hansen
2025-09-25 20:13             ` David Hildenbrand
2025-09-26  9:46               ` Patrick Roy
2025-09-26 10:53                 ` Will Deacon
2025-09-26 20:09                   ` David Hildenbrand
2025-09-27  7:38                     ` Patrick Roy
2025-09-29 10:20                       ` David Hildenbrand
2025-10-11 14:32                         ` Patrick Roy
2025-11-07 15:29                           ` Ackerley Tng
2025-11-07 17:22                             ` Nikita Kalyazin
2025-11-07 17:21                           ` Nikita Kalyazin
2025-10-30 16:05         ` Brendan Jackman
2025-10-31 18:31           ` Brendan Jackman
2025-09-24 15:22     ` [PATCH v7 07/12] KVM: selftests: load elf via bounce buffer Roy, Patrick
2025-09-24 15:22     ` [PATCH v7 08/12] KVM: selftests: set KVM_MEM_GUEST_MEMFD in vm_mem_add() if guest_memfd != -1 Roy, Patrick
2025-09-24 15:22     ` [PATCH v7 09/12] KVM: selftests: Add guest_memfd based vm_mem_backing_src_types Roy, Patrick
2025-09-24 15:22     ` [PATCH v7 10/12] KVM: selftests: cover GUEST_MEMFD_FLAG_NO_DIRECT_MAP in existing selftests Roy, Patrick
2025-09-24 15:22     ` [PATCH v7 11/12] KVM: selftests: stuff vm_mem_backing_src_type into vm_shape Roy, Patrick
2025-09-24 15:22     ` [PATCH v7 12/12] KVM: selftests: Test guest execution from direct map removed gmem Roy, Patrick
2025-10-30 17:18       ` Brendan Jackman
2025-12-05 17:24         ` Nikita Kalyazin
2025-09-25 10:26     ` [PATCH v7 04/12] KVM: guest_memfd: Add stub for kvm_arch_gmem_invalidate David Hildenbrand
2025-09-25 10:25   ` [PATCH v7 03/12] mm: introduce AS_NO_DIRECT_MAP David Hildenbrand
2025-09-24 15:29 ` [PATCH v7 00/12] Direct Map Removal Support for guest_memfd Roy, Patrick
2025-09-24 15:38   ` David Hildenbrand
2025-11-07 15:54 ` Brendan Jackman
2025-11-07 17:23   ` Nikita Kalyazin
2025-11-07 18:04     ` Brendan Jackman
2025-11-07 18:11       ` Nikita Kalyazin
2025-11-10 15:36         ` Brendan Jackman
2025-11-07 17:37   ` Brendan Jackman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=32608c1b-6da5-4a06-9790-58dfd4ba2011@linux.dev \
    --to=patrick.roy@linux.dev \
    --cc=Liam.Howlett@oracle.com \
    --cc=ackerleytng@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bp@alien8.de \
    --cc=bpf@vger.kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=corbet@lwn.net \
    --cc=daniel@iogearbox.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=derekmn@amazon.co.uk \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=hpa@zytor.com \
    --cc=jackabt@amazon.co.uk \
    --cc=jannh@google.com \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=joey.gouly@arm.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kalyazin@amazon.co.uk \
    --cc=kpsingh@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=luto@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=maz@kernel.org \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=oliver.upton@linux.dev \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pfalcato@suse.de \
    --cc=roypat@amazon.co.uk \
    --cc=rppt@kernel.org \
    --cc=sdf@fomichev.me \
    --cc=seanjc@google.com \
    --cc=shuah@kernel.org \
    --cc=song@kernel.org \
    --cc=surenb@google.com \
    --cc=suzuki.poulose@arm.com \
    --cc=tabba@google.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=xmarcalx@amazon.co.uk \
    --cc=yonghong.song@linux.dev \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.