Linux ARM-MSM sub-architecture
 help / color / mirror / Atom feed
From: Mike Day <michael.day@amd.com>
To: Elliot Berman <quic_eberman@quicinc.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Sean Christopherson <seanjc@google.com>,
	Fuad Tabba <tabba@google.com>,
	Ackerley Tng <ackerleytng@google.com>,
	Mike Rapoport <rppt@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Jonathan Corbet <corbet@lwn.net>,
	Trond Myklebust <trondmy@kernel.org>,
	Anna Schumaker <anna@kernel.org>,
	Mike Marshall <hubcap@omnibond.com>,
	Martin Brandenburg <martin@omnibond.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>
Cc: James Gowans <jgowans@amazon.com>,
	linux-fsdevel@vger.kernel.org, kvm@vger.kernel.org,
	linux-coco@lists.linux.dev, linux-arm-msm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-doc@vger.kernel.org, linux-nfs@vger.kernel.org,
	devel@lists.orangefs.org, linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v4 2/2] mm: guestmem: Convert address_space operations to guestmem library
Date: Thu, 21 Nov 2024 11:40:59 -0600	[thread overview]
Message-ID: <3349f838-2c73-4ef0-aa30-a21e41fb39e5@amd.com> (raw)
In-Reply-To: <20241120145527130-0800.eberman@hu-eberman-lv.qualcomm.com>



On 11/21/24 10:43, Elliot Berman wrote:
> On Wed, Nov 20, 2024 at 10:12:08AM -0800, Elliot Berman wrote:
>> diff --git a/mm/guestmem.c b/mm/guestmem.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..19dd7e5d498f07577ec5cec5b52055f7435980f4
>> --- /dev/null
>> +++ b/mm/guestmem.c
>> @@ -0,0 +1,196 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * guestmem library
>> + *
>> + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
>> + */
>> +
>> +#include <linux/fs.h>
>> +#include <linux/guestmem.h>
>> +#include <linux/mm.h>
>> +#include <linux/pagemap.h>
>> +
>> +struct guestmem {
>> +	const struct guestmem_ops *ops;
>> +};
>> +
>> +static inline struct guestmem *folio_to_guestmem(struct folio *folio)
>> +{
>> +	struct address_space *mapping = folio->mapping;
>> +
>> +	return mapping->i_private_data;
>> +}
>> +
>> +static inline bool __guestmem_release_folio(struct address_space *mapping,
>> +					    struct folio *folio)
>> +{
>> +	struct guestmem *gmem = mapping->i_private_data;
>> +	struct list_head *entry;
>> +
>> +	if (gmem->ops->release_folio) {
>> +		list_for_each(entry, &mapping->i_private_list) {
>> +			if (!gmem->ops->release_folio(entry, folio))
>> +				return false;
>> +		}
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +static inline int
>> +__guestmem_invalidate_begin(struct address_space *const mapping, pgoff_t start,
>> +			    pgoff_t end)
>> +{
>> +	struct guestmem *gmem = mapping->i_private_data;
>> +	struct list_head *entry;
>> +	int ret = 0;
>> +
>> +	list_for_each(entry, &mapping->i_private_list) {
>> +		ret = gmem->ops->invalidate_begin(entry, start, end);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static inline void
>> +__guestmem_invalidate_end(struct address_space *const mapping, pgoff_t start,
>> +			  pgoff_t end)
>> +{
>> +	struct guestmem *gmem = mapping->i_private_data;
>> +	struct list_head *entry;
>> +
>> +	if (gmem->ops->invalidate_end) {
>> +		list_for_each(entry, &mapping->i_private_list)
>> +			gmem->ops->invalidate_end(entry, start, end);
>> +	}
>> +}
>> +
>> +static void guestmem_free_folio(struct address_space *mapping,
>> +				struct folio *folio)
>> +{
>> +	WARN_ON_ONCE(!__guestmem_release_folio(mapping, folio));
>> +}
>> +
>> +static int guestmem_error_folio(struct address_space *mapping,
>> +				struct folio *folio)
>> +{
>> +	pgoff_t start, end;
>> +	int ret;
>> +
>> +	filemap_invalidate_lock_shared(mapping);
>> +
>> +	start = folio->index;
>> +	end = start + folio_nr_pages(folio);
>> +
>> +	ret = __guestmem_invalidate_begin(mapping, start, end);
>> +	if (ret)
>> +		goto out;
>> +
>> +	/*
>> +	 * Do not truncate the range, what action is taken in response to the
>> +	 * error is userspace's decision (assuming the architecture supports
>> +	 * gracefully handling memory errors).  If/when the guest attempts to
>> +	 * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
>> +	 * at which point KVM can either terminate the VM or propagate the
>> +	 * error to userspace.
>> +	 */
>> +
>> +	__guestmem_invalidate_end(mapping, start, end);
>> +
>> +out:
>> +	filemap_invalidate_unlock_shared(mapping);
>> +	return ret ? MF_DELAYED : MF_FAILED;
>> +}
>> +
>> +static int guestmem_migrate_folio(struct address_space *mapping,
>> +				  struct folio *dst, struct folio *src,
>> +				  enum migrate_mode mode)
>> +{
>> +	WARN_ON_ONCE(1);
>> +	return -EINVAL;
>> +}
>> +
>> +static const struct address_space_operations guestmem_aops = {
>> +	.dirty_folio = noop_dirty_folio,
>> +	.free_folio = guestmem_free_folio,
>> +	.error_remove_folio = guestmem_error_folio,
>> +	.migrate_folio = guestmem_migrate_folio,
>> +};
>> +
>> +int guestmem_attach_mapping(struct address_space *mapping,
>> +			    const struct guestmem_ops *const ops,
>> +			    struct list_head *data)
>> +{
>> +	struct guestmem *gmem;
>> +
>> +	if (mapping->a_ops == &guestmem_aops) {
>> +		gmem = mapping->i_private_data;
>> +		if (gmem->ops != ops)
>> +			return -EINVAL;
>> +
>> +		goto add;
>> +	}
>> +
>> +	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
>> +	if (!gmem)
>> +		return -ENOMEM;
>> +
>> +	gmem->ops = ops;
>> +
>> +	mapping->a_ops = &guestmem_aops;
>> +	mapping->i_private_data = gmem;
>> +
>> +	mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
>> +	mapping_set_inaccessible(mapping);
>> +	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> +	WARN_ON_ONCE(!mapping_unevictable(mapping));
>> +
>> +add:
>> +	list_add(data, &mapping->i_private_list);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_attach_mapping);
>> +
>> +void guestmem_detach_mapping(struct address_space *mapping,
>> +			     struct list_head *data)
>> +{
>> +	list_del(data);
>> +
>> +	if (list_empty(&mapping->i_private_list)) {
>> +		kfree(mapping->i_private_data);
> 
> Mike was helping me test this out for SEV-SNP. They helped find a bug
> here. Right now, when the file closes, KVM calls
> guestmem_detach_mapping() which will uninstall the ops. When that
> happens, it's not necessary that all of the folios aren't removed from
> the filemap yet and so our free_folio() callback isn't invoked. This
> means that we skip updating the RMP entry back to shared/KVM-owned.
> 
> There are a few approaches I could take:
> 
> 1. Create a guestmem superblock so I can register guestmem-specific
>     destroy_inode() to do the kfree() above. This requires a lot of
>     boilerplate code, and I think it's not preferred approach.
> 2. Update how KVM tracks the memory so it is back in "shared" state when
>     the file closes. This requires some significant rework about the page
>     state compared to current guest_memfd. That rework might be useful
>     for the shared/private state machine.
> 3. Call truncate_inode_pages(mapping, 0) to force pages to be freed
>     here. It's might be possible that a page is allocated after this
>     point. In order for that to be a problem, KVM would need to update
>     RMP entry as guest-owned, and I don't believe that's possible after
>     the last guestmem_detach_mapping().
> 
> My preference is to go with #3 as it was the most easy thing to do.

#3 is my preference as well. The semantics are that the guest is "closing" the gmem
object, which means all the memory is being released from the guest.

Mike
> 
>> +		mapping->i_private_data = NULL;
>> +		mapping->a_ops = &empty_aops;
>> +	}
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_detach_mapping);
>> +
>> +struct folio *guestmem_grab_folio(struct address_space *mapping, pgoff_t index)
>> +{
>> +	/* TODO: Support huge pages. */
>> +	return filemap_grab_folio(mapping, index);
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_grab_folio);
>> +
>> +int guestmem_punch_hole(struct address_space *mapping, loff_t offset,
>> +			loff_t len)
>> +{
>> +	pgoff_t start = offset >> PAGE_SHIFT;
>> +	pgoff_t end = (offset + len) >> PAGE_SHIFT;
>> +	int ret;
>> +
>> +	filemap_invalidate_lock(mapping);
>> +	ret = __guestmem_invalidate_begin(mapping, start, end);
>> +	if (ret)
>> +		goto out;
>> +
>> +	truncate_inode_pages_range(mapping, offset, offset + len - 1);
>> +
>> +	__guestmem_invalidate_end(mapping, start, end);
>> +
>> +out:
>> +	filemap_invalidate_unlock(mapping);
>> +	return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(guestmem_punch_hole);
>> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
>> index fd6a3010afa833e077623065b80bdbb5b1012250..1339098795d2e859b2ee0ef419b29045aedc8487 100644
>> --- a/virt/kvm/Kconfig
>> +++ b/virt/kvm/Kconfig
>> @@ -106,6 +106,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES
>>   
>>   config KVM_PRIVATE_MEM
>>          select XARRAY_MULTI
>> +       select GUESTMEM
>>          bool
>>   
>>   config KVM_GENERIC_PRIVATE_MEM
>> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
>> index 24dcbad0cb76e353509cf4718837a1999f093414..edf57d5662cb8634bbd9ca3118b293c4f7ca229a 100644
>> --- a/virt/kvm/guest_memfd.c
>> +++ b/virt/kvm/guest_memfd.c
>> @@ -1,6 +1,7 @@
>>   // SPDX-License-Identifier: GPL-2.0
>>   #include <linux/backing-dev.h>
>>   #include <linux/falloc.h>
>> +#include <linux/guestmem.h>
>>   #include <linux/kvm_host.h>
>>   #include <linux/pagemap.h>
>>   #include <linux/anon_inodes.h>
>> @@ -98,8 +99,7 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
>>    */
>>   static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
>>   {
>> -	/* TODO: Support huge pages. */
>> -	return filemap_grab_folio(inode->i_mapping, index);
>> +	return guestmem_grab_folio(inode->i_mapping, index);
>>   }
>>   
>>   static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
>> @@ -151,28 +151,7 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
>>   
>>   static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
>>   {
>> -	struct list_head *gmem_list = &inode->i_mapping->i_private_list;
>> -	pgoff_t start = offset >> PAGE_SHIFT;
>> -	pgoff_t end = (offset + len) >> PAGE_SHIFT;
>> -	struct kvm_gmem *gmem;
>> -
>> -	/*
>> -	 * Bindings must be stable across invalidation to ensure the start+end
>> -	 * are balanced.
>> -	 */
>> -	filemap_invalidate_lock(inode->i_mapping);
>> -
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_begin(gmem, start, end);
>> -
>> -	truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
>> -
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_end(gmem, start, end);
>> -
>> -	filemap_invalidate_unlock(inode->i_mapping);
>> -
>> -	return 0;
>> +	return guestmem_punch_hole(inode->i_mapping, offset, len);
>>   }
>>   
>>   static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
>> @@ -277,7 +256,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
>>   	kvm_gmem_invalidate_begin(gmem, 0, -1ul);
>>   	kvm_gmem_invalidate_end(gmem, 0, -1ul);
>>   
>> -	list_del(&gmem->entry);
>> +	guestmem_detach_mapping(inode->i_mapping, &gmem->entry);
>>   
>>   	filemap_invalidate_unlock(inode->i_mapping);
>>   
>> @@ -318,63 +297,42 @@ void kvm_gmem_init(struct module *module)
>>   	kvm_gmem_fops.owner = module;
>>   }
>>   
>> -static int kvm_gmem_migrate_folio(struct address_space *mapping,
>> -				  struct folio *dst, struct folio *src,
>> -				  enum migrate_mode mode)
>> +static int kvm_guestmem_invalidate_begin(struct list_head *entry, pgoff_t start,
>> +					 pgoff_t end)
>>   {
>> -	WARN_ON_ONCE(1);
>> -	return -EINVAL;
>> -}
>> +	struct kvm_gmem *gmem = container_of(entry, struct kvm_gmem, entry);
>>   
>> -static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio)
>> -{
>> -	struct list_head *gmem_list = &mapping->i_private_list;
>> -	struct kvm_gmem *gmem;
>> -	pgoff_t start, end;
>> -
>> -	filemap_invalidate_lock_shared(mapping);
>> -
>> -	start = folio->index;
>> -	end = start + folio_nr_pages(folio);
>> -
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_begin(gmem, start, end);
>> -
>> -	/*
>> -	 * Do not truncate the range, what action is taken in response to the
>> -	 * error is userspace's decision (assuming the architecture supports
>> -	 * gracefully handling memory errors).  If/when the guest attempts to
>> -	 * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON,
>> -	 * at which point KVM can either terminate the VM or propagate the
>> -	 * error to userspace.
>> -	 */
>> +	kvm_gmem_invalidate_begin(gmem, start, end);
>>   
>> -	list_for_each_entry(gmem, gmem_list, entry)
>> -		kvm_gmem_invalidate_end(gmem, start, end);
>> +	return 0;
>> +}
>>   
>> -	filemap_invalidate_unlock_shared(mapping);
>> +static void kvm_guestmem_invalidate_end(struct list_head *entry, pgoff_t start,
>> +					pgoff_t end)
>> +{
>> +	struct kvm_gmem *gmem = container_of(entry, struct kvm_gmem, entry);
>>   
>> -	return MF_DELAYED;
>> +	kvm_gmem_invalidate_end(gmem, start, end);
>>   }
>>   
>>   #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
>> -static void kvm_gmem_free_folio(struct address_space *mapping,
>> -				struct folio *folio)
>> +static bool kvm_gmem_release_folio(struct list_head *entry, struct folio *folio)
>>   {
>>   	struct page *page = folio_page(folio, 0);
>>   	kvm_pfn_t pfn = page_to_pfn(page);
>>   	int order = folio_order(folio);
>>   
>>   	kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
>> +
>> +	return true;
>>   }
>>   #endif
>>   
>> -static const struct address_space_operations kvm_gmem_aops = {
>> -	.dirty_folio = noop_dirty_folio,
>> -	.migrate_folio	= kvm_gmem_migrate_folio,
>> -	.error_remove_folio = kvm_gmem_error_folio,
>> +static const struct guestmem_ops kvm_guestmem_ops = {
>> +	.invalidate_begin = kvm_guestmem_invalidate_begin,
>> +	.invalidate_end = kvm_guestmem_invalidate_end,
>>   #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
>> -	.free_folio = kvm_gmem_free_folio,
>> +	.release_folio = kvm_gmem_release_folio,
>>   #endif
>>   };
>>   
>> @@ -430,22 +388,22 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>>   
>>   	inode->i_private = (void *)(unsigned long)flags;
>>   	inode->i_op = &kvm_gmem_iops;
>> -	inode->i_mapping->a_ops = &kvm_gmem_aops;
>>   	inode->i_mode |= S_IFREG;
>>   	inode->i_size = size;
>> -	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> -	mapping_set_inaccessible(inode->i_mapping);
>> -	/* Unmovable mappings are supposed to be marked unevictable as well. */
>> -	WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> +	err = guestmem_attach_mapping(inode->i_mapping, &kvm_guestmem_ops,
>> +				      &gmem->entry);
>> +	if (err)
>> +		goto err_putfile;
>>   
>>   	kvm_get_kvm(kvm);
>>   	gmem->kvm = kvm;
>>   	xa_init(&gmem->bindings);
>> -	list_add(&gmem->entry, &inode->i_mapping->i_private_list);
>>   
>>   	fd_install(fd, file);
>>   	return fd;
>>   
>> +err_putfile:
>> +	fput(file);
>>   err_gmem:
>>   	kfree(gmem);
>>   err_fd:
>>
>> -- 
>> 2.34.1
>>

  reply	other threads:[~2024-11-21 17:41 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-20 18:12 [PATCH v4 0/2] mm: Refactor KVM guest_memfd to introduce guestmem library Elliot Berman
2024-11-20 18:12 ` [PATCH v4 1/2] filemap: Pass address_space mapping to ->free_folio() Elliot Berman
2024-11-21 12:23   ` David Hildenbrand
2024-11-20 18:12 ` [PATCH v4 2/2] mm: guestmem: Convert address_space operations to guestmem library Elliot Berman
2024-11-21 16:43   ` Elliot Berman
2024-11-21 17:40     ` Mike Day [this message]
2024-11-22 11:03       ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3349f838-2c73-4ef0-aa30-a21e41fb39e5@amd.com \
    --to=michael.day@amd.com \
    --cc=ackerleytng@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=anna@kernel.org \
    --cc=brauner@kernel.org \
    --cc=corbet@lwn.net \
    --cc=david@redhat.com \
    --cc=devel@lists.orangefs.org \
    --cc=hpa@zytor.com \
    --cc=hubcap@omnibond.com \
    --cc=jack@suse.cz \
    --cc=jgowans@amazon.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=martin@omnibond.com \
    --cc=pbonzini@redhat.com \
    --cc=quic_eberman@quicinc.com \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=tabba@google.com \
    --cc=trondmy@kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox