linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Shivank Garg <shivankg@amd.com>
To: <seanjc@google.com>, <david@redhat.com>, <vbabka@suse.cz>,
	<willy@infradead.org>, <akpm@linux-foundation.org>,
	<shuah@kernel.org>, <pbonzini@redhat.com>, <brauner@kernel.org>,
	<viro@zeniv.linux.org.uk>
Cc: <ackerleytng@google.com>, <paul@paul-moore.com>,
	<jmorris@namei.org>, <serge@hallyn.com>, <pvorel@suse.cz>,
	<bfoster@redhat.com>, <tabba@google.com>, <vannapurve@google.com>,
	<chao.gao@intel.com>, <bharata@amd.com>, <nikunj@amd.com>,
	<michael.day@amd.com>, <shdhiman@amd.com>, <yan.y.zhao@intel.com>,
	<Neeraj.Upadhyay@amd.com>, <thomas.lendacky@amd.com>,
	<michael.roth@amd.com>, <aik@amd.com>, <jgg@nvidia.com>,
	<kalyazin@amazon.com>, <peterx@redhat.com>, <shivankg@amd.com>,
	<jack@suse.cz>, <rppt@kernel.org>, <hch@infradead.org>,
	<cgzones@googlemail.com>, <ira.weiny@intel.com>,
	<rientjes@google.com>, <roypat@amazon.co.uk>, <ziy@nvidia.com>,
	<matthew.brost@intel.com>, <joshua.hahnjy@gmail.com>,
	<rakie.kim@sk.com>, <byungchul@sk.com>, <gourry@gourry.net>,
	<kent.overstreet@linux.dev>, <ying.huang@linux.alibaba.com>,
	<apopple@nvidia.com>, <chao.p.peng@intel.com>,
	<amit@infradead.org>, <ddutile@redhat.com>,
	<dan.j.williams@intel.com>, <ashish.kalra@amd.com>,
	<gshan@redhat.com>, <jgowans@amazon.com>, <pankaj.gupta@amd.com>,
	<papaluri@amd.com>, <yuzhao@google.com>, <suzuki.poulose@arm.com>,
	<quic_eberman@quicinc.com>, <aneeshkumar.kizhakeveetil@arm.com>,
	<linux-fsdevel@vger.kernel.org>, <linux-mm@kvack.org>,
	<linux-kernel@vger.kernel.org>,
	<linux-security-module@vger.kernel.org>, <kvm@vger.kernel.org>,
	<linux-kselftest@vger.kernel.org>, <linux-coco@lists.linux.dev>
Subject: [PATCH V9 6/7] KVM: guest_memfd: Enforce NUMA mempolicy using shared policy
Date: Sun, 13 Jul 2025 17:43:40 +0000	[thread overview]
Message-ID: <20250713174339.13981-9-shivankg@amd.com> (raw)
In-Reply-To: <20250713174339.13981-2-shivankg@amd.com>

Previously, guest-memfd allocations followed local NUMA node id in absence
of process mempolicy, resulting in arbitrary memory allocation.
Moreover, mbind() couldn't be used  by the VMM as guest memory wasn't
mapped into userspace when allocation occurred.

Enable NUMA policy support by implementing vm_ops for guest-memfd mmap
operation. This allows the VMM to map the memory and use mbind() to set the
desired NUMA policy. The policy is stored in the inode structure via
kvm_gmem_inode_info, as memory policy is a property of the memory (struct
inode) itself. The policy is then retrieved via mpol_shared_policy_lookup()
and passed to filemap_grab_folio_mpol() to ensure that allocations follow
the specified memory policy.

This enables the VMM to control guest memory NUMA placement by calling
mbind() on the mapped memory regions, providing fine-grained control over
guest memory allocation across NUMA nodes.

The policy change only affect future allocations and does not migrate
existing memory. This matches mbind(2)'s default behavior which affects
only new allocations unless overridden with MPOL_MF_MOVE/MPOL_MF_MOVE_ALL
flags, which are not supported for guest_memfd as it is unmovable.

Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
 virt/kvm/guest_memfd.c | 67 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 989e2b26b344..5c9a5eb5c13f 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -4,6 +4,7 @@
 #include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/kvm_host.h>
+#include <linux/mempolicy.h>
 #include <linux/pseudo_fs.h>
 #include <linux/pagemap.h>
 
@@ -18,6 +19,7 @@ struct kvm_gmem {
 };
 
 struct kvm_gmem_inode_info {
+	struct shared_policy policy;
 	struct inode vfs_inode;
 };
 
@@ -26,6 +28,9 @@ static inline struct kvm_gmem_inode_info *KVM_GMEM_I(struct inode *inode)
 	return container_of(inode, struct kvm_gmem_inode_info, vfs_inode);
 }
 
+static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
+						   pgoff_t index);
+
 /**
  * folio_file_pfn - like folio_file_page, but return a pfn.
  * @folio: The folio which contains this index.
@@ -112,7 +117,25 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
 static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
 {
 	/* TODO: Support huge pages. */
-	return filemap_grab_folio(inode->i_mapping, index);
+	struct mempolicy *policy;
+	struct folio *folio;
+
+	/*
+	 * Fast-path: See if folio is already present in mapping to avoid
+	 * policy_lookup.
+	 */
+	folio = __filemap_get_folio(inode->i_mapping, index,
+				    FGP_LOCK | FGP_ACCESSED, 0);
+	if (!IS_ERR(folio))
+		return folio;
+
+	policy = kvm_gmem_get_pgoff_policy(KVM_GMEM_I(inode), index);
+	folio = __filemap_get_folio_mpol(inode->i_mapping, index,
+					 FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+					 mapping_gfp_mask(inode->i_mapping), policy);
+	mpol_cond_put(policy);
+
+	return folio;
 }
 
 static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
@@ -375,8 +398,45 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
 	return ret;
 }
 
+#ifdef CONFIG_NUMA
+static int kvm_gmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+
+	return mpol_set_shared_policy(&KVM_GMEM_I(inode)->policy, vma, mpol);
+}
+
+static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma,
+					     unsigned long addr, pgoff_t *pgoff)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+
+	*pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+	return mpol_shared_policy_lookup(&KVM_GMEM_I(inode)->policy, *pgoff);
+}
+
+static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
+						   pgoff_t index)
+{
+	struct mempolicy *mpol;
+
+	mpol = mpol_shared_policy_lookup(&info->policy, index);
+	return mpol ? mpol : get_task_policy(current);
+}
+#else
+static struct mempolicy *kvm_gmem_get_pgoff_policy(struct kvm_gmem_inode_info *info,
+						   pgoff_t index)
+{
+	return NULL;
+}
+#endif /* CONFIG_NUMA */
+
 static const struct vm_operations_struct kvm_gmem_vm_ops = {
-	.fault = kvm_gmem_fault_user_mapping,
+	.fault		= kvm_gmem_fault_user_mapping,
+#ifdef CONFIG_NUMA
+	.get_policy	= kvm_gmem_get_policy,
+	.set_policy	= kvm_gmem_set_policy,
+#endif
 };
 
 static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
@@ -411,11 +471,14 @@ static struct inode *kvm_gmem_alloc_inode(struct super_block *sb)
 	if (!info)
 		return NULL;
 
+	mpol_shared_policy_init(&info->policy, NULL);
+
 	return &info->vfs_inode;
 }
 
 static void kvm_gmem_destroy_inode(struct inode *inode)
 {
+	mpol_free_shared_policy(&KVM_GMEM_I(inode)->policy);
 }
 
 static void kvm_gmem_free_inode(struct inode *inode)
-- 
2.43.0


  parent reply	other threads:[~2025-07-13 17:49 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-13 17:43 [PATCH V9 0/7] Add NUMA mempolicy support for KVM guest-memfd Shivank Garg
2025-07-13 17:43 ` [PATCH V9 1/7] KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes Shivank Garg
2025-07-22 15:18   ` David Hildenbrand
2025-08-07 21:34     ` Ackerley Tng
2025-08-07 22:14       ` Ackerley Tng
2025-08-11  8:02       ` Garg, Shivank
2025-07-13 17:43 ` [PATCH V9 2/7] mm/filemap: Add NUMA mempolicy support to filemap_alloc_folio() Shivank Garg
2025-07-22 15:20   ` David Hildenbrand
2025-07-13 17:43 ` [PATCH V9 3/7] mm/filemap: Extend __filemap_get_folio() to support NUMA memory policies Shivank Garg
2025-07-22 15:21   ` David Hildenbrand
2025-07-13 17:43 ` [PATCH V9 4/7] mm/mempolicy: Export memory policy symbols Shivank Garg
2025-07-13 17:43 ` [PATCH V9 5/7] KVM: guest_memfd: Add slab-allocated inode cache Shivank Garg
2025-07-21 11:44   ` Vlastimil Babka
2025-07-22  5:03     ` Shivank Garg
2025-07-13 17:43 ` Shivank Garg [this message]
2025-07-21 13:30   ` [PATCH V9 6/7] KVM: guest_memfd: Enforce NUMA mempolicy using shared policy Vlastimil Babka
2025-07-22 15:24   ` David Hildenbrand
2025-07-13 17:43 ` [PATCH V9 7/7] KVM: guest_memfd: selftests: Add tests for mmap and NUMA policy support Shivank Garg
2025-07-22 14:40 ` [PATCH V9 0/7] Add NUMA mempolicy support for KVM guest-memfd David Hildenbrand
2025-07-22 14:45   ` Sean Christopherson
2025-07-22 15:51     ` David Hildenbrand
2025-07-22 23:07       ` Sean Christopherson
2025-07-23  8:20         ` David Hildenbrand
2025-07-22 15:49   ` Shivank Garg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250713174339.13981-9-shivankg@amd.com \
    --to=shivankg@amd.com \
    --cc=Neeraj.Upadhyay@amd.com \
    --cc=ackerleytng@google.com \
    --cc=aik@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=amit@infradead.org \
    --cc=aneeshkumar.kizhakeveetil@arm.com \
    --cc=apopple@nvidia.com \
    --cc=ashish.kalra@amd.com \
    --cc=bfoster@redhat.com \
    --cc=bharata@amd.com \
    --cc=brauner@kernel.org \
    --cc=byungchul@sk.com \
    --cc=cgzones@googlemail.com \
    --cc=chao.gao@intel.com \
    --cc=chao.p.peng@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=gourry@gourry.net \
    --cc=gshan@redhat.com \
    --cc=hch@infradead.org \
    --cc=ira.weiny@intel.com \
    --cc=jack@suse.cz \
    --cc=jgg@nvidia.com \
    --cc=jgowans@amazon.com \
    --cc=jmorris@namei.org \
    --cc=joshua.hahnjy@gmail.com \
    --cc=kalyazin@amazon.com \
    --cc=kent.overstreet@linux.dev \
    --cc=kvm@vger.kernel.org \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=matthew.brost@intel.com \
    --cc=michael.day@amd.com \
    --cc=michael.roth@amd.com \
    --cc=nikunj@amd.com \
    --cc=pankaj.gupta@amd.com \
    --cc=papaluri@amd.com \
    --cc=paul@paul-moore.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=pvorel@suse.cz \
    --cc=quic_eberman@quicinc.com \
    --cc=rakie.kim@sk.com \
    --cc=rientjes@google.com \
    --cc=roypat@amazon.co.uk \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=serge@hallyn.com \
    --cc=shdhiman@amd.com \
    --cc=shuah@kernel.org \
    --cc=suzuki.poulose@arm.com \
    --cc=tabba@google.com \
    --cc=thomas.lendacky@amd.com \
    --cc=vannapurve@google.com \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    --cc=yan.y.zhao@intel.com \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yuzhao@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).