linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chao Peng <chao.p.peng@linux.intel.com>
To: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	qemu-devel@nongnu.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
	Hugh Dickins <hughd@google.com>, Jeff Layton <jlayton@kernel.org>,
	"J . Bruce Fields" <bfields@fieldses.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Yu Zhang <yu.c.zhang@linux.intel.com>,
	Chao Peng <chao.p.peng@linux.intel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	luto@kernel.org, john.ji@intel.com, susie.li@intel.com,
	jun.nakajima@intel.com, dave.hansen@intel.com,
	ak@linux.intel.com, david@redhat.com
Subject: [RFC PATCH 6/6] KVM: add KVM_SPLIT_MEMORY_REGION
Date: Thu, 11 Nov 2021 22:13:45 +0800	[thread overview]
Message-ID: <20211111141352.26311-7-chao.p.peng@linux.intel.com> (raw)
In-Reply-To: <20211111141352.26311-1-chao.p.peng@linux.intel.com>

This new ioctl let user to split an exising memory region into two
parts. The first part reuses the existing memory region but have a
shrinked size. The second part is a newly created one.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 arch/x86/kvm/x86.c       |   3 +-
 include/linux/kvm_host.h |   4 ++
 include/uapi/linux/kvm.h |  16 +++++
 virt/kvm/kvm_main.c      | 147 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 98dbe602f47b..1d490c3d7766 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11020,7 +11020,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				const struct kvm_userspace_memory_region *mem,
 				enum kvm_mr_change change)
 {
-	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
+	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE ||
+	    change == KVM_MR_SHRINK)
 		return kvm_alloc_memslot_metadata(memslot,
 						  mem->memory_size >> PAGE_SHIFT);
 	return 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 17fabb4f53bf..8b5a9217231b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -752,6 +752,9 @@ static inline bool memslot_is_private(const struct kvm_memory_slot *slot)
  *   -- move it in the guest physical memory space
  *   -- just change its flags
  *
+ * KVM_SPLIT_MEMORY_REGION ioctl allows the following operation:
+ * - shrink an existing memory slot
+ *
  * Since flags can be changed by some of these operations, the following
  * differentiation is the best we can do for __kvm_set_memory_region():
  */
@@ -760,6 +763,7 @@ enum kvm_mr_change {
 	KVM_MR_DELETE,
 	KVM_MR_MOVE,
 	KVM_MR_FLAGS_ONLY,
+	KVM_MR_SHRINK,
 };
 
 int kvm_set_memory_region(struct kvm *kvm,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 470c472a9451..e61c0eac91e7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1108,6 +1108,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING 192
 #define KVM_CAP_X86_BUS_LOCK_EXIT 193
 #define KVM_CAP_PPC_DAWR1 194
+#define KVM_CAP_MEMORY_REGION_SPLIT 195
 
 #define KVM_CAP_VM_TYPES 1000
 
@@ -1885,4 +1886,19 @@ struct kvm_dirty_gfn {
 #define KVM_BUS_LOCK_DETECTION_OFF             (1 << 0)
 #define KVM_BUS_LOCK_DETECTION_EXIT            (1 << 1)
 
+/**
+ * struct kvm_split_memory_region_info - Infomation for memory region split.
+ * @slot1: The slot to be split.
+ * @slot2: The slot for the newly split part.
+ * @offset: The offset(bytes) in @slot1 to split.
+ */
+struct kvm_split_memory_region_info {
+	__u32 slot1;
+	__u32 slot2;
+	__u64 offset;
+};
+
+#define KVM_SPLIT_MEMORY_REGION _IOW(KVMIO, 0xcf, \
+					struct kvm_split_memory_region_info)
+
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e8e2c5b28aa4..11b0f3d8b9ee 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1467,6 +1467,140 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 	return kvm_set_memory_region(kvm, mem);
 }
 
+static void memslot_to_memory_region(struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot *slot)
+{
+	mem->slot = (u32)slot->as_id << 16 | slot->id;
+	mem->flags = slot->flags;
+	mem->guest_phys_addr = slot->base_gfn >> PAGE_SHIFT;
+	mem->memory_size = slot->npages << PAGE_SHIFT;
+	mem->userspace_addr = slot->userspace_addr;
+}
+
+static int kvm_split_memory_region(struct kvm *kvm, int as_id, int id1, int id2,
+					gfn_t offset)
+{
+	struct kvm_memory_slot *slot1;
+	struct kvm_memory_slot slot2, old;
+	struct kvm_userspace_memory_region mem;
+	unsigned long *dirty_bitmap_slot1;
+	struct kvm_memslots *slots;
+	int r;
+
+	/* Make a full copy of the old memslot. */
+	slot1 = id_to_memslot(__kvm_memslots(kvm, as_id), id1);
+	if (!slot1)
+		return -EINVAL;
+	else
+		old = *slot1;
+
+	if( offset <= old.base_gfn ||
+	    offset >= old.base_gfn + old.npages )
+		return -EINVAL;
+
+	/* Prepare the second half. */
+	slot2.as_id = as_id;
+	slot2.id = id2;
+	slot2.base_gfn = old.npages + offset;
+	slot2.npages = old.npages - offset;
+	slot2.flags = old.flags;
+	slot2.userspace_addr = old.userspace_addr + (offset >> PAGE_SHIFT);
+	slot2.file = old.file;
+	slot2.private_ops = old.private_ops;
+
+	if (!(old.flags & KVM_MEM_LOG_DIRTY_PAGES))
+		slot2.dirty_bitmap = NULL;
+	else if (!kvm->dirty_ring_size) {
+		slot1->npages = offset;
+		r = kvm_alloc_dirty_bitmap(slot1);
+		if (r)
+			return r;
+		else
+			dirty_bitmap_slot1 = slot1->dirty_bitmap;
+
+		r = kvm_alloc_dirty_bitmap(&slot2);
+		if (r)
+			goto out_bitmap;
+
+		//TODO: copy dirty_bitmap or return -EINVAL if logging is running
+	}
+
+//	mutex_lock(&kvm->slots_arch_lock);
+
+	slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), KVM_MR_CREATE);
+	if (!slots) {
+//		mutex_unlock(&kvm->slots_arch_lock);
+		r = -ENOMEM;
+		goto out_bitmap;
+	}
+
+	slot1 = id_to_memslot(slots, id1);
+	slot1->npages = offset;
+	slot1->dirty_bitmap = dirty_bitmap_slot1;
+
+	memslot_to_memory_region(&mem, slot1);
+	r = kvm_arch_prepare_memory_region(kvm, slot1, &mem, KVM_MR_SHRINK);
+	if (r)
+		goto out_slots;
+
+	memslot_to_memory_region(&mem, &slot2);
+	r = kvm_arch_prepare_memory_region(kvm, &slot2, &mem, KVM_MR_CREATE);
+	if (r)
+		goto out_slots;
+
+	update_memslots(slots, slot1, KVM_MR_SHRINK);
+	update_memslots(slots, &slot2, KVM_MR_CREATE);
+
+	slots = install_new_memslots(kvm, as_id, slots);
+
+	kvm_free_memslot(kvm, &old);
+
+	kvfree(slots);
+	return 0;
+
+out_slots:
+//	mutex_unlock(&kvm->slots_arch_lock);
+	kvfree(slots);
+out_bitmap:
+	if (dirty_bitmap_slot1)
+		kvm_destroy_dirty_bitmap(slot1);
+	if (slot2.dirty_bitmap)
+		kvm_destroy_dirty_bitmap(&slot2);
+
+	return r;
+}
+
+static int kvm_vm_ioctl_split_memory_region(struct kvm *kvm,
+				struct kvm_split_memory_region_info *info)
+{
+	int as_id1, as_id2, id1, id2;
+	int r;
+
+	if ((u16)info->slot1 >= KVM_USER_MEM_SLOTS ||
+	    (u16)info->slot2 >= KVM_USER_MEM_SLOTS)
+		return -EINVAL;
+	if (info->offset & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	as_id1 = info->slot1 >> 16;
+	as_id2 = info->slot2 >> 16;
+
+	if (as_id1 != as_id2 || as_id1 >= KVM_ADDRESS_SPACE_NUM)
+		return -EINVAL;
+
+	id1 = (u16)info->slot1;
+	id2 = (u16)info->slot2;
+	if (id1 == id2 || id1 >= KVM_MEM_SLOTS_NUM || id2 >= KVM_MEM_SLOTS_NUM)
+		return -EINVAL;
+
+	mutex_lock(&kvm->slots_lock);
+	r = kvm_split_memory_region(kvm, as_id1, id1, id2,
+					info->offset >> PAGE_SHIFT);
+	mutex_unlock(&kvm->slots_lock);
+
+	return r;
+}
+
 #ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
 /**
  * kvm_get_dirty_log - get a snapshot of dirty pages
@@ -3765,6 +3899,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #else
 		return 0;
 #endif
+	case KVM_CAP_MEMORY_REGION_SPLIT:
+		return 1;
 	default:
 		break;
 	}
@@ -3901,6 +4037,17 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
 		break;
 	}
+	case KVM_SPLIT_MEMORY_REGION: {
+		struct kvm_split_memory_region_info info;
+
+		r = -EFAULT;
+		if (copy_from_user(&info, argp, sizeof(info)))
+			goto out;
+
+		r = kvm_vm_ioctl_split_memory_region(kvm, &info);
+		break;
+	}
+
 	case KVM_GET_DIRTY_LOG: {
 		struct kvm_dirty_log log;
 
-- 
2.17.1


  parent reply	other threads:[~2021-11-11 14:15 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-11 14:13 [RFC PATCH 0/6] KVM: mm: fd-based approach for supporting KVM guest private memory Chao Peng
2021-11-11 14:13 ` [RFC PATCH 1/6] mm: Add F_SEAL_GUEST to shmem/memfd Chao Peng
2021-11-12 19:28   ` Kirill A. Shutemov
2021-11-11 14:13 ` [RFC PATCH 2/6] kvm: x86: Introduce guest private memory address space to memslot Chao Peng
2021-11-11 14:13 ` [RFC PATCH 3/6] kvm: x86: add private_ops " Chao Peng
2021-11-11 14:13 ` [RFC PATCH 4/6] kvm: x86: implement private_ops for memfd backing store Chao Peng
2021-11-11 14:13 ` [RFC PATCH 5/6] kvm: x86: add KVM_EXIT_MEMORY_ERROR exit Chao Peng
2021-11-11 15:08   ` Mika Penttilä
2021-11-12  5:50     ` Chao Peng
2021-11-11 14:13 ` Chao Peng [this message]
2021-11-11 14:13 ` [RFC PATCH 07/13] linux-headers: Update Chao Peng
2021-11-11 14:13 ` [RFC PATCH 08/13] hostmem: Add guest private memory to memory backend Chao Peng
2021-11-11 14:13 ` [RFC PATCH 09/13] qmp: Include "guest-private" property for memory backends Chao Peng
2021-11-11 14:13 ` [RFC PATCH 10/13] softmmu/physmem: Add private memory address space Chao Peng
2022-01-18  9:41   ` Philippe Mathieu-Daudé
2021-11-11 14:13 ` [RFC PATCH 11/13] kvm: register private memory slots Chao Peng
2021-11-11 14:13 ` [RFC PATCH 12/13] kvm: handle private to shared memory conversion Chao Peng
2021-11-11 14:13 ` [RFC PATCH 13/13] machine: Add 'private-memory-backend' property Chao Peng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211111141352.26311-7-chao.p.peng@linux.intel.com \
    --to=chao.p.peng@linux.intel.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bfields@fieldses.org \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@intel.com \
    --cc=david@redhat.com \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jlayton@kernel.org \
    --cc=jmattson@google.com \
    --cc=john.ji@intel.com \
    --cc=joro@8bytes.org \
    --cc=jun.nakajima@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=seanjc@google.com \
    --cc=susie.li@intel.com \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    --cc=yu.c.zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).