All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
To: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Gleb Natapov <gleb@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org>
Subject: [PATCH] KVM: x86: fix vcpu->mmio_fragments overflow
Date: Fri, 19 Oct 2012 15:37:32 +0800	[thread overview]
Message-ID: <5081033C.4060503@linux.vnet.ibm.com> (raw)

After commit b3356bf0dbb349 (KVM: emulator: optimize "rep ins" handling),
the pieces of io data can be collected and write them to the guest memory
or MMIO together.

Unfortunately, kvm splits the mmio access into 8 bytes and store them to
vcpu->mmio_fragments. If the guest uses "rep ins" to move large data, it
will cause vcpu->mmio_fragments overflow

The bug can be exposed by isapc (-M isapc):

[23154.818733] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC
[ ......]
[23154.858083] Call Trace:
[23154.859874]  [<ffffffffa04f0e17>] kvm_get_cr8+0x1d/0x28 [kvm]
[23154.861677]  [<ffffffffa04fa6d4>] kvm_arch_vcpu_ioctl_run+0xcda/0xe45 [kvm]
[23154.863604]  [<ffffffffa04f5a1a>] ? kvm_arch_vcpu_load+0x17b/0x180 [kvm]


Actually, we can use one mmio_fragment to store a large mmio access for the
mmio access is always continuous then split it when we pass the mmio-exit-info
to userspace. After that, we only need two entries to store mmio info for
the cross-mmio pages access

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
---
 arch/x86/kvm/x86.c       |  127 +++++++++++++++++++++++++++++-----------------
 include/linux/kvm_host.h |   16 +-----
 2 files changed, 84 insertions(+), 59 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b90dd5..41ceb51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3779,9 +3779,6 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 			   void *val, int bytes)
 {
-	struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
-
-	memcpy(vcpu->run->mmio.data, frag->data, frag->len);
 	return X86EMUL_CONTINUE;
 }

@@ -3799,6 +3796,64 @@ static const struct read_write_emulator_ops write_emultor = {
 	.write = true,
 };

+static bool get_current_mmio_info(struct kvm_vcpu *vcpu, gpa_t *gpa,
+				  unsigned *len, void **data)
+{
+	struct kvm_mmio_fragment *frag;
+	int cur = vcpu->mmio_cur_fragment;
+
+	if (cur >= vcpu->mmio_nr_fragments)
+		return false;
+
+	frag = &vcpu->mmio_fragments[cur];
+	if (frag->pos >= frag->len) {
+		if (++vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments)
+			return false;
+		frag++;
+	}
+
+	*gpa = frag->gpa + frag->pos;
+	*data = frag->data + frag->pos;
+	*len = min(8u, frag->len - frag->pos);
+	return true;
+}
+
+static void complete_current_mmio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmio_fragment *frag;
+	gpa_t gpa;
+	unsigned len;
+	void *data;
+
+	get_current_mmio_info(vcpu, &gpa, &len, &data);
+
+	if (!vcpu->mmio_is_write)
+		memcpy(data, vcpu->run->mmio.data, len);
+
+	/* Increase frag->pos to switch to the next mmio. */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
+	frag->pos += len;
+}
+
+static bool vcpu_fill_mmio_exit_info(struct kvm_vcpu *vcpu)
+{
+	gpa_t gpa;
+	unsigned len;
+	void *data;
+
+	if (!get_current_mmio_info(vcpu, &gpa, &len, &data))
+		return false;
+
+	vcpu->run->mmio.len = len;
+	vcpu->run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->run->exit_reason = KVM_EXIT_MMIO;
+	vcpu->run->mmio.phys_addr = gpa;
+
+	if (vcpu->mmio_is_write)
+		memcpy(vcpu->run->mmio.data, data, len);
+	return true;
+}
+
 static int emulator_read_write_onepage(unsigned long addr, void *val,
 				       unsigned int bytes,
 				       struct x86_exception *exception,
@@ -3834,18 +3889,12 @@ mmio:
 	bytes -= handled;
 	val += handled;

-	while (bytes) {
-		unsigned now = min(bytes, 8U);
-
-		frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
-		frag->gpa = gpa;
-		frag->data = val;
-		frag->len = now;
-
-		gpa += now;
-		val += now;
-		bytes -= now;
-	}
+	WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
+	frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+	frag->pos = 0;
+	frag->gpa = gpa;
+	frag->data = val;
+	frag->len = bytes;
 	return X86EMUL_CONTINUE;
 }

@@ -3855,7 +3904,6 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 			const struct read_write_emulator_ops *ops)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-	gpa_t gpa;
 	int rc;

 	if (ops->read_write_prepare &&
@@ -3887,17 +3935,13 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 	if (!vcpu->mmio_nr_fragments)
 		return rc;

-	gpa = vcpu->mmio_fragments[0].gpa;
-
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_cur_fragment = 0;
+	vcpu->mmio_is_write = ops->write;

-	vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
-	vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
-	vcpu->run->exit_reason = KVM_EXIT_MMIO;
-	vcpu->run->mmio.phys_addr = gpa;
-
-	return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+	vcpu_fill_mmio_exit_info(vcpu);
+	return ops->read_write_exit_mmio(vcpu, vcpu->mmio_fragments[0].gpa,
+					 val, bytes);
 }

 static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -5524,43 +5568,34 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu)
  *
  * read:
  *   for each fragment
- *     write gpa, len
- *     exit
- *     copy data
+ *      for each mmio piece in the fragment
+ *        write gpa, len
+ *        exit
+ *        copy data
  *   execute insn
  *
  * write:
  *   for each fragment
- *      write gpa, len
- *      copy data
- *      exit
+ *      for each mmio piece in the fragment
+ *         write gpa, len
+ *         copy data
+ *   exit
  */
 static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
-	struct kvm_mmio_fragment *frag;
-
 	BUG_ON(!vcpu->mmio_needed);

-	/* Complete previous fragment */
-	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-	if (!vcpu->mmio_is_write)
-		memcpy(frag->data, run->mmio.data, frag->len);
-	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+	complete_current_mmio(vcpu);
+
+	/* Initiate next fragment */
+	if (!vcpu_fill_mmio_exit_info(vcpu)) {
 		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
 			return 1;
 		vcpu->mmio_read_completed = 1;
 		return complete_emulated_io(vcpu);
 	}
-	/* Initiate next fragment */
-	++frag;
-	run->exit_reason = KVM_EXIT_MMIO;
-	run->mmio.phys_addr = frag->gpa;
-	if (vcpu->mmio_is_write)
-		memcpy(run->mmio.data, frag->data, frag->len);
-	run->mmio.len = frag->len;
-	run->mmio.is_write = vcpu->mmio_is_write;
+
 	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	return 0;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e235068..f6c3c2f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -42,19 +42,8 @@
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)

-/*
- * If we support unaligned MMIO, at most one fragment will be split into two:
- */
-#ifdef KVM_UNALIGNED_MMIO
-#  define KVM_EXTRA_MMIO_FRAGMENTS 1
-#else
-#  define KVM_EXTRA_MMIO_FRAGMENTS 0
-#endif
-
-#define KVM_USER_MMIO_SIZE 8
-
-#define KVM_MAX_MMIO_FRAGMENTS \
-	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
+/* Two fragments for cross MMIO pages. */
+#define KVM_MAX_MMIO_FRAGMENTS	2

 /*
  * For the normal pfn, the highest 12 bits should be zero,
@@ -203,6 +192,7 @@ struct kvm_mmio_fragment {
 	gpa_t gpa;
 	void *data;
 	unsigned len;
+	unsigned pos;
 };

 struct kvm_vcpu {
-- 
1.7.7.6

             reply	other threads:[~2012-10-19  7:37 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-19  7:37 Xiao Guangrong [this message]
2012-10-19  7:39 ` [PATCH] emulator test: add "rep ins" mmio access test Xiao Guangrong
2012-11-01  0:05   ` Marcelo Tosatti
2012-10-22  9:16 ` [PATCH] KVM: x86: fix vcpu->mmio_fragments overflow Gleb Natapov
2012-10-22 11:09   ` Xiao Guangrong
2012-10-22 11:23     ` Gleb Natapov
2012-10-22 11:35       ` Jan Kiszka
2012-10-22 11:43         ` Gleb Natapov
2012-10-22 11:45           ` Jan Kiszka
2012-10-22 12:18             ` Avi Kivity
2012-10-22 12:45               ` Jan Kiszka
2012-10-22 12:53                 ` Gleb Natapov
2012-10-22 12:55                   ` Jan Kiszka
2012-10-22 12:58                     ` Avi Kivity
2012-10-22 13:05                       ` Jan Kiszka
2012-10-22 13:08                         ` Gleb Natapov
2012-10-22 13:25                           ` Jan Kiszka
2012-10-22 14:00                             ` Gleb Natapov
2012-10-22 14:23                               ` Jan Kiszka
2012-10-22 15:36                               ` Avi Kivity
2012-10-22 12:58                     ` Gleb Natapov
2012-10-22 12:55                   ` Avi Kivity
2012-10-22 13:01                     ` Gleb Natapov
2012-10-22 13:02                       ` Avi Kivity
2012-10-22 13:05                         ` Gleb Natapov
2012-10-22 12:56                 ` Avi Kivity
2012-10-22 13:58 ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5081033C.4060503@linux.vnet.ibm.com \
    --to=xiaoguangrong@linux.vnet.ibm.com \
    --cc=avi@redhat.com \
    --cc=gleb@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.