From: Gleb Natapov <gleb@redhat.com>
To: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>, kvm@vger.kernel.org
Subject: Re: [PATCH] KVM: Fix page-crossing MMIO
Date: Thu, 19 Apr 2012 11:59:15 +0300 [thread overview]
Message-ID: <20120419085915.GX11918@redhat.com> (raw)
In-Reply-To: <1334766167-25162-1-git-send-email-avi@redhat.com>
On Wed, Apr 18, 2012 at 07:22:47PM +0300, Avi Kivity wrote:
> MMIO that are split across a page boundary are currently broken - the
> code does not expect to be aborted by the exit to userspace for the
> first MMIO fragment.
>
> This patch fixes the problem by generalizing the current code for handling
> 16-byte MMIOs to handle a number of "fragments", and changes the MMIO
> code to create those fragments.
>
For 16 bit IO userspace will see two 8bit writes. Is this OK? Is there
real code that does this kind of IO, or the patch is for correctness
only?
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> arch/ia64/include/asm/kvm_host.h | 2 +
> arch/ia64/kvm/kvm-ia64.c | 10 ++--
> arch/x86/kvm/x86.c | 114 +++++++++++++++++++++++++++-----------
> include/linux/kvm_host.h | 31 +++++++++--
> 4 files changed, 115 insertions(+), 42 deletions(-)
>
> diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
> index c4b4bac..6d6a5ac 100644
> --- a/arch/ia64/include/asm/kvm_host.h
> +++ b/arch/ia64/include/asm/kvm_host.h
> @@ -449,6 +449,8 @@ struct kvm_vcpu_arch {
> char log_buf[VMM_LOG_LEN];
> union context host;
> union context guest;
> +
> + char mmio_data[8];
> };
>
> struct kvm_vm_stat {
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 9d80ff8..882ab21 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
> goto mmio;
> vcpu->mmio_needed = 1;
> - vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
> - vcpu->mmio_size = kvm_run->mmio.len = p->size;
> + vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
> + vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
> vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
>
> if (vcpu->mmio_is_write)
> - memcpy(vcpu->mmio_data, &p->data, p->size);
> + memcpy(vcpu->arch.mmio_data, &p->data, p->size);
> memcpy(kvm_run->mmio.data, &p->data, p->size);
> kvm_run->exit_reason = KVM_EXIT_MMIO;
> return 0;
> @@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
> struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
>
> if (!vcpu->mmio_is_write)
> - memcpy(&p->data, vcpu->mmio_data, 8);
> + memcpy(&p->data, vcpu->arch.mmio_data, 8);
> p->state = STATE_IORESP_READY;
> }
>
> @@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> }
>
> if (vcpu->mmio_needed) {
> - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
> + memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
> kvm_set_mmio_data(vcpu);
> vcpu->mmio_read_completed = 1;
> vcpu->mmio_needed = 0;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0d9a578..4de705c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3718,9 +3718,8 @@ struct read_write_emulator_ops {
> static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
> {
> if (vcpu->mmio_read_completed) {
> - memcpy(val, vcpu->mmio_data, bytes);
> trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
> - vcpu->mmio_phys_addr, *(u64 *)val);
> + vcpu->mmio_fragments[0].gpa, *(u64 *)val);
> vcpu->mmio_read_completed = 0;
> return 1;
> }
> @@ -3756,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
> static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
> void *val, int bytes)
> {
> - memcpy(vcpu->mmio_data, val, bytes);
> - memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
> + struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
> +
> + memcpy(vcpu->run->mmio.data, frag->data, frag->len);
> return X86EMUL_CONTINUE;
> }
>
> @@ -3784,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> gpa_t gpa;
> int handled, ret;
> bool write = ops->write;
> -
> - if (ops->read_write_prepare &&
> - ops->read_write_prepare(vcpu, val, bytes))
> - return X86EMUL_CONTINUE;
> + struct kvm_mmio_fragment *frag;
>
> ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
>
> @@ -3813,15 +3810,19 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
> bytes -= handled;
> val += handled;
>
> - vcpu->mmio_needed = 1;
> - vcpu->run->exit_reason = KVM_EXIT_MMIO;
> - vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
> - vcpu->mmio_size = bytes;
> - vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
> - vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
> - vcpu->mmio_index = 0;
> + while (bytes) {
> + unsigned now = min(bytes, 8U);
>
> - return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
> + frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
> + frag->gpa = gpa;
> + frag->data = val;
> + frag->len = now;
> +
> + gpa += now;
> + val += now;
> + bytes -= now;
> + }
> + return X86EMUL_CONTINUE;
> }
>
> int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
> @@ -3830,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
> struct read_write_emulator_ops *ops)
> {
> struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
> + gpa_t gpa;
> + int rc;
> +
> + if (ops->read_write_prepare &&
> + ops->read_write_prepare(vcpu, val, bytes))
> + return X86EMUL_CONTINUE;
> +
> + vcpu->mmio_nr_fragments = 0;
>
> /* Crossing a page boundary? */
> if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
> - int rc, now;
> + int now;
>
> now = -addr & ~PAGE_MASK;
> rc = emulator_read_write_onepage(addr, val, now, exception,
> @@ -3846,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
> bytes -= now;
> }
>
> - return emulator_read_write_onepage(addr, val, bytes, exception,
> - vcpu, ops);
> + rc = emulator_read_write_onepage(addr, val, bytes, exception,
> + vcpu, ops);
> + if (rc != X86EMUL_CONTINUE)
> + return rc;
> +
> + if (!vcpu->mmio_nr_fragments)
> + return rc;
> +
> + gpa = vcpu->mmio_fragments[0].gpa;
> +
> + vcpu->mmio_needed = 1;
> + vcpu->mmio_cur_fragment = 0;
> +
> + vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
> + vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
> + vcpu->run->exit_reason = KVM_EXIT_MMIO;
> + vcpu->run->mmio.phys_addr = gpa;
> +
> + return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
> }
>
> static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
> @@ -5446,33 +5472,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
> return r;
> }
>
> +/*
> + * Implements the following, as a state machine:
> + *
> + * read:
> + * for each fragment
> + * write gpa, len
> + * exit
> + * copy data
> + * execute insn
> + *
> + * write:
> + * for each fragment
> + * write gpa, len
> + * copy data
> + * exit
> + */
> static int complete_mmio(struct kvm_vcpu *vcpu)
> {
> struct kvm_run *run = vcpu->run;
> + struct kvm_mmio_fragment *frag;
> int r;
>
> if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
> return 1;
>
> if (vcpu->mmio_needed) {
> - vcpu->mmio_needed = 0;
> + /* Complete previous fragment */
> + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
> if (!vcpu->mmio_is_write)
> - memcpy(vcpu->mmio_data + vcpu->mmio_index,
> - run->mmio.data, 8);
> - vcpu->mmio_index += 8;
> - if (vcpu->mmio_index < vcpu->mmio_size) {
> - run->exit_reason = KVM_EXIT_MMIO;
> - run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
> - memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
> - run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
> - run->mmio.is_write = vcpu->mmio_is_write;
> - vcpu->mmio_needed = 1;
> - return 0;
> + memcpy(frag->data, run->mmio.data, frag->len);
> + if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
> + vcpu->mmio_needed = 0;
> + if (vcpu->mmio_is_write)
> + return 1;
> + vcpu->mmio_read_completed = 1;
> + goto done;
> }
> + /* Initiate next fragment */
> + ++frag;
> + run->exit_reason = KVM_EXIT_MMIO;
> + run->mmio.phys_addr = frag->gpa;
> if (vcpu->mmio_is_write)
> - return 1;
> - vcpu->mmio_read_completed = 1;
> + memcpy(run->mmio.data, frag->data, frag->len);
> + run->mmio.len = frag->len;
> + run->mmio.is_write = vcpu->mmio_is_write;
> + return 0;
> +
> }
> +done:
> vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
> r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
> srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 49c2f2f..738b5ed 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -35,6 +35,20 @@
> #endif
>
> /*
> + * If we support unaligned MMIO, at most one fragment will be split into two:
> + */
> +#ifdef KVM_UNALIGNED_MMIO
> +# define KVM_EXTRA_MMIO_FRAGMENTS 1
> +#else
> +# define KVM_EXTRA_MMIO_FRAGMENTS 0
> +#endif
> +
> +#define KVM_USER_MMIO_SIZE 8
> +
> +#define KVM_MAX_MMIO_FRAGMENTS \
> + (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
> +
> +/*
> * vcpu->requests bit members
> */
> #define KVM_REQ_TLB_FLUSH 0
> @@ -117,6 +131,16 @@ enum {
> EXITING_GUEST_MODE
> };
>
> +/*
> + * Sometimes a large or cross-page mmio needs to be broken up into separate
> + * exits for userspace servicing.
> + */
> +struct kvm_mmio_fragment {
> + gpa_t gpa;
> + void *data;
> + unsigned len;
> +};
> +
> struct kvm_vcpu {
> struct kvm *kvm;
> #ifdef CONFIG_PREEMPT_NOTIFIERS
> @@ -144,10 +168,9 @@ struct kvm_vcpu {
> int mmio_needed;
> int mmio_read_completed;
> int mmio_is_write;
> - int mmio_size;
> - int mmio_index;
> - unsigned char mmio_data[KVM_MMIO_SIZE];
> - gpa_t mmio_phys_addr;
> + int mmio_cur_fragment;
> + int mmio_nr_fragments;
> + struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
> #endif
>
> #ifdef CONFIG_KVM_ASYNC_PF
> --
> 1.7.10
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
next prev parent reply other threads:[~2012-04-19 8:59 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-18 16:22 [PATCH] KVM: Fix page-crossing MMIO Avi Kivity
2012-04-19 8:59 ` Gleb Natapov [this message]
2012-04-19 9:07 ` Avi Kivity
2012-04-19 23:35 ` Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120419085915.GX11918@redhat.com \
--to=gleb@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox