From: Paul Durrant <Paul.Durrant@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
xen-devel <xen-devel@lists.xenproject.org>
Subject: Re: [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling
Date: Mon, 20 Jun 2016 12:22:47 +0000 [thread overview]
Message-ID: <5b125d0397f448eaa25f7cc1fdf099a8@AMSPEX02CL03.citrite.net> (raw)
In-Reply-To: <5767EFA602000078000F6A2E@prv-mh.provo.novell.com>
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 20 June 2016 12:29
> To: xen-devel
> Cc: Paul Durrant
> Subject: [PATCH v2] x86/HVM: use available linear->phys translations in REP
> MOVS/STOS handling
>
> If we have the translation result available already, we should also use
> it here. In my tests with Linux guests this eliminates all calls to
> hvmemul_linear_to_phys() from the STOS path and most from the MOVS
> one.
>
> Also record the translation for re-use at least during response
> processing.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> v2: Restrict to single iteration or address incrementing variant upon
> initial invocation, and don't bound *reps upon response processing.
> Latch translation only for the actual MMIO part of the accesses (to
> match the purpose of the per-vCPU fields; any further translation
> caching should probably go into hvmemul_linear_to_phys() itself).
> Also this time tested on AMD as well (albeit it still escapes me
> why behavior here would be CPU vendor dependent).
>
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1156,6 +1156,7 @@ static int hvmemul_rep_movs(
> {
> struct hvm_emulate_ctxt *hvmemul_ctxt =
> container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> + struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io;
> unsigned long saddr, daddr, bytes;
> paddr_t sgpa, dgpa;
> uint32_t pfec = PFEC_page_present;
> @@ -1178,16 +1179,41 @@ static int hvmemul_rep_movs(
> if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
> pfec |= PFEC_user_mode;
>
> - rc = hvmemul_linear_to_phys(
> - saddr, &sgpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> - if ( rc != X86EMUL_OKAY )
> - return rc;
> + if ( vio->mmio_access.read_access &&
> + (vio->mmio_gla == (saddr & PAGE_MASK)) &&
> + /*
> + * Upon initial invocation don't truncate large batches just because
> + * of a hit for the translation: Doing the guest page table walk is
> + * cheaper than multiple round trips through the device model. Yet
> + * when processing a response we can always re-use the translation.
> + */
> + (vio->io_req.state == STATE_IORESP_READY ||
> + ((!df || *reps == 1) &&
> + PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
> + sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
> + else
> + {
> + rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
> + hvmemul_ctxt);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> + }
>
> - rc = hvmemul_linear_to_phys(
> - daddr, &dgpa, bytes_per_rep, reps,
> - pfec | PFEC_write_access, hvmemul_ctxt);
> - if ( rc != X86EMUL_OKAY )
> - return rc;
> + bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
> + if ( vio->mmio_access.write_access &&
> + (vio->mmio_gla == (daddr & PAGE_MASK)) &&
> + /* See comment above. */
> + (vio->io_req.state == STATE_IORESP_READY ||
> + ((!df || *reps == 1) &&
> + PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
> + dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
> + else
> + {
> + rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
> + pfec | PFEC_write_access, hvmemul_ctxt);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> + }
>
> /* Check for MMIO ops */
> (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT,
> &sp2mt);
> @@ -1198,12 +1224,18 @@ static int hvmemul_rep_movs(
> return X86EMUL_UNHANDLEABLE;
>
> if ( sp2mt == p2m_mmio_dm )
> + {
> + latch_linear_to_phys(vio, saddr, sgpa, 0);
> return hvmemul_do_mmio_addr(
> sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
> + }
>
> if ( dp2mt == p2m_mmio_dm )
> + {
> + latch_linear_to_phys(vio, daddr, dgpa, 1);
> return hvmemul_do_mmio_addr(
> dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
> + }
>
> /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa,
> bytes). */
> bytes = *reps * bytes_per_rep;
> @@ -1279,25 +1311,37 @@ static int hvmemul_rep_stos(
> {
> struct hvm_emulate_ctxt *hvmemul_ctxt =
> container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> - unsigned long addr;
> + struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io;
> + unsigned long addr, bytes;
> paddr_t gpa;
> p2m_type_t p2mt;
> bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
> int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
> hvm_access_write, hvmemul_ctxt, &addr);
>
> - if ( rc == X86EMUL_OKAY )
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
> + if ( vio->mmio_access.write_access &&
> + (vio->mmio_gla == (addr & PAGE_MASK)) &&
> + /* See respective comment in MOVS processing. */
> + (vio->io_req.state == STATE_IORESP_READY ||
> + ((!df || *reps == 1) &&
> + PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
> + gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
> + else
> {
> uint32_t pfec = PFEC_page_present | PFEC_write_access;
>
> if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
> pfec |= PFEC_user_mode;
>
> - rc = hvmemul_linear_to_phys(
> - addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
> + rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
> + hvmemul_ctxt);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> }
> - if ( rc != X86EMUL_OKAY )
> - return rc;
>
> /* Check for MMIO op */
> (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT,
> &p2mt);
> @@ -1371,6 +1415,7 @@ static int hvmemul_rep_stos(
> return X86EMUL_UNHANDLEABLE;
>
> case p2m_mmio_dm:
> + latch_linear_to_phys(vio, addr, gpa, 1);
> return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep,
> IOREQ_WRITE, df,
> p_data);
> }
>
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-06-20 12:26 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-20 11:29 [PATCH v2] x86/HVM: use available linear->phys translations in REP MOVS/STOS handling Jan Beulich
2016-06-20 12:22 ` Paul Durrant [this message]
2016-06-21 19:50 ` Andrew Cooper
2016-06-22 7:19 ` Jan Beulich
2016-06-23 9:56 ` Andrew Cooper
-- strict thread matches above, loose matches on Subject: below --
2016-06-22 7:00 Paul Durrant
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5b125d0397f448eaa25f7cc1fdf099a8@AMSPEX02CL03.citrite.net \
--to=paul.durrant@citrix.com \
--cc=JBeulich@suse.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).