From: Tomasz Wroblewski <tomasz.wroblewski@gmail.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: xen-devel@lists.xenproject.org
Subject: Re: GPU passthrough performance regression in >4GB vms due to XSA-60 changes
Date: Mon, 19 May 2014 16:20:12 +0200 [thread overview]
Message-ID: <537A131C.8010303@gmail.com> (raw)
In-Reply-To: <537A18B90200007800013A09@mail.emea.novell.com>
[-- Attachment #1: Type: text/plain, Size: 1561 bytes --]
On 05/19/2014 02:44 PM, Jan Beulich wrote:
>>>> On 19.05.14 at 14:17, <tomasz.wroblewski@gmail.com> wrote:
>> On 05/19/2014 02:06 PM, Jan Beulich wrote:
>>>>>> On 19.05.14 at 13:32, <tomasz.wroblewski@gmail.com> wrote:
>>>> Yeah I gave about a day of effort to port us onto unstable and test
>>>> there but it sadly looks to be a bigger job, so leaving that as a last
>>>> resort (though planning to spend couple more days on it soon).
>>> Then as an alternative did you try pulling over the EPT changes
>>> from -unstable?
>> That would be indeed preferable, I've looked over them but couldn't
>> figure out which particular change would fix the EPT update after MTRR
>> enable. Do you remember which that was? I could test it and try to
>> narrow any other commits it'd require (seems there were a lot of ept
>> related changes)
> I used plural for a reason - I'm afraid you would need to start out with
> taking them all, and then possibly determine which ones to drop as
> being unrelated to the issue at hand.
Looks like a partial backport of your commit
commit aa9114edd97b292cd89b3616e3f2089471fd2201
Author: Jan Beulich <jbeulich@suse.com>
Date: Thu Apr 10 16:01:41 2014 +0200
x86/EPT: force re-evaluation of memory type as necessary
is all that's necessary. Attaching it versus 4.3.2. I only left the
memory_type_change calls in MTRR related areas, since only this is
problematic for the particular issue. This is probably good enough for
us, thanks for the pointers! Do you think this one is a relatively safe
for the stable branches?
[-- Attachment #2: ept-bp-aa9114edd97b292cd89b3616e3f2089471fd2201 --]
[-- Type: text/plain, Size: 13286 bytes --]
@@ -3249,13 +3250,13 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
case MSR_MTRRdefType:
if ( !mtrr )
goto gp_fault;
- if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
+ if ( !mtrr_def_type_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, msr_content) )
goto gp_fault;
break;
case MSR_MTRRfix64K_00000:
if ( !mtrr )
goto gp_fault;
- if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
+ if ( !mtrr_fix_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr, 0, msr_content) )
goto gp_fault;
break;
case MSR_MTRRfix16K_80000:
@@ -3263,7 +3264,7 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
if ( !mtrr )
goto gp_fault;
index = msr - MSR_MTRRfix16K_80000 + 1;
- if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+ if ( !mtrr_fix_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr,
index, msr_content) )
goto gp_fault;
break;
@@ -3271,7 +3272,7 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
if ( !mtrr )
goto gp_fault;
index = msr - MSR_MTRRfix4K_C0000 + 3;
- if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+ if ( !mtrr_fix_range_msr_set(v->domain, &v->arch.hvm_vcpu.mtrr,
index, msr_content) )
goto gp_fault;
break;
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
index 83ff1ff..6763322 100644
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -403,7 +403,7 @@ uint32_t get_pat_flags(struct vcpu *v,
return pat_type_2_pte_flags(pat_entry_value);
}
-bool_t mtrr_def_type_msr_set(struct mtrr_state *m, uint64_t msr_content)
+bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m, uint64_t msr_content)
{
uint8_t def_type = msr_content & 0xff;
uint8_t enabled = (msr_content >> 10) & 0x3;
@@ -422,13 +422,17 @@ bool_t mtrr_def_type_msr_set(struct mtrr_state *m, uint64_t msr_content)
return 0;
}
- m->enabled = enabled;
- m->def_type = def_type;
+ if ( m->enabled != enabled || m->def_type != def_type )
+ {
+ m->enabled = enabled;
+ m->def_type = def_type;
+ memory_type_changed(d);
+ }
return 1;
}
-bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row,
+bool_t mtrr_fix_range_msr_set(struct domain *d, struct mtrr_state *m, uint32_t row,
uint64_t msr_content)
{
uint64_t *fixed_range_base = (uint64_t *)m->fixed_ranges;
@@ -447,6 +451,7 @@ bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row,
}
fixed_range_base[row] = msr_content;
+ memory_type_changed(d);
}
return 1;
@@ -488,6 +493,8 @@ bool_t mtrr_var_range_msr_set(
m->overlapped = is_var_mtrr_overlapped(m);
+ memory_type_changed(d);
+
return 1;
}
@@ -662,7 +669,7 @@ static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
mtrr_state->mtrr_cap = hw_mtrr.msr_mtrr_cap;
for ( i = 0; i < NUM_FIXED_MSR; i++ )
- mtrr_fix_range_msr_set(mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
+ mtrr_fix_range_msr_set(d, mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
for ( i = 0; i < MTRR_VCNT; i++ )
{
@@ -672,7 +679,7 @@ static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
MTRRphysMask_MSR(i), hw_mtrr.msr_mtrr_var[i*2+1]);
}
- mtrr_def_type_msr_set(mtrr_state, hw_mtrr.msr_mtrr_def_type);
+ mtrr_def_type_msr_set(d, mtrr_state, hw_mtrr.msr_mtrr_def_type);
return 0;
}
@@ -680,6 +687,12 @@ static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr,
1, HVMSR_PER_VCPU);
+void memory_type_changed(struct domain *d)
+{
+ if ( iommu_enabled && !iommu_snoop && d->vcpu && d->vcpu[0] )
+ p2m_memory_type_changed(d);
+}
+
uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
uint8_t *ipat, bool_t direct_mmio)
{
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 03216b6..2e9d12d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2907,6 +2907,14 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
break;
}
+ case EXIT_REASON_EPT_MISCONFIG:
+ {
+ paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS);
+ if ( !ept_handle_misconfig(gpa) )
+ goto exit_and_crash;
+ break;
+ }
+
case EXIT_REASON_MONITOR_TRAP_FLAG:
v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
vmx_update_cpu_exec_control(v);
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 92d9e2d..16ba317 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -271,6 +271,125 @@ static int ept_next_level(struct p2m_domain *p2m, bool_t read_only,
return GUEST_TABLE_NORMAL_PAGE;
}
+static bool_t ept_invalidate_emt(mfn_t mfn)
+{
+ ept_entry_t *epte = map_domain_page(mfn_x(mfn));
+ unsigned int i;
+ bool_t changed = 0;
+
+ for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+ {
+ ept_entry_t e = atomic_read_ept_entry(&epte[i]);
+
+ if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
+ e.emt == MTRR_NUM_TYPES )
+ continue;
+
+ e.emt = MTRR_NUM_TYPES;
+ atomic_write_ept_entry(&epte[i], e);
+ changed = 1;
+ }
+
+ unmap_domain_page(epte);
+
+ return changed;
+}
+
+bool_t ept_handle_misconfig(uint64_t gpa)
+{
+ struct vcpu *curr = current;
+ struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
+ struct ept_data *ept = &p2m->ept;
+ unsigned int level = ept_get_wl(ept);
+ unsigned long gfn = PFN_DOWN(gpa);
+ unsigned long mfn = ept_get_asr(ept);
+ ept_entry_t *epte;
+ int okay;
+
+ if ( !mfn )
+ return 0;
+
+ p2m_lock(p2m);
+
+ okay = -curr->arch.hvm_vmx.ept_spurious_misconfig;
+ for ( ; ; --level )
+ {
+ ept_entry_t e;
+ unsigned int i;
+
+ epte = map_domain_page(mfn);
+ i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
+ e = atomic_read_ept_entry(&epte[i]);
+
+ if ( level == 0 || is_epte_superpage(&e) )
+ {
+ uint8_t ipat = 0;
+
+ if ( e.emt != MTRR_NUM_TYPES )
+ break;
+
+ if ( level == 0 )
+ {
+ for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
+ {
+ e = atomic_read_ept_entry(&epte[i]);
+ if ( e.emt == MTRR_NUM_TYPES )
+ e.emt = 0;
+ if ( !is_epte_valid(&e) || !is_epte_present(&e) )
+ continue;
+ e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
+ _mfn(e.mfn), &ipat,
+ e.sa_p2mt == p2m_mmio_direct);
+ e.ipat = ipat;
+ atomic_write_ept_entry(&epte[i], e);
+ }
+ }
+ else
+ {
+ e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
+ &ipat,
+ e.sa_p2mt == p2m_mmio_direct);
+ e.ipat = ipat;
+ atomic_write_ept_entry(&epte[i], e);
+ }
+
+ okay = 1;
+ break;
+ }
+
+ if ( e.emt == MTRR_NUM_TYPES )
+ {
+ ASSERT(is_epte_present(&e));
+ ept_invalidate_emt(_mfn(e.mfn));
+ smp_wmb();
+ e.emt = 0;
+ atomic_write_ept_entry(&epte[i], e);
+ unmap_domain_page(epte);
+ okay = 1;
+ }
+ else if ( is_epte_present(&e) && !e.emt )
+ unmap_domain_page(epte);
+ else
+ break;
+
+ mfn = e.mfn;
+ }
+
+ unmap_domain_page(epte);
+ if ( okay > 0 )
+ {
+ struct vcpu *v;
+
+ for_each_vcpu ( curr->domain, v )
+ v->arch.hvm_vmx.ept_spurious_misconfig = 1;
+ }
+ curr->arch.hvm_vmx.ept_spurious_misconfig = 0;
+ ept_sync_domain(p2m);
+ p2m_unlock(p2m);
+
+ return !!okay;
+}
+
/*
* ept_set_entry() computes 'need_modify_vtd_table' for itself,
* by observing whether any gfn->mfn translations are modified.
@@ -687,6 +806,17 @@ static void ept_change_entry_type_global(struct p2m_domain *p2m,
ept_sync_domain(p2m);
}
+static void ept_memory_type_changed(struct p2m_domain *p2m)
+{
+ unsigned long mfn = ept_get_asr(&p2m->ept);
+
+ if ( !mfn )
+ return;
+
+ if ( ept_invalidate_emt(_mfn(mfn)) )
+ ept_sync_domain(p2m);
+}
+
static void __ept_sync_domain(void *info)
{
struct ept_data *ept = &((struct p2m_domain *)info)->ept;
@@ -724,6 +854,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
p2m->set_entry = ept_set_entry;
p2m->get_entry = ept_get_entry;
p2m->change_entry_type_global = ept_change_entry_type_global;
+ p2m->memory_type_changed = ept_memory_type_changed;
p2m->audit_p2m = NULL;
/* Set the memory type used when accessing EPT paging structures. */
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index f5ddd20..a3ecb36 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -200,6 +200,18 @@ void p2m_change_entry_type_global(struct domain *d,
p2m_unlock(p2m);
}
+void p2m_memory_type_changed(struct domain *d)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ if ( p2m->memory_type_changed )
+ {
+ p2m_lock(p2m);
+ p2m->memory_type_changed(p2m);
+ p2m_unlock(p2m);
+ }
+}
+
mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
unsigned int *page_order, bool_t locked)
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 4d55573..0d85347 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -124,6 +124,9 @@ struct arch_vmx_struct {
unsigned long host_cr0;
+ /* Do we need to tolerate a spurious EPT_MISCONFIG VM exit? */
+ bool_t ept_spurious_misconfig;
+
/* Is the guest in real mode? */
uint8_t vmx_realmode;
/* Are we emulating rather than VMENTERing? */
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index f4d759b..55daed9 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -455,6 +455,7 @@ int ept_p2m_init(struct p2m_domain *p2m);
void ept_p2m_uninit(struct p2m_domain *p2m);
void ept_walk_table(struct domain *d, unsigned long gfn);
+bool_t ept_handle_misconfig(uint64_t gpa);
void setup_ept_dump(void);
void update_guest_eip(void);
diff --git a/xen/include/asm-x86/mtrr.h b/xen/include/asm-x86/mtrr.h
index 6b4d632..a6f426e 100644
--- a/xen/include/asm-x86/mtrr.h
+++ b/xen/include/asm-x86/mtrr.h
@@ -78,10 +78,11 @@ extern void mtrr_bp_restore(void);
extern bool_t mtrr_var_range_msr_set(
struct domain *d, struct mtrr_state *m,
uint32_t msr, uint64_t msr_content);
-extern bool_t mtrr_fix_range_msr_set(struct mtrr_state *v,
+extern bool_t mtrr_fix_range_msr_set(struct domain *d, struct mtrr_state *v,
uint32_t row, uint64_t msr_content);
-extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, uint64_t msr_content);
+extern bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *v, uint64_t msr_content);
extern bool_t pat_msr_set(uint64_t *pat, uint64_t msr);
+extern void memory_type_changed(struct domain *);
bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index f4e7253..facd318 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -233,6 +233,7 @@ struct p2m_domain {
void (*change_entry_type_global)(struct p2m_domain *p2m,
p2m_type_t ot,
p2m_type_t nt);
+ void (*memory_type_changed)(struct p2m_domain *p2m);
void (*write_p2m_entry)(struct p2m_domain *p2m,
unsigned long gfn, l1_pgentry_t *p,
@@ -506,6 +507,9 @@ void p2m_change_type_range(struct domain *d,
p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
p2m_type_t ot, p2m_type_t nt);
+/* Report a change affecting memory types. */
+void p2m_memory_type_changed(struct domain *d);
+
/* Set mmio addresses in the p2m table (for pass-through) */
int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2014-05-19 14:20 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-15 9:11 GPU passthrough performance regression in >4GB vms due to XSA-60 changes Tomasz Wroblewski
2014-05-15 12:32 ` Jan Beulich
2014-05-15 12:10 ` Tomasz Wroblewski
2014-05-15 13:23 ` Jan Beulich
2014-05-15 13:39 ` Tomasz Wroblewski
2014-05-15 14:34 ` Tomasz Wroblewski
2014-05-15 14:56 ` Tomasz Wroblewski
2014-05-15 16:07 ` Jan Beulich
2014-05-15 15:39 ` Tomasz Wroblewski
2014-05-16 6:33 ` Jan Beulich
2014-05-16 11:18 ` Tomasz Wroblewski
2014-05-16 11:38 ` Jan Beulich
2014-05-16 14:36 ` Jan Beulich
2014-05-19 10:29 ` Tomasz Wroblewski
2014-05-19 10:38 ` Jan Beulich
2014-05-19 10:47 ` Tomasz Wroblewski
2014-05-19 11:07 ` Jan Beulich
2014-05-19 11:32 ` Tomasz Wroblewski
2014-05-19 12:06 ` Jan Beulich
2014-05-19 12:17 ` Tomasz Wroblewski
2014-05-19 12:44 ` Jan Beulich
2014-05-19 14:20 ` Tomasz Wroblewski [this message]
2014-05-19 15:24 ` Jan Beulich
2014-05-19 15:48 ` Tomasz Wroblewski
2014-05-19 17:36 ` Tim Deegan
2014-05-20 6:31 ` Jan Beulich
2014-05-19 10:42 ` Tomasz Wroblewski
2014-05-19 11:01 ` Jan Beulich
2014-05-19 11:09 ` Tomasz Wroblewski
2014-05-19 11:19 ` Jan Beulich
2014-05-15 16:01 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=537A131C.8010303@gmail.com \
--to=tomasz.wroblewski@gmail.com \
--cc=JBeulich@suse.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).