* [PATCH] KVM: VMX: Allocate MSR Bitmap for each vcpu
2008-09-08 11:42 [PATCH 0/4] Memory type support for EPT Sheng Yang
@ 2008-09-08 11:42 ` Sheng Yang
2008-09-08 11:42 ` [PATCH] KVM: VMX: Add PAT support for EPT Sheng Yang
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-09-08 11:42 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Sheng Yang
Separate msr_bitmap for each vcpu, prepared for guest PAT support.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++----------------------
1 files changed, 30 insertions(+), 23 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 14671f4..7f53fb7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,7 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
+ struct page *msr_bitmap;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -106,7 +107,6 @@ static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
static struct page *vmx_io_bitmap_a;
static struct page *vmx_io_bitmap_b;
-static struct page *vmx_msr_bitmap;
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -2083,6 +2083,25 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
kunmap(msr_bitmap);
}
+static int setup_msr_bitmap(struct page *msr_bitmap)
+{
+ void *va;
+
+ va = kmap(msr_bitmap);
+ if (!va)
+ return -EINVAL;
+ memset(va, 0xff, PAGE_SIZE);
+ kunmap(msr_bitmap);
+
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP);
+
+ return 0;
+}
+
/*
* Sets up the vmcs for emulated real mode.
*/
@@ -2100,8 +2119,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
- if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+ if (cpu_has_vmx_msr_bitmap()) {
+ setup_msr_bitmap(vmx->msr_bitmap);
+ vmcs_write64(MSR_BITMAP, page_to_phys(vmx->msr_bitmap));
+ }
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
@@ -3362,6 +3383,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
vmx_free_vmcs(vcpu);
kfree(vmx->host_msrs);
kfree(vmx->guest_msrs);
+ __free_page(vmx->msr_bitmap);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
}
@@ -3397,6 +3419,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmcs_clear(vmx->vmcs);
+ vmx->msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!vmx->msr_bitmap)
+ goto free_vmcs;
+
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
err = vmx_vcpu_setup(vmx);
@@ -3518,12 +3544,6 @@ static int __init vmx_init(void)
goto out;
}
- vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- if (!vmx_msr_bitmap) {
- r = -ENOMEM;
- goto out1;
- }
-
/*
* Allow direct access to the PC debug port (it is often used for I/O
* delays, but the vmexits simply slow things down).
@@ -3537,21 +3557,11 @@ static int __init vmx_init(void)
memset(va, 0xff, PAGE_SIZE);
kunmap(vmx_io_bitmap_b);
- va = kmap(vmx_msr_bitmap);
- memset(va, 0xff, PAGE_SIZE);
- kunmap(vmx_msr_bitmap);
-
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
if (r)
- goto out2;
-
- vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
- vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
- vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
- vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
- vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
+ goto out1;
if (vm_need_ept()) {
bypass_guest_pf = 0;
@@ -3572,8 +3582,6 @@ static int __init vmx_init(void)
return 0;
-out2:
- __free_page(vmx_msr_bitmap);
out1:
__free_page(vmx_io_bitmap_b);
out:
@@ -3583,7 +3591,6 @@ out:
static void __exit vmx_exit(void)
{
- __free_page(vmx_msr_bitmap);
__free_page(vmx_io_bitmap_b);
__free_page(vmx_io_bitmap_a);
--
1.5.4.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH] KVM: VMX: Add PAT support for EPT
2008-09-08 11:42 [PATCH 0/4] Memory type support for EPT Sheng Yang
2008-09-08 11:42 ` [PATCH] KVM: VMX: Allocate MSR Bitmap for each vcpu Sheng Yang
@ 2008-09-08 11:42 ` Sheng Yang
2008-09-09 14:33 ` Avi Kivity
2008-09-08 11:42 ` [PATCH] KVM: Improve MTRR structure Sheng Yang
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Sheng Yang @ 2008-09-08 11:42 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Sheng Yang
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/vmx.c | 15 ++++++++++++---
arch/x86/kvm/vmx.h | 7 +++++++
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7f53fb7..b74b43f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1173,12 +1173,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
- opt = 0;
+ opt = VM_EXIT_LOAD_GUEST_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
- min = opt = 0;
+ min = 0;
+ opt = VM_ENTRY_SAVE_GUEST_PAT | VM_ENTRY_LOAD_HOST_PAT;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -2107,8 +2108,9 @@ static int setup_msr_bitmap(struct page *msr_bitmap)
*/
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
- u32 host_sysenter_cs;
+ u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
+ u64 host_pat;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2196,6 +2198,13 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(HOST_IA32_SYSENTER_ESP, a); /* 22.2.3 */
rdmsrl(MSR_IA32_SYSENTER_EIP, a);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
+ if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_HOST_PAT) {
+ rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
+ host_pat = msr_low | ((u64) msr_high << 32);
+ vmcs_write64(HOST_PAT, host_pat);
+ vmx_disable_intercept_for_msr(vmx->msr_bitmap,
+ MSR_IA32_CR_PAT);
+ }
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 0c22e5f..16b3cfb 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -62,11 +62,14 @@
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_GUEST_PAT 0x00004000
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_SAVE_GUEST_PAT 0x00040000
+#define VM_ENTRY_LOAD_HOST_PAT 0x00080000
/* VMCS Encodings */
enum vmcs_field {
@@ -112,6 +115,8 @@ enum vmcs_field {
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_PAT = 0x00002804,
+ GUEST_PAT_HIGH = 0x00002805,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
@@ -120,6 +125,8 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
+ HOST_PAT = 0x00002c00,
+ HOST_PAT_HIGH = 0x00002c01,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
--
1.5.4.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH] KVM: VMX: Add PAT support for EPT
2008-09-08 11:42 ` [PATCH] KVM: VMX: Add PAT support for EPT Sheng Yang
@ 2008-09-09 14:33 ` Avi Kivity
2008-09-09 14:36 ` Avi Kivity
0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2008-09-09 14:33 UTC (permalink / raw)
To: Sheng Yang; +Cc: kvm
Sheng Yang wrote:
(some text wanted here)
> diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
> index 0c22e5f..16b3cfb 100644
> --- a/arch/x86/kvm/vmx.h
> +++ b/arch/x86/kvm/vmx.h
> @@ -62,11 +62,14 @@
> #define PIN_BASED_VIRTUAL_NMIS 0x00000020
>
> #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
> +#define VM_EXIT_LOAD_GUEST_PAT 0x00004000
> #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
>
> #define VM_ENTRY_IA32E_MODE 0x00000200
> #define VM_ENTRY_SMM 0x00000400
> #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
> +#define VM_ENTRY_SAVE_GUEST_PAT 0x00040000
> +#define VM_ENTRY_LOAD_HOST_PAT 0x00080000
>
> /* VMCS Encodings */
> enum vmcs_field {
> @@ -112,6 +115,8 @@ enum vmcs_field {
> VMCS_LINK_POINTER_HIGH = 0x00002801,
> GUEST_IA32_DEBUGCTL = 0x00002802,
> GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
> + GUEST_PAT = 0x00002804,
> + GUEST_PAT_HIGH = 0x00002805,
> GUEST_PDPTR0 = 0x0000280a,
> GUEST_PDPTR0_HIGH = 0x0000280b,
> GUEST_PDPTR1 = 0x0000280c,
> @@ -120,6 +125,8 @@ enum vmcs_field {
> GUEST_PDPTR2_HIGH = 0x0000280f,
> GUEST_PDPTR3 = 0x00002810,
> GUEST_PDPTR3_HIGH = 0x00002811,
> + HOST_PAT = 0x00002c00,
> + HOST_PAT_HIGH = 0x00002c01,
> PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
> CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
> EXCEPTION_BITMAP = 0x00004004,
>
This appears to be a new feature? My documentation (a bit old) doesn't
show it. If so, we need a check to see that it is available.
An alternative to switching PAT is to translate a guest PAT index into a
host PAT index. But I guess that doesn't work with EPT?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] KVM: VMX: Add PAT support for EPT
2008-09-09 14:33 ` Avi Kivity
@ 2008-09-09 14:36 ` Avi Kivity
2008-09-10 11:14 ` Yang, Sheng
0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2008-09-09 14:36 UTC (permalink / raw)
To: Sheng Yang; +Cc: kvm
Avi Kivity wrote:
>
> This appears to be a new feature? My documentation (a bit old)
> doesn't show it. If so, we need a check to see that it is available.
The check is actually there.
If the feature is present, we need to expose it via
KVM_GET_SUPPORTED_CPUID, and add save/restore support for the msr via
msrs_to_save.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] KVM: VMX: Add PAT support for EPT
2008-09-09 14:36 ` Avi Kivity
@ 2008-09-10 11:14 ` Yang, Sheng
0 siblings, 0 replies; 9+ messages in thread
From: Yang, Sheng @ 2008-09-10 11:14 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm
On Tuesday 09 September 2008 22:36:22 Avi Kivity wrote:
> Avi Kivity wrote:
> > This appears to be a new feature? My documentation (a bit old)
> > doesn't show it. If so, we need a check to see that it is available.
>
> The check is actually there.
>
> If the feature is present, we need to expose it via
> KVM_GET_SUPPORTED_CPUID, and add save/restore support for the msr via
> msrs_to_save.
Yeah, it's a feature come with EPT. Thanks for reminder! Would update the
patch soon.
PS: The latest spec available at
http://www.intel.com/products/processor/manuals/
It contains EPT and VPID and other new things on Nehalem. I would work on
clearing up the code according to the latest spec soon (yeah, we also only
got it for days...)
--
regards
Yang, Sheng
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] KVM: Improve MTRR structure
2008-09-08 11:42 [PATCH 0/4] Memory type support for EPT Sheng Yang
2008-09-08 11:42 ` [PATCH] KVM: VMX: Allocate MSR Bitmap for each vcpu Sheng Yang
2008-09-08 11:42 ` [PATCH] KVM: VMX: Add PAT support for EPT Sheng Yang
@ 2008-09-08 11:42 ` Sheng Yang
2008-09-08 11:42 ` [PATCH] KVM: VMX: Add MTRR support for EPT Sheng Yang
2008-09-09 15:07 ` [PATCH 0/4] Memory type " Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-09-08 11:42 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Sheng Yang
Most structure copied from x86 MTRR.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/x86.c | 57 ++++++++++++++++++++++++++++++++++++++++++-
include/asm-x86/kvm_host.h | 18 +++++++++++++-
2 files changed, 72 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..5d7ea05 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,9 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
@@ -865,10 +868,34 @@ static bool msr_mtrr_valid(unsigned msr)
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
+ u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
if (!msr_mtrr_valid(msr))
return 1;
- vcpu->arch.mtrr[msr - 0x200] = data;
+ if (msr == MSR_MTRRdefType) {
+ vcpu->arch.mtrr_state.def_type = data;
+ vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+ } else if (msr == MSR_MTRRfix64K_00000)
+ p[0] = data;
+ else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+ p[1 + msr - MSR_MTRRfix16K_80000] = data;
+ else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+ p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+ else { /* Variable MTRRs */
+ int idx, is_mtrr_mask;
+ u64 *pt;
+
+ idx = (msr - 0x200) / 2;
+ is_mtrr_mask = msr - 0x200 - 2 * idx;
+ if (!is_mtrr_mask)
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+ else
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+ *pt = data;
+ }
return 0;
}
@@ -966,10 +993,35 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
+ u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
if (!msr_mtrr_valid(msr))
return 1;
- *pdata = vcpu->arch.mtrr[msr - 0x200];
+ if (msr == MSR_MTRRdefType)
+ *pdata = vcpu->arch.mtrr_state.def_type +
+ (vcpu->arch.mtrr_state.enabled << 10);
+ else if (msr == MSR_MTRRfix64K_00000)
+ *pdata = p[0];
+ else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+ *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+ else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+ *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+ else { /* Variable MTRRs */
+ int idx, is_mtrr_mask;
+ u64 *pt;
+
+ idx = (msr - 0x200) / 2;
+ is_mtrr_mask = msr - 0x200 - 2 * idx;
+ if (!is_mtrr_mask)
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+ else
+ pt =
+ (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+ *pdata = *pt;
+ }
+
return 0;
}
@@ -4121,6 +4173,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
+ vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 815efc3..2d65df2 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -86,6 +86,7 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
+#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
extern spinlock_t kvm_lock;
@@ -227,6 +228,21 @@ struct kvm_mmu {
u64 *pae_root;
};
+struct mtrr_var_range {
+ u32 base_lo;
+ u32 base_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+};
+
+struct mtrr_state_type {
+ struct mtrr_var_range var_ranges[KVM_NR_VAR_MTRR];
+ u8 fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+ unsigned char enabled;
+ unsigned char have_fixed;
+ u8 def_type;
+};
+
struct kvm_vcpu_arch {
u64 host_tsc;
int interrupt_window_open;
@@ -322,7 +338,7 @@ struct kvm_vcpu_arch {
bool nmi_pending;
bool nmi_injected;
- u64 mtrr[0x100];
+ struct mtrr_state_type mtrr_state;
};
struct kvm_mem_alias {
--
1.5.4.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH] KVM: VMX: Add MTRR support for EPT
2008-09-08 11:42 [PATCH 0/4] Memory type support for EPT Sheng Yang
` (2 preceding siblings ...)
2008-09-08 11:42 ` [PATCH] KVM: Improve MTRR structure Sheng Yang
@ 2008-09-08 11:42 ` Sheng Yang
2008-09-09 15:07 ` [PATCH 0/4] Memory type " Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-09-08 11:42 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Sheng Yang
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/mmu.c | 115 +++++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/svm.c | 6 ++
arch/x86/kvm/vmx.c | 24 ++++++++-
arch/x86/kvm/x86.c | 2 +-
include/asm-x86/kvm_host.h | 3 +-
5 files changed, 144 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a87a11e..2cd772a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -33,6 +33,7 @@
#include <asm/page.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
+#include <asm/mtrr.h>
/*
* When setting this variable to true it enables Two-Dimensional-Paging
@@ -159,6 +160,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
+static u64 __read_mostly shadow_mt_mask;
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{
@@ -174,13 +176,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask)
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
+ shadow_mt_mask = mt_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@@ -1143,6 +1146,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
return page;
}
+/*
+ * The function is based on mtrr_type_lookup() in
+ * arch/x86/kernel/cpu/mtrr/generic.c
+ */
+static int guest_mtrr_type_lookup(struct kvm_vcpu *vcpu, u64 start, u64 end)
+{
+ int i;
+ u64 base, mask;
+ u8 prev_match, curr_match;
+ int num_var_ranges = KVM_NR_VAR_MTRR;
+ struct mtrr_state_type mtrr_state = vcpu->arch.mtrr_state;
+
+ if (!mtrr_state.enabled)
+ return 0xFF;
+
+ /* Make end inclusive end, instead of exclusive */
+ end--;
+
+ /* Look in fixed ranges. Just return the type as per start */
+ if (mtrr_state.have_fixed && (start < 0x100000)) {
+ int idx;
+
+ if (start < 0x80000) {
+ idx = 0;
+ idx += (start >> 16);
+ return mtrr_state.fixed_ranges[idx];
+ } else if (start < 0xC0000) {
+ idx = 1 * 8;
+ idx += ((start - 0x80000) >> 14);
+ return mtrr_state.fixed_ranges[idx];
+ } else if (start < 0x1000000) {
+ idx = 3 * 8;
+ idx += ((start - 0xC0000) >> 12);
+ return mtrr_state.fixed_ranges[idx];
+ }
+ }
+
+ /*
+ * Look in variable ranges
+ * Look of multiple ranges matching this address and pick type
+ * as per MTRR precedence
+ */
+ if (!(mtrr_state.enabled & 2))
+ return mtrr_state.def_type;
+
+ prev_match = 0xFF;
+ for (i = 0; i < num_var_ranges; ++i) {
+ unsigned short start_state, end_state;
+
+ if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
+ continue;
+
+ base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) +
+ (mtrr_state.var_ranges[i].base_lo & PAGE_MASK);
+ mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) +
+ (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK);
+
+ start_state = ((start & mask) == (base & mask));
+ end_state = ((end & mask) == (base & mask));
+ if (start_state != end_state)
+ return 0xFE;
+
+ if ((start & mask) != (base & mask))
+ continue;
+
+ curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
+ if (prev_match == 0xFF) {
+ prev_match = curr_match;
+ continue;
+ }
+
+ if (prev_match == MTRR_TYPE_UNCACHABLE ||
+ curr_match == MTRR_TYPE_UNCACHABLE)
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ((prev_match == MTRR_TYPE_WRBACK &&
+ curr_match == MTRR_TYPE_WRTHROUGH) ||
+ (prev_match == MTRR_TYPE_WRTHROUGH &&
+ curr_match == MTRR_TYPE_WRBACK)) {
+ prev_match = MTRR_TYPE_WRTHROUGH;
+ curr_match = MTRR_TYPE_WRTHROUGH;
+ }
+
+ if (prev_match != curr_match)
+ return MTRR_TYPE_UNCACHABLE;
+ }
+
+ if (prev_match != 0xFF)
+ return prev_match;
+
+ return mtrr_state.def_type;
+}
+
+static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ u8 mtrr;
+
+ mtrr = guest_mtrr_type_lookup(vcpu, gfn << PAGE_SHIFT,
+ (gfn << PAGE_SHIFT) + PAGE_SIZE);
+ if (mtrr == 0xfe || mtrr == 0xff)
+ mtrr = MTRR_TYPE_WRBACK;
+ return mtrr;
+}
+
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
@@ -1152,6 +1259,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
u64 spte;
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
+ u64 mt_mask = shadow_mt_mask;
pgprintk("%s: spte %llx access %x write_fault %d"
" user_fault %d gfn %lx\n",
@@ -1199,6 +1307,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
+ if (mt_mask) {
+ mt_mask = get_memory_type(vcpu, gfn) <<
+ kvm_x86_ops->get_mt_mask_shift();
+ spte |= mt_mask;
+ }
spte |= (u64)pfn << PAGE_SHIFT;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6f7f316..341f47e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1899,6 +1899,11 @@ static int get_npt_level(void)
#endif
}
+static int svm_get_mt_mask_shift(void)
+{
+ return 0;
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -1954,6 +1959,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
+ .get_mt_mask_shift = svm_get_mt_mask_shift,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b74b43f..0a86ca3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1508,10 +1508,17 @@ static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
*hw_cr4 &= ~X86_CR4_PAE;
}
+static void vmx_mtrr_update(struct kvm_vcpu *vcpu)
+{
+ if (vm_need_ept())
+ kvm_mmu_reset_context(vcpu);
+}
+
static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
KVM_VM_CR0_ALWAYS_ON;
+ unsigned long last_cr0 = vmcs_readl(CR0_READ_SHADOW);
vmx_fpu_deactivate(vcpu);
@@ -1539,6 +1546,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
vmx_fpu_activate(vcpu);
+
+ /* According to SDM 10.11.8, check if need update MTRR */
+ if ((last_cr0 & X86_CR0_CD) && !(last_cr0 & X86_CR0_NW) &&
+ !(cr0 & X86_CR0_CD) && !(cr0 & X86_CR0_NW))
+ vmx_mtrr_update(vcpu);
}
static u64 construct_eptp(unsigned long root_hpa)
@@ -3481,6 +3493,11 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
+static int vmx_get_mt_mask_shift(void)
+{
+ return VMX_EPT_MT_EPTE_SHIFT;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -3536,6 +3553,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
+ .get_mt_mask_shift = vmx_get_mt_mask_shift,
};
static int __init vmx_init(void)
@@ -3575,11 +3593,11 @@ static int __init vmx_init(void)
if (vm_need_ept()) {
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
- VMX_EPT_WRITABLE_MASK |
- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+ VMX_EPT_WRITABLE_MASK);
kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
VMX_EPT_FAKE_DIRTY_MASK, 0ull,
- VMX_EPT_EXECUTABLE_MASK);
+ VMX_EPT_EXECUTABLE_MASK,
+ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
kvm_enable_tdp();
} else
kvm_disable_tdp();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5d7ea05..231d6fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2831,7 +2831,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0);
+ PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
return 0;
out:
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 2d65df2..6369f52 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -502,6 +502,7 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
+ int (*get_mt_mask_shift)(void);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@@ -515,7 +516,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask);
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
--
1.5.4.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH 0/4] Memory type support for EPT
2008-09-08 11:42 [PATCH 0/4] Memory type support for EPT Sheng Yang
` (3 preceding siblings ...)
2008-09-08 11:42 ` [PATCH] KVM: VMX: Add MTRR support for EPT Sheng Yang
@ 2008-09-09 15:07 ` Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2008-09-09 15:07 UTC (permalink / raw)
To: Sheng Yang; +Cc: kvm
Sheng Yang wrote:
> Hi, Avi
>
> This patchset add memory type support for EPT, including MTRR and PAT.
>
> The patch restruct the MTRR structure to make it easy to use, also take host
> kernel MTRR as a reference.
>
> The function guest_mtrr_type_lookup() is copied from host kernel rather than
> reuse, for I think it may be much more complicate if we try to reuse the
> function.
>
> The patchset have been tested with VT-d and EPT.
>
> The MTRR support for shadow page table is also under development, based on this
> patchset.
>
Please send with git send-email -n -- that preserves the patch order for
me to apply.
(and thanks for getting git send-email to work, it's much more pleasant
to review and apply those emails).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread