* [PATCH 2/4] KVM: MMU: Use __gfn_to_rmap() in kvm_handle_hva()
2012-01-23 10:42 [PATCH 0/4] KVM: Decouple rmap_pde from lpage_info write_count Takuya Yoshikawa
2012-01-23 10:43 ` [PATCH 1/4] KVM: MMU: Use gfn_to_rmap() in audit_write_protection() Takuya Yoshikawa
@ 2012-01-23 10:43 ` Takuya Yoshikawa
2012-01-23 10:44 ` [PATCH 3/4] KVM: Introduce gfn_to_index() which returns the index for a given level Takuya Yoshikawa
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Takuya Yoshikawa @ 2012-01-23 10:43 UTC (permalink / raw)
To: avi, mtosatti; +Cc: kvm, kvm-ppc
We can hide the implementation details and treat every level uniformly.
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
arch/x86/kvm/mmu.c | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 844fcce..0e82d9d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1133,14 +1133,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
gfn_t gfn = memslot->base_gfn + gfn_offset;
- ret = handler(kvm, &memslot->rmap[gfn_offset], data);
+ ret = 0;
- for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
- struct kvm_lpage_info *linfo;
+ for (j = PT_PAGE_TABLE_LEVEL;
+ j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+ unsigned long *rmapp;
- linfo = lpage_info_slot(gfn, memslot,
- PT_DIRECTORY_LEVEL + j);
- ret |= handler(kvm, &linfo->rmap_pde, data);
+ rmapp = __gfn_to_rmap(gfn, j, memslot);
+ ret |= handler(kvm, rmapp, data);
}
trace_kvm_age_page(hva, memslot, ret);
retval |= ret;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/4] KVM: Introduce gfn_to_index() which returns the index for a given level
2012-01-23 10:42 [PATCH 0/4] KVM: Decouple rmap_pde from lpage_info write_count Takuya Yoshikawa
2012-01-23 10:43 ` [PATCH 1/4] KVM: MMU: Use gfn_to_rmap() in audit_write_protection() Takuya Yoshikawa
2012-01-23 10:43 ` [PATCH 2/4] KVM: MMU: Use __gfn_to_rmap() in kvm_handle_hva() Takuya Yoshikawa
@ 2012-01-23 10:44 ` Takuya Yoshikawa
2012-01-23 10:45 ` [RFC PATCH 4/4] KVM: Decouple rmap_pde from lpage_info write_count Takuya Yoshikawa
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Takuya Yoshikawa @ 2012-01-23 10:44 UTC (permalink / raw)
To: avi, mtosatti; +Cc: kvm, kvm-ppc
We can also use this for PT_PAGE_TABLE_LEVEL to treat every level
uniformly.
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
arch/x86/kvm/mmu.c | 3 +--
include/linux/kvm_host.h | 7 +++++++
virt/kvm/kvm_main.c | 4 +---
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0e82d9d..12f5c99 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -688,8 +688,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
{
unsigned long idx;
- idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
- (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+ idx = gfn_to_index(gfn, slot->base_gfn, level);
return &slot->lpage_info[level - 2][idx];
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eada8e6..06d4e41 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -656,6 +656,13 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
return gfn_to_memslot(kvm, gfn)->id;
}
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+ /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+ (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9f32bff..4f2574f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -803,9 +803,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (new.lpage_info[i])
continue;
- lpages = 1 + ((base_gfn + npages - 1)
- >> KVM_HPAGE_GFN_SHIFT(level));
- lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
+ lpages = gfn_to_index(base_gfn + npages - 1, base_gfn, level) + 1;
new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread* [RFC PATCH 4/4] KVM: Decouple rmap_pde from lpage_info write_count
2012-01-23 10:42 [PATCH 0/4] KVM: Decouple rmap_pde from lpage_info write_count Takuya Yoshikawa
` (2 preceding siblings ...)
2012-01-23 10:44 ` [PATCH 3/4] KVM: Introduce gfn_to_index() which returns the index for a given level Takuya Yoshikawa
@ 2012-01-23 10:45 ` Takuya Yoshikawa
2012-01-23 18:40 ` [PATCH 0/4] " Marcelo Tosatti
2012-01-24 11:24 ` Avi Kivity
5 siblings, 0 replies; 9+ messages in thread
From: Takuya Yoshikawa @ 2012-01-23 10:45 UTC (permalink / raw)
To: avi, mtosatti; +Cc: kvm, kvm-ppc
Though we have one rmap array for every level, those for large pages,
called rmap_pde, are coupled with write_count information and constitute
lpage_info arrays.
To hide this implementation details, we are now using __gfn_to_rmap()
which includes likely(level == PT_PAGE_TABLE_LEVEL) heuristics; this
is not good because we know that it always fails for higher levels.
Furthermore, when we traverse rmap arrays to write protect pages during
dirty logging, the current layout reduces the locality of their elements
by placing write_count next to rmap_pde in lpage_info.
This patch mitigates this problem by decoupling rmap_pde from lpage_info
write_count and making the rmap array two dimensional which holds the
old rmap_pde elements in it.
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
---
arch/ia64/kvm/kvm-ia64.c | 8 ++++----
arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +++---
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 4 ++--
arch/x86/kvm/mmu.c | 9 +++------
arch/x86/kvm/x86.c | 4 ++--
include/linux/kvm_host.h | 3 +--
virt/kvm/kvm_main.c | 25 ++++++++++++++++---------
7 files changed, 31 insertions(+), 28 deletions(-)
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8ca7261..b17eaa1 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1376,8 +1376,8 @@ static void kvm_release_vm_pages(struct kvm *kvm)
kvm_for_each_memslot(memslot, slots) {
base_gfn = memslot->base_gfn;
for (j = 0; j < memslot->npages; j++) {
- if (memslot->rmap[j])
- put_page((struct page *)memslot->rmap[j]);
+ if (memslot->rmap[0][j])
+ put_page((struct page *)memslot->rmap[0][j]);
}
}
}
@@ -1591,12 +1591,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
kvm_set_pmt_entry(kvm, base_gfn + i,
pfn << PAGE_SHIFT,
_PAGE_AR_RWX | _PAGE_MA_WB);
- memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
+ memslot->rmap[0][i] = (unsigned long)pfn_to_page(pfn);
} else {
kvm_set_pmt_entry(kvm, base_gfn + i,
GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
_PAGE_MA_UC);
- memslot->rmap[i] = 0;
+ memslot->rmap[0][i] = 0;
}
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 783cd35..81f9036 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -631,7 +631,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_unlock;
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
- rmap = &memslot->rmap[gfn - memslot->base_gfn];
+ rmap = &memslot->rmap[0][gfn - memslot->base_gfn];
lock_rmap(rmap);
/* Check if we might have been invalidated; let the guest retry if so */
@@ -693,7 +693,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- ret = handler(kvm, &memslot->rmap[gfn_offset],
+ ret = handler(kvm, &memslot->rmap[0][gfn_offset],
memslot->base_gfn + gfn_offset);
retval |= ret;
}
@@ -928,7 +928,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
unsigned long *rmapp, *map;
preempt_disable();
- rmapp = memslot->rmap;
+ rmapp = memslot->rmap[0];
map = memslot->dirty_bitmap;
for (i = 0; i < memslot->npages; ++i) {
if (kvm_test_clear_dirty(kvm, rmapp))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5f3c60b..4df9b4a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -103,7 +103,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return;
- rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+ rmap = real_vmalloc_addr(&memslot->rmap[0][gfn - memslot->base_gfn]);
lock_rmap(rmap);
head = *rmap & KVMPPC_RMAP_INDEX;
@@ -199,7 +199,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
if (!slot_is_aligned(memslot, psize))
return H_PARAMETER;
slot_fn = gfn - memslot->base_gfn;
- rmap = &memslot->rmap[slot_fn];
+ rmap = &memslot->rmap[0][slot_fn];
if (!kvm->arch.using_mmu_notifiers) {
physp = kvm->arch.slot_phys[memslot->id];
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 12f5c99..61c66d2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -948,13 +948,10 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
- struct kvm_lpage_info *linfo;
-
- if (likely(level == PT_PAGE_TABLE_LEVEL))
- return &slot->rmap[gfn - slot->base_gfn];
+ gfn_t idx;
- linfo = lpage_info_slot(gfn, slot, level);
- return &linfo->rmap_pde;
+ idx = gfn_to_index(gfn, slot->base_gfn, level);
+ return &slot->rmap[level - PT_PAGE_TABLE_LEVEL][idx];
}
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c912f0b..136d965 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6137,7 +6137,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*x86 needs to hanlde !user_alloc case.
*/
if (!user_alloc) {
- if (npages && !old.rmap) {
+ if (npages && !old.rmap[0]) {
unsigned long userspace_addr;
down_write(¤t->mm->mmap_sem);
@@ -6167,7 +6167,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
- if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+ if (!user_alloc && !old.user_alloc && old.rmap[0] && !npages) {
int ret;
down_write(¤t->mm->mmap_sem);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 06d4e41..1f9eb1d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -172,7 +172,6 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
struct kvm_lpage_info {
- unsigned long rmap_pde;
int write_count;
};
@@ -180,10 +179,10 @@ struct kvm_memory_slot {
gfn_t base_gfn;
unsigned long npages;
unsigned long flags;
- unsigned long *rmap;
unsigned long *dirty_bitmap;
unsigned long *dirty_bitmap_head;
unsigned long nr_dirty_pages;
+ unsigned long *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
unsigned long userspace_addr;
int user_alloc;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4f2574f..27e7a89 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -537,8 +537,12 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
{
int i;
- if (!dont || free->rmap != dont->rmap)
- vfree(free->rmap);
+ for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+ if (!dont || free->rmap[i] != dont->rmap[i]) {
+ vfree(free->rmap[i]);
+ free->rmap[i] = NULL;
+ }
+ }
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
@@ -552,7 +556,6 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
}
free->npages = 0;
- free->rmap = NULL;
}
void kvm_free_physmem(struct kvm *kvm)
@@ -779,10 +782,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Allocate if a slot is being created */
#ifndef CONFIG_S390
- if (npages && !new.rmap) {
- new.rmap = vzalloc(npages * sizeof(*new.rmap));
-
- if (!new.rmap)
+ if (npages && !new.rmap[0]) {
+ new.rmap[0] = vzalloc(npages * sizeof(*new.rmap[0]));
+ if (!new.rmap[0])
goto out_free;
new.user_alloc = user_alloc;
@@ -806,10 +808,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
lpages = gfn_to_index(base_gfn + npages - 1, base_gfn, level) + 1;
new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
-
if (!new.lpage_info[i])
goto out_free;
+ new.rmap[i + 1] = vzalloc(lpages * sizeof(*new.rmap[i + 1]));
+ if (!new.rmap[i + 1])
+ goto out_free;
+
if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
new.lpage_info[i][0].write_count = 1;
if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
@@ -886,7 +891,9 @@ skip_lpage:
/* actual memory is freed via old in kvm_free_physmem_slot below */
if (!npages) {
- new.rmap = NULL;
+ for (i = 0; i < KVM_NR_PAGE_SIZES; ++i)
+ new.rmap[i] = NULL;
+
new.dirty_bitmap = NULL;
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
new.lpage_info[i] = NULL;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 9+ messages in thread