[PATCH] kvm mmu: reduce 50% memory usage

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] kvm mmu: reduce 50% memory usage
@ 2010-04-28 11:57 Lai Jiangshan
  2010-04-28 13:01 ` Avi Kivity
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Lai Jiangshan @ 2010-04-28 11:57 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti, LKML, kvm


I think users will enable tdp when their hardwares support ept or npt.
This patch can reduce about 50% kvm mmu memory usage for they.

This simple patch use the fact that:

When sp->role.direct is set, sp->gfns does not contain any essential
information, leaf sptes reachable from this sp are for a continuate
guest physical memory range(a linear range).
So sp->gfns[i](if it was set) equals to sp->gfn + i. (PT_PAGE_TABLE_LEVEL)
Obviously, it is not essential information, we can calculate it when need.

It means we don't need sp->gfns when sp->role.direct=1,
Thus we can save one page usage for every kvm_mmu_page.

Note:
Access to sp->gfns must be wrapped by kvm_mmu_page_get_gfn()
or kvm_mmu_page_set_gfn().
It is only exposed in FNAME(sync_page).

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 mmu.c         |   38 +++++++++++++++++++++++++++++---------
 paging_tmpl.h |    3 +++
 2 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ddfa865..a5c6719 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -393,6 +393,22 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
 	kfree(rd);
 }
 
+static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
+{
+	if (!sp->role.direct)
+		return sp->gfns[index];
+
+	return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS));
+}
+
+static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+{
+	if (sp->role.direct)
+		BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
+	else
+		sp->gfns[index] = gfn;
+}
+
 /*
  * Return the pointer to the largepage write count for a given
  * gfn, handling slots that are not large page aligned.
@@ -543,7 +559,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 		return count;
 	gfn = unalias_gfn(vcpu->kvm, gfn);
 	sp = page_header(__pa(spte));
-	sp->gfns[spte - sp->spt] = gfn;
+	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 	if (!*rmapp) {
 		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
@@ -601,6 +617,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	struct kvm_rmap_desc *prev_desc;
 	struct kvm_mmu_page *sp;
 	pfn_t pfn;
+	gfn_t gfn;
 	unsigned long *rmapp;
 	int i;
 
@@ -612,7 +629,8 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 		kvm_set_pfn_accessed(pfn);
 	if (is_writable_pte(*spte))
 		kvm_set_pfn_dirty(pfn);
-	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
+	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
+	rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
 	if (!*rmapp) {
 		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
 		BUG();
@@ -896,7 +914,8 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	ASSERT(is_empty_shadow_page(sp->spt));
 	list_del(&sp->link);
 	__free_page(virt_to_page(sp->spt));
-	__free_page(virt_to_page(sp->gfns));
+	if (!sp->role.direct)
+		__free_page(virt_to_page(sp->gfns));
 	kfree(sp);
 	++kvm->arch.n_free_mmu_pages;
 }
@@ -907,13 +926,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
 }
 
 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
-					       u64 *parent_pte)
+					       u64 *parent_pte, int direct)
 {
 	struct kvm_mmu_page *sp;
 
 	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
 	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
-	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
+	if (!direct)
+		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache,
+						  PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
@@ -1347,7 +1368,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			return sp;
 		}
 	++vcpu->kvm->stat.mmu_cache_miss;
-	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
+	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
 	if (!sp)
 		return sp;
 	sp->gfn = gfn;
@@ -3314,7 +3335,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 
 	if (*sptep & PT_WRITABLE_MASK) {
 		rev_sp = page_header(__pa(sptep));
-		gfn = rev_sp->gfns[sptep - rev_sp->spt];
+		gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
 
 		if (!gfn_to_memslot(kvm, gfn)) {
 			if (!printk_ratelimit())
@@ -3328,8 +3349,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 			return;
 		}
 
-		rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt],
-				    rev_sp->role.level);
+		rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
 		if (!*rmapp) {
 			if (!printk_ratelimit())
 				return;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d0cc07e..702c016 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -576,6 +576,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 	offset = nr_present = 0;
 
+	/* direct kvm_mmu_page can not be unsync. */
+	BUG_ON(sp->role.direct);
+
 	if (PTTYPE == 32)
 		offset = sp->role.quadrant << PT64_LEVEL_BITS;
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-04-28 11:57 [PATCH] kvm mmu: reduce 50% memory usage Lai Jiangshan
@ 2010-04-28 13:01 ` Avi Kivity
  2010-04-28 18:05 ` Marcelo Tosatti
  2010-04-29 18:09 ` Marcelo Tosatti
  2 siblings, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2010-04-28 13:01 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Marcelo Tosatti, LKML, kvm

On 04/28/2010 02:57 PM, Lai Jiangshan wrote:
> I think users will enable tdp when their hardwares support ept or npt.
> This patch can reduce about 50% kvm mmu memory usage for they.
>
>    

Good one!

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-04-28 11:57 [PATCH] kvm mmu: reduce 50% memory usage Lai Jiangshan
  2010-04-28 13:01 ` Avi Kivity
@ 2010-04-28 18:05 ` Marcelo Tosatti
  2010-04-29 18:09 ` Marcelo Tosatti
  2 siblings, 0 replies; 14+ messages in thread
From: Marcelo Tosatti @ 2010-04-28 18:05 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Avi Kivity, LKML, kvm

On Wed, Apr 28, 2010 at 07:57:01PM +0800, Lai Jiangshan wrote:
> 
> I think users will enable tdp when their hardwares support ept or npt.
> This patch can reduce about 50% kvm mmu memory usage for they.
> 
> This simple patch use the fact that:
> 
> When sp->role.direct is set, sp->gfns does not contain any essential
> information, leaf sptes reachable from this sp are for a continuate
> guest physical memory range(a linear range).
> So sp->gfns[i](if it was set) equals to sp->gfn + i. (PT_PAGE_TABLE_LEVEL)
> Obviously, it is not essential information, we can calculate it when need.
> 
> It means we don't need sp->gfns when sp->role.direct=1,
> Thus we can save one page usage for every kvm_mmu_page.
> 
> Note:
> Access to sp->gfns must be wrapped by kvm_mmu_page_get_gfn()
> or kvm_mmu_page_set_gfn().
> It is only exposed in FNAME(sync_page).
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>

Applied, thanks.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-04-28 11:57 [PATCH] kvm mmu: reduce 50% memory usage Lai Jiangshan
  2010-04-28 13:01 ` Avi Kivity
  2010-04-28 18:05 ` Marcelo Tosatti
@ 2010-04-29 18:09 ` Marcelo Tosatti
  2010-04-29 18:43   ` Avi Kivity
  2 siblings, 1 reply; 14+ messages in thread
From: Marcelo Tosatti @ 2010-04-29 18:09 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Avi Kivity, LKML, kvm

On Wed, Apr 28, 2010 at 07:57:01PM +0800, Lai Jiangshan wrote:
> 
> I think users will enable tdp when their hardwares support ept or npt.
> This patch can reduce about 50% kvm mmu memory usage for they.
> 
> This simple patch use the fact that:
> 
> When sp->role.direct is set, sp->gfns does not contain any essential
> information, leaf sptes reachable from this sp are for a continuate
> guest physical memory range(a linear range).
> So sp->gfns[i](if it was set) equals to sp->gfn + i. (PT_PAGE_TABLE_LEVEL)
> Obviously, it is not essential information, we can calculate it when need.
> 
> It means we don't need sp->gfns when sp->role.direct=1,
> Thus we can save one page usage for every kvm_mmu_page.
> 
> Note:
> Access to sp->gfns must be wrapped by kvm_mmu_page_get_gfn()
> or kvm_mmu_page_set_gfn().
> It is only exposed in FNAME(sync_page).

Lai,

You missed quadrant on 4mb large page emulation with shadow (see updated
patch below). Also for some reason i can't understand the assumption
does not hold for large sptes with TDP, so reverted for now.

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3266d73..a9edfdb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -393,6 +393,27 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
 	kfree(rd);
 }
 
+static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
+{
+	gfn_t gfn;
+
+	if (!sp->role.direct)
+		return sp->gfns[index];
+
+	gfn = sp->gfn + index * (1 << (sp->role.level - 1) * PT64_LEVEL_BITS);
+	gfn += sp->role.quadrant << PT64_LEVEL_BITS;
+
+	return gfn;
+}
+
+static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+{
+	if (sp->role.direct)
+		BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
+	else
+		sp->gfns[index] = gfn;
+}
+
 /*
  * Return the pointer to the largepage write count for a given
  * gfn, handling slots that are not large page aligned.
@@ -543,7 +564,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 		return count;
 	gfn = unalias_gfn(vcpu->kvm, gfn);
 	sp = page_header(__pa(spte));
-	sp->gfns[spte - sp->spt] = gfn;
+	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 	if (!*rmapp) {
 		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
@@ -601,6 +622,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	struct kvm_rmap_desc *prev_desc;
 	struct kvm_mmu_page *sp;
 	pfn_t pfn;
+	gfn_t gfn;
 	unsigned long *rmapp;
 	int i;
 
@@ -612,7 +634,8 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 		kvm_set_pfn_accessed(pfn);
 	if (is_writable_pte(*spte))
 		kvm_set_pfn_dirty(pfn);
-	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
+	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
+	rmapp = gfn_to_rmap(kvm, gfn, sp->role.level);
 	if (!*rmapp) {
 		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
 		BUG();
@@ -896,7 +919,8 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	ASSERT(is_empty_shadow_page(sp->spt));
 	list_del(&sp->link);
 	__free_page(virt_to_page(sp->spt));
-	__free_page(virt_to_page(sp->gfns));
+	if (!sp->role.direct)
+		__free_page(virt_to_page(sp->gfns));
 	kfree(sp);
 	++kvm->arch.n_free_mmu_pages;
 }
@@ -907,13 +931,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
 }
 
 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
-					       u64 *parent_pte)
+					       u64 *parent_pte, int direct)
 {
 	struct kvm_mmu_page *sp;
 
 	sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp);
 	sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
-	sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
+	if (!direct)
+		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache,
+						  PAGE_SIZE);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
@@ -1352,7 +1378,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	if (role.direct)
 		role.cr4_pae = 0;
 	role.access = access;
-	if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
+	if (vcpu->arch.mmu.root_level == PT32_ROOT_LEVEL) {
 		quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
@@ -1379,7 +1405,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			return sp;
 		}
 	++vcpu->kvm->stat.mmu_cache_miss;
-	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
+	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
 	if (!sp)
 		return sp;
 	sp->gfn = gfn;
@@ -3371,7 +3399,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 
 	if (*sptep & PT_WRITABLE_MASK) {
 		rev_sp = page_header(__pa(sptep));
-		gfn = rev_sp->gfns[sptep - rev_sp->spt];
+		gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
 
 		if (!gfn_to_memslot(kvm, gfn)) {
 			if (!printk_ratelimit())
@@ -3385,8 +3413,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 			return;
 		}
 
-		rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt],
-				    rev_sp->role.level);
+		rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
 		if (!*rmapp) {
 			if (!printk_ratelimit())
 				return;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 624b38f..2091590 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -592,6 +592,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 	offset = nr_present = 0;
 
+	/* direct kvm_mmu_page can not be unsync. */
+	BUG_ON(sp->role.direct);
+
 	if (PTTYPE == 32)
 		offset = sp->role.quadrant << PT64_LEVEL_BITS;
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-04-29 18:09 ` Marcelo Tosatti
@ 2010-04-29 18:43   ` Avi Kivity
       [not found]     ` <4BDA3F9B.8040708@cn.fujitsu.com>
  2010-04-30 15:44     ` Marcelo Tosatti
  0 siblings, 2 replies; 14+ messages in thread
From: Avi Kivity @ 2010-04-29 18:43 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Lai Jiangshan, LKML, kvm

On 04/29/2010 09:09 PM, Marcelo Tosatti wrote:
>
> You missed quadrant on 4mb large page emulation with shadow (see updated
> patch below).

Good catch.

> Also for some reason i can't understand the assumption
> does not hold for large sptes with TDP, so reverted for now.
>    

It's unrelated to TDP, same issue with shadow.  I think the calculation 
is correct.  For example the 4th spte for a level=2 page will yield 
gfn=4*512.

> @@ -393,6 +393,27 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
>   	kfree(rd);
>   }
>
> +static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
> +{
> +	gfn_t gfn;
> +
> +	if (!sp->role.direct)
> +		return sp->gfns[index];
> +
> +	gfn = sp->gfn + index * (1<<  (sp->role.level - 1) * PT64_LEVEL_BITS);
> +	gfn += sp->role.quadrant<<  PT64_LEVEL_BITS;
>    

PT64_LEVEL_BITS * level

> +
> +	return gfn;
> +}
> +
>
>    


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
       [not found]     ` <4BDA3F9B.8040708@cn.fujitsu.com>
@ 2010-04-30  7:26       ` Avi Kivity
       [not found]         ` <4BDA9AAC.6060303@cn.fujitsu.com>
  0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2010-04-30  7:26 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Marcelo Tosatti, LKML, kvm

On 04/30/2010 05:25 AM, Lai Jiangshan wrote:
>
>> It's unrelated to TDP, same issue with shadow.  I think the calculation
>> is correct.  For example the 4th spte for a level=2 page will yield
>> gfn=4*512.
>>      
> Avi, Marcelo
> Thank you very much.
>
> The calculation I used is correct.
>    

Yes.  btw, can you update the patch to also correct mmu.txt?

> The fault is comes from the FNAME(fetch) when using shadow page.
>
> I have fix it in the patch
> "[RFC PATCH] kvm: calculate correct gfn for small host pages which emulates large guest pages"
>
> (It seems that the mail lists is unreachable, did you receive it?)
>    

The lists are down at the moment.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
       [not found]         ` <4BDA9AAC.6060303@cn.fujitsu.com>
@ 2010-04-30 10:00           ` Avi Kivity
  0 siblings, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2010-04-30 10:00 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Marcelo Tosatti, LKML, kvm

On 04/30/2010 11:54 AM, Lai Jiangshan wrote:
> Avi Kivity wrote:
>    
>> On 04/30/2010 05:25 AM, Lai Jiangshan wrote:
>>      
>>>        
>>>> It's unrelated to TDP, same issue with shadow.  I think the calculation
>>>> is correct.  For example the 4th spte for a level=2 page will yield
>>>> gfn=4*512.
>>>>
>>>>          
>>> Avi, Marcelo
>>> Thank you very much.
>>>
>>> The calculation I used is correct.
>>>
>>>        
>> Yes.  btw, can you update the patch to also correct mmu.txt?
>>      
> The corresponding content in mmu.txt are:
>    role.direct:
>      If set, leaf sptes reachable from this page are for a linear range.
>      Examples include real mode translation, large guest pages backed by small
>      host pages, and gpa->hpa translations when NPT or EPT is active.
>      The linear range starts at (gfn<<  PAGE_SHIFT) and its size is determined
>      by role.level (2MB for first level, 1GB for second level, 0.5TB for third
>      level, 256TB for fourth level)
>      If clear, this page corresponds to a guest page table denoted by the gfn
>      field.
>
>    gfn:
>      Either the guest page table containing the translations shadowed by this
>      page, or the base page frame for linear translations.  See role.direct.
>
> These are correct. My patch is fully base on this document.
> I think it is not need to be fixed.
>
> Did I miss something?
>
>    

sp->gfns can now be NULL, so the documentation of this field needs to be 
updated.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-04-29 18:43   ` Avi Kivity
       [not found]     ` <4BDA3F9B.8040708@cn.fujitsu.com>
@ 2010-04-30 15:44     ` Marcelo Tosatti
  2010-05-06  7:03       ` Lai Jiangshan
  1 sibling, 1 reply; 14+ messages in thread
From: Marcelo Tosatti @ 2010-04-30 15:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Lai Jiangshan, LKML, kvm

On Thu, Apr 29, 2010 at 09:43:40PM +0300, Avi Kivity wrote:
> On 04/29/2010 09:09 PM, Marcelo Tosatti wrote:
> >
> >You missed quadrant on 4mb large page emulation with shadow (see updated
> >patch below).
> 
> Good catch.
> 
> >Also for some reason i can't understand the assumption
> >does not hold for large sptes with TDP, so reverted for now.
> 
> It's unrelated to TDP, same issue with shadow.  I think the
> calculation is correct.  For example the 4th spte for a level=2 page
> will yield gfn=4*512.

Under testing i see sp at level 2, with sp->gfn == 4096, mmu_set_spte
setting index 8 to gfn 4096 (whereas kvm_mmu_page_get_gfn returns 4096 +
8*512).

Lai, can you please take a look at it? You should see the
kvm_mmu_page_set_gfn BUG_ON by using -mem-path on hugetlbfs.

> >@@ -393,6 +393,27 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
> >  	kfree(rd);
> >  }
> >
> >+static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
> >+{
> >+	gfn_t gfn;
> >+
> >+	if (!sp->role.direct)
> >+		return sp->gfns[index];
> >+
> >+	gfn = sp->gfn + index * (1<<  (sp->role.level - 1) * PT64_LEVEL_BITS);
> >+	gfn += sp->role.quadrant<<  PT64_LEVEL_BITS;
> 
> PT64_LEVEL_BITS * level
> 
> >+
> >+	return gfn;
> >+}
> >+
> >
> 
> 
> -- 
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-04-30 15:44     ` Marcelo Tosatti
@ 2010-05-06  7:03       ` Lai Jiangshan
  2010-05-06 19:04         ` Marcelo Tosatti
  0 siblings, 1 reply; 14+ messages in thread
From: Lai Jiangshan @ 2010-05-06  7:03 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, LKML, kvm

Marcelo Tosatti wrote:
> On Thu, Apr 29, 2010 at 09:43:40PM +0300, Avi Kivity wrote:
>> On 04/29/2010 09:09 PM, Marcelo Tosatti wrote:
>>> You missed quadrant on 4mb large page emulation with shadow (see updated
>>> patch below).
>> Good catch.
>>
>>> Also for some reason i can't understand the assumption
>>> does not hold for large sptes with TDP, so reverted for now.
>> It's unrelated to TDP, same issue with shadow.  I think the
>> calculation is correct.  For example the 4th spte for a level=2 page
>> will yield gfn=4*512.
> 
> Under testing i see sp at level 2, with sp->gfn == 4096, mmu_set_spte
> setting index 8 to gfn 4096 (whereas kvm_mmu_page_get_gfn returns 4096 +
> 8*512).
> 
> Lai, can you please take a look at it? You should see the
> kvm_mmu_page_set_gfn BUG_ON by using -mem-path on hugetlbfs.
> 

Could you tell me how you test it? It will be better if I follow
your test steps.

I also hit the kvm_mmu_page_set_gfn BUG_ON, It is because
FNAME(fetch)() set sp->gfn wrong. The patch:
[PATCH] kvm: calculate correct gfn for small host pages which emulates large guest pages
fix it.

I can not hit kvm_mmu_page_set_gfn BUG_ON after this patch also
applied.

So could you tell me your test steps:
The host: ept/npt enabled? 64bit? testing codes in host?
The guest: OS? PAE? 32bit? 64bit? testing codes in guest?

Lai

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-05-06  7:03       ` Lai Jiangshan
@ 2010-05-06 19:04         ` Marcelo Tosatti
  2010-05-07  8:25           ` Lai Jiangshan
  2010-05-26  8:48           ` [RESEND PATCH 2/3] kvm, tdp: calculate correct base gfn for non-DIR level Lai Jiangshan
  0 siblings, 2 replies; 14+ messages in thread
From: Marcelo Tosatti @ 2010-05-06 19:04 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Avi Kivity, LKML, kvm

On Thu, May 06, 2010 at 03:03:48PM +0800, Lai Jiangshan wrote:
> Marcelo Tosatti wrote:
> > On Thu, Apr 29, 2010 at 09:43:40PM +0300, Avi Kivity wrote:
> >> On 04/29/2010 09:09 PM, Marcelo Tosatti wrote:
> >>> You missed quadrant on 4mb large page emulation with shadow (see updated
> >>> patch below).
> >> Good catch.
> >>
> >>> Also for some reason i can't understand the assumption
> >>> does not hold for large sptes with TDP, so reverted for now.
> >> It's unrelated to TDP, same issue with shadow.  I think the
> >> calculation is correct.  For example the 4th spte for a level=2 page
> >> will yield gfn=4*512.
> > 
> > Under testing i see sp at level 2, with sp->gfn == 4096, mmu_set_spte
> > setting index 8 to gfn 4096 (whereas kvm_mmu_page_get_gfn returns 4096 +
> > 8*512).
> > 
> > Lai, can you please take a look at it? You should see the
> > kvm_mmu_page_set_gfn BUG_ON by using -mem-path on hugetlbfs.
> > 
> 
> Could you tell me how you test it? It will be better if I follow
> your test steps.

mount -t hugetlbfs none /mnt/
echo xyz > /proc/sys/vm/nr_hugepages
qemu-kvm parameters -mem-path /mnt/

> I also hit the kvm_mmu_page_set_gfn BUG_ON, It is because
> FNAME(fetch)() set sp->gfn wrong. The patch:
> [PATCH] kvm: calculate correct gfn for small host pages which emulates large guest pages
> fix it.
> 
> I can not hit kvm_mmu_page_set_gfn BUG_ON after this patch also
> applied.
> 
> So could you tell me your test steps:
> The host: ept/npt enabled? 64bit? testing codes in host?

Intel EPT enabled.

> The guest: OS? PAE? 32bit? 64bit? testing codes in guest?

FC12 guest.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-05-06 19:04         ` Marcelo Tosatti
@ 2010-05-07  8:25           ` Lai Jiangshan
  2010-05-07 20:14             ` Marcelo Tosatti
  2010-05-26  8:48           ` [RESEND PATCH 2/3] kvm, tdp: calculate correct base gfn for non-DIR level Lai Jiangshan
  1 sibling, 1 reply; 14+ messages in thread
From: Lai Jiangshan @ 2010-05-07  8:25 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, LKML, kvm

Marcelo Tosatti wrote:
> On Thu, May 06, 2010 at 03:03:48PM +0800, Lai Jiangshan wrote:
>> Marcelo Tosatti wrote:
>>> On Thu, Apr 29, 2010 at 09:43:40PM +0300, Avi Kivity wrote:
>>>> On 04/29/2010 09:09 PM, Marcelo Tosatti wrote:
>>>>> You missed quadrant on 4mb large page emulation with shadow (see updated
>>>>> patch below).
>>>> Good catch.
>>>>
>>>>> Also for some reason i can't understand the assumption
>>>>> does not hold for large sptes with TDP, so reverted for now.
>>>> It's unrelated to TDP, same issue with shadow.  I think the
>>>> calculation is correct.  For example the 4th spte for a level=2 page
>>>> will yield gfn=4*512.
>>> Under testing i see sp at level 2, with sp->gfn == 4096, mmu_set_spte
>>> setting index 8 to gfn 4096 (whereas kvm_mmu_page_get_gfn returns 4096 +
>>> 8*512).
>>>
>>> Lai, can you please take a look at it? You should see the
>>> kvm_mmu_page_set_gfn BUG_ON by using -mem-path on hugetlbfs.
>>>
>> Could you tell me how you test it? It will be better if I follow
>> your test steps.
> 
> mount -t hugetlbfs none /mnt/
> echo xyz > /proc/sys/vm/nr_hugepages
> qemu-kvm parameters -mem-path /mnt/
> 
>> I also hit the kvm_mmu_page_set_gfn BUG_ON, It is because
>> FNAME(fetch)() set sp->gfn wrong. The patch:
>> [PATCH] kvm: calculate correct gfn for small host pages which emulates large guest pages
>> fix it.
>>
>> I can not hit kvm_mmu_page_set_gfn BUG_ON after this patch also
>> applied.
>>
>> So could you tell me your test steps:
>> The host: ept/npt enabled? 64bit? testing codes in host?
> 
> Intel EPT enabled.
> 
>> The guest: OS? PAE? 32bit? 64bit? testing codes in guest?
> 
> FC12 guest.
> 


I forgot to check if the calculation of base gfn is correct
in __direct_map().

Subject: [PATCH] kvm, tdp: calculate correct base gfn for non-DIR level

the base gfn calculation is incorrect in __direct_map(),
it does not calculate correctly when level=3 or 4.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
---
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a5c6719..6986a6f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1955,7 +1956,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 		}
 
 		if (*iterator.sptep == shadow_trap_nonpresent_pte) {
-			pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+			u64 base_addr = iterator.addr;
+
+			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
+			pseudo_gfn = base_addr >> PAGE_SHIFT;
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] kvm mmu: reduce 50% memory usage
  2010-05-07  8:25           ` Lai Jiangshan
@ 2010-05-07 20:14             ` Marcelo Tosatti
  0 siblings, 0 replies; 14+ messages in thread
From: Marcelo Tosatti @ 2010-05-07 20:14 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Avi Kivity, LKML, kvm

On Fri, May 07, 2010 at 04:25:36PM +0800, Lai Jiangshan wrote:
> Subject: [PATCH] kvm, tdp: calculate correct base gfn for non-DIR level
> 
> the base gfn calculation is incorrect in __direct_map(),
> it does not calculate correctly when level=3 or 4.
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
> ---
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a5c6719..6986a6f 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1955,7 +1956,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
>  		}
>  
>  		if (*iterator.sptep == shadow_trap_nonpresent_pte) {
> -			pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
> +			u64 base_addr = iterator.addr;
> +
> +			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
> +			pseudo_gfn = base_addr >> PAGE_SHIFT;
>  			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
>  					      iterator.level - 1,
>  					      1, ACC_ALL, iterator.sptep);

Looks good, please resend the whole patch.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RESEND PATCH 2/3]  kvm, tdp: calculate correct base gfn for non-DIR level
  2010-05-06 19:04         ` Marcelo Tosatti
  2010-05-07  8:25           ` Lai Jiangshan
@ 2010-05-26  8:48           ` Lai Jiangshan
  2010-05-26 10:15             ` Avi Kivity
  1 sibling, 1 reply; 14+ messages in thread
From: Lai Jiangshan @ 2010-05-26  8:48 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, LKML, kvm


In Document/kvm/mmu.txt:
  gfn:
    Either the guest page table containing the translations shadowed by this
    page, or the base page frame for linear translations. See role.direct.

But in __direct_map(), the base gfn calculation is incorrect,
it does not calculate correctly when level=3 or 4.

Reported-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a5c6719..6986a6f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1955,7 +1956,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 		}
 
 		if (*iterator.sptep == shadow_trap_nonpresent_pte) {
-			pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+			u64 base_addr = iterator.addr;
+
+			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
+			pseudo_gfn = base_addr >> PAGE_SHIFT;
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);





^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RESEND PATCH 2/3]  kvm, tdp: calculate correct base gfn for non-DIR level
  2010-05-26  8:48           ` [RESEND PATCH 2/3] kvm, tdp: calculate correct base gfn for non-DIR level Lai Jiangshan
@ 2010-05-26 10:15             ` Avi Kivity
  0 siblings, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2010-05-26 10:15 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Marcelo Tosatti, LKML, kvm

On 05/26/2010 11:48 AM, Lai Jiangshan wrote:
> In Document/kvm/mmu.txt:
>    gfn:
>      Either the guest page table containing the translations shadowed by this
>      page, or the base page frame for linear translations. See role.direct.
>
> But in __direct_map(), the base gfn calculation is incorrect,
> it does not calculate correctly when level=3 or 4.
>    

Applied, thanks.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-05-26 10:15 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-28 11:57 [PATCH] kvm mmu: reduce 50% memory usage Lai Jiangshan
2010-04-28 13:01 ` Avi Kivity
2010-04-28 18:05 ` Marcelo Tosatti
2010-04-29 18:09 ` Marcelo Tosatti
2010-04-29 18:43   ` Avi Kivity
     [not found]     ` <4BDA3F9B.8040708@cn.fujitsu.com>
2010-04-30  7:26       ` Avi Kivity
     [not found]         ` <4BDA9AAC.6060303@cn.fujitsu.com>
2010-04-30 10:00           ` Avi Kivity
2010-04-30 15:44     ` Marcelo Tosatti
2010-05-06  7:03       ` Lai Jiangshan
2010-05-06 19:04         ` Marcelo Tosatti
2010-05-07  8:25           ` Lai Jiangshan
2010-05-07 20:14             ` Marcelo Tosatti
2010-05-26  8:48           ` [RESEND PATCH 2/3] kvm, tdp: calculate correct base gfn for non-DIR level Lai Jiangshan
2010-05-26 10:15             ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).