public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 0/2] switch to get_user_pages_fast
@ 2008-09-11 13:43 Marcelo Tosatti
  2008-09-11 13:43 ` [patch 1/2] KVM: opencode gfn_to_page in kvm_vm_fault Marcelo Tosatti
  2008-09-11 13:43 ` [patch 2/2] KVM: switch to get_user_pages_fast Marcelo Tosatti
  0 siblings, 2 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2008-09-11 13:43 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm


-- 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 1/2] KVM: opencode gfn_to_page in kvm_vm_fault
  2008-09-11 13:43 [patch 0/2] switch to get_user_pages_fast Marcelo Tosatti
@ 2008-09-11 13:43 ` Marcelo Tosatti
  2008-09-11 13:43 ` [patch 2/2] KVM: switch to get_user_pages_fast Marcelo Tosatti
  1 sibling, 0 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2008-09-11 13:43 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Marcelo Tosatti

[-- Attachment #1: kvm-vm-fault --]
[-- Type: text/plain, Size: 1132 bytes --]

kvm_vm_fault is invoked with mmap_sem held in read mode. Since gfn_to_page
will be converted to get_user_pages_fast, which requires this lock NOT 
to be held, switch to opencoded get_user_pages.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm.tip/virt/kvm/kvm_main.c
===================================================================
--- kvm.tip.orig/virt/kvm/kvm_main.c
+++ kvm.tip/virt/kvm/kvm_main.c
@@ -1387,17 +1387,22 @@ out:
 
 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+	struct page *page[1];
+	unsigned long addr;
+	int npages;
+	gfn_t gfn = vmf->pgoff;
 	struct kvm *kvm = vma->vm_file->private_data;
-	struct page *page;
 
-	if (!kvm_is_visible_gfn(kvm, vmf->pgoff))
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr))
 		return VM_FAULT_SIGBUS;
-	page = gfn_to_page(kvm, vmf->pgoff);
-	if (is_error_page(page)) {
-		kvm_release_page_clean(page);
+
+	npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
+				NULL);
+	if (unlikely(npages != 1))
 		return VM_FAULT_SIGBUS;
-	}
-	vmf->page = page;
+
+	vmf->page = page[0];
 	return 0;
 }
 

-- 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 2/2] KVM: switch to get_user_pages_fast
  2008-09-11 13:43 [patch 0/2] switch to get_user_pages_fast Marcelo Tosatti
  2008-09-11 13:43 ` [patch 1/2] KVM: opencode gfn_to_page in kvm_vm_fault Marcelo Tosatti
@ 2008-09-11 13:43 ` Marcelo Tosatti
  2008-09-11 14:06   ` Avi Kivity
  2008-09-11 15:04   ` Hollis Blanchard
  1 sibling, 2 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2008-09-11 13:43 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Marcelo Tosatti, Hollis Blanchard

[-- Attachment #1: kvm-use-fast-gup --]
[-- Type: text/plain, Size: 7170 bytes --]

Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
faster on VMX.

Hollis, can you fix kvmppc_mmu_map? gfn_to_page must not be called with
mmap_sem held.

Looks tricky:
/* Must be called with mmap_sem locked for writing. */
static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: Hollis Blanchard <hollisb@us.ibm.com>

Index: kvm.tip/arch/x86/kvm/mmu.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/mmu.c
+++ kvm.tip/arch/x86/kvm/mmu.c
@@ -405,16 +405,19 @@ static int host_largepage_backed(struct 
 {
 	struct vm_area_struct *vma;
 	unsigned long addr;
+	int ret = 0;
 
 	addr = gfn_to_hva(kvm, gfn);
 	if (kvm_is_error_hva(addr))
-		return 0;
+		return ret;
 
+	down_read(&current->mm->mmap_sem);
 	vma = find_vma(current->mm, addr);
 	if (vma && is_vm_hugetlb_page(vma))
-		return 1;
+		ret = 1;
+	up_read(&current->mm->mmap_sem);
 
-	return 0;
+	return ret;
 }
 
 static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
@@ -1136,9 +1139,7 @@ struct page *gva_to_page(struct kvm_vcpu
 	if (gpa == UNMAPPED_GVA)
 		return NULL;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	up_read(&current->mm->mmap_sem);
 
 	return page;
 }
@@ -1326,16 +1327,14 @@ static int nonpaging_map(struct kvm_vcpu
 	pfn_t pfn;
 	unsigned long mmu_seq;
 
-	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
@@ -1484,15 +1483,13 @@ static int tdp_page_fault(struct kvm_vcp
 	if (r)
 		return r;
 
-	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
 		return 1;
@@ -1805,15 +1802,13 @@ static void mmu_guess_page_from_pte_writ
 		return;
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
-	down_read(&current->mm->mmap_sem);
 	if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		vcpu->arch.update_pte.largepage = 1;
 	}
 	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
Index: kvm.tip/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.tip.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm.tip/arch/x86/kvm/paging_tmpl.h
@@ -102,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct k
 	pt_element_t *table;
 	struct page *page;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(kvm, table_gfn);
-	up_read(&current->mm->mmap_sem);
 
 	table = kmap_atomic(page, KM_USER0);
-
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
-
 	kunmap_atomic(table, KM_USER0);
 
 	kvm_release_page_dirty(page);
@@ -418,7 +414,6 @@ static int FNAME(page_fault)(struct kvm_
 		return 0;
 	}
 
-	down_read(&current->mm->mmap_sem);
 	if (walker.level == PT_DIRECTORY_LEVEL) {
 		gfn_t large_gfn;
 		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
@@ -428,9 +423,8 @@ static int FNAME(page_fault)(struct kvm_
 		}
 	}
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
-	up_read(&current->mm->mmap_sem);
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
Index: kvm.tip/arch/x86/kvm/vmx.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/vmx.c
+++ kvm.tip/arch/x86/kvm/vmx.c
@@ -2010,9 +2010,7 @@ static int alloc_apic_access_page(struct
 	if (r)
 		goto out;
 
-	down_read(&current->mm->mmap_sem);
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
-	up_read(&current->mm->mmap_sem);
 out:
 	up_write(&kvm->slots_lock);
 	return r;
@@ -2034,10 +2032,8 @@ static int alloc_identity_pagetable(stru
 	if (r)
 		goto out;
 
-	down_read(&current->mm->mmap_sem);
 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
 			VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
-	up_read(&current->mm->mmap_sem);
 out:
 	up_write(&kvm->slots_lock);
 	return r;
Index: kvm.tip/arch/x86/kvm/x86.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/x86.c
+++ kvm.tip/arch/x86/kvm/x86.c
@@ -932,10 +932,8 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		down_read(&current->mm->mmap_sem);
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
-		up_read(&current->mm->mmap_sem);
 
 		if (is_error_page(vcpu->arch.time_page)) {
 			kvm_release_page_clean(vcpu->arch.time_page);
@@ -2305,9 +2303,7 @@ static int emulator_cmpxchg_emulated(uns
 
 		val = *(u64 *)new;
 
-		down_read(&current->mm->mmap_sem);
 		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-		up_read(&current->mm->mmap_sem);
 
 		kaddr = kmap_atomic(page, KM_USER0);
 		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
@@ -3072,9 +3068,7 @@ static void vapic_enter(struct kvm_vcpu 
 	if (!apic || !apic->vapic_addr)
 		return;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	up_read(&current->mm->mmap_sem);
 
 	vcpu->arch.apic->vapic_page = page;
 }
Index: kvm.tip/virt/kvm/kvm_main.c
===================================================================
--- kvm.tip.orig/virt/kvm/kvm_main.c
+++ kvm.tip/virt/kvm/kvm_main.c
@@ -716,9 +716,6 @@ unsigned long gfn_to_hva(struct kvm *kvm
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-/*
- * Requires current->mm->mmap_sem to be held
- */
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
 	struct page *page[1];
@@ -734,20 +731,23 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t 
 		return page_to_pfn(bad_page);
 	}
 
-	npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-				NULL);
+	npages = get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1)) {
 		struct vm_area_struct *vma;
 
+		down_read(&current->mm->mmap_sem);
 		vma = find_vma(current->mm, addr);
+
 		if (vma == NULL || addr < vma->vm_start ||
 		    !(vma->vm_flags & VM_PFNMAP)) {
+			up_read(&current->mm->mmap_sem);
 			get_page(bad_page);
 			return page_to_pfn(bad_page);
 		}
 
 		pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+		up_read(&current->mm->mmap_sem);
 		BUG_ON(!is_mmio_pfn(pfn));
 	} else
 		pfn = page_to_pfn(page[0]);

-- 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] KVM: switch to get_user_pages_fast
  2008-09-11 13:43 ` [patch 2/2] KVM: switch to get_user_pages_fast Marcelo Tosatti
@ 2008-09-11 14:06   ` Avi Kivity
  2008-09-11 15:04   ` Hollis Blanchard
  1 sibling, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2008-09-11 14:06 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, Hollis Blanchard

Marcelo Tosatti wrote:
> Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
> pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
> faster on VMX.
>
> Hollis, can you fix kvmppc_mmu_map? gfn_to_page must not be called with
> mmap_sem held.
>
> Looks tricky:
> /* Must be called with mmap_sem locked for writing. */
> static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> CC: Hollis Blanchard <hollisb@us.ibm.com>
>
>   

I'll wait until this is resolved, since I don't want to break ppc.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] KVM: switch to get_user_pages_fast
  2008-09-11 13:43 ` [patch 2/2] KVM: switch to get_user_pages_fast Marcelo Tosatti
  2008-09-11 14:06   ` Avi Kivity
@ 2008-09-11 15:04   ` Hollis Blanchard
  2008-09-11 15:15     ` Avi Kivity
  1 sibling, 1 reply; 8+ messages in thread
From: Hollis Blanchard @ 2008-09-11 15:04 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, kvm

On Thu, 2008-09-11 at 10:43 -0300, Marcelo Tosatti wrote:
> plain text document attachment (kvm-use-fast-gup)
> Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
> pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
> faster on VMX.
> 
> Hollis, can you fix kvmppc_mmu_map? gfn_to_page must not be called with
> mmap_sem held.
> 
> Looks tricky:
> /* Must be called with mmap_sem locked for writing. */
> static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,

Actually the comment is wrong, so it's not that tricky. ;) Marcelo,
after Avi applies the following patch, could you respin and remove the
locking around PPC's gfn_to_pfn() too? Thanks!



kvm: ppc: kvmppc_44x_shadow_release() does not require mmap_sem to be locked

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -110,7 +110,6 @@ static int kvmppc_44x_tlbe_is_writable(s
 	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
 }
 
-/* Must be called with mmap_sem locked for writing. */
 static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
                                       unsigned int index)
 {
@@ -150,17 +149,16 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp
 	/* Get reference to new page. */
 	down_read(&current->mm->mmap_sem);
 	new_page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
 		kvm_release_page_clean(new_page);
-		up_read(&current->mm->mmap_sem);
 		return;
 	}
 	hpaddr = page_to_phys(new_page);
 
 	/* Drop reference to old page. */
 	kvmppc_44x_shadow_release(vcpu, victim);
-	up_read(&current->mm->mmap_sem);
 
 	vcpu->arch.shadow_pages[victim] = new_page;
 
@@ -194,7 +192,6 @@ void kvmppc_mmu_invalidate(struct kvm_vc
 	int i;
 
 	/* XXX Replace loop with fancy data structures. */
-	down_write(&current->mm->mmap_sem);
 	for (i = 0; i <= tlb_44x_hwater; i++) {
 		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
 		unsigned int tid;
@@ -219,7 +216,6 @@ void kvmppc_mmu_invalidate(struct kvm_vc
 				stlbe->tid, stlbe->word0, stlbe->word1,
 				stlbe->word2, handler);
 	}
-	up_write(&current->mm->mmap_sem);
 }
 
 /* Invalidate all mappings on the privilege switch after PID has been changed.
@@ -231,7 +227,6 @@ void kvmppc_mmu_priv_switch(struct kvm_v
 
 	if (vcpu->arch.swap_pid) {
 		/* XXX Replace loop with fancy data structures. */
-		down_write(&current->mm->mmap_sem);
 		for (i = 0; i <= tlb_44x_hwater; i++) {
 			struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
 
@@ -243,7 +238,6 @@ void kvmppc_mmu_priv_switch(struct kvm_v
 			            stlbe->tid, stlbe->word0, stlbe->word1,
 			            stlbe->word2, handler);
 		}
-		up_write(&current->mm->mmap_sem);
 		vcpu->arch.swap_pid = 0;
 	}
 


-- 
Hollis Blanchard
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] KVM: switch to get_user_pages_fast
  2008-09-11 15:04   ` Hollis Blanchard
@ 2008-09-11 15:15     ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2008-09-11 15:15 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: Marcelo Tosatti, kvm

Hollis Blanchard wrote:


> kvm: ppc: kvmppc_44x_shadow_release() does not require mmap_sem to be locked
>
>   

Applied this, thanks.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 2/2] KVM: switch to get_user_pages_fast
  2008-09-16 23:54 [patch 0/2] " Marcelo Tosatti
@ 2008-09-16 23:54 ` Marcelo Tosatti
  2008-09-17 14:42   ` Hollis Blanchard
  0 siblings, 1 reply; 8+ messages in thread
From: Marcelo Tosatti @ 2008-09-16 23:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Marcelo Tosatti, Hollis Blanchard

[-- Attachment #1: kvm-use-fast-gup --]
[-- Type: text/plain, Size: 7493 bytes --]

Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
faster on VMX.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm.tip/arch/x86/kvm/mmu.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/mmu.c
+++ kvm.tip/arch/x86/kvm/mmu.c
@@ -405,16 +405,19 @@ static int host_largepage_backed(struct 
 {
 	struct vm_area_struct *vma;
 	unsigned long addr;
+	int ret = 0;
 
 	addr = gfn_to_hva(kvm, gfn);
 	if (kvm_is_error_hva(addr))
-		return 0;
+		return ret;
 
+	down_read(&current->mm->mmap_sem);
 	vma = find_vma(current->mm, addr);
 	if (vma && is_vm_hugetlb_page(vma))
-		return 1;
+		ret = 1;
+	up_read(&current->mm->mmap_sem);
 
-	return 0;
+	return ret;
 }
 
 static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
@@ -1140,9 +1143,7 @@ struct page *gva_to_page(struct kvm_vcpu
 	if (gpa == UNMAPPED_GVA)
 		return NULL;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	up_read(&current->mm->mmap_sem);
 
 	return page;
 }
@@ -1330,16 +1331,14 @@ static int nonpaging_map(struct kvm_vcpu
 	pfn_t pfn;
 	unsigned long mmu_seq;
 
-	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
@@ -1488,15 +1487,13 @@ static int tdp_page_fault(struct kvm_vcp
 	if (r)
 		return r;
 
-	down_read(&current->mm->mmap_sem);
 	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		largepage = 1;
 	}
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
 		return 1;
@@ -1809,15 +1806,13 @@ static void mmu_guess_page_from_pte_writ
 		return;
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
-	down_read(&current->mm->mmap_sem);
 	if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
 		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
 		vcpu->arch.update_pte.largepage = 1;
 	}
 	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 
 	if (is_error_pfn(pfn)) {
 		kvm_release_pfn_clean(pfn);
Index: kvm.tip/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.tip.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm.tip/arch/x86/kvm/paging_tmpl.h
@@ -102,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct k
 	pt_element_t *table;
 	struct page *page;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(kvm, table_gfn);
-	up_read(&current->mm->mmap_sem);
 
 	table = kmap_atomic(page, KM_USER0);
-
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
-
 	kunmap_atomic(table, KM_USER0);
 
 	kvm_release_page_dirty(page);
@@ -418,7 +414,6 @@ static int FNAME(page_fault)(struct kvm_
 		return 0;
 	}
 
-	down_read(&current->mm->mmap_sem);
 	if (walker.level == PT_DIRECTORY_LEVEL) {
 		gfn_t large_gfn;
 		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
@@ -428,9 +423,8 @@ static int FNAME(page_fault)(struct kvm_
 		}
 	}
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
-	up_read(&current->mm->mmap_sem);
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
Index: kvm.tip/arch/x86/kvm/vmx.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/vmx.c
+++ kvm.tip/arch/x86/kvm/vmx.c
@@ -2010,9 +2010,7 @@ static int alloc_apic_access_page(struct
 	if (r)
 		goto out;
 
-	down_read(&current->mm->mmap_sem);
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
-	up_read(&current->mm->mmap_sem);
 out:
 	up_write(&kvm->slots_lock);
 	return r;
@@ -2034,10 +2032,8 @@ static int alloc_identity_pagetable(stru
 	if (r)
 		goto out;
 
-	down_read(&current->mm->mmap_sem);
 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
 			VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
-	up_read(&current->mm->mmap_sem);
 out:
 	up_write(&kvm->slots_lock);
 	return r;
Index: kvm.tip/arch/x86/kvm/x86.c
===================================================================
--- kvm.tip.orig/arch/x86/kvm/x86.c
+++ kvm.tip/arch/x86/kvm/x86.c
@@ -942,10 +942,8 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		down_read(&current->mm->mmap_sem);
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
-		up_read(&current->mm->mmap_sem);
 
 		if (is_error_page(vcpu->arch.time_page)) {
 			kvm_release_page_clean(vcpu->arch.time_page);
@@ -2318,9 +2316,7 @@ static int emulator_cmpxchg_emulated(uns
 
 		val = *(u64 *)new;
 
-		down_read(&current->mm->mmap_sem);
 		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-		up_read(&current->mm->mmap_sem);
 
 		kaddr = kmap_atomic(page, KM_USER0);
 		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
@@ -3085,9 +3081,7 @@ static void vapic_enter(struct kvm_vcpu 
 	if (!apic || !apic->vapic_addr)
 		return;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	up_read(&current->mm->mmap_sem);
 
 	vcpu->arch.apic->vapic_page = page;
 }
Index: kvm.tip/virt/kvm/kvm_main.c
===================================================================
--- kvm.tip.orig/virt/kvm/kvm_main.c
+++ kvm.tip/virt/kvm/kvm_main.c
@@ -723,9 +723,6 @@ unsigned long gfn_to_hva(struct kvm *kvm
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-/*
- * Requires current->mm->mmap_sem to be held
- */
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 {
 	struct page *page[1];
@@ -741,20 +738,23 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t 
 		return page_to_pfn(bad_page);
 	}
 
-	npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-				NULL);
+	npages = get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1)) {
 		struct vm_area_struct *vma;
 
+		down_read(&current->mm->mmap_sem);
 		vma = find_vma(current->mm, addr);
+
 		if (vma == NULL || addr < vma->vm_start ||
 		    !(vma->vm_flags & VM_PFNMAP)) {
+			up_read(&current->mm->mmap_sem);
 			get_page(bad_page);
 			return page_to_pfn(bad_page);
 		}
 
 		pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+		up_read(&current->mm->mmap_sem);
 		BUG_ON(!is_mmio_pfn(pfn));
 	} else
 		pfn = page_to_pfn(page[0]);
Index: kvm.tip/arch/powerpc/kvm/44x_tlb.c
===================================================================
--- kvm.tip.orig/arch/powerpc/kvm/44x_tlb.c
+++ kvm.tip/arch/powerpc/kvm/44x_tlb.c
@@ -147,9 +147,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp
 	stlbe = &vcpu->arch.shadow_tlb[victim];
 
 	/* Get reference to new page. */
-	down_read(&current->mm->mmap_sem);
 	new_page = gfn_to_page(vcpu->kvm, gfn);
-	up_read(&current->mm->mmap_sem);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
 		kvm_release_page_clean(new_page);

-- 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/2] KVM: switch to get_user_pages_fast
  2008-09-16 23:54 ` [patch 2/2] KVM: " Marcelo Tosatti
@ 2008-09-17 14:42   ` Hollis Blanchard
  0 siblings, 0 replies; 8+ messages in thread
From: Hollis Blanchard @ 2008-09-17 14:42 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, kvm

On Tue, 2008-09-16 at 20:54 -0300, Marcelo Tosatti wrote:
> plain text document attachment (kvm-use-fast-gup)
> Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
> pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
> faster on VMX.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Acked-by: Hollis Blanchard <hollisb@us.ibm.com>

-- 
Hollis Blanchard
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-09-17 14:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-11 13:43 [patch 0/2] switch to get_user_pages_fast Marcelo Tosatti
2008-09-11 13:43 ` [patch 1/2] KVM: opencode gfn_to_page in kvm_vm_fault Marcelo Tosatti
2008-09-11 13:43 ` [patch 2/2] KVM: switch to get_user_pages_fast Marcelo Tosatti
2008-09-11 14:06   ` Avi Kivity
2008-09-11 15:04   ` Hollis Blanchard
2008-09-11 15:15     ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2008-09-16 23:54 [patch 0/2] " Marcelo Tosatti
2008-09-16 23:54 ` [patch 2/2] KVM: " Marcelo Tosatti
2008-09-17 14:42   ` Hollis Blanchard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox