[patch 0/3] allow read-only memory mappings

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [patch 0/3] allow read-only memory mappings
@ 2010-10-05 11:54 Marcelo Tosatti
  2010-10-05 11:54 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-05 11:54 UTC (permalink / raw)
  To: avi, aarcange, gleb; +Cc: kvm

This should probably exit as MMIO instead of custom exit code, for IO_MEM_ROM 
slots. But then, unsure if IO_MEM_ROM areas should be mprotected (meaning QEMU
has to handle SIGBUS for its own accesses), or the attribute set in a slots flag.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT
  2010-10-05 11:54 [patch 0/3] allow read-only memory mappings Marcelo Tosatti
@ 2010-10-05 11:54 ` Marcelo Tosatti
  2010-10-05 11:55 ` [patch 2/3] KVM: dont require read-only host ptes Marcelo Tosatti
  2010-10-05 11:55 ` [patch 3/3] KVM: handle " Marcelo Tosatti
  2 siblings, 0 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-05 11:54 UTC (permalink / raw)
  To: avi, aarcange, gleb; +Cc: kvm, Marcelo Tosatti

[-- Attachment #1: vmx-remove-base-ptes --]
[-- Type: text/plain, Size: 1145 bytes --]

The EPT present/writable bits use the same position as normal
pagetable bits. 

Since direct_map passes ACC_ALL to mmu_set_spte, thus always setting
the writable bit on sptes, use the generic PT_PRESENT shadow_base_pte.

Also pass present/writable error code information from EPT violation
to generic pagefault handler.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/vmx.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx.c
+++ kvm/arch/x86/kvm/vmx.c
@@ -3483,7 +3483,7 @@ static int handle_ept_violation(struct k
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 	trace_kvm_page_fault(gpa, exit_qualification);
-	return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
+	return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3);
 }
 
 static u64 ept_rsvd_mask(u64 spte, int level)
@@ -4408,8 +4408,6 @@ static int __init vmx_init(void)
 
 	if (enable_ept) {
 		bypass_guest_pf = 0;
-		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
-			VMX_EPT_WRITABLE_MASK);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
 				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 2/3] KVM: dont require read-only host ptes
  2010-10-05 11:54 [patch 0/3] allow read-only memory mappings Marcelo Tosatti
  2010-10-05 11:54 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
@ 2010-10-05 11:55 ` Marcelo Tosatti
  2010-10-05 13:43   ` Gleb Natapov
  2010-10-05 11:55 ` [patch 3/3] KVM: handle " Marcelo Tosatti
  2 siblings, 1 reply; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-05 11:55 UTC (permalink / raw)
  To: avi, aarcange, gleb; +Cc: kvm, Marcelo Tosatti

[-- Attachment #1: gfn-to-pfn-ronly --]
[-- Type: text/plain, Size: 7088 bytes --]

gfn_to_pfn requires a writable host pte, failing otherwise.

Change it to fallback to read-only "acquision', informing the callers. 

Hopefully the ptes are cache-hot so the overhead is minimal.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm/arch/ia64/kvm/kvm-ia64.c
@@ -1589,7 +1589,7 @@ int kvm_arch_prepare_memory_region(struc
 		return -ENOMEM;
 
 	for (i = 0; i < npages; i++) {
-		pfn = gfn_to_pfn(kvm, base_gfn + i);
+		pfn = gfn_to_pfn(kvm, base_gfn + i, NULL);
 		if (!kvm_is_mmio_pfn(pfn)) {
 			kvm_set_pmt_entry(kvm, base_gfn + i,
 					pfn << PAGE_SHIFT,
Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu
 {
 	int r;
 	int level;
+	int writable;
 	pfn_t pfn;
 	unsigned long mmu_seq;
 
@@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
 
 	/* mmio */
-	if (is_error_pfn(pfn))
+	if (is_error_pfn(pfn) || !writable)
 		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
@@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp
 	pfn_t pfn;
 	int r;
 	int level;
+	int writable;
+	int write = error_code & PFERR_WRITE_MASK;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	unsigned long mmu_seq;
 
@@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
-	if (is_error_pfn(pfn))
+	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
+	if (is_error_pfn(pfn) || !writable)
 		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
-	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
-			 level, gfn, pfn);
+	r = __direct_map(vcpu, gpa, write, level, gfn, pfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
@@ -3043,6 +3045,7 @@ static void mmu_guess_page_from_pte_writ
 {
 	gfn_t gfn;
 	pfn_t pfn;
+	int writable;
 
 	if (!is_present_gpte(gpte))
 		return;
@@ -3050,9 +3053,9 @@ static void mmu_guess_page_from_pte_writ
 
 	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
 
-	if (is_error_pfn(pfn)) {
+	if (is_error_pfn(pfn) || !writable) {
 		kvm_release_pfn_clean(pfn);
 		return;
 	}
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -536,6 +536,7 @@ static int FNAME(page_fault)(struct kvm_
 	int write_fault = error_code & PFERR_WRITE_MASK;
 	int user_fault = error_code & PFERR_USER_MASK;
 	int fetch_fault = error_code & PFERR_FETCH_MASK;
+	int writable;
 	struct guest_walker walker;
 	u64 *sptep;
 	int write_pt = 0;
@@ -573,10 +574,10 @@ static int FNAME(page_fault)(struct kvm_
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
-	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
+	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &writable);
 
 	/* mmio */
-	if (is_error_pfn(pfn))
+	if (is_error_pfn(pfn) || !writable)
 		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -302,7 +302,7 @@ void kvm_set_page_accessed(struct page *
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
 int memslot_id(struct kvm *kvm, gfn_t gfn);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -948,17 +948,28 @@ unsigned long gfn_to_hva(struct kvm *kvm
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
+			int *writable)
 {
 	struct page *page[1];
 	int npages;
 	pfn_t pfn;
 
+	if (writable)
+		*writable = 1;
+
 	if (atomic)
 		npages = __get_user_pages_fast(addr, 1, 1, page);
 	else {
 		might_sleep();
 		npages = get_user_pages_fast(addr, 1, 1, page);
+
+		/* attempt to map read-only */
+		if (unlikely(npages != 1) && writable) {
+			npages = get_user_pages_fast(addr, 1, 0, page);
+			if (npages == 1)
+				*writable = 0;
+		}
 	}
 
 	if (unlikely(npages != 1)) {
@@ -995,11 +1006,11 @@ return_fault_page:
 
 pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
 {
-	return hva_to_pfn(kvm, addr, true);
+	return hva_to_pfn(kvm, addr, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic)
+static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, int *writable)
 {
 	unsigned long addr;
 
@@ -1009,18 +1020,18 @@ static pfn_t __gfn_to_pfn(struct kvm *kv
 		return page_to_pfn(bad_page);
 	}
 
-	return hva_to_pfn(kvm, addr, atomic);
+	return hva_to_pfn(kvm, addr, atomic, writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
 {
-	return __gfn_to_pfn(kvm, gfn, true);
+	return __gfn_to_pfn(kvm, gfn, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable)
 {
-	return __gfn_to_pfn(kvm, gfn, false);
+	return __gfn_to_pfn(kvm, gfn, false, writable);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
@@ -1028,7 +1039,7 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm
 			 struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(kvm, addr, false);
+	return hva_to_pfn(kvm, addr, false, NULL);
 }
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1052,7 +1063,7 @@ struct page *gfn_to_page(struct kvm *kvm
 {
 	pfn_t pfn;
 
-	pfn = gfn_to_pfn(kvm, gfn);
+	pfn = gfn_to_pfn(kvm, gfn, NULL);
 	if (!kvm_is_mmio_pfn(pfn))
 		return pfn_to_page(pfn);
 
Index: kvm/arch/powerpc/kvm/book3s.c
===================================================================
--- kvm.orig/arch/powerpc/kvm/book3s.c
+++ kvm/arch/powerpc/kvm/book3s.c
@@ -468,7 +468,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu 
 		return pfn;
 	}
 
-	return gfn_to_pfn(vcpu->kvm, gfn);
+	return gfn_to_pfn(vcpu->kvm, gfn, NULL);
 }
 
 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3] KVM: dont require read-only host ptes
  2010-10-05 11:55 ` [patch 2/3] KVM: dont require read-only host ptes Marcelo Tosatti
@ 2010-10-05 13:43   ` Gleb Natapov
  2010-10-05 13:47     ` Marcelo Tosatti
  0 siblings, 1 reply; 8+ messages in thread
From: Gleb Natapov @ 2010-10-05 13:43 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: avi, aarcange, kvm

On Tue, Oct 05, 2010 at 08:55:00AM -0300, Marcelo Tosatti wrote:
> gfn_to_pfn requires a writable host pte, failing otherwise.
> 
> Change it to fallback to read-only "acquision', informing the callers. 
> 
> Hopefully the ptes are cache-hot so the overhead is minimal.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Index: kvm/arch/ia64/kvm/kvm-ia64.c
> ===================================================================
> --- kvm.orig/arch/ia64/kvm/kvm-ia64.c
> +++ kvm/arch/ia64/kvm/kvm-ia64.c
> @@ -1589,7 +1589,7 @@ int kvm_arch_prepare_memory_region(struc
>  		return -ENOMEM;
>  
>  	for (i = 0; i < npages; i++) {
> -		pfn = gfn_to_pfn(kvm, base_gfn + i);
> +		pfn = gfn_to_pfn(kvm, base_gfn + i, NULL);
>  		if (!kvm_is_mmio_pfn(pfn)) {
>  			kvm_set_pmt_entry(kvm, base_gfn + i,
>  					pfn << PAGE_SHIFT,
> Index: kvm/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/mmu.c
> +++ kvm/arch/x86/kvm/mmu.c
> @@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu
>  {
>  	int r;
>  	int level;
> +	int writable;
>  	pfn_t pfn;
>  	unsigned long mmu_seq;
>  
> @@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu
>  
>  	mmu_seq = vcpu->kvm->mmu_notifier_seq;
>  	smp_rmb();
> -	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> +	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
>  
>  	/* mmio */
> -	if (is_error_pfn(pfn))
> +	if (is_error_pfn(pfn) || !writable)
>  		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
>  
>  	spin_lock(&vcpu->kvm->mmu_lock);
> @@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp
>  	pfn_t pfn;
>  	int r;
>  	int level;
> +	int writable;
> +	int write = error_code & PFERR_WRITE_MASK;
>  	gfn_t gfn = gpa >> PAGE_SHIFT;
>  	unsigned long mmu_seq;
>  
> @@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp
>  
>  	mmu_seq = vcpu->kvm->mmu_notifier_seq;
>  	smp_rmb();
> -	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> -	if (is_error_pfn(pfn))
> +	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
> +	if (is_error_pfn(pfn) || !writable)
Why would we fail read only access to read only memory? Shouldn't we
check access type here?

--
			Gleb.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 2/3] KVM: dont require read-only host ptes
  2010-10-05 13:43   ` Gleb Natapov
@ 2010-10-05 13:47     ` Marcelo Tosatti
  0 siblings, 0 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-05 13:47 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, aarcange, kvm

On Tue, Oct 05, 2010 at 03:43:01PM +0200, Gleb Natapov wrote:
> > --- kvm.orig/arch/x86/kvm/mmu.c
> > +++ kvm/arch/x86/kvm/mmu.c
> > @@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu
> >  {
> >  	int r;
> >  	int level;
> > +	int writable;
> >  	pfn_t pfn;
> >  	unsigned long mmu_seq;
> >  
> > @@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu
> >  
> >  	mmu_seq = vcpu->kvm->mmu_notifier_seq;
> >  	smp_rmb();
> > -	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> > +	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
> >  
> >  	/* mmio */
> > -	if (is_error_pfn(pfn))
> > +	if (is_error_pfn(pfn) || !writable)
> >  		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
> >  
> >  	spin_lock(&vcpu->kvm->mmu_lock);
> > @@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp
> >  	pfn_t pfn;
> >  	int r;
> >  	int level;
> > +	int writable;
> > +	int write = error_code & PFERR_WRITE_MASK;
> >  	gfn_t gfn = gpa >> PAGE_SHIFT;
> >  	unsigned long mmu_seq;
> >  
> > @@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp
> >  
> >  	mmu_seq = vcpu->kvm->mmu_notifier_seq;
> >  	smp_rmb();
> > -	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> > -	if (is_error_pfn(pfn))
> > +	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
> > +	if (is_error_pfn(pfn) || !writable)
> Why would we fail read only access to read only memory? Shouldn't we
> check access type here?
> 
> --
> 			Gleb.

Next patch does that.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 3/3] KVM: handle read-only host ptes
  2010-10-05 11:54 [patch 0/3] allow read-only memory mappings Marcelo Tosatti
  2010-10-05 11:54 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
  2010-10-05 11:55 ` [patch 2/3] KVM: dont require read-only host ptes Marcelo Tosatti
@ 2010-10-05 11:55 ` Marcelo Tosatti
  2 siblings, 0 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-05 11:55 UTC (permalink / raw)
  To: avi, aarcange, gleb; +Cc: kvm, Marcelo Tosatti

[-- Attachment #1: unallowed-write-infra --]
[-- Type: text/plain, Size: 6153 bytes --]

Instantiate read-only spte if host pte is read-only, and exit to
userspace if guest attempts to write.

With this in place userspace can mprotect(PROT_READ) guest memory 
and handle write attempts.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -2205,7 +2205,14 @@ static void direct_pte_prefetch(struct k
 	__direct_pte_prefetch(vcpu, sp, sptep);
 }
 
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+static int kvm_report_unallowed_write(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+	vcpu->run->exit_reason = KVM_EXIT_UNALLOWED_WRITE;
+	vcpu->run->unallowed_write.gpa = gpa;
+	return -EPERM;
+}
+
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int dirty,
 			int level, gfn_t gfn, pfn_t pfn)
 {
 	struct kvm_shadow_walk_iterator iterator;
@@ -2216,7 +2223,7 @@ static int __direct_map(struct kvm_vcpu 
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 		if (iterator.level == level) {
 			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
-				     0, write, 1, &pt_write,
+				     0, write, dirty, &pt_write,
 				     level, gfn, pfn, false, true);
 			direct_pte_prefetch(vcpu, iterator.sptep);
 			++vcpu->stat.pf_fixed;
@@ -2269,13 +2276,15 @@ static int kvm_handle_bad_page(struct kv
 	return 1;
 }
 
-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
+static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write)
 {
 	int r;
 	int level;
 	int writable;
+	int dirty = 1;
 	pfn_t pfn;
 	unsigned long mmu_seq;
+	gfn_t gfn = v >> PAGE_SHIFT;
 
 	level = mapping_level(vcpu, gfn);
 
@@ -2293,14 +2302,22 @@ static int nonpaging_map(struct kvm_vcpu
 	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
 
 	/* mmio */
-	if (is_error_pfn(pfn) || !writable)
+	if (is_error_pfn(pfn))
 		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
+	if (!writable) {
+		if (write) {
+			kvm_release_pfn_clean(pfn);
+			return kvm_report_unallowed_write(vcpu, v);
+		}
+		/* instantiate read-only spte */
+		dirty = 0;
+	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
-	r = __direct_map(vcpu, v, write, level, gfn, pfn);
+	r = __direct_map(vcpu, v, write, dirty, level, gfn, pfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -2559,7 +2576,6 @@ static gpa_t nonpaging_gva_to_gpa_nested
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 				u32 error_code)
 {
-	gfn_t gfn;
 	int r;
 
 	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
@@ -2570,10 +2586,7 @@ static int nonpaging_page_fault(struct k
 	ASSERT(vcpu);
 	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
-	gfn = gva >> PAGE_SHIFT;
-
-	return nonpaging_map(vcpu, gva & PAGE_MASK,
-			     error_code & PFERR_WRITE_MASK, gfn);
+	return nonpaging_map(vcpu, gva, error_code & PFERR_WRITE_MASK);
 }
 
 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
@@ -2584,6 +2597,7 @@ static int tdp_page_fault(struct kvm_vcp
 	int level;
 	int writable;
 	int write = error_code & PFERR_WRITE_MASK;
+	int dirty = 1;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	unsigned long mmu_seq;
 
@@ -2601,13 +2615,22 @@ static int tdp_page_fault(struct kvm_vcp
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, gfn, &writable);
-	if (is_error_pfn(pfn) || !writable)
+	if (is_error_pfn(pfn))
 		return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
+	if (!writable) {
+		if (write) {
+			kvm_release_pfn_clean(pfn);
+			return kvm_report_unallowed_write(vcpu, gpa);
+		}
+		/* instantiate read-only spte */
+		dirty = 0;
+	}
+
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))
 		goto out_unlock;
 	kvm_mmu_free_some_pages(vcpu);
-	r = __direct_map(vcpu, gpa, write, level, gfn, pfn);
+	r = __direct_map(vcpu, gpa, write, dirty, level, gfn, pfn);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
@@ -3261,8 +3284,11 @@ int kvm_mmu_page_fault(struct kvm_vcpu *
 	enum emulation_result er;
 
 	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
-	if (r < 0)
+	if (r < 0) {
+		if (r == -EPERM) /* unallowed write */
+			r = 0;
 		goto out;
+	}
 
 	if (!r) {
 		r = 1;
Index: kvm/include/linux/kvm.h
===================================================================
--- kvm.orig/include/linux/kvm.h
+++ kvm/include/linux/kvm.h
@@ -161,6 +161,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
+#define KVM_EXIT_UNALLOWED_WRITE  19
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -264,6 +265,10 @@ struct kvm_run {
 		struct {
 			__u64 gprs[32];
 		} osi;
+		/* KVM_EXIT_UNALLOWED_WRITE */
+		struct {
+			__u64 gpa;
+		} unallowed_write;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -542,6 +542,7 @@ static int FNAME(page_fault)(struct kvm_
 	int write_pt = 0;
 	int r;
 	pfn_t pfn;
+	gpa_t gpa;
 	int level = PT_PAGE_TABLE_LEVEL;
 	unsigned long mmu_seq;
 
@@ -567,6 +568,9 @@ static int FNAME(page_fault)(struct kvm_
 		return 0;
 	}
 
+	gpa = gfn_to_gpa(walker.gfn);
+	gpa |= addr & ~PAGE_MASK;
+
 	if (walker.level >= PT_DIRECTORY_LEVEL) {
 		level = min(walker.level, mapping_level(vcpu, walker.gfn));
 		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
@@ -577,8 +581,15 @@ static int FNAME(page_fault)(struct kvm_
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &writable);
 
 	/* mmio */
-	if (is_error_pfn(pfn) || !writable)
+	if (is_error_pfn(pfn))
 		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
+	if (!writable) {
+		if (write_fault) {
+			kvm_release_pfn_clean(pfn);
+			return kvm_report_unallowed_write(vcpu, gpa);
+		}
+		walker.pte_access &= ~ACC_WRITE_MASK;
+	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	if (mmu_notifier_retry(vcpu, mmu_seq))



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 0/3] [RFC] support read-only mappings
@ 2010-10-19 16:26 Marcelo Tosatti
  2010-10-19 16:26 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
  0 siblings, 1 reply; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 16:26 UTC (permalink / raw)
  To: kvm; +Cc: avi, aarcange




^ permalink raw reply	[flat|nested] 8+ messages in thread

* [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT
  2010-10-19 16:26 [patch 0/3] [RFC] support read-only mappings Marcelo Tosatti
@ 2010-10-19 16:26 ` Marcelo Tosatti
  2010-10-20 10:24   ` Avi Kivity
  0 siblings, 1 reply; 8+ messages in thread
From: Marcelo Tosatti @ 2010-10-19 16:26 UTC (permalink / raw)
  To: kvm; +Cc: avi, aarcange, Marcelo Tosatti

[-- Attachment #1: vmx-remove-base-ptes --]
[-- Type: text/plain, Size: 1145 bytes --]

The EPT present/writable bits use the same position as normal
pagetable bits. 

Since direct_map passes ACC_ALL to mmu_set_spte, thus always setting
the writable bit on sptes, use the generic PT_PRESENT shadow_base_pte.

Also pass present/writable error code information from EPT violation
to generic pagefault handler.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/vmx.c
===================================================================
--- kvm.orig/arch/x86/kvm/vmx.c
+++ kvm/arch/x86/kvm/vmx.c
@@ -3483,7 +3483,7 @@ static int handle_ept_violation(struct k
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 	trace_kvm_page_fault(gpa, exit_qualification);
-	return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
+	return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3);
 }
 
 static u64 ept_rsvd_mask(u64 spte, int level)
@@ -4408,8 +4408,6 @@ static int __init vmx_init(void)
 
 	if (enable_ept) {
 		bypass_guest_pf = 0;
-		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
-			VMX_EPT_WRITABLE_MASK);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
 				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT
  2010-10-19 16:26 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
@ 2010-10-20 10:24   ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2010-10-20 10:24 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, aarcange

  On 10/19/2010 06:26 PM, Marcelo Tosatti wrote:
> The EPT present/writable bits use the same position as normal
> pagetable bits.
>
> Since direct_map passes ACC_ALL to mmu_set_spte, thus always setting
> the writable bit on sptes, use the generic PT_PRESENT shadow_base_pte.
>
> Also pass present/writable error code information from EPT violation
> to generic pagefault handler.
>
> Signed-off-by: Marcelo Tosatti<mtosatti@redhat.com>
>
> Index: kvm/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/vmx.c
> +++ kvm/arch/x86/kvm/vmx.c
> @@ -3483,7 +3483,7 @@ static int handle_ept_violation(struct k
>
>   	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
>   	trace_kvm_page_fault(gpa, exit_qualification);
> -	return kvm_mmu_page_fault(vcpu, gpa&  PAGE_MASK, 0);
> +	return kvm_mmu_page_fault(vcpu, gpa, exit_qualification&  0x3);
>   }

Why in the same patch?  Seems unrelated.

Ah, it's actually not unrelated, it won't work without it.


>
>   static u64 ept_rsvd_mask(u64 spte, int level)
> @@ -4408,8 +4408,6 @@ static int __init vmx_init(void)
>
>   	if (enable_ept) {
>   		bypass_guest_pf = 0;
> -		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
> -			VMX_EPT_WRITABLE_MASK);
>   		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
>   				VMX_EPT_EXECUTABLE_MASK);
>   		kvm_enable_tdp();
>

Only caller gone, please remove callee.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-10-20 10:24 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-05 11:54 [patch 0/3] allow read-only memory mappings Marcelo Tosatti
2010-10-05 11:54 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
2010-10-05 11:55 ` [patch 2/3] KVM: dont require read-only host ptes Marcelo Tosatti
2010-10-05 13:43   ` Gleb Natapov
2010-10-05 13:47     ` Marcelo Tosatti
2010-10-05 11:55 ` [patch 3/3] KVM: handle " Marcelo Tosatti
  -- strict thread matches above, loose matches on Subject: below --
2010-10-19 16:26 [patch 0/3] [RFC] support read-only mappings Marcelo Tosatti
2010-10-19 16:26 ` [patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT Marcelo Tosatti
2010-10-20 10:24   ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox