From mboxrd@z Thu Jan  1 00:00:00 1970
From: Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 07/13] KVM: MMU: mode specific sync_page
Date: Sat, 06 Sep 2008 15:48:29 -0300
Message-ID: <20080906192431.043506161@localhost.localdomain>
References: <20080906184822.560099087@localhost.localdomain>
Cc: kvm@vger.kernel.org
To: Avi Kivity <avi@qumranet.com>
Return-path: <kvm-owner@vger.kernel.org>
Received: from mx1.redhat.com ([66.187.233.31]:60558 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1752492AbYIFT1Y (ORCPT <rfc822;kvm@vger.kernel.org>);
	Sat, 6 Sep 2008 15:27:24 -0400
Content-Disposition: inline; filename=kvm-oos-sync-page
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>

Examine guest pagetable and bring the shadow back in sync. At the moment
sync_page is simplistic and only cares about shadow present entries
whose gfn remains unchanged.

It might be worthwhile to prepopulate the shadow in advance.

FIXME: the RW->RO transition needs a local TLB flush.

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -868,6 +868,14 @@ static void nonpaging_prefetch_page(stru
 		sp->spt[i] = shadow_trap_nonpresent_pte;
 }
 
+static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
+			       struct kvm_mmu_page *sp)
+{
+	/* should never happen */
+	WARN_ON(1);
+	return 1;
+}
+
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned index;
@@ -888,6 +896,43 @@ static struct kvm_mmu_page *kvm_mmu_look
 	return NULL;
 }
 
+static void kvm_sync_writeble(struct kvm_vcpu *vcpu, u64 *spte, int gpte_rw,
+			      gfn_t gfn)
+{
+	if (is_writeble_pte(*spte) == gpte_rw)
+		return;
+	if (is_writeble_pte(*spte))
+		*spte &= ~PT_WRITABLE_MASK;
+	else {
+		if (kvm_mmu_lookup_page(vcpu->kvm, gfn)) {
+			pgprintk("%s: found shadow page for %lx, keeping ro\n",
+			 __func__, gfn);
+		} else
+			*spte |= PT_WRITABLE_MASK;
+	}
+	return;
+}
+
+static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	int ret;
+
+	rmap_write_protect(vcpu->kvm, sp->gfn);
+	ret = vcpu->arch.mmu.sync_page(vcpu, sp);
+	if (ret <= 0)
+		/* possible optimization: unprotect all
+ 		 * mappings (only originally writeble ones
+ 		 * of course).
+ 		 */
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+	else
+		kvm_clear_pg_unsync(sp);
+
+	return ret;
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -1536,6 +1581,7 @@ static int nonpaging_init_context(struct
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
 	context->free = nonpaging_free;
 	context->prefetch_page = nonpaging_prefetch_page;
+	context->sync_page = nonpaging_sync_page;
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->root_hpa = INVALID_PAGE;
@@ -1583,6 +1629,7 @@ static int paging64_init_context_common(
 	context->page_fault = paging64_page_fault;
 	context->gva_to_gpa = paging64_gva_to_gpa;
 	context->prefetch_page = paging64_prefetch_page;
+	context->sync_page = paging64_sync_page;
 	context->free = paging_free;
 	context->root_level = level;
 	context->shadow_root_level = level;
@@ -1604,6 +1651,7 @@ static int paging32_init_context(struct 
 	context->gva_to_gpa = paging32_gva_to_gpa;
 	context->free = paging_free;
 	context->prefetch_page = paging32_prefetch_page;
+	context->sync_page = paging32_sync_page;
 	context->root_level = PT32_ROOT_LEVEL;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->root_hpa = INVALID_PAGE;
@@ -1623,6 +1671,7 @@ static int init_kvm_tdp_mmu(struct kvm_v
 	context->page_fault = tdp_page_fault;
 	context->free = nonpaging_free;
 	context->prefetch_page = nonpaging_prefetch_page;
+	context->sync_page = nonpaging_sync_page;
 	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
 	context->root_hpa = INVALID_PAGE;
 
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -510,6 +510,85 @@ static void FNAME(prefetch_page)(struct 
 	kvm_release_page_clean(page);
 }
 
+static int FNAME(sync_page)(struct kvm_vcpu *vcpu,
+			    struct kvm_mmu_page *sp)
+{
+	int i, nr_present = 0;
+	struct page *pt_page;
+	pt_element_t *pt;
+	void *gpte_kaddr;
+
+	pt_page = gfn_to_page_atomic(vcpu->kvm, sp->gfn);
+	if (is_error_page(pt_page)) {
+		kvm_release_page_clean(pt_page);
+		return -EFAULT;
+	}
+
+	gpte_kaddr = pt = kmap_atomic(pt_page, KM_USER0);
+
+	if (PTTYPE == 32)
+		pt += sp->role.quadrant << PT64_LEVEL_BITS;
+
+	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+		if (is_shadow_present_pte(sp->spt[i])) {
+			struct page *page;
+			u64 spte;
+			unsigned pte_access;
+
+			if (!is_present_pte(*pt)) {
+				rmap_remove(vcpu->kvm, &sp->spt[i]);
+				sp->spt[i] = shadow_notrap_nonpresent_pte;
+				pt++;
+				continue;
+			}
+
+			page = gfn_to_page_atomic(vcpu->kvm, gpte_to_gfn(*pt));
+			if (is_error_page(page) ||
+			    spte_to_pfn(sp->spt[i]) != page_to_pfn(page)) {
+				rmap_remove(vcpu->kvm, &sp->spt[i]);
+				sp->spt[i] = shadow_trap_nonpresent_pte;
+				kvm_release_page_clean(page);
+				pt++;
+				continue;
+			}
+			kvm_release_page_clean(page);
+			nr_present++;
+			spte = sp->spt[i];
+			pte_access = sp->role.access & FNAME(gpte_access)(vcpu, *pt);
+			/* user */
+			if (pte_access & ACC_USER_MASK)
+				spte |= shadow_user_mask;
+			/* nx */
+			if (pte_access & ACC_EXEC_MASK)
+				spte |= shadow_x_mask;
+			else
+				spte |= shadow_nx_mask;
+			/* writeble */
+			kvm_sync_writeble(vcpu, &spte, is_writeble_pte(*pt),
+					  gpte_to_gfn(*pt));
+			/* clear writable to catch dirtyness */
+			if (!is_dirty_pte(*pt))
+				spte &= ~PT_WRITABLE_MASK;
+			/* guest->shadow accessed sync */
+			if (!(*pt & PT_ACCESSED_MASK))
+				spte &= ~PT_ACCESSED_MASK;
+			/* shadow->guest accessed sync */
+			if (spte & PT_ACCESSED_MASK)
+				set_bit(PT_ACCESSED_SHIFT, (unsigned long *)pt);
+			/* global */
+			if (!(*pt & PT_GLOBAL_MASK))
+				kvm_clear_pg_global(sp);
+			set_shadow_pte(&sp->spt[i], spte);
+		}
+		pt++;
+	}
+
+	kunmap_atomic(pt_page, KM_USER0);
+	kvm_release_page_dirty(pt_page);
+
+	return nr_present;
+}
+
 #undef pt_element_t
 #undef guest_walker
 #undef shadow_walker
Index: kvm/include/asm-x86/kvm_host.h
===================================================================
--- kvm.orig/include/asm-x86/kvm_host.h
+++ kvm/include/asm-x86/kvm_host.h
@@ -221,6 +221,8 @@ struct kvm_mmu {
 	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
 	void (*prefetch_page)(struct kvm_vcpu *vcpu,
 			      struct kvm_mmu_page *page);
+	int (*sync_page)(struct kvm_vcpu *vcpu,
+			  struct kvm_mmu_page *sp);
 	hpa_t root_hpa;
 	int root_level;
 	int shadow_root_level;
@@ -760,6 +762,7 @@ int kvm_age_hva(struct kvm *kvm, unsigne
 
 enum kvm_page_flags {
 	KVM_PG_global,
+	KVM_PG_unsync,
 };
 
 #define KVMPGFLAG(name)							\
@@ -773,5 +776,6 @@ static inline int kvm_test_clear_pg_##na
 	{ return test_and_clear_bit(KVM_PG_##name, &sp->flags); }
 
 KVMPGFLAG(global);
+KVMPGFLAG(unsync);
 
 #endif

--