From mboxrd@z Thu Jan 1 00:00:00 1970 From: Izik Eidus Subject: [PATCH 3/4] (resend) Swapping Date: Sun, 14 Oct 2007 01:20:44 +0200 Message-ID: <471152CC.4080007@qumranet.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------020105060907040905070100" To: kvm-devel Return-path: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org Errors-To: kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org List-Id: kvm.vger.kernel.org This is a multi-part message in MIME format. --------------020105060907040905070100 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit ok, i change some things as Anthony noted me, but the most important thing is that i changed here: is setting the dirty bit after using the page (i forgat to do it before) --------------020105060907040905070100 Content-Type: text/x-patch; name="0009-make-the-guest-non-shadowed-memory-swappable.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename*0="0009-make-the-guest-non-shadowed-memory-swappable.patch" >>From 9088fb6a35517bdafe7a7066f60fab56f98c9127 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Sun, 14 Oct 2007 01:12:05 +0200 Subject: [PATCH] make the guest non shadowed memory swappable Signed-off-by: Izik Eidus --- drivers/kvm/kvm.h | 2 + drivers/kvm/kvm_main.c | 81 +++++++++++++++++++++++++-------------------- drivers/kvm/mmu.c | 23 ++++++++++++- drivers/kvm/paging_tmpl.h | 27 +++++++++++++-- 4 files changed, 93 insertions(+), 40 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index a155c2b..74b427f 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -409,6 +409,7 @@ struct kvm_memory_slot { unsigned long *rmap; unsigned long *dirty_bitmap; int user_alloc; /* user allocated memory */ + unsigned long userspace_addr; }; struct kvm { @@ -570,6 +571,7 @@ extern struct page *bad_page; int is_error_page(struct page *page); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); +void kvm_release_page(struct page *page); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index bfa201c..f58d49b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -319,19 +319,6 @@ static struct kvm *kvm_create_vm(void) return kvm; } -static void kvm_free_userspace_physmem(struct kvm_memory_slot *free) -{ - int i; - - for (i = 0; i < free->npages; ++i) { - if (free->phys_mem[i]) { - if (!PageReserved(free->phys_mem[i])) - SetPageDirty(free->phys_mem[i]); - page_cache_release(free->phys_mem[i]); - } - } -} - static void kvm_free_kernel_physmem(struct kvm_memory_slot *free) { int i; @@ -349,9 +336,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, { if (!dont || free->phys_mem != dont->phys_mem) if (free->phys_mem) { - if (free->user_alloc) - kvm_free_userspace_physmem(free); - else + if (!free->user_alloc) kvm_free_kernel_physmem(free); vfree(free->phys_mem); } @@ -771,19 +756,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, memset(new.phys_mem, 0, npages * sizeof(struct page *)); memset(new.rmap, 0, npages * sizeof(*new.rmap)); if (user_alloc) { - unsigned long pages_num; - new.user_alloc = 1; - down_read(¤t->mm->mmap_sem); - - pages_num = get_user_pages(current, current->mm, - mem->userspace_addr, - npages, 1, 0, new.phys_mem, - NULL); - - up_read(¤t->mm->mmap_sem); - if (pages_num != npages) - goto out_unlock; + new.userspace_addr = mem->userspace_addr; } else { for (i = 0; i < npages; ++i) { new.phys_mem[i] = alloc_page(GFP_HIGHUSER @@ -1058,12 +1032,39 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) gfn = unalias_gfn(kvm, gfn); slot = __gfn_to_memslot(kvm, gfn); - if (!slot) + if (!slot) { + get_page(bad_page); return bad_page; + } + if (slot->user_alloc) { + struct page *page[1]; + int npages; + + down_read(¤t->mm->mmap_sem); + npages = get_user_pages(current, current->mm, + slot->userspace_addr + + (gfn - slot->base_gfn) * PAGE_SIZE, 1, + 1, 0, page, NULL); + up_read(¤t->mm->mmap_sem); + if (npages != 1) { + get_page(bad_page); + return bad_page; + } + return page[0]; + } + get_page(slot->phys_mem[gfn - slot->base_gfn]); return slot->phys_mem[gfn - slot->base_gfn]; } EXPORT_SYMBOL_GPL(gfn_to_page); +void kvm_release_page(struct page *page) +{ + if (!PageReserved(page)) + SetPageDirty(page); + put_page(page); +} +EXPORT_SYMBOL_GPL(kvm_release_page); + static int next_segment(unsigned long len, int offset) { if (len > PAGE_SIZE - offset) @@ -1079,13 +1080,16 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, struct page *page; page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) + if (is_error_page(page)) { + kvm_release_page(page); return -EFAULT; + } page_virt = kmap_atomic(page, KM_USER0); memcpy(data, page_virt + offset, len); kunmap_atomic(page_virt, KM_USER0); + kvm_release_page(page); return 0; } EXPORT_SYMBOL_GPL(kvm_read_guest_page); @@ -1117,14 +1121,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, struct page *page; page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) + if (is_error_page(page)) { + kvm_release_page(page); return -EFAULT; + } page_virt = kmap_atomic(page, KM_USER0); memcpy(page_virt + offset, data, len); kunmap_atomic(page_virt, KM_USER0); mark_page_dirty(kvm, gfn); + kvm_release_page(page); return 0; } EXPORT_SYMBOL_GPL(kvm_write_guest_page); @@ -1155,13 +1162,16 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) struct page *page; page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) + if (is_error_page(page)) { + kvm_release_page(page); return -EFAULT; + } page_virt = kmap_atomic(page, KM_USER0); memset(page_virt + offset, 0, len); kunmap_atomic(page_virt, KM_USER0); + kvm_release_page(page); return 0; } EXPORT_SYMBOL_GPL(kvm_clear_guest_page); @@ -2090,8 +2100,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, for (i = 0; i < nr_pages; ++i) { mutex_lock(&vcpu->kvm->lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); - if (page) - get_page(page); vcpu->pio.guest_pages[i] = page; mutex_unlock(&vcpu->kvm->lock); if (!page) { @@ -3081,9 +3089,10 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; page = gfn_to_page(kvm, pgoff); - if (is_error_page(page)) + if (is_error_page(page)) { + kvm_release_page(page); return NOPAGE_SIGBUS; - get_page(page); + } if (type != NULL) *type = VM_FAULT_MINOR; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index e6a9b4a..4c91c84 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -425,6 +425,8 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) if (!is_rmap_pte(*spte)) return; page = page_header(__pa(spte)); + kvm_release_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> + PAGE_SHIFT)); rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); @@ -907,6 +909,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; rmap_add(vcpu, &table[index], v >> PAGE_SHIFT); + if (!is_rmap_pte(table[index])) { + struct page *page; + + page = pfn_to_page((v & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT); + kvm_release_page(page); + } return 0; } @@ -920,7 +929,12 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) v, level - 1, 1, 3, &table[index]); if (!new_table) { + struct page *page; + pgprintk("nonpaging_map: ENOMEM\n"); + page = pfn_to_page((v & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT); + kvm_release_page(page); return -ENOMEM; } @@ -1035,8 +1049,11 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK); - if (is_error_hpa(paddr)) + if (is_error_hpa(paddr)) { + kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); return 1; + } return nonpaging_map(vcpu, addr & PAGE_MASK, paddr); } @@ -1503,6 +1520,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, } else { gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va); hpa_t hpa = gpa_to_hpa(vcpu, gpa); + struct page *page; if (is_shadow_present_pte(ent) && (ent & PT64_BASE_ADDR_MASK) != hpa) @@ -1515,6 +1533,9 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, && !is_error_hpa(hpa)) printk(KERN_ERR "audit: (%s) notrap shadow," " valid guest gva %lx\n", audit_msg, va); + page = pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT); + kvm_release_page(page); } } diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 58fd35a..600b1cc 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -175,6 +175,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->inherited_ar &= walker->table[index]; table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; kunmap_atomic(walker->table, KM_USER0); + kvm_release_page(walker->page); walker->page = gfn_to_page(vcpu->kvm, table_gfn); walker->table = kmap_atomic(walker->page, KM_USER0); --walker->level; @@ -183,8 +184,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->level - 1, table_gfn); } walker->pte = *ptep; - if (walker->page) + if (walker->page) { walker->ptep = NULL; + kvm_release_page(walker->page); + } if (walker->table) kunmap_atomic(walker->table, KM_USER0); pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); @@ -206,6 +209,8 @@ err: walker->error_code |= PFERR_FETCH_MASK; if (walker->table) kunmap_atomic(walker->table, KM_USER0); + if (walker->page) + kvm_release_page(walker->page); return 0; } @@ -249,6 +254,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, if (is_error_hpa(paddr)) { set_shadow_pte(shadow_pte, shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); + kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); return; } @@ -286,9 +293,20 @@ unshadowed: pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); set_shadow_pte(shadow_pte, spte); page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); - if (!was_rmapped) + if (!was_rmapped) { rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + if (!is_rmap_pte(*shadow_pte)) { + struct page *page; + + page = pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT); + kvm_release_page(page); + } + } + else + kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK) + >> PAGE_SHIFT)); if (!ptwrite || !*ptwrite) vcpu->last_pte_updated = shadow_pte; } @@ -512,19 +530,22 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, { int i; pt_element_t *gpt; + struct page *page; if (sp->role.metaphysical || PTTYPE == 32) { nonpaging_prefetch_page(vcpu, sp); return; } - gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0); + page = gfn_to_page(vcpu->kvm, sp->gfn); + gpt = kmap_atomic(page, KM_USER0); for (i = 0; i < PT64_ENT_PER_PAGE; ++i) if (is_present_pte(gpt[i])) sp->spt[i] = shadow_trap_nonpresent_pte; else sp->spt[i] = shadow_notrap_nonpresent_pte; kunmap_atomic(gpt, KM_USER0); + kvm_release_page(page); } #undef pt_element_t -- 1.5.2.4 --------------020105060907040905070100 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ --------------020105060907040905070100 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ kvm-devel mailing list kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org https://lists.sourceforge.net/lists/listinfo/kvm-devel --------------020105060907040905070100--