From: Jaeyong Yoo <jaeyong.yoo@samsung.com>
To: xen-devel@lists.xen.org
Cc: Jaeyong Yoo <jaeyong.yoo@samsung.com>
Subject: [PATCH v3 09/10] xen/arm: Implement hypercall for dirty page tracing (shadow op)
Date: Thu, 01 Aug 2013 21:57:52 +0900 [thread overview]
Message-ID: <1375361873-32145-10-git-send-email-jaeyong.yoo@samsung.com> (raw)
In-Reply-To: <1375361873-32145-1-git-send-email-jaeyong.yoo@samsung.com>
Add hypercall (shadow op: enable/disable and clean/peek dirted page bitmap).
For generating the dirty-bitmap, loop over the xen's page table mapped to guest p2m.
In this way, we don't need to map/unmap domain page for guest p2m.
For unmapping the guest p2m slotted into xen's page table after finishing live migration,
we implement add_mapped_vaddr for storing the write-faulting addresses. In destroy_all_mapped_vaddrs function,
the actual unmap happens.
Signed-off-by: Jaeyong Yoo <jaeyong.yoo@samsung.com>
---
xen/arch/arm/domain.c | 7 ++
xen/arch/arm/domctl.c | 13 ++
xen/arch/arm/mm.c | 95 ++++++++++++++
xen/arch/arm/p2m.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++
xen/include/asm-arm/mm.h | 1 +
xen/include/asm-arm/p2m.h | 4 +
6 files changed, 427 insertions(+)
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index e9cfc81..b629988 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -512,6 +512,13 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
spin_lock_init(&d->arch.map_lock);
d->arch.map_domain.nr_banks = 0;
+ /* init for dirty-page tracing */
+ d->arch.dirty.count = 0;
+ d->arch.dirty.gmfn_guest_start = 0;
+ d->arch.dirty.vlpt_start = NULL;
+ d->arch.dirty.vlpt_end = NULL;
+ d->arch.dirty.head = NULL;
+
clear_page(d->shared_info);
share_xen_page_with_guest(
virt_to_page(d->shared_info), d, XENSHARE_writable);
diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
index 9cfb48a..87c5184 100644
--- a/xen/arch/arm/domctl.c
+++ b/xen/arch/arm/domctl.c
@@ -93,6 +93,19 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
xfree(c.data);
}
break;
+ case XEN_DOMCTL_shadow_op:
+ {
+ domain_pause(d);
+ ret = dirty_mode_op(d, &domctl->u.shadow_op);
+ domain_unpause(d);
+
+ if ( (&domctl->u.shadow_op)->op == XEN_DOMCTL_SHADOW_OP_CLEAN ||
+ (&domctl->u.shadow_op)->op == XEN_DOMCTL_SHADOW_OP_PEEK )
+ {
+ copyback = 1;
+ }
+ }
+ break;
default:
return -EINVAL;
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index a24afe6..cd7bdff 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1304,6 +1304,9 @@ int handle_page_fault(struct domain *d, paddr_t addr)
pte.pt.table = 1; /* 4k mappings always have this bit set */
write_pte(&xen_third[xen_third_table], pte);
flush_xen_data_tlb_range_va(va, PAGE_SIZE);
+
+ /* in order to remove mappings in free stage */
+ add_mapped_vaddr(d, va);
}
/* at this point, xen third level pt has valid entry: means we can access
@@ -1322,6 +1325,98 @@ out:
return rc;
}
+int get_dirty_bitmap(struct domain *d, uint8_t *bitmap[], int peek, int clean)
+{
+ vaddr_t vlpt_start = (vaddr_t)d->arch.dirty.vlpt_start;
+ vaddr_t vlpt_end = (vaddr_t)d->arch.dirty.vlpt_end;
+ int xen_second_linear_start, xen_second_linear_end;
+ int xen_third_table_start, xen_third_table_end;
+ int i1, i2, i3;
+
+ xen_second_linear_start = second_linear_offset((unsigned long)vlpt_start);
+ xen_second_linear_end = second_linear_offset((unsigned long)vlpt_end) + 1;
+
+ for ( i1 = xen_second_linear_start; i1 < xen_second_linear_end; i1++ )
+ {
+ vaddr_t xen_second_start_va;
+ int i1_offset = 0;
+ lpae_t *xen_third;
+
+ /* if xen_second page table does not have valid entry, it means,
+ * the corresponding region is not dirtied, so we do nothing */
+ if ( !xen_second[i1].pt.valid )
+ continue;
+
+ xen_second_start_va = i1 << (LPAE_SHIFT + PAGE_SHIFT);
+
+ /* since vlpt is partialy laying over xen_second,
+ we need to find the start index of third */
+ if ( vlpt_start > xen_second_start_va )
+ {
+ xen_third_table_start = third_table_offset(vlpt_start);
+ i1_offset = (vlpt_start - xen_second_start_va) / sizeof(lpae_t);
+ }
+ else
+ xen_third_table_start = 0;
+
+ if ( vlpt_end < xen_second_start_va +
+ (1ul << (LPAE_SHIFT + PAGE_SHIFT)) )
+ xen_third_table_end = third_table_offset(vlpt_end) + 1;
+ else
+ xen_third_table_end = LPAE_ENTRIES;
+
+ xen_third = __va(pfn_to_paddr(xen_second[i1].pt.base));
+
+ for ( i2 = xen_third_table_start; i2 < xen_third_table_end; i2 ++ )
+ {
+ lpae_t *guest_third;
+ if ( !xen_third[i2].pt.valid )
+ continue;
+
+ guest_third = (lpae_t *)((i1 << (LPAE_SHIFT+PAGE_SHIFT))
+ + (i2 << PAGE_SHIFT));
+ for ( i3 = 0; i3 < LPAE_ENTRIES; i3++ )
+ {
+ lpae_t pte;
+ lpae_walk_t third_pte = guest_third[i3].walk;
+ int write = 0;
+ int bit_offset;
+ if ( !third_pte.valid )
+ return -EINVAL;
+
+ pte = guest_third[i3];
+ if ( peek && pte.p2m.avail )
+ {
+ int bitmap_index;
+ int bitmap_offset;
+ bit_offset = (i1 - xen_second_linear_start) *
+ LPAE_ENTRIES * LPAE_ENTRIES +
+ i2 * LPAE_ENTRIES +
+ i3 -
+ i1_offset;
+
+ bitmap_index = bit_offset >> (PAGE_SHIFT + 3);
+ bitmap_offset = bit_offset & ((1ul << (PAGE_SHIFT + 3)) -
+ 1);
+ __test_and_set_bit(bitmap_offset, bitmap[bitmap_index]);
+ write = 1;
+ }
+ if ( clean && pte.p2m.write )
+ {
+ pte.p2m.write = 0;
+ pte.p2m.avail = 0;
+ write = 1;
+ }
+ if ( write )
+ write_pte(&guest_third[i3], pte);
+ }
+ }
+ }
+
+ flush_tlb_all_local();
+ return 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 307c6d4..c62a383 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -5,6 +5,9 @@
#include <xen/domain_page.h>
#include <asm/flushtlb.h>
#include <asm/gic.h>
+#include <asm/vlpt.h>
+#include <xen/guest_access.h>
+#include <xen/pfn.h>
void dump_p2m_lookup(struct domain *d, paddr_t addr)
{
@@ -345,6 +348,310 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
return p >> PAGE_SHIFT;
}
+static int alloc_vlpt_for_p2m(struct domain *d)
+{
+ unsigned long gmfn_start = 0, gmfn_end = 0, gmfns, pgts_3rd;
+ void *vlpt_start, *vlpt_end;
+ int nr_banks;
+
+ spin_lock(&d->arch.map_lock);
+ /* The guest memory map must be ordered by start addr */
+ nr_banks = d->arch.map_domain.nr_banks;
+ if ( nr_banks )
+ {
+ gmfn_start = d->arch.map_domain.bank[0].start >> PAGE_SHIFT;
+ gmfn_end = (d->arch.map_domain.bank[nr_banks - 1].start +
+ d->arch.map_domain.bank[nr_banks - 1].size) >> PAGE_SHIFT;
+ }
+ spin_unlock(&d->arch.map_lock);
+ gmfns = gmfn_end - gmfn_start;
+ pgts_3rd = (gmfns + LPAE_ENTRIES - 1) >> LPAE_SHIFT;
+
+ vlpt_start = vlpt_alloc(pgts_3rd, 1);
+
+ if ( !vlpt_start )
+ {
+ printk("Out of memory for allocating VLPT mapping\n");
+ goto out;
+ }
+
+ vlpt_end = vlpt_start + pgts_3rd*PAGE_SIZE;
+
+ d->arch.dirty.vlpt_start = vlpt_start;
+ d->arch.dirty.vlpt_end = vlpt_end;
+
+ d->arch.dirty.head = NULL;
+
+ return 0;
+out:
+ if ( vlpt_start ) vlpt_free(vlpt_start);
+ return -ENOMEM;
+}
+
+#define MAX_VA_PER_NODE (PAGE_SIZE - sizeof(struct page_info *) -\
+ sizeof(int)) / sizeof(unsigned long)
+
+/* an array-based linked list for storing virtual addresses
+ * where the third-table mapping should be destroyed after
+ * live migration */
+struct mapped_va_node
+{
+ struct page_info *next;
+ int items;
+ unsigned long vaddrs[MAX_VA_PER_NODE];
+};
+
+int add_mapped_vaddr(struct domain *d, unsigned long va)
+{
+ struct page_info *head_page = d->arch.dirty.head;
+ struct mapped_va_node *mvn = NULL;
+
+ if ( !head_page )
+ {
+ head_page = alloc_domheap_page(NULL, 0);
+ if ( !head_page )
+ return -ENOMEM;
+
+ mvn = __map_domain_page(head_page);
+ mvn->items = 0;
+ mvn->next = NULL;
+ d->arch.dirty.head = head_page;
+ }
+
+ if ( !mvn )
+ mvn = __map_domain_page(head_page);
+
+ if ( mvn->items == MAX_VA_PER_NODE )
+ {
+ struct page_info *page;
+ unmap_domain_page(mvn);
+
+ page = alloc_domheap_page(NULL, 0);
+ if ( !page )
+ return -ENOMEM;
+
+ mvn = __map_domain_page(page);
+ mvn->items = 0;
+ mvn->next = head_page;
+
+ d->arch.dirty.head = page;
+ }
+
+ mvn->vaddrs[mvn->items] = va;
+ mvn->items ++;
+
+ unmap_domain_page(mvn);
+ return 0;
+}
+
+static void destroy_all_mapped_vaddrs(struct domain *d)
+{
+ struct page_info *head_page = d->arch.dirty.head;
+ struct mapped_va_node *mvn = NULL;
+
+ while ( head_page )
+ {
+ int i;
+ mvn = __map_domain_page(head_page);
+ head_page = mvn->next;
+
+ for ( i = 0; i < mvn->items; ++i )
+ destroy_xen_mappings(mvn->vaddrs[i], mvn->vaddrs[i] + PAGE_SIZE);
+
+ unmap_domain_page(mvn);
+ }
+
+ d->arch.dirty.head = NULL;
+}
+
+static void free_vlpt_for_p2m(struct domain *d)
+{
+ destroy_all_mapped_vaddrs(d);
+
+ vlpt_free(d->arch.dirty.vlpt_start);
+ d->arch.dirty.vlpt_start = NULL;
+ d->arch.dirty.vlpt_end = NULL;
+ d->arch.dirty.head = NULL;
+}
+
+/* Change types across all p2m entries in a domain */
+static void p2m_change_entry_type_global(struct domain *d, enum mg nt)
+{
+ struct p2m_domain *p2m = &d->arch.p2m;
+ uint64_t ram_base = 0;
+ int i1, i2, i3;
+ int first_index, second_index, third_index;
+ lpae_t *first = __map_domain_page(p2m->first_level);
+ lpae_t pte, *second = NULL, *third = NULL;
+
+ spin_lock(&d->arch.map_lock);
+ /*Suppose that first map base is a guest's RAM base */
+ if ( d->arch.map_domain.nr_banks )
+ ram_base = d->arch.map_domain.bank[0].start;
+ spin_unlock(&d->arch.map_lock);
+ first_index = first_table_offset(ram_base);
+ second_index = second_table_offset(ram_base);
+ third_index = third_table_offset(ram_base);
+
+ BUG_ON( !ram_base && "RAM base is undefined" );
+ BUG_ON( !first && "Can't map first level p2m." );
+
+ spin_lock(&p2m->lock);
+
+ for ( i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1 )
+ {
+ lpae_walk_t first_pte = first[i1].walk;
+ if ( !first_pte.valid || !first_pte.table )
+ goto out;
+
+ second = map_domain_page(first_pte.base);
+ BUG_ON( !second && "Can't map second level p2m.");
+ for ( i2 = second_index; i2 < LPAE_ENTRIES; ++i2 )
+ {
+ lpae_walk_t second_pte = second[i2].walk;
+ if ( !second_pte.valid || !second_pte.table )
+ goto out;
+
+ third = map_domain_page(second_pte.base);
+ BUG_ON( !third && "Can't map third level p2m.");
+
+ for ( i3 = third_index; i3 < LPAE_ENTRIES; ++i3 )
+ {
+ lpae_walk_t third_pte = third[i3].walk;
+ int write = 0;
+ if ( !third_pte.valid )
+ goto out;
+
+ pte = third[i3];
+ if ( pte.p2m.write == 1 && nt == mg_ro )
+ {
+ pte.p2m.write = 0;
+ write = 1;
+ }
+ else if ( pte.p2m.write == 0 && nt == mg_rw )
+ {
+ pte.p2m.write = 1;
+ write = 1;
+ }
+ if ( write )
+ write_pte(&third[i3], pte);
+ }
+ unmap_domain_page(third);
+
+ third = NULL;
+ third_index = 0;
+ }
+ unmap_domain_page(second);
+
+ second = NULL;
+ second_index = 0;
+ third_index = 0;
+ }
+
+out:
+ flush_tlb_all_local();
+ if ( third ) unmap_domain_page(third);
+ if ( second ) unmap_domain_page(second);
+ if ( first ) unmap_domain_page(first);
+
+ spin_unlock(&p2m->lock);
+}
+
+/* Read a domain's log-dirty bitmap and stats.
+ * If the operation is a CLEAN, clear the bitmap and stats. */
+int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+ unsigned long gmfn_start;
+ unsigned long gmfn_end;
+ unsigned long gmfns;
+ unsigned int bitmap_pages;
+ int rc = 0, clean = 0, peek = 1;
+ uint8_t *bitmap[256]; /* bitmap[256] covers 32GB ram */
+ int i;
+
+ BUG_ON( !d->arch.map_domain.nr_banks );
+
+ gmfn_start = d->arch.map_domain.bank[0].start >> PAGE_SHIFT;
+ gmfn_end = domain_get_maximum_gpfn(d);
+ gmfns = gmfn_end - gmfn_start;
+ bitmap_pages = PFN_UP((gmfns + 7) / 8);
+
+ if ( guest_handle_is_null(sc->dirty_bitmap) )
+ {
+ peek = 0;
+ }
+ else
+ {
+ /* prepare a mapping to the bitmap from guest param */
+ vaddr_t to = (vaddr_t)sc->dirty_bitmap.p; /* TODO: use macro */
+
+ BUG_ON( to & ~PAGE_MASK && "offset not aligned to PAGE SIZE");
+
+ for ( i = 0; i < bitmap_pages; ++i )
+ {
+ paddr_t g;
+ rc = gvirt_to_maddr(to, &g);
+ if ( rc )
+ return rc;
+ bitmap[i] = map_domain_page(g>>PAGE_SHIFT);
+ memset(bitmap[i], 0x00, PAGE_SIZE);
+ to += PAGE_SIZE;
+ }
+ }
+
+ clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+ sc->stats.dirty_count = d->arch.dirty.count;
+
+ spin_lock(&d->arch.dirty.lock);
+
+ get_dirty_bitmap(d, bitmap, peek, clean);
+
+ if ( peek )
+ {
+ for ( i = 0; i < bitmap_pages; ++i )
+ {
+ unmap_domain_page(bitmap[i]);
+ }
+ }
+ spin_unlock(&d->arch.dirty.lock);
+
+ return 0;
+}
+
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+ long ret = 0;
+ switch (sc->op)
+ {
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ {
+ enum mg nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro;
+
+ d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1;
+ p2m_change_entry_type_global(d, nt);
+
+ if ( sc->op == XEN_DOMCTL_SHADOW_OP_OFF )
+ free_vlpt_for_p2m(d);
+ else
+ ret = alloc_vlpt_for_p2m(d);
+ }
+ break;
+
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+ {
+ ret = log_dirty_op(d, sc);
+ }
+ break;
+
+ default:
+ return -ENOSYS;
+ }
+ return ret;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index fd976e3..be67349 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -332,6 +332,7 @@ enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
/* routine for dirty-page tracing */
int handle_page_fault(struct domain *d, paddr_t addr);
+int get_dirty_bitmap(struct domain *d, uint8_t *bitmap[], int peek, int clean);
#endif /* __ARCH_ARM_MM__ */
/*
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index a00069b..fe33360 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -2,6 +2,7 @@
#define _XEN_P2M_H
#include <xen/mm.h>
+#include <public/domctl.h>
struct domain;
@@ -107,6 +108,9 @@ static inline int get_page_and_type(struct page_info *page,
return rc;
}
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc);
+int add_mapped_vaddr(struct domain *d, unsigned long va);
+
#endif /* _XEN_P2M_H */
/*
--
1.8.1.2
next prev parent reply other threads:[~2013-08-01 12:57 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-01 12:57 [PATCH v3 00/10] xen/arm: live migration support in arndale board Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 01/10] xen/arm: Implement hvm save and restore Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 02/10] xen/arm: Add more registers for saving and restoring vcpu registers Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 03/10] xen/arm: Implement set_memory_map hypercall Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 04/10] xen/arm: Implement get_maximum_gpfn hypercall for arm Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 05/10] xen/arm: Implement modify_returncode Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 06/10] xen/arm: Implement virtual-linear page table for guest p2m mapping in live migration Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 07/10] xen/arm: Add handling write fault for dirty-page tracing Jaeyong Yoo
2013-08-04 16:27 ` Stefano Stabellini
2013-08-05 0:23 ` Jaeyong Yoo
2013-08-05 11:11 ` Stefano Stabellini
2013-08-05 11:39 ` Jaeyong Yoo
2013-08-05 13:49 ` Stefano Stabellini
2013-08-05 13:52 ` Ian Campbell
2013-08-06 11:56 ` Jaeyong Yoo
2013-08-06 13:17 ` Ian Campbell
2013-08-07 1:24 ` Jaeyong Yoo
2013-08-15 4:24 ` Jaeyong Yoo
2013-08-17 22:16 ` Ian Campbell
2013-08-17 22:21 ` Ian Campbell
2013-08-20 10:15 ` Jaeyong Yoo
2013-08-18 6:39 ` Ian Campbell
2013-08-20 10:19 ` Jaeyong Yoo
2013-08-17 23:51 ` Julien Grall
2013-08-20 10:16 ` Jaeyong Yoo
2013-08-01 12:57 ` [PATCH v3 08/10] xen/arm: Fixing clear_guest_offset macro Jaeyong Yoo
2013-08-01 12:57 ` Jaeyong Yoo [this message]
2013-08-01 12:57 ` [PATCH v3 10/10] xen/arm: Implement toolstack for xl restore/save and migrate Jaeyong Yoo
2013-09-25 15:59 ` [PATCH v3 00/10] xen/arm: live migration support in arndale board Ian Campbell
2013-09-26 6:23 ` Jaeyong Yoo
2013-09-26 15:13 ` Ian Campbell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1375361873-32145-10-git-send-email-jaeyong.yoo@samsung.com \
--to=jaeyong.yoo@samsung.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).