From: Wen Congyang <wency@cn.fujitsu.com>
To: Dong Eddie <eddie.dong@intel.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>,
xen-devl <xen-devel@lists.xen.org>,
Shriram Rajagopalan <rshriram@cs.ubc.ca>
Cc: Jiang Yunhong <yunhong.jiang@intel.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Ye Wei <wei.ye1987@gmail.com>, Xu Yao <xuyao.xu@huawei.com>,
Hong Tao <bobby.hong@huawei.com>
Subject: [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory
Date: Thu, 11 Jul 2013 16:35:40 +0800 [thread overview]
Message-ID: <1373531748-12547-9-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1373531748-12547-1-git-send-email-wency@cn.fujitsu.com>
This patch implements restore callbacks for colo:
1. flush_memory():
We update the memory as the following:
a. pin non-dirty L1 pagetables
b. unpin pagetables execpt non-dirty L1
c. update the memory
d. pin page tables
e. unpin non-dirty L1 pagetables
Signed-off-by: Ye Wei <wei.ye1987@gmail.com>
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
tools/libxc/xc_domain_restore_colo.c | 372 ++++++++++++++++++++++++++++++++++
tools/libxc/xc_save_restore_colo.h | 1 +
2 files changed, 373 insertions(+), 0 deletions(-)
diff --git a/tools/libxc/xc_domain_restore_colo.c b/tools/libxc/xc_domain_restore_colo.c
index 77b63b6..50009fa 100644
--- a/tools/libxc/xc_domain_restore_colo.c
+++ b/tools/libxc/xc_domain_restore_colo.c
@@ -152,3 +152,375 @@ char* colo_get_page(struct restore_data *comm_data, void *data,
set_bit(pfn, colo_data->dirty_pages);
return colo_data->pagebase + pfn * PAGE_SIZE;
}
+
+/* Step1:
+ *
+ * pin non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1)
+ * mL1: L1 pages on master side
+ * sL1: L1 pages on slaver side
+ */
+static int pin_l1(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for (i = 0; i < dinfo->p2m_size; i++)
+ {
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB)
+ /* don't pin already pined */
+ continue;
+
+ if (test_bit(i, dirty_pages))
+ /* don't pin dirty */
+ continue;
+
+ /* here, it must also be L1 in slaver, otherwise it is dirty.
+ * (add test code ?)
+ */
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Step2:
+ *
+ * unpin pagetables execpt non-dirty L1: sL2 + sL3 + sL4 + (dirty_pages & sL1)
+ * sL1: L1 pages on slaver side
+ * sL2: L2 pages on slaver side
+ * sL3: L3 pages on slaver side
+ * sL4: L4 pages on slaver side
+ */
+static int unpin_pagetable(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for (i = 0; i < dinfo->p2m_size; i++)
+ {
+ if ( (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (!test_bit(i, dirty_pages))
+ /* it is in (~dirty_pages & mL1), keep it */
+ continue;
+ /* fallthrough */
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+ break;
+
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to unpin batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to unpin batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* we have unpined all pagetables except non-diry l1. So it is OK to map the
+ * dirty memory and update it.
+ */
+static int update_memory(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned long pfn;
+ unsigned long max_mem_pfn = colo_data->max_mem_pfn;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ unsigned long pagetype;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ struct xc_mmu *mmu = comm_data->mmu;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+ char *pagebase = colo_data->pagebase;
+ int pfn_err = 0;
+ char *region_base_slaver;
+ xen_pfn_t region_mfn_slaver;
+ unsigned long mfn;
+ char *pagebuff;
+
+ for (pfn = 0; pfn < max_mem_pfn; pfn++) {
+ if (!test_bit(pfn, dirty_pages))
+ continue;
+
+ pagetype = pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+ if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
+ /* a bogus/unmapped page: skip it */
+ continue;
+
+ mfn = comm_data->p2m[pfn];
+ region_mfn_slaver = mfn;
+ region_base_slaver = xc_map_foreign_bulk(xch, dom,
+ PROT_WRITE,
+ ®ion_mfn_slaver,
+ &pfn_err, 1);
+ if (!region_base_slaver || pfn_err) {
+ PERROR("update_memory: xc_map_foreign_bulk failed");
+ return 1;
+ }
+
+ pagebuff = (char *)(pagebase + pfn * PAGE_SIZE);
+ memcpy(region_base_slaver, pagebuff, PAGE_SIZE);
+ munmap(region_base_slaver, PAGE_SIZE);
+
+ if (xc_add_mmu_update(xch, mmu, (((uint64_t)mfn) << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE, pfn) )
+ {
+ PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
+ return 1;
+ }
+ }
+
+ /*
+ * Ensure we flush all machphys updates before potential PAE-specific
+ * reallocations below.
+ */
+ if (xc_flush_mmu_updates(xch, mmu))
+ {
+ PERROR("Error doing flush_mmu_updates()");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Step 4: pin master pt
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+static int pin_pagetable(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for ( i = 0; i < dinfo->p2m_size; i++ )
+ {
+ if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+ continue;
+
+ switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (!test_bit(i, dirty_pages))
+ /* it is in (~dirty_pages & mL1)(=~dirty_pages & sL1),
+ * already pined
+ */
+ continue;
+
+ pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+ break;
+
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to pin batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Step5:
+ * unpin unneeded non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1)
+ */
+static int unpin_l1(struct restore_data *comm_data,
+ struct restore_colo_data *colo_data)
+{
+ unsigned int nr_pins = 0;
+ unsigned long i;
+ struct mmuext_op pin[MAX_PIN_BATCH];
+ struct domain_info_context *dinfo = comm_data->dinfo;
+ unsigned long *pfn_type = comm_data->pfn_type;
+ uint32_t dom = comm_data->dom;
+ xc_interface *xch = comm_data->xch;
+ unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+ unsigned long *dirty_pages = colo_data->dirty_pages;
+
+ for (i = 0; i < dinfo->p2m_size; i++)
+ {
+ switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+ {
+ case XEN_DOMCTL_PFINFO_L1TAB:
+ if (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) // still needed
+ continue;
+ if (test_bit(i, dirty_pages)) // not pined by step 1
+ continue;
+
+ pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+ break;
+
+ case XEN_DOMCTL_PFINFO_L2TAB:
+ case XEN_DOMCTL_PFINFO_L3TAB:
+ case XEN_DOMCTL_PFINFO_L4TAB:
+ default:
+ continue;
+ }
+
+ pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+ nr_pins++;
+
+ /* Batch full? Then flush. */
+ if (nr_pins == MAX_PIN_BATCH)
+ {
+ if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+ nr_pins = 0;
+ }
+ }
+
+ /* Flush final partial batch. */
+ if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+ {
+ PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+ return 1;
+ }
+
+ return 0;
+}
+
+int colo_flush_memory(struct restore_data *comm_data, void *data)
+{
+ struct restore_colo_data *colo_data = data;
+ xc_interface *xch = comm_data->xch;
+ uint32_t dom = comm_data->dom;
+ DECLARE_HYPERCALL;
+
+ if (!colo_data->first_time)
+ {
+ /* reset cpu */
+ hypercall.op = __HYPERVISOR_reset_vcpu_op;
+ hypercall.arg[0] = (unsigned long)dom;
+ do_xen_hypercall(xch, &hypercall);
+ }
+
+ if (pin_l1(comm_data, colo_data) != 0)
+ return -1;
+ if (unpin_pagetable(comm_data, colo_data) != 0)
+ return -1;
+
+ if (update_memory(comm_data, colo_data) != 0)
+ return -1;
+
+ if (pin_pagetable(comm_data, colo_data) != 0)
+ return -1;
+ if (unpin_l1(comm_data, colo_data) != 0)
+ return -1;
+
+ memcpy(colo_data->pfn_type_slaver, comm_data->pfn_type,
+ comm_data->dinfo->p2m_size * sizeof(xen_pfn_t));
+
+ return 0;
+}
diff --git a/tools/libxc/xc_save_restore_colo.h b/tools/libxc/xc_save_restore_colo.h
index 67c567c..8af75b4 100644
--- a/tools/libxc/xc_save_restore_colo.h
+++ b/tools/libxc/xc_save_restore_colo.h
@@ -7,5 +7,6 @@
extern int colo_init(struct restore_data *, void **);
extern void colo_free(struct restore_data *, void *);
extern char *colo_get_page(struct restore_data *, void *, unsigned long);
+extern int colo_flush_memory(struct restore_data *, void *);
#endif
--
1.7.4
next prev parent reply other threads:[~2013-07-11 8:35 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-11 8:35 [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 01/16] xen: introduce new hypercall to reset vcpu Wen Congyang
2013-07-11 9:44 ` Andrew Cooper
2013-07-11 9:58 ` Wen Congyang
2013-07-11 10:01 ` Ian Campbell
2013-08-01 11:48 ` Tim Deegan
2013-08-06 6:47 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 02/16] block-remus: introduce colo mode Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 03/16] block-remus: introduce a interface to allow the user specify which mode the backup end uses Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 04/16] dominfo.completeRestore() will be called more than once in colo mode Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 06/16] colo: implement restore_callbacks init()/free() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 07/16] colo: implement restore_callbacks get_page() Wen Congyang
2013-07-11 8:35 ` Wen Congyang [this message]
2013-07-11 8:35 ` [RFC Patch v2 09/16] colo: implement restore_callbacks update_p2m() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore() Wen Congyang
2013-07-11 9:40 ` Ian Campbell
2013-07-11 9:54 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 11/16] xc_restore: implement for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 12/16] XendCheckpoint: implement colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 13/16] xc_domain_save: flush cache before calling callbacks->postcopy() Wen Congyang
2013-07-11 13:43 ` Andrew Cooper
2013-07-12 1:36 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 14/16] add callback to configure network for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 15/16] xc_domain_save: implement save_callbacks " Wen Congyang
2013-07-11 13:52 ` Andrew Cooper
2013-07-11 8:35 ` [RFC Patch v2 16/16] remus: implement colo mode Wen Congyang
2013-07-11 9:37 ` [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Andrew Cooper
2013-07-11 9:40 ` Ian Campbell
2013-07-14 14:33 ` Shriram Rajagopalan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1373531748-12547-9-git-send-email-wency@cn.fujitsu.com \
--to=wency@cn.fujitsu.com \
--cc=bobby.hong@huawei.com \
--cc=eddie.dong@intel.com \
--cc=laijs@cn.fujitsu.com \
--cc=rshriram@cs.ubc.ca \
--cc=wei.ye1987@gmail.com \
--cc=xen-devel@lists.xen.org \
--cc=xuyao.xu@huawei.com \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).