xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: Dong Eddie <eddie.dong@intel.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>,
	xen-devl <xen-devel@lists.xen.org>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>
Cc: Jiang Yunhong <yunhong.jiang@intel.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ye Wei <wei.ye1987@gmail.com>, Xu Yao <xuyao.xu@huawei.com>,
	Hong Tao <bobby.hong@huawei.com>
Subject: [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory
Date: Thu, 11 Jul 2013 16:35:40 +0800	[thread overview]
Message-ID: <1373531748-12547-9-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1373531748-12547-1-git-send-email-wency@cn.fujitsu.com>

This patch implements restore callbacks for colo:
1. flush_memory():
        We update the memory as the following:
        a. pin non-dirty L1 pagetables
        b. unpin pagetables execpt non-dirty L1
        c. update the memory
        d. pin page tables
        e. unpin non-dirty L1 pagetables

Signed-off-by: Ye Wei <wei.ye1987@gmail.com>
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxc/xc_domain_restore_colo.c |  372 ++++++++++++++++++++++++++++++++++
 tools/libxc/xc_save_restore_colo.h   |    1 +
 2 files changed, 373 insertions(+), 0 deletions(-)

diff --git a/tools/libxc/xc_domain_restore_colo.c b/tools/libxc/xc_domain_restore_colo.c
index 77b63b6..50009fa 100644
--- a/tools/libxc/xc_domain_restore_colo.c
+++ b/tools/libxc/xc_domain_restore_colo.c
@@ -152,3 +152,375 @@ char* colo_get_page(struct restore_data *comm_data, void *data,
     set_bit(pfn, colo_data->dirty_pages);
     return colo_data->pagebase + pfn * PAGE_SIZE;
 }
+
+/* Step1:
+ *
+ * pin non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1)
+ *  mL1: L1 pages on master side
+ *  sL1: L1 pages on slaver side
+ */
+static int pin_l1(struct restore_data *comm_data,
+                  struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for (i = 0; i < dinfo->p2m_size; i++)
+    {
+        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB)
+                /* don't pin already pined */
+                continue;
+
+            if (test_bit(i, dirty_pages))
+                /* don't pin dirty */
+                continue;
+
+            /* here, it must also be L1 in slaver, otherwise it is dirty.
+             * (add test code ?)
+             */
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Step2:
+ *
+ * unpin pagetables execpt non-dirty L1: sL2 + sL3 + sL4 + (dirty_pages & sL1)
+ *  sL1: L1 pages on slaver side
+ *  sL2: L2 pages on slaver side
+ *  sL3: L3 pages on slaver side
+ *  sL4: L4 pages on slaver side
+ */
+static int unpin_pagetable(struct restore_data *comm_data,
+                           struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for (i = 0; i < dinfo->p2m_size; i++)
+    {
+        if ( (pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+            continue;
+
+        switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (!test_bit(i, dirty_pages))
+                /* it is in (~dirty_pages & mL1), keep it */
+                continue;
+            /* fallthrough */
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+            break;
+
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to unpin batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to unpin batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* we have unpined all pagetables except non-diry l1. So it is OK to map the
+ * dirty memory and update it.
+ */
+static int update_memory(struct restore_data *comm_data,
+                         struct restore_colo_data *colo_data)
+{
+    unsigned long pfn;
+    unsigned long max_mem_pfn = colo_data->max_mem_pfn;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    unsigned long pagetype;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    struct xc_mmu *mmu = comm_data->mmu;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+    char *pagebase = colo_data->pagebase;
+    int pfn_err = 0;
+    char *region_base_slaver;
+    xen_pfn_t region_mfn_slaver;
+    unsigned long mfn;
+    char *pagebuff;
+
+    for (pfn = 0; pfn < max_mem_pfn; pfn++) {
+        if (!test_bit(pfn, dirty_pages))
+            continue;
+
+        pagetype = pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTAB_MASK;
+        if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
+            /* a bogus/unmapped page: skip it */
+            continue;
+
+        mfn = comm_data->p2m[pfn];
+        region_mfn_slaver = mfn;
+        region_base_slaver = xc_map_foreign_bulk(xch, dom,
+                                                 PROT_WRITE,
+                                                 &region_mfn_slaver,
+                                                 &pfn_err, 1);
+        if (!region_base_slaver || pfn_err) {
+            PERROR("update_memory: xc_map_foreign_bulk failed");
+            return 1;
+        }
+
+        pagebuff = (char *)(pagebase + pfn * PAGE_SIZE);
+        memcpy(region_base_slaver, pagebuff, PAGE_SIZE);
+        munmap(region_base_slaver, PAGE_SIZE);
+
+        if (xc_add_mmu_update(xch, mmu, (((uint64_t)mfn) << PAGE_SHIFT)
+                              | MMU_MACHPHYS_UPDATE, pfn) )
+        {
+            PERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
+            return 1;
+        }
+    }
+
+    /*
+     * Ensure we flush all machphys updates before potential PAE-specific
+     * reallocations below.
+     */
+    if (xc_flush_mmu_updates(xch, mmu))
+    {
+        PERROR("Error doing flush_mmu_updates()");
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Step 4: pin master pt
+ * Pin page tables. Do this after writing to them as otherwise Xen
+ * will barf when doing the type-checking.
+ */
+static int pin_pagetable(struct restore_data *comm_data,
+                         struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for ( i = 0; i < dinfo->p2m_size; i++ )
+    {
+        if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+            continue;
+
+        switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (!test_bit(i, dirty_pages))
+                /* it is in (~dirty_pages & mL1)(=~dirty_pages & sL1),
+                 * already pined
+                 */
+                continue;
+
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+            break;
+
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to pin batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to pin batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Step5:
+ * unpin unneeded non-dirty L1 pagetables: ~dirty_pages & mL1 (= ~dirty_pages & sL1)
+ */
+static int unpin_l1(struct restore_data *comm_data,
+                    struct restore_colo_data *colo_data)
+{
+    unsigned int nr_pins = 0;
+    unsigned long i;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+    struct domain_info_context *dinfo = comm_data->dinfo;
+    unsigned long *pfn_type = comm_data->pfn_type;
+    uint32_t dom = comm_data->dom;
+    xc_interface *xch = comm_data->xch;
+    unsigned long *pfn_type_slaver = colo_data->pfn_type_slaver;
+    unsigned long *dirty_pages = colo_data->dirty_pages;
+
+    for (i = 0; i < dinfo->p2m_size; i++)
+    {
+        switch ( pfn_type_slaver[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            if (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) // still needed
+                continue;
+            if (test_bit(i, dirty_pages)) // not pined by step 1
+                continue;
+
+            pin[nr_pins].cmd = MMUEXT_UNPIN_TABLE;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = comm_data->p2m[i];
+        nr_pins++;
+
+        /* Batch full? Then flush. */
+        if (nr_pins == MAX_PIN_BATCH)
+        {
+            if (xc_mmuext_op(xch, pin, nr_pins, dom) < 0)
+            {
+                PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+                return 1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    /* Flush final partial batch. */
+    if ((nr_pins != 0) && (xc_mmuext_op(xch, pin, nr_pins, dom) < 0))
+    {
+        PERROR("Failed to pin L1 batch of %d page tables", nr_pins);
+        return 1;
+    }
+
+    return 0;
+}
+
+int colo_flush_memory(struct restore_data *comm_data, void *data)
+{
+    struct restore_colo_data *colo_data = data;
+    xc_interface *xch = comm_data->xch;
+    uint32_t dom = comm_data->dom;
+    DECLARE_HYPERCALL;
+
+    if (!colo_data->first_time)
+    {
+        /* reset cpu */
+        hypercall.op = __HYPERVISOR_reset_vcpu_op;
+        hypercall.arg[0] = (unsigned long)dom;
+        do_xen_hypercall(xch, &hypercall);
+    }
+
+    if (pin_l1(comm_data, colo_data) != 0)
+        return -1;
+    if (unpin_pagetable(comm_data, colo_data) != 0)
+        return -1;
+
+    if (update_memory(comm_data, colo_data) != 0)
+        return -1;
+
+    if (pin_pagetable(comm_data, colo_data) != 0)
+        return -1;
+    if (unpin_l1(comm_data, colo_data) != 0)
+        return -1;
+
+    memcpy(colo_data->pfn_type_slaver, comm_data->pfn_type,
+           comm_data->dinfo->p2m_size * sizeof(xen_pfn_t));
+
+    return 0;
+}
diff --git a/tools/libxc/xc_save_restore_colo.h b/tools/libxc/xc_save_restore_colo.h
index 67c567c..8af75b4 100644
--- a/tools/libxc/xc_save_restore_colo.h
+++ b/tools/libxc/xc_save_restore_colo.h
@@ -7,5 +7,6 @@
 extern int colo_init(struct restore_data *, void **);
 extern void colo_free(struct restore_data *, void *);
 extern char *colo_get_page(struct restore_data *, void *, unsigned long);
+extern int colo_flush_memory(struct restore_data *, void *);
 
 #endif
-- 
1.7.4

  parent reply	other threads:[~2013-07-11  8:35 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-11  8:35 [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 01/16] xen: introduce new hypercall to reset vcpu Wen Congyang
2013-07-11  9:44   ` Andrew Cooper
2013-07-11  9:58     ` Wen Congyang
2013-07-11 10:01       ` Ian Campbell
2013-08-01 11:48   ` Tim Deegan
2013-08-06  6:47     ` Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 02/16] block-remus: introduce colo mode Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 03/16] block-remus: introduce a interface to allow the user specify which mode the backup end uses Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 04/16] dominfo.completeRestore() will be called more than once in colo mode Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 06/16] colo: implement restore_callbacks init()/free() Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 07/16] colo: implement restore_callbacks get_page() Wen Congyang
2013-07-11  8:35 ` Wen Congyang [this message]
2013-07-11  8:35 ` [RFC Patch v2 09/16] colo: implement restore_callbacks update_p2m() Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore() Wen Congyang
2013-07-11  9:40   ` Ian Campbell
2013-07-11  9:54     ` Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 11/16] xc_restore: implement for colo Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 12/16] XendCheckpoint: implement colo Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 13/16] xc_domain_save: flush cache before calling callbacks->postcopy() Wen Congyang
2013-07-11 13:43   ` Andrew Cooper
2013-07-12  1:36     ` Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 14/16] add callback to configure network for colo Wen Congyang
2013-07-11  8:35 ` [RFC Patch v2 15/16] xc_domain_save: implement save_callbacks " Wen Congyang
2013-07-11 13:52   ` Andrew Cooper
2013-07-11  8:35 ` [RFC Patch v2 16/16] remus: implement colo mode Wen Congyang
2013-07-11  9:37 ` [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Andrew Cooper
2013-07-11  9:40 ` Ian Campbell
2013-07-14 14:33   ` Shriram Rajagopalan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1373531748-12547-9-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=bobby.hong@huawei.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.ye1987@gmail.com \
    --cc=xen-devel@lists.xen.org \
    --cc=xuyao.xu@huawei.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).