From: Wen Congyang <wency@cn.fujitsu.com>
To: Dong Eddie <eddie.dong@intel.com>,
Lai Jiangshan <laijs@cn.fujitsu.com>,
xen-devl <xen-devel@lists.xen.org>,
Shriram Rajagopalan <rshriram@cs.ubc.ca>
Cc: Jiang Yunhong <yunhong.jiang@intel.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Ye Wei <wei.ye1987@gmail.com>, Xu Yao <xuyao.xu@huawei.com>,
Hong Tao <bobby.hong@huawei.com>
Subject: [RFC Patch v2 12/16] XendCheckpoint: implement colo
Date: Thu, 11 Jul 2013 16:35:44 +0800 [thread overview]
Message-ID: <1373531748-12547-13-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1373531748-12547-1-git-send-email-wency@cn.fujitsu.com>
In colo mode, XendCheckpoit.py will communicate with both master and
xc_restore. This patch implements this communication. In colo mode,
the signature is "GuestColoRestore".
Signed-off-by: Ye Wei <wei.ye1987@gmail.com>
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
tools/python/xen/xend/XendCheckpoint.py | 127 +++++++++++++++++++++---------
1 files changed, 89 insertions(+), 38 deletions(-)
diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py
index fa09757..ed71690 100644
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -23,8 +23,11 @@ from xen.xend.XendLogging import log
from xen.xend.XendConfig import XendConfig
from xen.xend.XendConstants import *
from xen.xend import XendNode
+from xen.xend.xenstore.xsutil import ResumeDomain
+from xen.remus import util
SIGNATURE = "LinuxGuestRecord"
+COLO_SIGNATURE = "GuestColoRestore"
QEMU_SIGNATURE = "QemuDeviceModelRecord"
dm_batch = 512
XC_SAVE = "xc_save"
@@ -203,10 +206,15 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False):
signature = read_exact(fd, len(SIGNATURE),
"not a valid guest state file: signature read")
- if signature != SIGNATURE:
+ if signature != SIGNATURE and signature != COLO_SIGNATURE:
raise XendError("not a valid guest state file: found '%s'" %
signature)
+ if signature == COLO_SIGNATURE:
+ colo = True
+ else:
+ colo = False
+
l = read_exact(fd, sizeof_int,
"not a valid guest state file: config size read")
vmconfig_size = unpack("!i", l)[0]
@@ -301,12 +309,15 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False):
cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
fd, dominfo.getDomid(),
- store_port, console_port, int(is_hvm), pae, apic, superpages])
+ store_port, console_port, int(is_hvm), pae, apic,
+ superpages, int(colo)])
log.debug("[xc_restore]: %s", string.join(cmd))
- handler = RestoreInputHandler()
+ inputHandler = RestoreInputHandler()
+ restoreHandler = RestoreHandler(fd, colo, dominfo, inputHandler,
+ restore_image)
- forkHelper(cmd, fd, handler.handler, True)
+ forkHelper(cmd, fd, inputHandler.handler, not colo, restoreHandler)
# We don't want to pass this fd to any other children -- we
# might need to recover the disk space that backs it.
@@ -321,42 +332,74 @@ def restore(xd, fd, dominfo = None, paused = False, relocating = False):
raise XendError('Could not read store MFN')
if not is_hvm and handler.console_mfn is None:
- raise XendError('Could not read console MFN')
+ raise XendError('Could not read console MFN')
+
+ restoreHandler.resume(True, paused, None)
+
+ return dominfo
+ except Exception, exn:
+ dominfo.destroy()
+ log.exception(exn)
+ raise exn
+
+
+class RestoreHandler:
+ def __init__(self, fd, colo, dominfo, inputHandler, restore_image):
+ self.fd = fd
+ self.colo = colo
+ self.firsttime = True
+ self.inputHandler = inputHandler
+ self.dominfo = dominfo
+ self.restore_image = restore_image
+ self.store_port = dominfo.store_port
+ self.console_port = dominfo.console_port
+
+ def resume(self, finish, paused, child):
+ fd = self.fd
+ dominfo = self.dominfo
+ handler = self.inputHandler
+ restore_image = self.restore_image
restore_image.setCpuid()
+ dominfo.completeRestore(handler.store_mfn, handler.console_mfn,
+ self.firsttime)
- # xc_restore will wait for source to close connection
-
- dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
+ if self.colo and not finish:
+ # notify master that checkpoint finishes
+ write_exact(fd, "finish", "failed to write finish done")
+ buf = read_exact(fd, 6, "failed to read resume flag")
+ if buf != "resume":
+ return False
- #
- # We shouldn't hold the domains_lock over a waitForDevices
- # As this function sometime gets called holding this lock,
- # we must release it and re-acquire it appropriately
- #
from xen.xend import XendDomain
- lock = True;
- try:
- XendDomain.instance().domains_lock.release()
- except:
- lock = False;
-
- try:
- dominfo.waitForDevices() # Wait for backends to set up
- finally:
- if lock:
- XendDomain.instance().domains_lock.acquire()
+ if self.firsttime:
+ lock = True
+ try:
+ XendDomain.instance().domains_lock.release()
+ except:
+ lock = False
+
+ try:
+ dominfo.waitForDevices() # Wait for backends to set up
+ finally:
+ if lock:
+ XendDomain.instance().domains_lock.acquire()
+ if not paused:
+ dominfo.unpause()
+ else:
+ # colo
+ xc.domain_resume(dominfo.domid, 0)
+ ResumeDomain(dominfo.domid)
- if not paused:
- dominfo.unpause()
+ if self.colo and not finish:
+ child.tochild.write("resume")
+ child.tochild.flush()
- return dominfo
- except Exception, exn:
- dominfo.destroy()
- log.exception(exn)
- raise exn
+ dominfo.store_port = self.store_port
+ dominfo.console_port = self.console_port
+ self.firsttime = False
class RestoreInputHandler:
def __init__(self):
@@ -364,17 +407,25 @@ class RestoreInputHandler:
self.console_mfn = None
- def handler(self, line, _):
+ def handler(self, line, child, restoreHandler):
+ if line == "finish":
+ # colo
+ return restoreHandler.resume(False, False, child)
+
m = re.match(r"^(store-mfn) (\d+)$", line)
if m:
self.store_mfn = int(m.group(2))
- else:
- m = re.match(r"^(console-mfn) (\d+)$", line)
- if m:
- self.console_mfn = int(m.group(2))
+ return True
+
+ m = re.match(r"^(console-mfn) (\d+)$", line)
+ if m:
+ self.console_mfn = int(m.group(2))
+ return True
+
+ return False
-def forkHelper(cmd, fd, inputHandler, closeToChild):
+def forkHelper(cmd, fd, inputHandler, closeToChild, restoreHandler):
child = xPopen3(cmd, True, -1, [fd])
if closeToChild:
@@ -392,7 +443,7 @@ def forkHelper(cmd, fd, inputHandler, closeToChild):
else:
line = line.rstrip()
log.debug('%s', line)
- inputHandler(line, child.tochild)
+ inputHandler(line, child, restoreHandler)
except IOError, exn:
raise XendError('Error reading from child process for %s: %s' %
--
1.7.4
next prev parent reply other threads:[~2013-07-11 8:35 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-11 8:35 [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 01/16] xen: introduce new hypercall to reset vcpu Wen Congyang
2013-07-11 9:44 ` Andrew Cooper
2013-07-11 9:58 ` Wen Congyang
2013-07-11 10:01 ` Ian Campbell
2013-08-01 11:48 ` Tim Deegan
2013-08-06 6:47 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 02/16] block-remus: introduce colo mode Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 03/16] block-remus: introduce a interface to allow the user specify which mode the backup end uses Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 04/16] dominfo.completeRestore() will be called more than once in colo mode Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 05/16] xc_domain_restore: introduce restore_callbacks for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 06/16] colo: implement restore_callbacks init()/free() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 07/16] colo: implement restore_callbacks get_page() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 08/16] colo: implement restore_callbacks flush_memory Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 09/16] colo: implement restore_callbacks update_p2m() Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 10/16] colo: implement restore_callbacks finish_restore() Wen Congyang
2013-07-11 9:40 ` Ian Campbell
2013-07-11 9:54 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 11/16] xc_restore: implement for colo Wen Congyang
2013-07-11 8:35 ` Wen Congyang [this message]
2013-07-11 8:35 ` [RFC Patch v2 13/16] xc_domain_save: flush cache before calling callbacks->postcopy() Wen Congyang
2013-07-11 13:43 ` Andrew Cooper
2013-07-12 1:36 ` Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 14/16] add callback to configure network for colo Wen Congyang
2013-07-11 8:35 ` [RFC Patch v2 15/16] xc_domain_save: implement save_callbacks " Wen Congyang
2013-07-11 13:52 ` Andrew Cooper
2013-07-11 8:35 ` [RFC Patch v2 16/16] remus: implement colo mode Wen Congyang
2013-07-11 9:37 ` [RFC Patch v2 00/16] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Andrew Cooper
2013-07-11 9:40 ` Ian Campbell
2013-07-14 14:33 ` Shriram Rajagopalan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1373531748-12547-13-git-send-email-wency@cn.fujitsu.com \
--to=wency@cn.fujitsu.com \
--cc=bobby.hong@huawei.com \
--cc=eddie.dong@intel.com \
--cc=laijs@cn.fujitsu.com \
--cc=rshriram@cs.ubc.ca \
--cc=wei.ye1987@gmail.com \
--cc=xen-devel@lists.xen.org \
--cc=xuyao.xu@huawei.com \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).