diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendConstants.py --- a/tools/python/xen/xend/XendConstants.py Wed Sep 12 09:43:33 2007 +0100 +++ b/tools/python/xen/xend/XendConstants.py Thu Sep 27 09:40:19 2007 +0900 @@ -120,3 +120,19 @@ VTPM_DELETE_SCRIPT = '/etc/xen/scripts/v XS_VMROOT = "/vm/" + +# +# Dumpcore constants +# + +DUMPCORE_REASON_CRASH = "crash" +DUMPCORE_REASON_XMCOM = "xm dump-core" + +# basepath = /local/domain//XSPATH_DUMPCORE_BASE/ +XSPATH_DUMPCORE_BASE = "dump_core_lock" + +# basepath/XSPATH_DUMPCORE_THREAD_ID +XSPATH_DUMPCORE_THREAD_ID = "thread_id" + +# basepath/XSPATH_DUMPCORE_REASON +XSPATH_DUMPCORE_REASON = "reason" diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Wed Sep 12 09:43:33 2007 +0100 +++ b/tools/python/xen/xend/XendDomain.py Thu Sep 27 09:40:19 2007 +0900 @@ -44,6 +44,7 @@ from xen.xend.XendConstants import DOM_S from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN from xen.xend.XendConstants import TRIGGER_TYPE +from xen.xend.XendConstants import DUMPCORE_REASON_CRASH, DUMPCORE_REASON_XMCOM from xen.xend.XendDevices import XendDevices from xen.xend.XendAPIConstants import * @@ -1223,7 +1224,7 @@ class XendDomain: log.info("Domain core dump requested for domain %s (%d) " "live=%d crash=%d.", dominfo.getName(), dominfo.getDomid(), live, crash) - return dominfo.dumpCore(filename) + return dominfo.dumpCore(filename, DUMPCORE_REASON_XMCOM, live, crash) except Exception, ex: raise XendError(str(ex)) diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Wed Sep 12 09:43:33 2007 +0100 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Sep 27 09:40:19 2007 +0900 @@ -53,6 +53,10 @@ from xen.xend.XendAPIConstants import * from xen.xend.XendAPIConstants import * from xen.xend.XendVMMetrics import XendVMMetrics + +from xen.xend import XendDumpLock +import thread +import xen.util.auxbin MIGRATE_TIMEOUT = 30.0 BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' @@ -1169,11 +1173,19 @@ class XendDomainInfo: if xoptions.get_enable_dump(): try: - self.dumpCore() + self.dumpCore(None, DUMPCORE_REASON_CRASH) + restart_reason = 'crash' + except XendDumpLock.DuplicateDumpError: + # Don't call _maybeRestart method -- other thread is + # dumping now. + restart_reason = None except XendError: # This error has been logged -- there's nothing more # we can do in this context. - pass + restart_reason = 'crash' + + if restart_reason != None: + self._stateSet(DOM_STATE_HALTED) restart_reason = 'crash' self._stateSet(DOM_STATE_HALTED) @@ -1365,23 +1377,28 @@ class XendDomainInfo: # Debugging .. # - def dumpCore(self, corefile = None): + def dumpCore(self, corefile = None, reason = DUMPCORE_REASON_XMCOM, live = False, crash = False): """Create a core dump for this domain. @raise: XendError if core dumping failed. """ - + + if not (reason == DUMPCORE_REASON_XMCOM or reason == DUMPCORE_REASON_CRASH): + log.error("Unknown reason for dumpCore(): '%s'" % reason) + return False + try: if not corefile: this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime()) corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time, - self.info['name_label'], self.domid) + self.info['name_label'], self.domid) if os.path.isdir(corefile): raise XendError("Cannot dump core in a directory: %s" % corefile) - - xc.domain_dumpcore(self.domid, corefile) + status = self.innerDumpCore(corefile, reason, live, crash) + if status == 1: + raise XendDumpLock.DuplicateDumpError("Other thread is already dumping core") except RuntimeError, ex: corefile_incomp = corefile+'-incomplete' os.rename(corefile, corefile_incomp) @@ -1389,6 +1406,61 @@ class XendDomainInfo: self.domid, self.info['name_label']) raise XendError("Failed to dump core: %s" % str(ex)) + def innerDumpCore(self, corefile, reason, live, crash): + """ + @return value is as follow: + 0: normal end + -1: critical error end + 1: cannot dump end, since other thread is dumping a core of same domain. + It meens that this thread mustn't restart the domain ( see XendDomainInfo.py@refreshShutdown() ). + """ + try: + is_locked = False + return_val = -1 + lockobj = XendDumpLock.DumpLock(self.domid) + + while(True): + is_locked = lockobj.lock(reason) + if is_locked: break + + info = XendDumpLock.get_lockinfo(self.domid) + if info == None: break + if not (info['reason'] == DUMPCORE_REASON_XMCOM and reason == DUMPCORE_REASON_CRASH): + break + + time.sleep(1) + + log.debug("thread(%s) have a lock? -> %s" % (thread.get_ident(), is_locked)) + if is_locked: + if reason == DUMPCORE_REASON_XMCOM and not live: + log.info("pause for dump-core domid=%s" % self.domid) + self.pause() + + program_path = xen.util.auxbin.pathTo("xc_dumpcore") + cmd = "%s %d %s" % (program_path, self.domid, corefile) + log.info("create dumpcore process: cmd='%s'" % cmd) + + exitstatus = os.system(cmd) + if exitstatus == 0: + return_val = 0 + else: + return_val = -1 + + if reason == DUMPCORE_REASON_XMCOM and not live: + log.info("unpause for dump-core domid=%s" % self.domid) + self.unpause() + + if reason == DUMPCORE_REASON_XMCOM and crash: + log.info("Destroying domain: %s ..." % self.domid) + self.destroy() + else: + return_val = 1 + log.error("Other thread is already dumping core") + finally: + if is_locked: lockobj.unlock() + + return return_val + # # Device creation/deletion functions # diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendDumpLock.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/XendDumpLock.py Thu Sep 27 09:40:19 2007 +0900 @@ -0,0 +1,217 @@ +#=========================================================================== +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (C) 2007 Rikiya Ayukawa +#============================================================================ + + +from xen.xend.xenstore import xsutil + +import logging +import thread +import threading +import time +import re +import copy +import os +import traceback +from threading import Thread +from xen.xend.xenstore.xstransact import xstransact +from xen.xend.XendConstants import * + +log = logging.getLogger("xend.XendDumpLock") + +class DumpLock: + """Lock of dump-core for each domain + + @cvar locking_id2thread_dict: dict of thread objs which have a dump-core lock. dict is indexed by thread id + @type locking_id2thread_dict: dict of thread objs (index type is int) + """ + + locking_id2thread_dict = {} + inner_lock_unlock_cond = threading.Condition() + + def __init__(self, dom_id): + """Constructor for a dump lock + """ + self.path_domain = "/dump_core_lock" + self.xs = xsutil.xshandle() + + self.dom_id = dom_id + self.is_locked = False + + def lock(self, reason): + try: + DumpLock.inner_lock_unlock_cond.acquire() + return self._inner_lock(reason) + finally: + DumpLock.inner_lock_unlock_cond.release() + + def unlock(self): + try: + DumpLock.inner_lock_unlock_cond.acquire() + return self._inner_unlock() + finally: + DumpLock.inner_lock_unlock_cond.release() + + def _inner_lock(self, reason): + """ + Get a lock for dump-core. + You can get only one lock for each guest domain. + + Each pair of lock() and unlock() should be called by same thread. + """ + success_lock = False + locking_thid = thread.get_ident() + + if not _check_domain_exist(self.dom_id): + log.error("Not exist domain: id %d" % self.dom_id) + raise RuntimeError("Not exist domain: id %d" % self.dom_id) + + #get the xenstore path for a lock + path = _basepath(self.dom_id) + + th = self.xs.transaction_start() + + stored_reason = self.xs.read(th, path + XSPATH_DUMPCORE_REASON) + stored_thread_id = self.xs.read(th, path + XSPATH_DUMPCORE_THREAD_ID) + + if stored_thread_id != None: + stored_thread_id = int(stored_thread_id) + + if stored_thread_id == None: #no threads have a lock for dom_id + self._acquire_lock(th, reason, locking_thid) + success_lock = True + elif not self._is_alive_and_locked(stored_thread_id): + #force to get a lock + self._acquire_lock(th, reason, locking_thid) + success_lock = True + log.warning("Force to get a lock of domain id %s" % self.dom_id) + else: #failed to lock + success_lock = False + + self.xs.transaction_end(th) + + return success_lock + + def _inner_unlock(self): + """ + """ + if not self.is_locked: + return False + + th = self.xs.transaction_start() + self._release_lock(th) + self.xs.transaction_end(th) + + return True + + def _acquire_lock(self, th, reason, thread_id): + path = _basepath(self.dom_id) + + self.xs.write(th, path + XSPATH_DUMPCORE_THREAD_ID, str(thread_id)) + self.xs.write(th, path + XSPATH_DUMPCORE_REASON, str(reason)) + self.is_locked = True + DumpLock.locking_id2thread_dict[thread_id] = threading.currentThread() + def _release_lock(self, th): + path = _basepath(self.dom_id) + + self.xs.rm(th, path.rstrip("/")) + self.is_locked = False + del DumpLock.locking_id2thread_dict[thread.get_ident()] + def _is_alive_and_locked(self, thread_id): + """ + @param thread_id The thread id, which may have a lock now + @return Returns True if thread_id is an alive thread's id + """ + if not DumpLock.locking_id2thread_dict.has_key(thread_id): + return False + + th_obj = DumpLock.locking_id2thread_dict[thread_id] + alive_th_list = threading.enumerate() + return th_obj in alive_th_list + +class DuplicateDumpError(Exception): + pass + +def _check_domain_exist(dom_id): + rl = xstransact.Read('/local/domain', str(dom_id)) + return rl != None + +def _basepath(dom_id): + xs = xsutil.xshandle() + path = xs.get_domain_path(dom_id) + return path + "/" + XSPATH_DUMPCORE_BASE + "/" + +def get_lockinfo(dom_id): + """Get lock information + @todo raise Exception if domain doesn't exist + @return list if dom_id exists, otherwise None + """ + xs = xsutil.xshandle() + info = {} + + if not _check_domain_exist(dom_id): + return None + + path = _basepath(dom_id) + + th = xs.transaction_start() + info['thread_id'] = xs.read(th, path + XSPATH_DUMPCORE_THREAD_ID) + info['reason'] = xs.read(th, path + XSPATH_DUMPCORE_REASON) + xs.transaction_end(th) + + if info['thread_id'] == None: + return None + + return info + +if __name__ == '__main__': + logging.basicConfig() + #unit tests + def test_cannot_duplock_for_1domain(): + lock = DumpLock(0) + lock2 = DumpLock(0) + + lock.lock(DUMPCORE_REASON_XMCOM) + can_lock = lock2.lock(DUMPCORE_REASON_XMCOM) + + assert not can_lock + lock.unlock() + def test_get_lockinfo(): + assert get_lockinfo(-1) == None + lock = DumpLock(0) + rl = lock.lock(DUMPCORE_REASON_XMCOM) + assert rl == True + info = get_lockinfo(0) + assert info != None + assert info['reason'] == DUMPCORE_REASON_XMCOM + assert info['thread_id'] == str(thread.get_ident()) + lock.unlock() + def test_duplicate_exception(): + try: + raise DuplicateDumpError("test") + except DuplicateDumpError, ex: + pass + + try: + raise DuplicateDumpError("test") + except Exception, ex: + pass + + def main(): + test_cannot_duplock_for_1domain() + test_get_lockinfo() + test_duplicate_exception() + main() diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Wed Sep 12 09:43:33 2007 +0100 +++ b/tools/python/xen/xm/main.py Thu Sep 27 09:40:19 2007 +0900 @@ -1286,19 +1286,8 @@ def xm_dump_core(args): else: filename = None - if not live: - server.xend.domain.pause(dom) - - try: - print "Dumping core of domain: %s ..." % str(dom) - server.xend.domain.dump(dom, filename, live, crash) - finally: - if not live: - server.xend.domain.unpause(dom) - - if crash: - print "Destroying domain: %s ..." % str(dom) - server.xend.domain.destroy(dom) + print "Dumping core of domain: %s ..." % str(dom) + server.xend.domain.dump(dom, filename, live, crash) def xm_rename(args): arg_check(args, "rename", 2) diff -r a00cc97b392a -r b7c1cfb4969a tools/xcutils/Makefile --- a/tools/xcutils/Makefile Wed Sep 12 09:43:33 2007 +0100 +++ b/tools/xcutils/Makefile Thu Sep 27 09:40:19 2007 +0900 @@ -22,7 +22,7 @@ CFLAGS += -Wp,-MD,.$(@F).d CFLAGS += -Wp,-MD,.$(@F).d PROG_DEP = .*.d -PROGRAMS = xc_restore xc_save readnotes +PROGRAMS = xc_restore xc_save readnotes xc_dumpcore LDLIBS = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl -lxenstore diff -r a00cc97b392a -r b7c1cfb4969a tools/xcutils/xc_dumpcore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/xcutils/xc_dumpcore.c Thu Sep 27 09:40:19 2007 +0900 @@ -0,0 +1,45 @@ +/* + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of + * this archive for more details. + * + * Copyright (C) 2007 by Rikiya Ayukawa + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +int +main(int argc, char **argv) +{ + unsigned int xc_fd, domid; + int ret; + + if (argc != 3) + errx(1, "usage: %s domid corename", argv[0]); + + xc_fd = xc_interface_open(); + if (xc_fd < 0) + errx(1, "failed to open control interface"); + + domid = atoi(argv[1]); + ret = xc_domain_dumpcore(xc_fd, domid, argv[2]); + + xc_interface_close(xc_fd); + + return ret; +}