Hi,
xm has a subcommand "dump-core", which dumps the core image of
the domainU to the file.
This subcommand is useful, but it has the problem that xend
can't deal with other xm commands while dealing with "dump-core".
The attached patch fixes this problem. ( for cset#15880:a00cc97b392a )
Details of the attached patch are as follows:
- Add xc_dumpcore program. This program only calls xc_domain_dump()
in libxc to dump the core image of a domainU.
- Change xend to make a xc_dumpcore process when it deal with "dump-core".
the xend's thread which make the process waits for the process to
finish dumping the core image.
- Make XendDumpLock to avoid that xend dumps the same domainU twice or more
at the same time. XendDumpLock read/write lock informaition of "dump-core"
from/to xenstore.
- dump-core has --live and --crash options. These options send requests to
pause/unpause, destroy the domainU. I move the code into dumpcore thread
because xm cannot use the options well at the sencond dump-core request.
TODO:
- Xend should be fixed not to destory the domainU that it is dumping by
"xm destoroy." There are some other xm commands needed this kind of
the special care but I don't implement them at this moment yet.
So, for example, if you run "xm dump-core <domid>" and run "xm destroy <same
domid>"
immediately, I don't know what happen (system may crash).
I would like you to give me comments.
Thank you.
Signed-off-by: Rikiya Ayukawa <ayukawa.rikiya@xxxxxxxxxxxxxxxxxx>
Rikiya Ayukawa
diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendConstants.py
--- a/tools/python/xen/xend/XendConstants.py Wed Sep 12 09:43:33 2007 +0100
+++ b/tools/python/xen/xend/XendConstants.py Thu Sep 27 09:40:19 2007 +0900
@@ -120,3 +120,19 @@ VTPM_DELETE_SCRIPT = '/etc/xen/scripts/v
XS_VMROOT = "/vm/"
+
+#
+# Dumpcore constants
+#
+
+DUMPCORE_REASON_CRASH = "crash"
+DUMPCORE_REASON_XMCOM = "xm dump-core"
+
+# basepath = /local/domain/<domid>/XSPATH_DUMPCORE_BASE/
+XSPATH_DUMPCORE_BASE = "dump_core_lock"
+
+# basepath/XSPATH_DUMPCORE_THREAD_ID
+XSPATH_DUMPCORE_THREAD_ID = "thread_id"
+
+# basepath/XSPATH_DUMPCORE_REASON
+XSPATH_DUMPCORE_REASON = "reason"
diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Wed Sep 12 09:43:33 2007 +0100
+++ b/tools/python/xen/xend/XendDomain.py Thu Sep 27 09:40:19 2007 +0900
@@ -44,6 +44,7 @@ from xen.xend.XendConstants import DOM_S
from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED
from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN
from xen.xend.XendConstants import TRIGGER_TYPE
+from xen.xend.XendConstants import DUMPCORE_REASON_CRASH, DUMPCORE_REASON_XMCOM
from xen.xend.XendDevices import XendDevices
from xen.xend.XendAPIConstants import *
@@ -1223,7 +1224,7 @@ class XendDomain:
log.info("Domain core dump requested for domain %s (%d) "
"live=%d crash=%d.",
dominfo.getName(), dominfo.getDomid(), live, crash)
- return dominfo.dumpCore(filename)
+ return dominfo.dumpCore(filename, DUMPCORE_REASON_XMCOM, live,
crash)
except Exception, ex:
raise XendError(str(ex))
diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Sep 12 09:43:33 2007 +0100
+++ b/tools/python/xen/xend/XendDomainInfo.py Thu Sep 27 09:40:19 2007 +0900
@@ -53,6 +53,10 @@ from xen.xend.XendAPIConstants import *
from xen.xend.XendAPIConstants import *
from xen.xend.XendVMMetrics import XendVMMetrics
+
+from xen.xend import XendDumpLock
+import thread
+import xen.util.auxbin
MIGRATE_TIMEOUT = 30.0
BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
@@ -1169,11 +1173,19 @@ class XendDomainInfo:
if xoptions.get_enable_dump():
try:
- self.dumpCore()
+ self.dumpCore(None, DUMPCORE_REASON_CRASH)
+ restart_reason = 'crash'
+ except XendDumpLock.DuplicateDumpError:
+ # Don't call _maybeRestart method -- other thread is
+ # dumping now.
+ restart_reason = None
except XendError:
# This error has been logged -- there's nothing more
# we can do in this context.
- pass
+ restart_reason = 'crash'
+
+ if restart_reason != None:
+ self._stateSet(DOM_STATE_HALTED)
restart_reason = 'crash'
self._stateSet(DOM_STATE_HALTED)
@@ -1365,23 +1377,28 @@ class XendDomainInfo:
# Debugging ..
#
- def dumpCore(self, corefile = None):
+ def dumpCore(self, corefile = None, reason = DUMPCORE_REASON_XMCOM, live =
False, crash = False):
"""Create a core dump for this domain.
@raise: XendError if core dumping failed.
"""
-
+
+ if not (reason == DUMPCORE_REASON_XMCOM or reason ==
DUMPCORE_REASON_CRASH):
+ log.error("Unknown reason for dumpCore(): '%s'" % reason)
+ return False
+
try:
if not corefile:
this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
- self.info['name_label'], self.domid)
+
self.info['name_label'], self.domid)
if os.path.isdir(corefile):
raise XendError("Cannot dump core in a directory: %s" %
corefile)
-
- xc.domain_dumpcore(self.domid, corefile)
+ status = self.innerDumpCore(corefile, reason, live, crash)
+ if status == 1:
+ raise XendDumpLock.DuplicateDumpError("Other thread is already
dumping core")
except RuntimeError, ex:
corefile_incomp = corefile+'-incomplete'
os.rename(corefile, corefile_incomp)
@@ -1389,6 +1406,61 @@ class XendDomainInfo:
self.domid, self.info['name_label'])
raise XendError("Failed to dump core: %s" % str(ex))
+ def innerDumpCore(self, corefile, reason, live, crash):
+ """
+ @return value is as follow:
+ 0: normal end
+ -1: critical error end
+ 1: cannot dump end, since other thread is dumping a core of same
domain.
+ It meens that this thread mustn't restart the domain ( see
XendDomainInfo.py@refreshShutdown() ).
+ """
+ try:
+ is_locked = False
+ return_val = -1
+ lockobj = XendDumpLock.DumpLock(self.domid)
+
+ while(True):
+ is_locked = lockobj.lock(reason)
+ if is_locked: break
+
+ info = XendDumpLock.get_lockinfo(self.domid)
+ if info == None: break
+ if not (info['reason'] == DUMPCORE_REASON_XMCOM and reason ==
DUMPCORE_REASON_CRASH):
+ break
+
+ time.sleep(1)
+
+ log.debug("thread(%s) have a lock? -> %s" % (thread.get_ident(),
is_locked))
+ if is_locked:
+ if reason == DUMPCORE_REASON_XMCOM and not live:
+ log.info("pause for dump-core domid=%s" % self.domid)
+ self.pause()
+
+ program_path = xen.util.auxbin.pathTo("xc_dumpcore")
+ cmd = "%s %d %s" % (program_path, self.domid, corefile)
+ log.info("create dumpcore process: cmd='%s'" % cmd)
+
+ exitstatus = os.system(cmd)
+ if exitstatus == 0:
+ return_val = 0
+ else:
+ return_val = -1
+
+ if reason == DUMPCORE_REASON_XMCOM and not live:
+ log.info("unpause for dump-core domid=%s" % self.domid)
+ self.unpause()
+
+ if reason == DUMPCORE_REASON_XMCOM and crash:
+ log.info("Destroying domain: %s ..." % self.domid)
+ self.destroy()
+ else:
+ return_val = 1
+ log.error("Other thread is already dumping core")
+ finally:
+ if is_locked: lockobj.unlock()
+
+ return return_val
+
#
# Device creation/deletion functions
#
diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xend/XendDumpLock.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendDumpLock.py Thu Sep 27 09:40:19 2007 +0900
@@ -0,0 +1,217 @@
+#===========================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 Rikiya Ayukawa <ayukawa.rikiya@xxxxxxxxxxxxxxxxxx>
+#============================================================================
+
+
+from xen.xend.xenstore import xsutil
+
+import logging
+import thread
+import threading
+import time
+import re
+import copy
+import os
+import traceback
+from threading import Thread
+from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.XendConstants import *
+
+log = logging.getLogger("xend.XendDumpLock")
+
+class DumpLock:
+ """Lock of dump-core for each domain
+
+ @cvar locking_id2thread_dict: dict of thread objs which have a dump-core
lock. dict is indexed by thread id
+ @type locking_id2thread_dict: dict of thread objs (index type is int)
+ """
+
+ locking_id2thread_dict = {}
+ inner_lock_unlock_cond = threading.Condition()
+
+ def __init__(self, dom_id):
+ """Constructor for a dump lock
+ """
+ self.path_domain = "/dump_core_lock"
+ self.xs = xsutil.xshandle()
+
+ self.dom_id = dom_id
+ self.is_locked = False
+
+ def lock(self, reason):
+ try:
+ DumpLock.inner_lock_unlock_cond.acquire()
+ return self._inner_lock(reason)
+ finally:
+ DumpLock.inner_lock_unlock_cond.release()
+
+ def unlock(self):
+ try:
+ DumpLock.inner_lock_unlock_cond.acquire()
+ return self._inner_unlock()
+ finally:
+ DumpLock.inner_lock_unlock_cond.release()
+
+ def _inner_lock(self, reason):
+ """
+ Get a lock for dump-core.
+ You can get only one lock for each guest domain.
+
+ Each pair of lock() and unlock() should be called by same thread.
+ """
+ success_lock = False
+ locking_thid = thread.get_ident()
+
+ if not _check_domain_exist(self.dom_id):
+ log.error("Not exist domain: id %d" % self.dom_id)
+ raise RuntimeError("Not exist domain: id %d" % self.dom_id)
+
+ #get the xenstore path for a lock
+ path = _basepath(self.dom_id)
+
+ th = self.xs.transaction_start()
+
+ stored_reason = self.xs.read(th, path + XSPATH_DUMPCORE_REASON)
+ stored_thread_id = self.xs.read(th, path + XSPATH_DUMPCORE_THREAD_ID)
+
+ if stored_thread_id != None:
+ stored_thread_id = int(stored_thread_id)
+
+ if stored_thread_id == None: #no threads have a lock for dom_id
+ self._acquire_lock(th, reason, locking_thid)
+ success_lock = True
+ elif not self._is_alive_and_locked(stored_thread_id):
+ #force to get a lock
+ self._acquire_lock(th, reason, locking_thid)
+ success_lock = True
+ log.warning("Force to get a lock of domain id %s" % self.dom_id)
+ else: #failed to lock
+ success_lock = False
+
+ self.xs.transaction_end(th)
+
+ return success_lock
+
+ def _inner_unlock(self):
+ """
+ """
+ if not self.is_locked:
+ return False
+
+ th = self.xs.transaction_start()
+ self._release_lock(th)
+ self.xs.transaction_end(th)
+
+ return True
+
+ def _acquire_lock(self, th, reason, thread_id):
+ path = _basepath(self.dom_id)
+
+ self.xs.write(th, path + XSPATH_DUMPCORE_THREAD_ID, str(thread_id))
+ self.xs.write(th, path + XSPATH_DUMPCORE_REASON, str(reason))
+ self.is_locked = True
+ DumpLock.locking_id2thread_dict[thread_id] = threading.currentThread()
+ def _release_lock(self, th):
+ path = _basepath(self.dom_id)
+
+ self.xs.rm(th, path.rstrip("/"))
+ self.is_locked = False
+ del DumpLock.locking_id2thread_dict[thread.get_ident()]
+ def _is_alive_and_locked(self, thread_id):
+ """
+ @param thread_id The thread id, which may have a lock now
+ @return Returns True if thread_id is an alive thread's id
+ """
+ if not DumpLock.locking_id2thread_dict.has_key(thread_id):
+ return False
+
+ th_obj = DumpLock.locking_id2thread_dict[thread_id]
+ alive_th_list = threading.enumerate()
+ return th_obj in alive_th_list
+
+class DuplicateDumpError(Exception):
+ pass
+
+def _check_domain_exist(dom_id):
+ rl = xstransact.Read('/local/domain', str(dom_id))
+ return rl != None
+
+def _basepath(dom_id):
+ xs = xsutil.xshandle()
+ path = xs.get_domain_path(dom_id)
+ return path + "/" + XSPATH_DUMPCORE_BASE + "/"
+
+def get_lockinfo(dom_id):
+ """Get lock information
+ @todo raise Exception if domain doesn't exist
+ @return list if dom_id exists, otherwise None
+ """
+ xs = xsutil.xshandle()
+ info = {}
+
+ if not _check_domain_exist(dom_id):
+ return None
+
+ path = _basepath(dom_id)
+
+ th = xs.transaction_start()
+ info['thread_id'] = xs.read(th, path + XSPATH_DUMPCORE_THREAD_ID)
+ info['reason'] = xs.read(th, path + XSPATH_DUMPCORE_REASON)
+ xs.transaction_end(th)
+
+ if info['thread_id'] == None:
+ return None
+
+ return info
+
+if __name__ == '__main__':
+ logging.basicConfig()
+ #unit tests
+ def test_cannot_duplock_for_1domain():
+ lock = DumpLock(0)
+ lock2 = DumpLock(0)
+
+ lock.lock(DUMPCORE_REASON_XMCOM)
+ can_lock = lock2.lock(DUMPCORE_REASON_XMCOM)
+
+ assert not can_lock
+ lock.unlock()
+ def test_get_lockinfo():
+ assert get_lockinfo(-1) == None
+ lock = DumpLock(0)
+ rl = lock.lock(DUMPCORE_REASON_XMCOM)
+ assert rl == True
+ info = get_lockinfo(0)
+ assert info != None
+ assert info['reason'] == DUMPCORE_REASON_XMCOM
+ assert info['thread_id'] == str(thread.get_ident())
+ lock.unlock()
+ def test_duplicate_exception():
+ try:
+ raise DuplicateDumpError("test")
+ except DuplicateDumpError, ex:
+ pass
+
+ try:
+ raise DuplicateDumpError("test")
+ except Exception, ex:
+ pass
+
+ def main():
+ test_cannot_duplock_for_1domain()
+ test_get_lockinfo()
+ test_duplicate_exception()
+ main()
diff -r a00cc97b392a -r b7c1cfb4969a tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Wed Sep 12 09:43:33 2007 +0100
+++ b/tools/python/xen/xm/main.py Thu Sep 27 09:40:19 2007 +0900
@@ -1286,19 +1286,8 @@ def xm_dump_core(args):
else:
filename = None
- if not live:
- server.xend.domain.pause(dom)
-
- try:
- print "Dumping core of domain: %s ..." % str(dom)
- server.xend.domain.dump(dom, filename, live, crash)
- finally:
- if not live:
- server.xend.domain.unpause(dom)
-
- if crash:
- print "Destroying domain: %s ..." % str(dom)
- server.xend.domain.destroy(dom)
+ print "Dumping core of domain: %s ..." % str(dom)
+ server.xend.domain.dump(dom, filename, live, crash)
def xm_rename(args):
arg_check(args, "rename", 2)
diff -r a00cc97b392a -r b7c1cfb4969a tools/xcutils/Makefile
--- a/tools/xcutils/Makefile Wed Sep 12 09:43:33 2007 +0100
+++ b/tools/xcutils/Makefile Thu Sep 27 09:40:19 2007 +0900
@@ -22,7 +22,7 @@ CFLAGS += -Wp,-MD,.$(@F).d
CFLAGS += -Wp,-MD,.$(@F).d
PROG_DEP = .*.d
-PROGRAMS = xc_restore xc_save readnotes
+PROGRAMS = xc_restore xc_save readnotes xc_dumpcore
LDLIBS = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl -lxenstore
diff -r a00cc97b392a -r b7c1cfb4969a tools/xcutils/xc_dumpcore.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xcutils/xc_dumpcore.c Thu Sep 27 09:40:19 2007 +0900
@@ -0,0 +1,45 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of
+ * this archive for more details.
+ *
+ * Copyright (C) 2007 by Rikiya Ayukawa <ayukawa.rikiya@xxxxxxxxxxxxxxxxxx>
+ *
+ */
+
+#include <err.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <xs.h>
+#include <xenctrl.h>
+#include <xenguest.h>
+
+
+int
+main(int argc, char **argv)
+{
+ unsigned int xc_fd, domid;
+ int ret;
+
+ if (argc != 3)
+ errx(1, "usage: %s domid corename", argv[0]);
+
+ xc_fd = xc_interface_open();
+ if (xc_fd < 0)
+ errx(1, "failed to open control interface");
+
+ domid = atoi(argv[1]);
+ ret = xc_domain_dumpcore(xc_fd, domid, argv[2]);
+
+ xc_interface_close(xc_fd);
+
+ return ret;
+}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|