This week I worked on kdump support with SLES11SP1 as dom0/domU.
I came up with the patch below which works ok in my testing. There is
also a kernel patch, which is not finished yet (proper crashkernel
detection missing).
During my testing and "fine-tuning" I came across an issue that I havent
figured out yet:
In the added function _handleCrashDumpWatch() a new watch on
${backend}/state should be registered. This watch does never trigger for
some reason, even though the values do change. Is registering a watch
within a watch supposed to work? I would like to send an event from
_handleCrashDumpWatchCallback() when the backend switched state to avoid
a hardcoded sleep, similar to what the hotplug scripts do.
Any ideas what is wrong with my attempt?
Olaf
tools/python/xen/xend/XendDomainInfo.py | 107 ++++++++++++++
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -35,6 +35,7 @@ import stat
import shutil
import traceback
from types import StringTypes
+from threading import Event
import xen.lowlevel.xc
from xen.util import asserts, auxbin, mkdir
@@ -2391,6 +2392,96 @@ class XendDomainInfo:
return self.getDeviceController(deviceClass).reconfigureDevice(
devid, devconfig)
+ def _handleCrashDumpWatchCallback(self, arg, ev):
+ log.debug("_handleCrashDumpWatchCallback called with '%s'" % arg)
+ v = None
+ try:
+ v = xstransact.Read(arg)
+ except:
+ log.exception("_handleCrashDumpWatchCallback exception")
+ ev.set()
+ return False
+ if not int(v) == 4:
+ ev.set()
+ return True
+
+ # reset all devices where frontend and backend is in state
XenbusStateConnected
+ # protocol:
+ # initial value is 0
+ # crashed guest writes 1, this function resets all devices
+ # this function writes 2, to notify the guest about the finished reset
process
+ def _handleCrashDumpWatch(self, arg):
+ log.debug("_handleCrashDumpWatch called with '%s'" % arg)
+ v = None
+ delay = 3.0
+ try:
+ v = xstransact.Read(arg)
+ except:
+ log.exception("_handleCrashDumpWatch exception")
+ log.debug("_handleCrashDumpWatch trigger value %s" % v)
+ if not int(v) == 1:
+ return True
+ try:
+ ev = Event()
+ t = xstransact("%s/device" % self.vmpath)
+ try:
+ for devclass in XendDevices.valid_devices():
+ for dev in t.list(devclass):
+ self.crashWatchCallback = backend = frontend = f_state
= b_state = None
+ try:
+ log.debug("Reading dev %s", dev)
+ frontend = xstransact.Read("%s/device/%s" %
(self.vmpath, dev), "frontend")
+ f_state = xstransact.Read(frontend, "state")
+ backend = xstransact.Read("%s/device/%s" %
(self.vmpath, dev), "backend")
+ b_state = xstransact.Read(backend, "state")
+ log.debug('backend %s(%s) frontend %s(%s)',
backend, b_state, frontend, f_state)
+ except:
+ log.exception("Reading frontend/backend state
failed: %s; %s; %s",
+ self.info['name_label'],
+ devclass, dev)
+ pass
+ try:
+ # XenbusStateConnected
+ if b_state == "4" and f_state == "4":
+ ev.clear()
+ self.crashWatchCallback = xswatch(backend +
'/state', self._handleCrashDumpWatchCallback, ev)
+ # XenbusStateClosing
+ log.debug("Set %s to XenbusStateClosing",
frontend)
+ xstransact.Write(frontend, "state", "5")
+ ev.wait(delay)
+ b_state = xstransact.Read(backend, "state")
+ log.debug('backend %s/state == %s', backend,
b_state)
+ # XenbusStateClosed
+ log.debug("Set %s to XenbusStateClosed",
frontend)
+ xstransact.Write(frontend, "state", "6")
+ ev.wait(delay)
+ b_state = xstransact.Read(backend, "state")
+ log.debug('backend %s/state == %s', backend,
b_state)
+ # XenbusStateInitialising
+ log.debug("Set %s to XenbusStateInitialising",
frontend)
+ xstransact.Write(frontend, "state", "1")
+ ev.wait(delay)
+ b_state = xstransact.Read(backend, "state")
+ log.debug('backend %s/state == %s', backend,
b_state)
+ if self.crashWatchCallback:
+ try:
+ self.crashWatchCallback.unwatch()
+ except:
+ pass
+ except:
+ log.debug("state write failed for %s" % frontend)
+ pass
+ finally:
+ t.abort()
+ log.debug("_handleCrashDumpWatch notify guest")
+ xstransact.Write(self.dompath, 'device-reset-trigger', "2")
+ except:
+ log.debug("_handleCrashDumpWatch exception")
+ pass
+ log.debug("_handleCrashDumpWatch done")
+ # release this watch
+ return False
+
def _createDevices(self, resume = False):
"""Create the devices for a vm.
@@ -2439,6 +2530,12 @@ class XendDomainInfo:
self.info['devices'][dev_uuid][1]['devid'] = devid
+ xstransact.Write(self.dompath, 'device-reset-trigger', "0")
+ xstransact.SetPermissions(self.dompath + '/device-reset-trigger',
+ { 'dom': self.getDomid(), 'read': True,
'write': True })
+ self.crashWatch = xswatch(self.dompath + '/device-reset-trigger',
+ self._handleCrashDumpWatch)
+
if self.image:
self.image.createDeviceModel(resume)
self.image.createXenPaging()
@@ -2479,6 +2576,16 @@ class XendDomainInfo:
finally:
t.abort()
+ try:
+ if self.crashWatch:
+ try:
+ self.crashWatch.unwatch()
+ except:
+ pass
+ finally:
+ self.crashWatch = None
+
+
def getDeviceController(self, name):
"""Get the device controller for this domain, and if it
doesn't exist, create it.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|