# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1210253611 -3600
# Node ID c99a88623eda83d8e02f4b6d7c32bc4c6d298d8a
# Parent 8bd776540ab319d73b8e55656ad2c342c178a5b1
xend: Fix and improve error handling for failed suspend/migrate
This has been broken since cset 16964:5d84464dc1fc
Also deal better with very early errors (close sender side socket)
Signed-off-by: Steven Hand <steven.hand@xxxxxxxxxxxx>
---
tools/python/xen/xend/XendCheckpoint.py | 14 ++------------
tools/python/xen/xend/XendDomain.py | 6 ++++--
tools/python/xen/xend/XendDomainInfo.py | 13 ++++++++++++-
3 files changed, 18 insertions(+), 15 deletions(-)
diff -r 8bd776540ab3 -r c99a88623eda tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Thu May 08 14:32:11 2008 +0100
+++ b/tools/python/xen/xend/XendCheckpoint.py Thu May 08 14:33:31 2008 +0100
@@ -81,8 +81,6 @@ def save(fd, dominfo, network, live, dst
# thing is useful for debugging.
dominfo.setName('migrating-' + domain_name)
- done_suspend = 0
-
try:
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
@@ -110,7 +108,6 @@ def save(fd, dominfo, network, live, dst
log.debug("Suspending %d ...", dominfo.getDomid())
dominfo.shutdown('suspend')
dominfo.waitForShutdown()
- done_suspend = 1
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
domain_name)
log.info("Domain %d suspended.", dominfo.getDomid())
@@ -154,16 +151,9 @@ def save(fd, dominfo, network, live, dst
pass
except Exception, exn:
- log.exception("Save failed on domain %s (%s).", domain_name,
+ log.exception("Save failed on domain %s (%s) - resuming.", domain_name,
dominfo.getDomid())
-
- # If we didn't get as far as suspending the domain (for
- # example, we couldn't balloon enough memory for the new
- # domain), then we don't want to re-plumb the devices, as the
- # domU will not be expecting it.
- if done_suspend:
- log.debug("XendCheckpoint.save: resumeDomain")
- dominfo.resumeDomain()
+ dominfo.resumeDomain()
try:
dominfo.setName(domain_name)
diff -r 8bd776540ab3 -r c99a88623eda tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Thu May 08 14:32:11 2008 +0100
+++ b/tools/python/xen/xend/XendDomain.py Thu May 08 14:33:31 2008 +0100
@@ -1308,8 +1308,10 @@ class XendDomain:
sock.send("receive\n")
sock.recv(80)
- XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node)
- sock.close()
+ try:
+ XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst,
node=node)
+ finally:
+ sock.close()
def domain_save(self, domid, dst, checkpoint=False):
"""Start saving a domain to file.
diff -r 8bd776540ab3 -r c99a88623eda tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Thu May 08 14:32:11 2008 +0100
+++ b/tools/python/xen/xend/XendDomainInfo.py Thu May 08 14:33:31 2008 +0100
@@ -2378,8 +2378,19 @@ class XendDomainInfo:
def resumeDomain(self):
log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid))
- if self.domid is None:
+ # resume a suspended domain (e.g. after live checkpoint, or after
+ # a later error during save or migate); checks that the domain
+ # is currently suspended first so safe to call from anywhere
+
+ xeninfo = dom_get(self.domid)
+ if xeninfo is None:
return
+ if not xeninfo['shutdown']:
+ return
+ reason = shutdown_reason(xeninfo['shutdown_reason'])
+ if reason != 'suspend':
+ return
+
try:
# could also fetch a parsed note from xenstore
fast = self.info.get_notes().get('SUSPEND_CANCEL') and 1 or 0
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|