WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Detangle the restart/reboot/halt/save/restore code. Ther

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Detangle the restart/reboot/halt/save/restore code. There is now one point
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 27 Sep 2005 13:02:11 +0000
Delivery-date: Tue, 27 Sep 2005 12:59:49 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User emellor@ewan
# Node ID f9296342d9b213c519baf1b614d511b779679a11
# Parent  299c2d5b5dafdc4c6b44a941875f79cde72833a6
Detangle the restart/reboot/halt/save/restore code.  There is now one point
for management of shutdown state, XendDomainInfo.refreshShutdown, which is
able to cope whatever the current state of the domain.  This fixes bug #124:
running reboot within a domU doesn't bring the domain back up after shutdown,
and bug #256: "xm reboot" could not make domU reboot.

Fix the refreshing inside XendDomain to ensure that the values returned by xm
list reflect reality correctly.  We were removing XendDomainInfo instances,
but not creating them on start-up, so if xend were restarted, domain
information was being lost.

Merge XendDomain.dom0_unknown and initial_refresh into
XendDomainInfo.recreate.

Catch exceptions inside callInfo, and reraise them as XendErrors.

Remove unused XendDomain.close.

Further improvements are needed to XendDomainInfo to ensure that it avoids all
race conditions on shutdown, and can cope correctly with xend restarting
during a shutdown.

Signed-off-by: Ewan Mellor <ewan@xxxxxxxxxxxxx>

diff -r 299c2d5b5daf -r f9296342d9b2 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Tue Sep 27 11:30:13 2005
+++ b/tools/python/xen/xend/XendDomain.py       Tue Sep 27 12:53:06 2005
@@ -34,10 +34,8 @@
 from xen.xend.XendLogging import log
 from xen.xend import scheduler
 from xen.xend.server import relocate
-from xen.xend.uuid import getUuid
 from xen.xend.xenstore import XenNode, DBMap
 from xen.xend.xenstore.xstransact import xstransact
-from xen.xend.xenstore.xsutil import GetDomainPath
 
 
 xc = xen.lowlevel.xc.new()
@@ -47,14 +45,7 @@
 
 __all__ = [ "XendDomain" ]
 
-SHUTDOWN_TIMEOUT = 30
-PRIV_DOMAIN      =  0
-
-def is_dead(dom):
-    return dom['crashed'] or dom['shutdown'] or (
-        dom['dying'] and not(dom['running'] or dom['paused'] or
-                             dom['blocked']))
-
+PRIV_DOMAIN = 0
 
 class XendDomainDict(dict):
     def get_by_name(self, name):
@@ -77,11 +68,10 @@
         # So we stuff the XendDomain instance (self) into xroot's components.
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = XendDomainDict()
-        self.domroot = "/domain"
         self.vmroot = "/domain"
         self.dbmap = DBMap(db=XenNode(self.vmroot))
         self.watchReleaseDomain()
-        self.initial_refresh()
+        self.refresh()
         self.dom0_setup()
 
     def list(self):
@@ -110,9 +100,7 @@
         return map(lambda x: x.getName(), doms)
 
     def onReleaseDomain(self):
-        self.reap()
         self.refresh()
-        self.domain_restarts()
 
     def watchReleaseDomain(self):
         from xen.xend.xenstore.xswatch import xswatch
@@ -141,43 +129,22 @@
             dominfo = dominfo[0]
         return dominfo
 
-    def initial_refresh(self):
-        """Refresh initial domain info from db.
-        """
-        doms = self.xen_domains()
-        self.dbmap.readDB()             # XXX only needed for "xend"
-        for dom in doms.values():
-            domid = dom['dom']
-            dompath = GetDomainPath(domid)
-            if not dompath:
-                continue
-            vmpath = xstransact.Read(dompath, "vm")
-            if not vmpath:
-                continue
-            uuid = xstransact.Read(vmpath, "uuid")
-            if not uuid:
-                continue
-            log.info("recreating domain %d, uuid %s" % (domid, uuid))
-            dompath = "/".join(dompath.split("/")[0:-1])
-            try:
-                dominfo = XendDomainInfo.recreate(uuid, dompath, domid, dom)
-            except Exception, ex:
-                log.exception("Error recreating domain info: id=%d", domid)
-                continue
-            self._add_domain(dominfo)
-        self.reap()
-        self.refresh()
-        self.domain_restarts()
+
+    def recreate_domain(self, xeninfo):
+        """Refresh initial domain info from db."""
+
+        dominfo = XendDomainInfo.recreate(xeninfo)
+        self._add_domain(dominfo)
+        return dominfo
+
 
     def dom0_setup(self):
         dom0 = self.domain_lookup(PRIV_DOMAIN)
         if not dom0:
-            dom0 = self.dom0_unknown()
-        dom0.dom0_init_store()    
+            dom0 = self.recreate_domain(self.xen_domain(PRIV_DOMAIN))
+        dom0.dom0_init_store()
         dom0.dom0_enforce_vcpus()
 
-    def close(self):
-        pass
 
     def _add_domain(self, info, notify=True):
         """Add a domain entry to the tables.
@@ -219,44 +186,30 @@
             if (domid is None) or (domid == id):
                 domdb.delete()
 
-    def reap(self):
-        """Look for domains that have crashed or stopped.
-        Tidy them up.
-        """
-        doms = self.xen_domains()
-        for d in doms.values():
-            if not is_dead(d):
-                continue
-            domid = d['dom']
-            dominfo = self.domains.get(domid)
-            if not dominfo or dominfo.is_terminated():
-                continue
-            log.debug('domain died name=%s domid=%d', dominfo.getName(), domid)
-            if d['crashed'] and xroot.get_enable_dump():
-                self.domain_dumpcore(domid)
-            if d['shutdown']:
-                reason = shutdown_reason(d['shutdown_reason'])
-                log.debug('shutdown name=%s id=%d reason=%s',
-                          dominfo.getName(), domid, reason)
-                if reason == 'suspend':
-                    dominfo.state_set("suspended")
-                    continue
-                if reason in ['poweroff', 'reboot']:
-                    self.domain_restart_schedule(domid, reason)
-            dominfo.destroy()
 
     def refresh(self):
         """Refresh domain list from Xen.
         """
         doms = self.xen_domains()
-        # Remove entries for domains that no longer exist.
-        # Update entries for existing domains.
         for d in self.domains.values():
             info = doms.get(d.getDomid())
             if info:
                 d.update(info)
-            elif not d.restart_pending():
+            else:
                 self._delete_domain(d.getDomid())
+        for d in doms:
+            if d not in self.domains:
+                try:
+                    self.recreate_domain(doms[d])
+                except:
+                    log.exception(
+                        "Failed to recreate information for domain %d.  "
+                        "Destroying it in the hope of recovery.", d)
+                    try:
+                        xc.domain_destroy(dom = d)
+                    except:
+                        log.exception('Destruction of %d failed.', d)
+
 
     def update_domain(self, id):
         """Update information for a single domain.
@@ -279,30 +232,6 @@
         """
         dominfo = XendDomainInfo.create(self.dbmap.getPath(), config)
         self._add_domain(dominfo)
-        return dominfo
-
-    def domain_restart(self, dominfo):
-        """Restart a domain.
-
-        @param dominfo: domain object
-        """
-        log.info("Restarting domain: name=%s id=%s", dominfo.getName(),
-                 dominfo.getDomid())
-        eserver.inject("xend.domain.restart",
-                       [dominfo.getName(), dominfo.getDomid(), "begin"])
-        try:
-            dominfo.restart()
-            log.info('Restarted domain name=%s id=%s', dominfo.getName(),
-                     dominfo.getDomid())
-            eserver.inject("xend.domain.restart",
-                           [dominfo.getName(), dominfo.getDomid(),
-                            "success"])
-            self.domain_unpause(dominfo.getDomid())
-        except Exception, ex:
-            log.exception("Exception restarting domain: name=%s id=%s",
-                          dominfo.getName(), dominfo.getDomid())
-            eserver.inject("xend.domain.restart",
-                           [dominfo.getName(), dominfo.getDomid(), "fail"])
         return dominfo
 
     def domain_configure(self, config):
@@ -345,33 +274,7 @@
         self.update_domain(id)
         return self.domains.get(id)
 
-    def dom0_unknown(self):
-        dom0 = PRIV_DOMAIN
-        uuid = None
-        info = self.xen_domain(dom0)
-        dompath = GetDomainPath(dom0)
-        if dompath:
-            vmpath = xstransact.Read(dompath, "vm")
-            if vmpath:
-                uuid = xstransact.Read(vmpath, "uuid")
-            if not uuid:
-                uuid = dompath.split("/")[-1]
-            dompath = "/".join(dompath.split("/")[0:-1])
-        if not uuid:
-            uuid = getUuid()
-            dompath = self.domroot
-        log.info("Creating entry for unknown xend domain: id=%d uuid=%s",
-                 dom0, uuid)
-        try:
-            dominfo = XendDomainInfo.recreate(uuid, dompath, dom0, info)
-            self._add_domain(dominfo)
-            return dominfo
-        except Exception, exn:
-            log.exception(exn)
-            raise XendError("Error recreating xend domain info: id=%d: %s" %
-                            (dom0, str(exn)))
-
-        
+
     def domain_lookup(self, id):
         return self.domains.get(id)
 
@@ -410,8 +313,9 @@
             return xc.domain_pause(dom=dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
-    
-    def domain_shutdown(self, id, reason='poweroff'):
+
+
+    def domain_shutdown(self, domid, reason='poweroff'):
         """Shutdown domain (nicely).
          - poweroff: restart according to exit code and restart mode
          - reboot:   restart on exit
@@ -422,89 +326,13 @@
         @param id:     domain id
         @param reason: shutdown type: poweroff, reboot, suspend, halt
         """
-        dominfo = self.domain_lookup(id)
-        self.domain_restart_schedule(dominfo.getDomid(), reason, force=True)
-        eserver.inject('xend.domain.shutdown', [dominfo.getName(),
-                                                dominfo.getDomid(), reason])
-        if reason == 'halt':
-            reason = 'poweroff'
-        val = dominfo.shutdown(reason)
-        if not reason in ['suspend']:
-            self.domain_shutdowns()
-        return val
-
-
-    def domain_sysrq(self, id, key):
+        self.callInfo(domid, XendDomainInfo.shutdown, reason)
+
+
+    def domain_sysrq(self, domid, key):
         """Send a SysRq to the specified domain."""
-        return self.callInfo(id, XendDomainInfo.send_sysrq, key)
-
-
-    def domain_shutdowns(self):
-        """Process pending domain shutdowns.
-        Destroys domains whose shutdowns have timed out.
-        """
-        timeout = SHUTDOWN_TIMEOUT + 1
-        for dominfo in self.domains.values():
-            if not dominfo.shutdown_pending:
-                # domain doesn't need shutdown
-                continue
-            id = dominfo.getDomid()
-            left = dominfo.shutdown_time_left(SHUTDOWN_TIMEOUT)
-            if left <= 0:
-                # Shutdown expired - destroy domain.
-                try:
-                    log.info("Domain shutdown timeout expired: name=%s id=%s",
-                             dominfo.getName(), id)
-                    self.domain_destroy(id, reason=
-                                        dominfo.shutdown_pending['reason'])
-                except Exception:
-                    pass
-            else:
-                # Shutdown still pending.
-                timeout = min(timeout, left)
-        if timeout <= SHUTDOWN_TIMEOUT:
-            # Pending shutdowns remain - reschedule.
-            scheduler.later(timeout, self.domain_shutdowns)
-
-    def domain_restart_schedule(self, id, reason, force=False):
-        """Schedule a restart for a domain if it needs one.
-
-        @param id:     domain id
-        @param reason: shutdown reason
-        """
-        log.debug('domain_restart_schedule> %d %s %d', id, reason, force)
-        dominfo = self.domain_lookup(id)
-        if not dominfo:
-            return
-        restart = (force and reason == 'reboot') or 
dominfo.restart_needed(reason)
-        if restart:
-            log.info('Scheduling restart for domain: name=%s id=%s',
-                     dominfo.getName(), dominfo.getDomid())
-            eserver.inject("xend.domain.restart",
-                           [dominfo.getName(), dominfo.getDomid(),
-                            "schedule"])
-            dominfo.restarting()
-        else:
-            log.info('Cancelling restart for domain: name=%s id=%s',
-                     dominfo.getName(), dominfo.getDomid())
-            eserver.inject("xend.domain.restart",
-                           [dominfo.getName(), dominfo.getDomid(), "cancel"])
-            dominfo.restart_cancel()
-
-    def domain_restarts(self):
-        """Execute any scheduled domain restarts for domains that have gone.
-        """
-        doms = self.xen_domains()
-        for dominfo in self.domains.values():
-            if not dominfo.restart_pending():
-                continue
-            info = doms.get(dominfo.getDomid())
-            if info:
-                # Don't execute restart for domains still running.
-                continue
-            # Remove it from the restarts.
-            log.info('restarting: %s' % dominfo.getName())
-            self.domain_restart(dominfo)
+        return self.callInfo(domid, XendDomainInfo.send_sysrq, key)
+
 
     def domain_destroy(self, domid, reason='halt'):
         """Terminate domain immediately.
@@ -517,7 +345,6 @@
         if domid == PRIV_DOMAIN:
             raise XendError("Cannot destroy privileged domain %i" % domid)
         
-        self.domain_restart_schedule(domid, reason, force=True)
         dominfo = self.domain_lookup(domid)
         if dominfo:
             val = dominfo.destroy()
@@ -730,10 +557,15 @@
     ## private:
 
     def callInfo(self, domid, fn, *args, **kwargs):
-        self.refresh()
-        dominfo = self.domains.get(domid)
-        if dominfo:
-            return fn(dominfo, *args, **kwargs)
+        try:
+            self.refresh()
+            dominfo = self.domains.get(domid)
+            if dominfo:
+                return fn(dominfo, *args, **kwargs)
+        except XendError:
+            raise
+        except Exception, exn:
+            raise XendError(str(exn))
 
 
 def instance():
diff -r 299c2d5b5daf -r f9296342d9b2 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Tue Sep 27 11:30:13 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Tue Sep 27 12:53:06 2005
@@ -35,7 +35,9 @@
 from xen.xend.server.channel import EventChannel
 
 from xen.xend import image
+from xen.xend import scheduler
 from xen.xend import sxp
+from xen.xend import XendRoot
 from xen.xend.XendBootloader import bootloader
 from xen.xend.XendLogging import log
 from xen.xend.XendError import XendError, VmError
@@ -43,7 +45,7 @@
 
 from xen.xend.uuid import getUuid
 from xen.xend.xenstore.xstransact import xstransact
-from xen.xend.xenstore.xsutil import IntroduceDomain
+from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain
 
 """Shutdown code for poweroff."""
 DOMAIN_POWEROFF = 0
@@ -75,9 +77,6 @@
     RESTART_NEVER,
     ]
 
-STATE_RESTART_PENDING = 'pending'
-STATE_RESTART_BOOTING = 'booting'
-
 STATE_VM_OK         = "ok"
 STATE_VM_TERMINATED = "terminated"
 STATE_VM_SUSPENDED  = "suspended"
@@ -92,7 +91,29 @@
 SIF_TPM_BE_DOMAIN = (1<<7)
 
 
+SHUTDOWN_TIMEOUT = 30
+
+
+DOMROOT = '/domain'
+VMROOT  = '/domain'
+
+
 xc = xen.lowlevel.xc.new()
+xroot = XendRoot.instance()
+
+
+## Configuration entries that we expect to round-trip -- be read from the
+# config file or xc, written to save-files (i.e. through sxpr), and reused as
+# config on restart or restore, all without munging.  Some configuration
+# entries are munged for backwards compatibility reasons, or because they
+# don't come out of xc in the same form as they are specified in the config
+# file, so those are handled separately.
+ROUNDTRIPPING_CONFIG_ENTRIES = [
+        ('name',         str),
+        ('ssidref',      int),
+        ('cpu_weight',   float),
+        ('bootloader',   str)
+    ]
 
 
 def domain_exists(name):
@@ -145,22 +166,43 @@
         
         vm = cls(getUuid(), dompath, cls.parseConfig(config))
         vm.construct()
+        vm.refreshShutdown()
         return vm
 
     create = classmethod(create)
 
 
-    def recreate(cls, uuid, dompath, domid, info):
-        """Create the VM object for an existing domain.
-
-        @param dompath:   The path to all domain information
-        @param info:      domain info from xc
-        """
-
-        log.debug("XendDomainInfo.recreate(%s, %s, %s, %s)", uuid, dompath,
-                  domid, info)
-
-        return cls(uuid, dompath, info, domid, True)
+    def recreate(cls, xeninfo):
+        """Create the VM object for an existing domain."""
+
+        log.debug("XendDomainInfo.recreate(%s)", xeninfo)
+
+        domid = xeninfo['dom']
+        try:
+            dompath = GetDomainPath(domid)
+            if not dompath:
+                raise XendError(
+                    'No domain path in store for existing domain %d' % domid)
+            vmpath = xstransact.Read(dompath, "vm")
+            if not vmpath:
+                raise XendError(
+                    'No vm path in store for existing domain %d' % domid)
+            uuid = xstransact.Read(vmpath, "uuid")
+            if not uuid:
+                raise XendError(
+                    'No vm/uuid path in store for existing domain %d' % domid)
+
+            dompath = "/".join(dompath.split("/")[0:-1])
+        except Exception, exn:
+            log.warn(str(exn))
+            dompath = DOMROOT
+            uuid = getUuid()
+
+        log.info("Recreating domain %d, uuid %s", domid, uuid)
+
+        vm = cls(uuid, dompath, xeninfo, domid, True)
+        vm.refreshShutdown(xeninfo)
+        return vm
 
     recreate = classmethod(recreate)
 
@@ -183,14 +225,12 @@
         except TypeError, exn:
             raise VmError('Invalid ssidref in config: %s' % exn)
 
-        log.debug('restoring with ssidref = %d' % ssidref)
-
         vm = cls(uuid, dompath, cls.parseConfig(config),
                  xc.domain_create(ssidref = ssidref))
-        vm.clear_shutdown()
         vm.create_channel()
         vm.configure()
         vm.exportToDB()
+        vm.refreshShutdown()
         return vm
 
     restore = classmethod(restore)
@@ -214,33 +254,28 @@
         log.debug("parseConfig: config is %s" % str(config))
 
         result = {}
-        imagecfg = "()"
-
-        result['name']         = get_cfg('name')
-        result['ssidref']      = get_cfg('ssidref',    int)
+
+        for e in ROUNDTRIPPING_CONFIG_ENTRIES:
+            result[e[0]] = get_cfg(e[0], e[1])
+
         result['memory']       = get_cfg('memory',     int)
         result['mem_kb']       = get_cfg('mem_kb',     int)
         result['maxmem']       = get_cfg('maxmem',     int)
         result['maxmem_kb']    = get_cfg('maxmem_kb',  int)
         result['cpu']          = get_cfg('cpu',        int)
-        result['cpu_weight']   = get_cfg('cpu_weight', float)
-        result['bootloader']   = get_cfg('bootloader')
         result['restart_mode'] = get_cfg('restart')
+        result['image']        = get_cfg('image')
 
         try:
-            imagecfg = get_cfg('image')
-
-            if imagecfg:
-                result['image'] = imagecfg
-                result['vcpus'] = int(sxp.child_value(imagecfg, 'vcpus',
-                                                      1))
+            if result['image']:
+                result['vcpus'] = int(sxp.child_value(result['image'],
+                                                      'vcpus', 1))
             else:
                 result['vcpus'] = 1
         except TypeError, exn:
             raise VmError(
                 'Invalid configuration setting: vcpus = %s: %s' %
-                (sxp.child_value(imagecfg, 'vcpus', 1),
-                 str(exn)))
+                (sxp.child_value(result['image'], 'vcpus', 1), str(exn)))
 
         result['backend'] = []
         for c in sxp.children(config, 'backend'):
@@ -283,26 +318,26 @@
         self.store_mfn = None
         self.console_channel = None
         self.console_mfn = None
-        
-        #todo: state: running, suspended
+
         self.state = STATE_VM_OK
         self.state_updated = threading.Condition()
-        self.shutdown_pending = None
-
-        self.restart_state = None
-        self.restart_time = None
-        self.restart_count = 0
-        
+
         self.writeVm("uuid", self.uuid)
         self.storeDom("vm", self.path)
 
 
     def augmentInfo(self):
+        """Augment self.info, as given to us through {@link #recreate}, with
+        values taken from the store.  This recovers those values known to xend
+        but not to the hypervisor.
+        """
         def useIfNeeded(name, val):
             if not self.infoIsSet(name) and val is not None:
                 self.info[name] = val
 
         params = (("name", str),
+                  ("restart-mode", str),
+                  ("image",        str),
                   ("start-time", float))
 
         from_store = self.gatherVm(*params)
@@ -322,12 +357,17 @@
             defaultInfo('name',         lambda: "Domain-%d" % self.domid)
             defaultInfo('ssidref',      lambda: 0)
             defaultInfo('restart_mode', lambda: RESTART_ONREBOOT)
+            defaultInfo('cpu',          lambda: None)
             defaultInfo('cpu_weight',   lambda: 1.0)
             defaultInfo('bootloader',   lambda: None)
             defaultInfo('backend',      lambda: [])
             defaultInfo('device',       lambda: [])
+            defaultInfo('image',        lambda: None)
 
             self.check_name(self.info['name'])
+
+            if isinstance(self.info['image'], str):
+                self.info['image'] = sxp.from_string(self.info['image'])
 
             # Internally, we keep only maxmem_KiB, and not maxmem or maxmem_kb
             # (which come from outside, and are in MiB and KiB respectively).
@@ -451,17 +491,16 @@
             'domid':              str(self.domid),
             'uuid':               self.uuid,
 
-            'restart_time':       str(self.restart_time),
-
-            'xend/state':         self.state,
-            'xend/restart_count': str(self.restart_count),
             'xend/restart_mode':  str(self.info['restart_mode']),
 
             'memory/target':      str(self.info['memory_KiB'])
             }
 
         for (k, v) in self.info.items():
-            to_store[k] = str(v)
+            if v:
+                to_store[k] = str(v)
+
+        to_store['image'] = sxp.to_string(self.info['image'])
 
         log.debug("Storing %s" % str(to_store))
 
@@ -511,6 +550,88 @@
     def getBackendFlags(self):
         return reduce(lambda x, y: x | backendFlags[y],
                       self.info['backend'], 0)
+
+
+    def refreshShutdown(self, xeninfo = None):
+        if xeninfo is None:
+            xeninfo = dom_get(self.domid)
+            if xeninfo is None:
+                # The domain no longer exists.  This will occur if we have
+                # scheduled a timer to check for shutdown timeouts and the
+                # shutdown succeeded.
+                return
+
+        if xeninfo['dying']:
+            # Dying means that a domain has been destroyed, but has not yet
+            # been cleaned up by Xen.  This could persist indefinitely if,
+            # for example, another domain has some of its pages mapped.
+            # We might like to diagnose this problem in the future, but for
+            # now all we can sensibly do is ignore it.
+            pass
+
+        elif xeninfo['crashed']:
+            log.warn('Domain has crashed: name=%s id=%d.',
+                     self.info['name'], self.domid)
+
+            if xroot.get_enable_dump():
+                self.dumpCore()
+
+            self.maybeRestart('crashed')
+
+        elif xeninfo['shutdown']:
+            reason = shutdown_reason(xeninfo['shutdown_reason'])
+
+            log.info('Domain has shutdown: name=%s id=%d reason=%s.',
+                     self.info['name'], self.domid, reason)
+
+            self.clearRestart()
+
+            if reason == 'suspend':
+                self.state_set(STATE_VM_SUSPENDED)
+                # Don't destroy the domain.  XendCheckpoint will do this once
+                # it has finished.
+            elif reason in ['poweroff', 'reboot']:
+                self.maybeRestart(reason)
+            else:
+                self.destroy()
+
+        else:
+            # Domain is alive.  If we are shutting it down, then check
+            # the timeout on that, and destroy it if necessary.
+
+            sst = self.readVm('xend/shutdown_start_time')
+            if sst:
+                sst = float(sst)
+                timeout = SHUTDOWN_TIMEOUT - time.time() + sst
+                if timeout < 0:
+                    log.info(
+                        "Domain shutdown timeout expired: name=%s id=%s",
+                        self.info['name'], self.domid)
+                    self.destroy()
+                else:
+                    log.debug(
+                        "Scheduling refreshShutdown on domain %d in %ds.",
+                        self.domid, timeout)
+                    scheduler.later(timeout, self.refreshShutdown)
+
+
+    def shutdown(self, reason):
+        if not reason in shutdown_reasons.values():
+            raise XendError('invalid reason:' + reason)
+        self.storeVm("control/shutdown", reason)
+        if not reason in ['suspend']:
+            self.storeVm('xend/shutdown_start_time', time.time())
+
+
+    def clearRestart(self):
+        self.removeVm("xend/shutdown_start_time")
+
+
+    def maybeRestart(self, reason):
+        if self.restart_needed(reason):
+            self.restart()
+        else:
+            self.destroy()
 
 
     def dumpCore(self):
@@ -526,18 +647,32 @@
                       self.domid, self.info['name'], str(exn))
 
 
+    def closeChannel(self, channel, entry):
+        """Close the given channel, if set, and remove the given entry in the
+        store.  Nothrow guarantee."""
+        
+        try:
+            try:
+                if channel:
+                    channel.close()
+            finally:
+                self.removeDom(entry)
+        except Exception, exn:
+            log.exception(exn)
+        
+
     def closeStoreChannel(self):
         """Close the store channel, if any.  Nothrow guarantee."""
-        
-        try:
-            if self.store_channel:
-                try:
-                    self.store_channel.close()
-                    self.removeDom("store/port")
-                finally:
-                    self.store_channel = None
-        except Exception, exn:
-            log.exception(exn)
+
+        self.closeChannel(self.store_channel, "store/port")
+        self.store_channel = None
+
+
+    def closeConsoleChannel(self):
+        """Close the console channel, if any.  Nothrow guarantee."""
+
+        self.closeChannel(self.console_channel, "console/port")
+        self.console_channel = None
 
 
     def setConsoleRef(self, ref):
@@ -566,10 +701,13 @@
             
         self.info.update(info)
         self.validateInfo()
+        self.refreshShutdown(info)
 
         log.debug("XendDomainInfo.update done on domain %d: %s", self.domid,
                   self.info)
 
+
+    ## private:
 
     def state_set(self, state):
         self.state_updated.acquire()
@@ -577,13 +715,16 @@
             self.state = state
             self.state_updated.notifyAll()
         self.state_updated.release()
-        self.exportToDB()
+
+
+    ## public:
 
     def state_wait(self, state):
         self.state_updated.acquire()
         while self.state != state:
             self.state_updated.wait()
         self.state_updated.release()
+
 
     def __str__(self):
         s = "<domain"
@@ -619,47 +760,49 @@
 
     def sxpr(self):
         sxpr = ['domain',
-                ['domid', self.domid],
-                ['name', self.info['name']],
-                ['memory', self.info['memory_KiB'] / 1024],
-                ['ssidref', self.info['ssidref']]]
-        if self.uuid:
-            sxpr.append(['uuid', self.uuid])
-        if self.info:
-            sxpr.append(['maxmem', self.info['maxmem_KiB'] / 1024])
-
-            if self.infoIsSet('device'):
-                for (_, c) in self.info['device']:
-                    sxpr.append(['device', c])
-
-            def stateChar(name):
-                if name in self.info:
-                    if self.info[name]:
-                        return name[0]
-                    else:
-                        return '-'
+                ['domid',   self.domid],
+                ['uuid',    self.uuid],
+                ['memory',  self.info['memory_KiB'] / 1024]]
+
+        for e in ROUNDTRIPPING_CONFIG_ENTRIES:
+            if self.infoIsSet(e[0]):
+                sxpr.append([e[0], self.info[e[0]]])
+        
+        sxpr.append(['maxmem', self.info['maxmem_KiB'] / 1024])
+
+        if self.infoIsSet('image'):
+            sxpr.append(['image', self.info['image']])
+
+        if self.infoIsSet('device'):
+            for (_, c) in self.info['device']:
+                sxpr.append(['device', c])
+
+        def stateChar(name):
+            if name in self.info:
+                if self.info[name]:
+                    return name[0]
                 else:
-                    return '?'
-
-            state = reduce(
-                lambda x, y: x + y,
-                map(stateChar,
-                    ['running', 'blocked', 'paused', 'shutdown', 'crashed']))
-
-            sxpr.append(['state', state])
-            if self.infoIsSet('shutdown'):
-                reason = shutdown_reason(self.info['shutdown_reason'])
-                sxpr.append(['shutdown_reason', reason])
-            if self.infoIsSet('cpu_time'):
-                sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
-            sxpr.append(['vcpus', self.info['vcpus']])
-            sxpr.append(['cpumap', self.info['cpumap']])
-            if self.infoIsSet('vcpu_to_cpu'):
-                sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
-                # build a string, using '|' to separate items, show only up
-                # to number of vcpus in domain, and trim the trailing '|'
-                sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x)+'|',
-                            
self.info['vcpu_to_cpu'][0:self.info['vcpus']]))[:-1]])
+                    return '-'
+            else:
+                return '?'
+
+        state = reduce(
+            lambda x, y: x + y,
+            map(stateChar,
+                ['running', 'blocked', 'paused', 'shutdown', 'crashed',
+                 'dying']))
+
+        sxpr.append(['state', state])
+        if self.infoIsSet('shutdown'):
+            reason = shutdown_reason(self.info['shutdown_reason'])
+            sxpr.append(['shutdown_reason', reason])
+        if self.infoIsSet('cpu_time'):
+            sxpr.append(['cpu_time', self.info['cpu_time']/1e9])
+        sxpr.append(['vcpus', self.info['vcpus']])
+        sxpr.append(['cpumap', self.info['cpumap']])
+        if self.infoIsSet('vcpu_to_cpu'):
+            sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
+            sxpr.append(['vcpu_to_cpu', self.prettyVCpuMap()])
             
         if self.infoIsSet('start_time'):
             up_time =  time.time() - self.info['start_time']
@@ -674,13 +817,16 @@
             sxpr.append(['console_channel', self.console_channel.sxpr()])
         if self.console_mfn:
             sxpr.append(['console_mfn', self.console_mfn])
-        if self.restart_count:
-            sxpr.append(['restart_count', self.restart_count])
-        if self.restart_state:
-            sxpr.append(['restart_state', self.restart_state])
-        if self.restart_time:
-            sxpr.append(['restart_time', str(self.restart_time)])
+
         return sxpr
+
+
+    ## private:
+
+    def prettyVCpuMap(self):
+        return '|'.join(map(str,
+                            self.info['vcpu_to_cpu'][0:self.info['vcpus']]))
+
 
     def check_name(self, name):
         """Check if a vm name is valid. Valid names contain alphabetic 
characters,
@@ -719,11 +865,19 @@
         @param config: configuration
         @raise: VmError on error
         """
-        # todo - add support for scheduling params?
+
+        log.debug('XendDomainInfo.construct: %s %s',
+                  str(self.domid),
+                  str(self.info['ssidref']))
+
+        self.domid = xc.domain_create(dom = 0, ssidref = self.info['ssidref'])
+
+        if self.domid <= 0:
+            raise VmError('Creating domain failed: name=%s' %
+                          self.info['name'])
+
         try:
             self.initDomain()
-
-            # Create domain devices.
             self.construct_image()
             self.configure()
             self.exportToDB()
@@ -737,30 +891,23 @@
 
 
     def initDomain(self):
-        log.debug('XendDomainInfo.initDomain: %s %s %s %s)',
+        log.debug('XendDomainInfo.initDomain: %s %s %s',
                   str(self.domid),
                   str(self.info['memory_KiB']),
-                  str(self.info['ssidref']),
                   str(self.info['cpu_weight']))
 
-        self.domid = xc.domain_create(dom = self.domid or 0,
-                                      ssidref = self.info['ssidref'])
-
-        if 'image' not in self.info:
+        if not self.infoIsSet('image'):
             raise VmError('Missing image in configuration')
 
         self.image = image.create(self,
                                   self.info['image'],
                                   self.info['device'])
 
-        if self.domid <= 0:
-            raise VmError('Creating domain failed: name=%s' %
-                          self.info['name'])
-
         if self.info['bootloader']:
             self.image.handleBootloading()
 
         xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
+        # XXX Merge with configure_maxmem?
         m = self.image.getDomainMemory(self.info['memory_KiB'])
         xc.domain_setmaxmem(self.domid, m)
         xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
@@ -794,6 +941,8 @@
         self.configure_vcpus(self.info['vcpus'])
 
 
+    ## public:
+
     def delete(self):
         """Delete the vm's db.
         """
@@ -803,48 +952,36 @@
             log.warning("error in domain db delete: %s", ex)
 
 
-    def destroy_domain(self):
-        """Destroy the vm's domain.
-        The domain will not finally go away unless all vm
-        devices have been released.
-        """
-        if self.domid is None:
-            return
-        try:
-            xc.domain_destroy(dom=self.domid)
-        except Exception, err:
-            log.exception("Domain destroy failed: %s", self.info['name'])
-
     def cleanup(self):
-        """Cleanup vm resources: release devices.
-        """
-        self.state = STATE_VM_TERMINATED
+        """Cleanup vm resources: release devices.  Nothrow guarantee."""
+
+        self.state_set(STATE_VM_TERMINATED)
         self.release_devices()
         self.closeStoreChannel()
-        if self.console_channel:
-            # notify processes using this console?
-            try:
-                self.console_channel.close()
-                self.console_channel = None
-            except:
-                pass
+        self.closeConsoleChannel()
+
         if self.image:
             try:
                 self.image.destroy()
-                self.image = None
             except:
-                pass
+                log.exception(
+                    "XendDomainInfo.cleanup: image.destroy() failed.")
+            self.image = None
+
 
     def destroy(self):
-        """Cleanup vm and destroy domain.
-        """
-
-        log.debug("XendDomainInfo.destroy")
-
-        self.destroy_domain()
+        """Cleanup vm and destroy domain.  Nothrow guarantee."""
+
+        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
+
         self.cleanup()
-        self.exportToDB()
-        return 0
+
+        try:
+            if self.domid is not None:
+                xc.domain_destroy(dom=self.domid)
+        except Exception, exn:
+            log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.")
+
 
     def is_terminated(self):
         """Check if a domain has been terminated.
@@ -852,8 +989,7 @@
         return self.state == STATE_VM_TERMINATED
 
     def release_devices(self):
-        """Release all vm devices.
-        """
+        """Release all vm devices.  Nothrow guarantee."""
 
         while True:
             t = xstransact("%s/device" % self.path)
@@ -865,8 +1001,8 @@
                         # Log and swallow any exceptions in removal --
                         # there's nothing more we can do.
                         log.exception(
-                           "Device release failed: %s; %s; %s; %s" %
-                            (self.info['name'], n, d, str(ex)))
+                           "Device release failed: %s; %s; %s",
+                           self.info['name'], n, d)
             if t.commit():
                 break
 
@@ -902,8 +1038,7 @@
 
         @raise: VmError for invalid devices
         """
-        if not self.rebooting():
-            self.create_configured_devices()
+        self.create_configured_devices()
         if self.image:
             self.image.createDeviceModel()
 
@@ -942,23 +1077,6 @@
             return reason == 'reboot'
         return False
 
-    def restart_cancel(self):
-        """Cancel a vm restart.
-        """
-        self.restart_state = None
-
-    def restarting(self):
-        """Put the vm into restart mode.
-        """
-        self.restart_state = STATE_RESTART_PENDING
-
-    def restart_pending(self):
-        """Test if the vm has a pending restart.
-        """
-        return self.restart_state == STATE_RESTART_PENDING
-
-    def rebooting(self):
-        return self.restart_state == STATE_RESTART_BOOTING
 
     def restart_check(self):
         """Check if domain restart is OK.
@@ -976,23 +1094,37 @@
         self.restart_time = tnow
         self.restart_count += 1
 
+
     def restart(self):
-        """Restart the domain after it has exited.
-        Reuses the domain id
-
-        """
+        """Restart the domain after it has exited. """
+
+        #            self.restart_check()
+        self.cleanup()
+
+        config = self.sxpr()
+
+        if self.readVm('xend/restart_in_progress'):
+            log.error('Xend failed during restart of domain %d.  '
+                      'Refusing to restart to avoid loops.',
+                      self.domid)
+            self.destroy()
+            return
+
+        self.writeVm('xend/restart_in_progress', 'True')
+
         try:
-            self.clear_shutdown()
-            self.state = STATE_VM_OK
-            self.shutdown_pending = None
-            self.restart_check()
-            self.exportToDB()
-            self.restart_state = STATE_RESTART_BOOTING
-            self.configure_bootloader()
-            self.construct()
-            self.exportToDB()
+            self.destroy()
+            try:
+                xd = get_component('xen.xend.XendDomain')
+                xd.domain_unpause(xd.domain_create(config).getDomid())
+            except Exception, exn:
+                log.exception('Failed to restart domain %d.', self.domid)
         finally:
-            self.restart_state = None
+            self.removeVm('xend/restart_in_progress')
+            
+        # self.configure_bootloader()
+        #        self.exportToDB()
+
 
     def configure_bootloader(self):
         if not self.info['bootloader']:
@@ -1006,7 +1138,8 @@
         if dev:
             disk = sxp.child_value(dev, "uname")
             fn = blkdev_uname_to_file(disk)
-            blcfg = bootloader(self.info['bootloader'], fn, 1, 
self.info['vcpus'])
+            blcfg = bootloader(self.info['bootloader'], fn, 1,
+                               self.info['vcpus'])
         if blcfg is None:
             msg = "Had a bootloader specified, but can't find disk"
             log.error(msg)
@@ -1023,7 +1156,9 @@
 
 
     def configure_maxmem(self):
-        xc.domain_setmaxmem(self.domid, maxmem_kb = self.info['maxmem_KiB'])
+        if self.image:
+            m = self.image.getDomainMemory(self.info['memory_KiB'])
+            xc.domain_setmaxmem(self.domid, maxmem_kb = m)
 
 
     def vcpu_hotplug(self, vcpu, state):
@@ -1038,23 +1173,8 @@
             availability = "online"
         self.storeVm("cpu/%d/availability" % vcpu, availability)
 
-    def shutdown(self, reason):
-        if not reason in shutdown_reasons.values():
-            raise XendError('invalid reason:' + reason)
-        self.storeVm("control/shutdown", reason)
-        if not reason in ['suspend']:
-            self.shutdown_pending = {'start':time.time(), 'reason':reason}
-
-    def clear_shutdown(self):
-        self.removeVm("control/shutdown")
-
     def send_sysrq(self, key=0):
         self.storeVm("control/sysrq", '%c' % key)
-
-    def shutdown_time_left(self, timeout):
-        if not self.shutdown_pending:
-            return 0
-        return timeout - (time.time() - self.shutdown_pending['start'])
 
     def dom0_init_store(self):
         if not self.store_channel:
@@ -1078,8 +1198,6 @@
     def dom0_enforce_vcpus(self):
         dom = 0
         # get max number of vcpus to use for dom0 from config
-        from xen.xend import XendRoot
-        xroot = XendRoot.instance()
         target = int(xroot.get_dom0_vcpus())
         log.debug("number of vcpus to use is %d" % (target))
    

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Detangle the restart/reboot/halt/save/restore code. There is now one point, Xen patchbot -unstable <=