Hi John,
Please don't reset a domain after unpaused the domain.
Could you move the reset into xend?
e.g.
try:
log.info("Domain core dump requested for domain %s (%d) "
"live=%d crash=%d.",
dominfo.getName(), dominfo.getDomid(), live, crash)
dominfo.dumpCore(filename)
if crash:
self.domain_destroy(domid)
+ elif reset:
+ self.domain_reset(domid)
Best regards,
Kan
Wed, 11 Mar 2009 18:45:49 -0700, John Levon wrote:
># HG changeset patch
># User John Levon <john.levon@xxxxxxx>
># Date 1236822336 25200
># Node ID 88b3a560b0fb2a5adca969d9b192220d64dfd105
># Parent e92a56f3581975496d5d9f250823e46493e58548
>Domain core-dumping fixes
>
>The code was attempting to use the domain's current number of pages
>(info.nr_pages) as a maximum index. We then walk the memory map and can
>easily over-write past the end of the nr_pages-sized array, if the
>domain has more pages mapped in than earlier (live dump). Restrict
>ourselves to the current number of pages.
>
>Also fix the dump core method in xend to actually implement the crash
>and live options. In particular this means that xend clients other than
>xm now get non-live dumps by default.
>
>Signed-off-by: John Levon <john.levon@xxxxxxx>
>
>diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c
>--- a/tools/libxc/xc_core.c
>+++ b/tools/libxc/xc_core.c
>@@ -518,7 +518,17 @@ xc_domain_dumpcore_via_callback(int xc_h
> if ( sts != 0 )
> goto out;
>
>+ /*
>+ * Note: this is the *current* number of pages and may change under
>+ * a live dump-core. We'll just take this value, and if more pages
>+ * exist, we'll skip them. If there's less, then we'll just not use
>+ * all the array...
>+ *
>+ * We don't want to use the total potential size of the memory map
>+ * since that is usually much higher than info.nr_pages.
>+ */
> nr_pages = info.nr_pages;
>+
> if ( !auto_translated_physmap )
> {
> /* obtain p2m table */
>@@ -770,7 +780,7 @@ xc_domain_dumpcore_via_callback(int xc_h
>
> pfn_start = memory_map[map_idx].addr >> PAGE_SHIFT;
> pfn_end = pfn_start + (memory_map[map_idx].size >> PAGE_SHIFT);
>- for ( i = pfn_start; i < pfn_end; i++ )
>+ for ( i = pfn_start; i < pfn_end && j < nr_pages; i++ )
> {
> uint64_t gmfn;
> void *vaddr;
>diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
>--- a/tools/libxc/xenctrl.h
>+++ b/tools/libxc/xenctrl.h
>@@ -158,7 +158,7 @@ typedef struct xc_dominfo {
> paused:1, blocked:1, running:1,
> hvm:1, debugged:1;
> unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
>- unsigned long nr_pages;
>+ unsigned long nr_pages; /* current number, not maximum */
> unsigned long shared_info_frame;
> uint64_t cpu_time;
> unsigned long max_memkb;
>diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/
>XendDomain.py
>--- a/tools/python/xen/xend/XendDomain.py
>+++ b/tools/python/xen/xend/XendDomain.py
>@@ -1252,13 +1252,23 @@ class XendDomain:
> POWER_STATE_NAMES[DOM_STATE_PAUSED],
> POWER_STATE_NAMES[dominfo._stateGet()])
>
>- try:
>- log.info("Domain core dump requested for domain %s (%d) "
>- "live=%d crash=%d.",
>- dominfo.getName(), dominfo.getDomid(), live, crash)
>- return dominfo.dumpCore(filename)
>- except Exception, ex:
>- raise XendError(str(ex))
>+ dopause = (not live and dominfo._stateGet() == DOM_STATE_RUNNING)
>+ if dopause:
>+ dominfo.pause()
>+
>+ try:
>+ try:
>+ log.info("Domain core dump requested for domain %s (%d) "
>+ "live=%d crash=%d.",
>+ dominfo.getName(), dominfo.getDomid(), live, crash)
>+ dominfo.dumpCore(filename)
>+ if crash:
>+ self.domain_destroy(domid)
>+ except Exception, ex:
>+ raise XendError(str(ex))
>+ finally:
>+ if dopause and not crash:
>+ dominfo.unpause()
>
> def domain_destroy(self, domid):
> """Terminate domain immediately.
>diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/
>xend/XendDomainInfo.py
>--- a/tools/python/xen/xend/XendDomainInfo.py
>+++ b/tools/python/xen/xend/XendDomainInfo.py
>@@ -2036,26 +2036,31 @@ class XendDomainInfo:
> @raise: XendError if core dumping failed.
> """
>
>+ if not corefile:
>+ this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
>+ corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
>+ self.info['name_label'], self.domid)
>+
>+ if os.path.isdir(corefile):
>+ raise XendError("Cannot dump core in a directory: %s" %
>+ corefile)
>+
> try:
>- if not corefile:
>- this_time = time.strftime("%Y-%m%d-%H%M.%S", time.
>localtime())
>- corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
>- self.info['name_label'], self.domid)
>-
>- if os.path.isdir(corefile):
>- raise XendError("Cannot dump core in a directory: %s" %
>- corefile)
>-
>- self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
>- xc.domain_dumpcore(self.domid, corefile)
>+ try:
>+ self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
>+ xc.domain_dumpcore(self.domid, corefile)
>+ except RuntimeError, ex:
>+ corefile_incomp = corefile+'-incomplete'
>+ try:
>+ os.rename(corefile, corefile_incomp)
>+ except:
>+ pass
>+
>+ log.error("core dump failed: id = %s name = %s: %s",
>+ self.domid, self.info['name_label'], str(ex))
>+ raise XendError("Failed to dump core: %s" % str(ex))
>+ finally:
> self._removeVm(DUMPCORE_IN_PROGRESS)
>- except RuntimeError, ex:
>- corefile_incomp = corefile+'-incomplete'
>- os.rename(corefile, corefile_incomp)
>- self._removeVm(DUMPCORE_IN_PROGRESS)
>- log.exception("XendDomainInfo.dumpCore failed: id = %s name =
>%s",
>- self.domid, self.info['name_label'])
>- raise XendError("Failed to dump core: %s" % str(ex))
>
> #
> # Device creation/deletion functions
>diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
>--- a/tools/python/xen/xm/main.py
>+++ b/tools/python/xen/xm/main.py
>@@ -1351,22 +1351,10 @@ def xm_dump_core(args):
> else:
> filename = None
>
>- if not live:
>- ds = server.xend.domain.pause(dom, True)
>-
>- try:
>- print "Dumping core of domain: %s ..." % str(dom)
>- server.xend.domain.dump(dom, filename, live, crash)
>-
>- if crash:
>- print "Destroying domain: %s ..." % str(dom)
>- server.xend.domain.destroy(dom)
>- elif reset:
>- print "Resetting domain: %s ..." % str(dom)
>- server.xend.domain.reset(dom)
>- finally:
>- if not live and not crash and not reset and ds == DOM_STATE_RUNNING:
>- server.xend.domain.unpause(dom)
>+ print "Dumping core of domain: %s ..." % str(dom)
>+ server.xend.domain.dump(dom, filename, live, crash)
>+ if reset:
>+ server.xend.domain.reset(dom)
>
> def xm_rename(args):
> arg_check(args, "rename", 2)
>
>_______________________________________________
>Xen-devel mailing list
>Xen-devel@xxxxxxxxxxxxxxxxxxx
>http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|