WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Watchdog timers for domains

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Watchdog timers for domains
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 04 Jun 2010 03:45:36 -0700
Delivery-date: Fri, 04 Jun 2010 03:48:25 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1275647723 -3600
# Node ID 497bda800505b740c9aea42647031fc27abd2e8c
# Parent  d4a91417a0231ff4a434d1a5b46749acda1dfe13
Watchdog timers for domains

Each domain is allowed to set, reset and disable its timers; when any
timer runs out the domain is killed.

Patch from Christian Limpach <Christian.Limpach@xxxxxxxxxx>
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
 .hgignore                  |    1 
 tools/libxc/xc_domain.c    |   28 +++++++++++++
 tools/libxc/xenctrl.h      |    4 +
 tools/misc/Makefile        |    8 ++-
 tools/misc/xen-watchdog    |   59 +++++++++++++++++++++++++++
 tools/misc/xenwatchdogd.c  |   96 +++++++++++++++++++++++++++++++++++++++++++++
 xen/common/domain.c        |   11 ++++-
 xen/common/keyhandler.c    |    5 ++
 xen/common/schedule.c      |   85 +++++++++++++++++++++++++++++++++++++++
 xen/common/shutdown.c      |    9 ++++
 xen/include/public/sched.h |   17 +++++++
 xen/include/xen/sched.h    |   11 ++++-
 12 files changed, 328 insertions(+), 6 deletions(-)

diff -r d4a91417a023 -r 497bda800505 .hgignore
--- a/.hgignore Fri Jun 04 10:46:32 2010 +0100
+++ b/.hgignore Fri Jun 04 11:35:23 2010 +0100
@@ -237,6 +237,7 @@
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
 ^tools/xcutils/readnotes$
+^tools/misc/xenwatchdogd$
 ^tools/xenfb/sdlfb$
 ^tools/xenfb/vncfb$
 ^tools/xenmon/xentrace_setmask$
diff -r d4a91417a023 -r 497bda800505 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Fri Jun 04 10:46:32 2010 +0100
+++ b/tools/libxc/xc_domain.c   Fri Jun 04 11:35:23 2010 +0100
@@ -364,6 +364,34 @@ int xc_vcpu_getcontext(xc_interface *xch
     unlock_pages(ctxt, sz);
 
     return rc;
+}
+
+int xc_watchdog(xc_interface *xch,
+                uint32_t id,
+                uint32_t timeout)
+{
+    int ret = -1;
+    sched_watchdog_t arg;
+    DECLARE_HYPERCALL;
+
+    hypercall.op     = __HYPERVISOR_sched_op;
+    hypercall.arg[0] = (unsigned long)SCHEDOP_watchdog;
+    hypercall.arg[1] = (unsigned long)&arg;
+    arg.id = id;
+    arg.timeout = timeout;
+
+    if ( lock_pages(&arg, sizeof(arg)) != 0 )
+    {
+        PERROR("Could not lock memory for Xen hypercall");
+        goto out1;
+    }
+
+    ret = do_xen_hypercall(xch, &hypercall);
+
+    unlock_pages(&arg, sizeof(arg));
+
+ out1:
+    return ret;
 }
 
 
diff -r d4a91417a023 -r 497bda800505 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Jun 04 10:46:32 2010 +0100
+++ b/tools/libxc/xenctrl.h     Fri Jun 04 11:35:23 2010 +0100
@@ -331,6 +331,10 @@ int xc_domain_shutdown(xc_interface *xch
 int xc_domain_shutdown(xc_interface *xch,
                        uint32_t domid,
                        int reason);
+
+int xc_watchdog(xc_interface *xch,
+               uint32_t id,
+               uint32_t timeout);
 
 int xc_vcpu_setaffinity(xc_interface *xch,
                         uint32_t domid,
diff -r d4a91417a023 -r 497bda800505 tools/misc/Makefile
--- a/tools/misc/Makefile       Fri Jun 04 10:46:32 2010 +0100
+++ b/tools/misc/Makefile       Fri Jun 04 11:35:23 2010 +0100
@@ -10,7 +10,7 @@ CFLAGS   += $(INCLUDES)
 
 HDRS     = $(wildcard *.h)
 
-TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat 
xenlockprof xen-hptool
+TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat 
xenlockprof xen-hptool xenwatchdogd
 TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx
 TARGETS := $(TARGETS-y)
 
@@ -22,7 +22,7 @@ INSTALL_BIN-$(CONFIG_X86) += xen-detect
 INSTALL_BIN-$(CONFIG_X86) += xen-detect
 INSTALL_BIN := $(INSTALL_BIN-y)
 
-INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm 
xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool
+INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm 
xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool xenwatchdogd
 INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx
 INSTALL_SBIN := $(INSTALL_SBIN-y)
 
@@ -37,8 +37,10 @@ install: build
 install: build
        $(INSTALL_DIR) $(DESTDIR)$(BINDIR)
        $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
+       $(INSTALL_DIR) $(DESTDIR)$(CONFIG_DIR)/init.d
        $(INSTALL_PYTHON_PROG) $(INSTALL_BIN) $(DESTDIR)$(BINDIR)
        $(INSTALL_PYTHON_PROG) $(INSTALL_SBIN) $(DESTDIR)$(SBINDIR)
+       $(INSTALL_PROG) xen-watchdog $(DESTDIR)$(CONFIG_DIR)/init.d
        set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d install-recurse; done
 
 .PHONY: clean
@@ -49,7 +51,7 @@ clean:
 %.o: %.c $(HDRS) Makefile
        $(CC) -c $(CFLAGS) -o $@ $<
 
-xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool: %: %.o Makefile
+xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool xenwatchdogd: %: 
%.o Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl) 
$(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore)
 
 gtraceview: %: %.o Makefile
diff -r d4a91417a023 -r 497bda800505 tools/misc/xen-watchdog
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/misc/xen-watchdog   Fri Jun 04 11:35:23 2010 +0100
@@ -0,0 +1,59 @@
+#! /bin/bash
+#
+# xen-watchdog
+#
+# chkconfig: 2345 21 79
+# description: Run domain watchdog daemon
+#
+
+# Source function library.
+. /etc/init.d/functions
+
+start() {
+       local r
+       base="watchdogd"
+       echo -n $"Starting domain watchdog daemon: "
+
+       /usr/sbin/xenwatchdogd 30 15
+       r=$?
+       [ "$r" -eq 0 ] && success $"$base startup" || failure $"$base startup"
+       echo
+
+       return $r
+}
+
+stop() {
+       local r
+       base="watchdogd"
+       echo -n $"Stopping domain watchdog daemon: "
+
+       killall -USR1 watchdogd 2>/dev/null
+       r=$?
+       [ "$r" -eq 0 ] && success $"$base stop" || failure $"$base stop"
+       echo
+
+       return $r
+}
+
+case "$1" in
+  start)
+       start
+       ;;
+  stop)
+       stop
+       ;;
+  restart)
+       stop
+       start
+       ;;
+  status)
+       ;;
+  condrestart)
+       stop
+       start
+       ;;
+  *)
+       echo $"Usage: $0 {start|stop|status|restart|condrestart}"
+       exit 1
+esac
+
diff -r d4a91417a023 -r 497bda800505 tools/misc/xenwatchdogd.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/misc/xenwatchdogd.c Fri Jun 04 11:35:23 2010 +0100
@@ -0,0 +1,96 @@
+
+#include <err.h>
+#include <limits.h>
+#include "xenctrl.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdio.h>
+
+xc_interface *h;
+int id = 0;
+
+void daemonize(void)
+{
+    switch (fork()) {
+    case -1:
+       err(1, "fork");
+    case 0:
+       break;
+    default:
+       exit(0);
+    }
+    umask(0);
+    if (setsid() < 0)
+       err(1, "setsid");
+    if (chdir("/") < 0)
+       err(1, "chdir /");
+    freopen("/dev/null", "r", stdin);
+    freopen("/dev/null", "w", stdout);
+    freopen("/dev/null", "w", stderr);
+}
+
+void catch_exit(int sig)
+{
+    if (id)
+        xc_watchdog(h, id, 300);
+    exit(0);
+}
+
+void catch_usr1(int sig)
+{
+    if (id)
+        xc_watchdog(h, id, 0);
+    exit(0);
+}
+
+int main(int argc, char **argv)
+{
+    int t, s;
+    int ret;
+
+    if (argc < 2)
+       errx(1, "usage: %s <timeout> <sleep>", argv[0]);
+
+    daemonize();
+
+    h = xc_interface_open(NULL, NULL, 0);
+    if (h == NULL)
+       err(1, "xc_interface_open");
+
+    t = strtoul(argv[1], NULL, 0);
+    if (t == ULONG_MAX)
+       err(1, "strtoul");
+
+    s = t / 2;
+    if (argc == 3) {
+       s = strtoul(argv[2], NULL, 0);
+       if (s == ULONG_MAX)
+           err(1, "strtoul");
+    }
+
+    if (signal(SIGHUP, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGINT, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGQUIT, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGTERM, &catch_exit) == SIG_ERR)
+       err(1, "signal");
+    if (signal(SIGUSR1, &catch_usr1) == SIG_ERR)
+       err(1, "signal");
+
+    id = xc_watchdog(h, 0, t);
+    if (id <= 0)
+        err(1, "xc_watchdog setup");
+
+    for (;;) {
+        sleep(s);
+        ret = xc_watchdog(h, id, t);
+        if (ret != 0)
+            err(1, "xc_watchdog");
+    }
+}
diff -r d4a91417a023 -r 497bda800505 xen/common/domain.c
--- a/xen/common/domain.c       Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/domain.c       Fri Jun 04 11:35:23 2010 +0100
@@ -209,8 +209,8 @@ struct domain *domain_create(
     domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
 {
     struct domain *d, **pd;
-    enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
-           INIT_gnttab = 1u<<3, INIT_arch = 1u<<4 };
+    enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
+           INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
     int init_status = 0;
     int poolid = CPUPOOLID_NONE;
 
@@ -224,6 +224,9 @@ struct domain *domain_create(
     if ( xsm_alloc_security_domain(d) != 0 )
         goto fail;
     init_status |= INIT_xsm;
+
+    watchdog_domain_init(d);
+    init_status |= INIT_watchdog;
 
     atomic_set(&d->refcnt, 1);
     spin_lock_init_prof(d, domain_lock);
@@ -327,6 +330,8 @@ struct domain *domain_create(
     }
     if ( init_status & INIT_rangeset )
         rangeset_domain_destroy(d);
+    if ( init_status & INIT_watchdog )
+        watchdog_domain_destroy(d);
     if ( init_status & INIT_xsm )
         xsm_free_security_domain(d);
     xfree(d->pirq_mask);
@@ -604,6 +609,8 @@ static void complete_domain_destroy(stru
 
     arch_domain_destroy(d);
 
+    watchdog_domain_destroy(d);
+
     rangeset_domain_destroy(d);
 
     cpupool_rm_domain(d);
diff -r d4a91417a023 -r 497bda800505 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/keyhandler.c   Fri Jun 04 11:35:23 2010 +0100
@@ -241,6 +241,7 @@ static void dump_domains(unsigned char k
 
     for_each_domain ( d )
     {
+        unsigned int i;
         printk("General information for domain %u:\n", d->domain_id);
         cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
         printk("    refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
@@ -254,6 +255,10 @@ static void dump_domains(unsigned char k
                d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
                d->handle[12], d->handle[13], d->handle[14], d->handle[15],
                d->vm_assist);
+        for (i = 0 ; i < NR_DOMAIN_WATCHDOG_TIMERS; i++)
+            if ( test_bit(i, &d->watchdog_inuse_map) )
+                printk("    watchdog %d expires in %d seconds\n",
+                       i, (u32)((d->watchdog_timer[i].expires - NOW()) >> 30));
 
         arch_dump_domain_info(d);
 
diff -r d4a91417a023 -r 497bda800505 xen/common/schedule.c
--- a/xen/common/schedule.c     Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/schedule.c     Fri Jun 04 11:35:23 2010 +0100
@@ -632,6 +632,78 @@ static long do_yield(void)
     return 0;
 }
 
+static void domain_watchdog_timeout(void *data)
+{
+    struct domain *d = data;
+
+    if ( d->is_shutting_down || d->is_dying )
+        return;
+
+    printk("Watchdog timer fired for domain %u\n", d->domain_id);
+    domain_shutdown(d, SHUTDOWN_watchdog);
+}
+
+static long domain_watchdog(struct domain *d, uint32_t id, uint32_t timeout)
+{
+    if ( id > NR_DOMAIN_WATCHDOG_TIMERS )
+        return -EINVAL;
+
+    spin_lock(&d->watchdog_lock);
+
+    if ( id == 0 )
+    {
+        for ( id = 0; id < NR_DOMAIN_WATCHDOG_TIMERS; id++ )
+        {
+            if ( test_and_set_bit(id, &d->watchdog_inuse_map) )
+                continue;
+            set_timer(&d->watchdog_timer[id], NOW() + SECONDS(timeout));
+            break;
+        }
+        spin_unlock(&d->watchdog_lock);
+        return id == NR_DOMAIN_WATCHDOG_TIMERS ? -EEXIST : id + 1;
+    }
+
+    id -= 1;
+    if ( !test_bit(id, &d->watchdog_inuse_map) )
+    {
+        spin_unlock(&d->watchdog_lock);
+        return -EEXIST;
+    }
+
+    if ( timeout == 0 )
+    {
+        stop_timer(&d->watchdog_timer[id]);
+        clear_bit(id, &d->watchdog_inuse_map);
+    }
+    else
+    {
+        set_timer(&d->watchdog_timer[id], NOW() + SECONDS(timeout));
+    }
+
+    spin_unlock(&d->watchdog_lock);
+    return 0;
+}
+
+void watchdog_domain_init(struct domain *d)
+{
+    unsigned int i;
+
+    spin_lock_init(&d->watchdog_lock);
+
+    d->watchdog_inuse_map = 0;
+
+    for ( i = 0; i < NR_DOMAIN_WATCHDOG_TIMERS; i++ )
+        init_timer(&d->watchdog_timer[i], domain_watchdog_timeout, d, 0);
+}
+
+void watchdog_domain_destroy(struct domain *d)
+{
+    unsigned int i;
+
+    for ( i = 0; i < NR_DOMAIN_WATCHDOG_TIMERS; i++ )
+        kill_timer(&d->watchdog_timer[i]);
+}
+
 long do_sched_op_compat(int cmd, unsigned long arg)
 {
     long ret = 0;
@@ -770,6 +842,19 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HAN
         rcu_unlock_domain(d);
         ret = 0;
 
+        break;
+    }
+
+    case SCHEDOP_watchdog:
+    {
+        struct sched_watchdog sched_watchdog;
+
+        ret = -EFAULT;
+        if ( copy_from_guest(&sched_watchdog, arg, 1) )
+            break;
+
+        ret = domain_watchdog(
+            current->domain, sched_watchdog.id, sched_watchdog.timeout);
         break;
     }
 
diff -r d4a91417a023 -r 497bda800505 xen/common/shutdown.c
--- a/xen/common/shutdown.c     Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/common/shutdown.c     Fri Jun 04 11:35:23 2010 +0100
@@ -5,6 +5,7 @@
 #include <xen/domain.h>
 #include <xen/delay.h>
 #include <xen/shutdown.h>
+#include <xen/console.h>
 #include <asm/debugger.h>
 #include <public/sched.h>
 
@@ -53,6 +54,14 @@ void dom0_shutdown(u8 reason)
         break; /* not reached */
     }
 
+    case SHUTDOWN_watchdog:
+    {
+        printk("Domain 0 shutdown: watchdog rebooting machine.\n");
+        kexec_crash();
+        machine_restart(0);
+        break; /* not reached */
+    }
+
     default:
     {
         printk("Domain 0 shutdown (unknown reason %u): ", reason);
diff -r d4a91417a023 -r 497bda800505 xen/include/public/sched.h
--- a/xen/include/public/sched.h        Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/include/public/sched.h        Fri Jun 04 11:35:23 2010 +0100
@@ -106,6 +106,22 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu
 #define SCHEDOP_shutdown_code 5
 
 /*
+ * Setup, poke and destroy a domain watchdog timer.
+ * @arg == pointer to sched_watchdog structure.
+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
+ *               after timeout, returns watchdog id.
+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
+ */
+#define SCHEDOP_watchdog    6
+struct sched_watchdog {
+    uint32_t id;                /* watchdog ID */
+    uint32_t timeout;           /* timeout */
+};
+typedef struct sched_watchdog sched_watchdog_t;
+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
+
+/*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
  * software to determine the appropriate action. For the most part, Xen does
  * not care about the shutdown code.
@@ -114,6 +130,7 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu
 #define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
 #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
 #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
+#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */
 
 #endif /* __XEN_PUBLIC_SCHED_H__ */
 
diff -r d4a91417a023 -r 497bda800505 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Jun 04 10:46:32 2010 +0100
+++ b/xen/include/xen/sched.h   Fri Jun 04 11:35:23 2010 +0100
@@ -191,7 +191,7 @@ struct mem_event_domain
     /* event channel port (vcpu0 only) */
     int xen_port;
 };
- 
+
 struct domain
 {
     domid_t          domain_id;
@@ -294,6 +294,12 @@ struct domain
     /* OProfile support. */
     struct xenoprof *xenoprof;
     int32_t time_offset_seconds;
+
+    /* Domain watchdog. */
+#define NR_DOMAIN_WATCHDOG_TIMERS 2
+    spinlock_t watchdog_lock;
+    uint32_t watchdog_inuse_map;
+    struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS];
 
     struct rcu_head rcu;
 
@@ -598,6 +604,9 @@ uint64_t get_cpu_idle_time(unsigned int 
      cpu_online(cpu) &&                         \
      !per_cpu(tasklet_work_to_do, cpu))
 
+void watchdog_domain_init(struct domain *d);
+void watchdog_domain_destroy(struct domain *d);
+
 #define IS_PRIV(_d) ((_d)->is_privileged)
 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == 
(_t)))
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Watchdog timers for domains, Xen patchbot-unstable <=