WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 11/11] xl: Migration support

Implement "xl migrate".

ssh is used as the transport by default, although this can be
overridden by specifying a different sshcommand.  This is a very
standard approach nowadays and avoids the need for daemons at the
target host in the default configuration, while providing flexibility
to admins.  (In the future it might be nice to support plain
unencrypted migration over TCP, which we do not rule out now, although
it is not currently implemented.)

Properties of the migration protocol:
  * The domain on the target machine is named "<domname>--incoming"
    while it is being transferred.
  * The domain on the source machine is renamed "<domain>--migratedaway"
    before we give the target permission to rename and unpause.
  * The locking in libxl_domain_rename ensures that of two
    simultaneous migration attempts no more than one will succeed.
  * We go to some considerable effort to avoid leaving the domain in
    a bad state if something goes wrong with one of the ends or the
    network, although there is still (inevitably) a possibility of a
    unresolvable state (in case of very badly timed network failure)
    which is probably best resolved by destroying the domain at both
    ends.

Incidental changes:
  create_domain now returns a libxl error code rather than exiting on error.
  domain_qualifier_to_domid takes an argument for saving the supplied name.
  Various common information (eg the domid we are operating on) is
   now in static variables rather than locals.
  New ERROR_BADFAIL error code for reporting unpleasant failures.

Signed-off-by: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>

migration fixes
Migration fixup error reporting

migration - resume, not unpause

Error handling fixes; initialise common_ctx

migration fixes
---
 tools/libxl/libxl.h |    1 +
 tools/libxl/xl.c    |  675 ++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 590 insertions(+), 86 deletions(-)

diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index d57dd10..62b6cff 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -241,6 +241,7 @@ enum {
     ERROR_NI = -3,
     ERROR_NOMEM = -4,
     ERROR_INVAL = -5,
+    ERROR_BADFAIL = -6,
 };
 
 #define LIBXL_VERSION 0
diff --git a/tools/libxl/xl.c b/tools/libxl/xl.c
index 7d35db1..53be99c 100644
--- a/tools/libxl/xl.c
+++ b/tools/libxl/xl.c
@@ -17,6 +17,7 @@
 #include "libxl_osdeps.h"
 
 #include <stdio.h>
+#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -42,9 +43,33 @@ int logfile = 2;
 
 static int suspend_old_xl_format = 0;
 
+static struct libxl_ctx common_ctx;
+static uint32_t common_domid;
+static const char *common_domname;
+static uint8_t *common_config_data;
+static int common_config_len;
+
+static char *migration_domname;
+
 static const char savefileheader_magic[32]=
     "Xen saved domain, xl format\n \0 \r";
 
+static const char migrate_receiver_banner[]=
+    "xl migration receiver ready, send binary domain data.\n";
+static const char migrate_receiver_ready[]=
+    "domain received, ready to unpause";
+static const char migrate_permission_to_go[]=
+    "domain is yours, you are cleared to unpause";
+static const char migrate_report[]=
+    "my copy unpause results are as follows";
+  /* followed by one byte:
+   *     0: everything went well, domain is running
+   *            next thing is we all exit
+   * non-0: things went badly
+   *            next thing should be a migrate_permission_to_go
+   *            from target to source
+   */
+
 typedef struct {
     char magic[32]; /* savefileheader_magic */
     /* All uint32_ts are in domain's byte order. */
@@ -69,7 +94,7 @@ void log_callback(void *userdata, int loglevel, const char 
*file, int line, cons
     write(logfile, str, strlen(str));
 }
 
-static int domain_qualifier_to_domid(struct libxl_ctx *ctx, char *p, uint32_t 
*domid)
+static int domain_qualifier_to_domid(struct libxl_ctx *ctx, char *p, uint32_t 
*domid, const char **expected_name_r)
 {
     int i, alldigit;
 
@@ -83,10 +108,12 @@ static int domain_qualifier_to_domid(struct libxl_ctx 
*ctx, char *p, uint32_t *d
 
     if (i > 0 && alldigit) {
         *domid = strtoul(p, NULL, 10);
+        if (expected_name_r) *expected_name_r = 0;
         return 0;
     } else {
         /* check here if it's a uuid and do proper conversion */
     }
+    if (expected_name_r) *expected_name_r = p;
     return libxl_name_to_domid(ctx, p, domid);
 }
 
@@ -726,7 +753,7 @@ static void *xrealloc(void *ptr, size_t sz) {
     return r;
 }
 
-static void create_domain(int debug, int daemonize, const char *config_file, 
const char *restore_file, int paused)
+static int create_domain(int debug, int daemonize, const char *config_file, 
const char *restore_file, int paused, int migrate_fd /* -1 means none */)
 {
     struct libxl_ctx ctx;
     uint32_t domid;
@@ -755,7 +782,7 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
 
     if (libxl_ctx_init(&ctx, LIBXL_VERSION)) {
         fprintf(stderr, "cannot init xl context\n");
-        exit(1);
+        return ERROR_FAIL;
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
@@ -763,8 +790,9 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
         uint8_t *optdata_begin = 0;
         const uint8_t *optdata_here = 0;
         union { uint32_t u32; char b[4]; } u32buf;
-            
-        restore_fd = open(restore_file, O_RDONLY);
+
+        restore_fd = migrate_fd >= 0 ? migrate_fd :
+            open(restore_file, O_RDONLY);
 
         if (suspend_old_xl_format) {
             memset(&hdr,0,sizeof(hdr));
@@ -776,11 +804,11 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
             if (memcmp(hdr.magic, savefileheader_magic, sizeof(hdr.magic))) {
                 fprintf(stderr, "File has wrong magic number -"
                         " corrupt or needs -O?\n");
-                exit(2);
+                return ERROR_INVAL;
             }
             if (hdr.byteorder != SAVEFILE_BYTEORDER_VALUE) {
                 fprintf(stderr, "File has wrong byte order\n");
-                exit(2);
+                return ERROR_INVAL;
             }
             fprintf(stderr, "Loading new save file %s"
                     " (new xl fmt info"
@@ -793,7 +821,7 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
                 fprintf(stderr, "Savefile has mandatory flag(s) 0x%"PRIx32" "
                         "which are not supported; need newer xl\n",
                         badflags);
-                exit(2);
+                return ERROR_INVAL;
             }
         }
         if (hdr.optional_data_len) {
@@ -803,12 +831,13 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
         }
 
 #define OPTDATA_LEFT  (hdr.optional_data_len - (optdata_here - optdata_begin))
-#define WITH_OPTDATA(amt, body)                                         \
-            if (OPTDATA_LEFT < (amt)) {                                 \
-                fprintf(stderr, "Savefile truncated.\n"); exit(2);      \
-            } else {                                                    \
-                body;                                                   \
-                optdata_here += (amt);                                  \
+#define WITH_OPTDATA(amt, body)                                 \
+            if (OPTDATA_LEFT < (amt)) {                         \
+                fprintf(stderr, "Savefile truncated.\n");       \
+                return ERROR_INVAL;                             \
+            } else {                                            \
+                body;                                           \
+                optdata_here += (amt);                          \
             }
 
         optdata_here = optdata_begin;
@@ -832,12 +861,12 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
         ret = libxl_read_file_contents(&ctx, config_file,
                                        &config_data, &config_len);
         if (ret) { fprintf(stderr, "Failed to read config file: %s: %s\n",
-                           config_file, strerror(errno)); exit(1); }
+                           config_file, strerror(errno)); return ERROR_FAIL; }
     } else {
         if (!config_data) {
             fprintf(stderr, "Config file not specified and"
                     " none in save file\n");
-            exit(1);
+            return ERROR_INVAL;
         }
         config_file = "<saved>";
     }
@@ -846,6 +875,17 @@ static void create_domain(int debug, int daemonize, const 
char *config_file, con
 
     parse_config_data(config_file, config_data, config_len, &info1, &info2, 
&disks, &num_disks, &vifs, &num_vifs, &pcidevs, &num_pcidevs, &vfbs, &num_vfbs, 
&vkbs, &num_vkbs, &dm_info);
 
+    if (migrate_fd >= 0) {
+        if (info1.name) {
+            /* when we receive a domain we get its name from the config
+             * file; and we receive it to a temporary name */
+            assert(!common_domname);
+            common_domname = info1.name;
+            asprintf(&migration_domname, "%s--incoming", info1.name);
+            info1.name = migration_domname;
+        }
+    }
+
     if (debug)
         printf_info(&info1, &info2, disks, num_disks, vifs, num_vifs, pcidevs, 
num_pcidevs, vfbs, num_vfbs, vkbs, num_vkbs, &dm_info);
 
@@ -855,14 +895,15 @@ start:
     ret = libxl_domain_make(&ctx, &info1, &domid);
     if (ret) {
         fprintf(stderr, "cannot make domain: %d\n", ret);
-        exit(1);
+        return ERROR_FAIL;
     }
+    common_domid = domid;
 
     ret = libxl_userdata_store(&ctx, domid, "xl",
                                     config_data, config_len);
     if (ret) {
         perror("cannot save config file");
-        exit(1);
+        return ERROR_FAIL;
     }
 
     if (!restore_file || !need_daemon) {
@@ -873,12 +914,11 @@ start:
         ret = libxl_domain_build(&ctx, &info2, domid, &state);
     } else {
         ret = libxl_domain_restore(&ctx, &info2, domid, restore_fd, &state, 
&dm_info);
-        close(restore_fd);
     }
 
     if (ret) {
         fprintf(stderr, "cannot (re-)build domain: %d\n", ret);
-        exit(1);
+        return ERROR_FAIL;
     }
 
     for (i = 0; i < num_disks; i++) {
@@ -886,7 +926,7 @@ start:
         ret = libxl_device_disk_add(&ctx, domid, &disks[i]);
         if (ret) {
             fprintf(stderr, "cannot add disk %d to domain: %d\n", i, ret);
-            exit(1);
+            return ERROR_FAIL;
         }
     }
     for (i = 0; i < num_vifs; i++) {
@@ -894,7 +934,7 @@ start:
         ret = libxl_device_nic_add(&ctx, domid, &vifs[i]);
         if (ret) {
             fprintf(stderr, "cannot add nic %d to domain: %d\n", i, ret);
-            exit(1);
+            return ERROR_FAIL;
         }
     }
     if (info1.hvm) {
@@ -926,7 +966,7 @@ start:
         libxl_domain_unpause(&ctx, domid);
 
     if (!daemonize)
-        exit(0);
+        return 0; /* caller gets success in parent */
 
     if (need_daemon) {
         char *fullname, *name;
@@ -1023,12 +1063,7 @@ start:
     }
 
     close(logfile);
-    free(disks);
-    free(vifs);
-    free(vfbs);
-    free(vkbs);
-    free(pcidevs);
-    free(config_data);
+    exit(0);
 }
 
 static void help(char *command)
@@ -1091,6 +1126,25 @@ static void help(char *command)
         printf("-O                     Old (configless) xl save format.\n");
         printf("-p                     Do not unpause domain after restoring 
it.\n");
         printf("-e                     Do not wait in the background for the 
death of the domain.\n");
+        printf("-d                     Enable debug messages.\n");
+    } else if(!strcmp(command, "migrate")) {
+        printf("Usage: xl migrate [options] <Domain> <host>\n\n");
+        printf("Save a domain state to restore later.\n\n");
+        printf("Options:\n\n");
+        printf("-h                     Print this help.\n");
+        printf("-C <config>            Send <config> instead of config file 
from creation.\n");
+        printf("-s <sshcommand>        Use <sshcommand> instead of ssh.  
String will be passed to sh.  If empty, run <host> instead of ssh <host> xl 
migrate-receive [-d -e]\n");
+        printf("-e                     Do not wait in the background (on 
<host>) for the death of the domain.\n");
+    } else if(!strcmp(command, "migrate-receive")) {
+        printf("Usage: xl migrate-receive  - for internal use only");
+    } else if(!strcmp(command, "restore")) {
+        printf("Usage: xl restore [options] [<ConfigFile>] 
<CheckpointFile>\n\n");
+        printf("Restore a domain from a saved state.\n\n");
+        printf("Options:\n\n");
+        printf("-h                     Print this help.\n");
+        printf("-O                     Old (configless) xl save format.\n");
+        printf("-p                     Do not unpause domain after restoring 
it.\n");
+        printf("-e                     Do not wait in the background for the 
death of the domain.\n");
     } else if(!strcmp(command, "destroy")) {
         printf("Usage: xl destroy <Domain>\n\n");
         printf("Terminate a domain immediately.\n\n");
@@ -1126,7 +1180,7 @@ void set_memory_target(char *p, char *mem)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, p, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", p);
         exit(2);
     }
@@ -1177,7 +1231,7 @@ void console(char *p, int cons_num)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, p, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", p);
         exit(2);
     }
@@ -1197,7 +1251,7 @@ void cd_insert(char *dom, char *virtdev, char *phys)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, dom, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, dom, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", dom);
         exit(2);
     }
@@ -1333,7 +1387,7 @@ void pcilist(char *dom)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, dom, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, dom, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", dom);
         exit(2);
     }
@@ -1386,7 +1440,7 @@ void pcidetach(char *dom, char *bdf)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, dom, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, dom, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", dom);
         exit(2);
     }
@@ -1435,7 +1489,7 @@ void pciattach(char *dom, char *bdf, char *vs)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, dom, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, dom, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", dom);
         exit(2);
     }
@@ -1486,7 +1540,7 @@ void pause_domain(char *p)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, p, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", p);
         exit(2);
     }
@@ -1504,7 +1558,7 @@ void unpause_domain(char *p)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, p, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", p);
         exit(2);
     }
@@ -1522,7 +1576,7 @@ void destroy_domain(char *p)
     }
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, p, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", p);
         exit(2);
     }
@@ -1592,33 +1646,45 @@ void list_vm(void)
     free(info);
 }
 
-int save_domain(char *p, char *filename, int checkpoint,
-                const char *override_config_file)
+static void save_domain_core_begin(char *domain_spec,
+                                   const char *override_config_file)
 {
-    struct libxl_ctx ctx;
-    uint32_t domid;
-    int fd, rc;
-
-    if (libxl_ctx_init(&ctx, LIBXL_VERSION)) {
+    /* fixme clone and hack of this init should be eliminated */
+    int rc;
+    
+    if (libxl_ctx_init(&common_ctx, LIBXL_VERSION)) {
         fprintf(stderr, "cannot init xl context\n");
         exit(2);
     }
-    libxl_ctx_set_log(&ctx, log_callback, NULL);
+    libxl_ctx_set_log(&common_ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
-        fprintf(stderr, "%s is an invalid domain identifier\n", p);
+    if (domain_qualifier_to_domid(&common_ctx, domain_spec, &common_domid, 
&common_domname)<0) {
+        fprintf(stderr, "%s is an invalid domain identifier\n", domain_spec);
         exit(2);
     }
-    fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644);
-    if (fd < 0) {
-        fprintf(stderr, "Failed to open temp file %s for writing\n", filename);
-        exit(2);
+    if (!suspend_old_xl_format) {
+        /* configuration file in optional data: */
+        
+        if (override_config_file) {
+            void *config_v = 0;
+            rc = libxl_read_file_contents(&common_ctx, override_config_file,
+                                          &config_v, &common_config_len);
+            common_config_data = config_v;
+        } else {
+            rc = libxl_userdata_retrieve(&common_ctx, common_domid, "xl",
+                                              &common_config_data, 
&common_config_len);
+        }
+        if (rc) {
+            fputs("Unable to get config file\n",stderr);
+            exit(2);
+        }
     }
+}
 
+void save_domain_core_writeconfig(int fd, const char *filename)
+{
     if (!suspend_old_xl_format) {
         SaveFileHeader hdr;
-        uint8_t *config_data = 0;
-        int config_len;
         uint8_t *optdata_begin;
         union { uint32_t u32; char b[4]; } u32buf;
 
@@ -1637,34 +1703,15 @@ int save_domain(char *p, char *filename, int checkpoint,
         }                                                                   \
                               }) 
 
-        /* configuration file in optional data: */
-        
-        if (override_config_file) {
-            void *config_v = 0;
-            rc = libxl_read_file_contents(&ctx, override_config_file,
-                                          &config_v, &config_len);
-            config_data = config_v;
-        } else {
-            rc = libxl_userdata_retrieve(&ctx, domid, "xl",
-                                              &config_data, &config_len);
-        }
-        if (rc) {
-            fputs("Unable to get config file\n",stderr);
-            exit(2);
-        }
-        if (!config_len) {
-            fputs(" Savefile will not contain xl domain config\n", stderr);
-        }
-
-        u32buf.u32 = config_len;
+        u32buf.u32 = common_config_len;
         ADD_OPTDATA(u32buf.b,    4);
-        ADD_OPTDATA(config_data, config_len);
+        ADD_OPTDATA(common_config_data, common_config_len);
 
         /* that's the optional data */
 
-        CHK_ERRNO( libxl_write_exactly(&ctx, fd,
+        CHK_ERRNO( libxl_write_exactly(&common_ctx, fd,
             &hdr, sizeof(hdr), filename, "header") );
-        CHK_ERRNO( libxl_write_exactly(&ctx, fd,
+        CHK_ERRNO( libxl_write_exactly(&common_ctx, fd,
             optdata_begin, hdr.optional_data_len, filename, "header") );
 
         fprintf(stderr, "Saving to %s new xl format (info"
@@ -1672,14 +1719,371 @@ int save_domain(char *p, char *filename, int 
checkpoint,
                 filename, hdr.mandatory_flags, hdr.optional_flags,
                 hdr.optional_data_len);
     }
+}
+
+int save_domain(char *p, char *filename, int checkpoint,
+                const char *override_config_file)
+{
+    int fd;
+
+    save_domain_core_begin(p, override_config_file);
+
+    if (!suspend_old_xl_format && !common_config_len) {
+        fputs(" Savefile will not contain xl domain config\n", stderr);
+    }
+
+    fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644);
+    if (fd < 0) {
+        fprintf(stderr, "Failed to open temp file %s for writing\n", filename);
+        exit(2);
+    }
+
+    save_domain_core_writeconfig(fd, filename);
 
-    libxl_domain_suspend(&ctx, NULL, domid, fd);
+    libxl_domain_suspend(&common_ctx, NULL, common_domid, fd);
     close(fd);
 
     if (checkpoint)
-        libxl_domain_unpause(&ctx, domid);
+        libxl_domain_unpause(&common_ctx, common_domid);
     else
-        libxl_domain_destroy(&ctx, domid, 0);
+        libxl_domain_destroy(&common_ctx, common_domid, 0);
+
+    exit(0);
+}
+
+static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
+                                     const char *what, const char *rune) {
+    char buf[msgsz];
+    const char *stream;
+    int rc;
+
+    stream = rune ? "migration receiver stream" : "migration stream";
+    rc = libxl_read_exactly(&common_ctx, fd, buf, msgsz, stream, what);
+    if (rc) return ERROR_FAIL;
+
+    if (memcmp(buf, msg, msgsz)) {
+        fprintf(stderr, "%s contained unexpected data instead of %s\n",
+                stream, what);
+        if (rune)
+            fprintf(stderr, "(command run was: %s )\n", rune);
+        return ERROR_FAIL;
+    }
+    return 0;
+}
+
+static void migration_child_report(pid_t migration_child, int recv_fd) {
+    pid_t child;
+    int status, sr;
+    struct timeval now, waituntil, timeout;
+    static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */
+
+    if (!migration_child) return;
+
+    CHK_ERRNO( gettimeofday(&waituntil, 0) );
+    waituntil.tv_sec += 2;
+    
+    for (;;) {
+        child = waitpid(migration_child, &status, WNOHANG);
+        
+        if (child == migration_child) {
+            if (status)
+                libxl_report_child_exitstatus(&common_ctx, XL_LOG_INFO,
+                                              "migration target process",
+                                              migration_child, status);
+            break;
+        }
+        if (child == -1) {
+            if (errno == EINTR) continue;
+            fprintf(stderr, "wait for migration child [%ld] failed: %s\n",
+                    (long)migration_child, strerror(errno));
+            break;
+        }
+        assert(child == 0);
+
+        CHK_ERRNO( gettimeofday(&now, 0) );
+        if (timercmp(&now, &waituntil, >)) {
+            fprintf(stderr, "migration child [%ld] not exiting, no longer"
+                    " waiting (exit status will be unreported)\n",
+                    (long)migration_child);
+            break;
+        }
+        timersub(&waituntil, &now, &timeout);
+
+        if (recv_fd >= 0) {
+            fd_set readfds, exceptfds;
+            FD_ZERO(&readfds);
+            FD_ZERO(&exceptfds);
+            FD_SET(recv_fd, &readfds);
+            FD_SET(recv_fd, &exceptfds);
+            sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout);
+        } else {
+            if (timercmp(&timeout, &pollinterval, >))
+                timeout = pollinterval;
+            sr = select(0,0,0,0, &timeout);
+        }
+        if (sr > 0) {
+            recv_fd = -1;
+        } else if (sr == 0) {
+        } else if (sr == -1) {
+            if (errno != EINTR) {
+                fprintf(stderr, "migration child [%ld] exit wait select"
+                        " failed unexpectedly: %s\n",
+                        (long)migration_child, strerror(errno));
+                break;
+            }
+        }
+    }
+    migration_child = 0;
+}
+
+static void migrate_domain(char *domain_spec, const char *rune,
+                           const char *override_config_file)
+{
+    pid_t child = -1;
+    int rc;
+    int sendpipe[2], recvpipe[2];
+    int send_fd, recv_fd;
+    libxl_domain_suspend_info suspinfo;
+    char *away_domname;
+    char rc_buf;
+    
+    save_domain_core_begin(domain_spec, override_config_file);
+
+    if (!common_domname) {
+        common_domname = libxl_domid_to_name(&common_ctx, common_domid);
+        /* libxl_domid_to_name fails ?  don't bother with names then */
+    }
+
+    if (!common_config_len) {
+        fprintf(stderr, "No config file stored for running domain and "
+                "none supplied - cannot migrate.\n");
+        exit(1);
+    }
+    
+    MUST( libxl_pipe(&common_ctx, sendpipe) );
+    MUST( libxl_pipe(&common_ctx, recvpipe) );
+    
+    child = libxl_fork(&common_ctx);
+    if (child==-1) exit(1);
+
+    if (!child) {
+        dup2(sendpipe[0], 0);
+        dup2(recvpipe[1], 1);
+        close(sendpipe[0]); close(sendpipe[1]);
+        close(recvpipe[0]); close(recvpipe[1]);
+        execlp("sh","sh","-c",rune,(char*)0);
+        perror("failed to exec sh");
+        exit(-1);
+    }
+
+    close(sendpipe[0]);
+    close(recvpipe[1]);
+    send_fd = sendpipe[1];
+    recv_fd = recvpipe[0];
+
+    signal(SIGPIPE, SIG_IGN);
+    /* if receiver dies, we get an error and can clean up
+       rather than just dying */
+
+    rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner,
+                                   sizeof(migrate_receiver_banner)-1,
+                                   "banner", rune);
+    if (rc) {
+        close(send_fd);
+        migration_child_report(child, recv_fd);
+        exit(-rc);
+    }
+
+    save_domain_core_writeconfig(send_fd, "migration stream");
+
+    memset(&suspinfo, 0, sizeof(suspinfo));
+    suspinfo.flags |= XL_SUSPEND_LIVE;
+    rc = libxl_domain_suspend(&common_ctx, &suspinfo, common_domid, send_fd);
+    if (rc) {
+        fprintf(stderr, "migration sender: libxl_domain_suspend failed"
+                " (rc=%d)\n", rc);
+        goto failed_resume;
+    }
+
+    fprintf(stderr, "migration sender: Transfer complete.\n");
+
+    rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready,
+                                   sizeof(migrate_receiver_ready),
+                                   "ready message", rune);
+    if (rc) goto failed_resume;
+
+    /* right, at this point we are about give the destination
+     * permission to rename and resume, so we must first rename the
+     * domain away ourselves */
+
+    fprintf(stderr, "migration sender: Target has acknowledged transfer.\n");
+
+    if (common_domname) {
+        asprintf(&away_domname, "%s--migratedaway", common_domname);
+        rc = libxl_domain_rename(&common_ctx, common_domid,
+                                 common_domname, away_domname, 0);
+        if (rc) goto failed_resume;
+    }
+
+    /* point of no return - as soon as we have tried to say
+     * "go" to the receiver, it's not safe to carry on.  We leave
+     * the domain renamed to %s--migratedaway in case that's helpful.
+     */
+
+    fprintf(stderr, "migration sender: Giving target permission to start.\n");
+
+    rc = libxl_write_exactly(&common_ctx, send_fd,
+                             migrate_permission_to_go,
+                             sizeof(migrate_permission_to_go),
+                             "migration stream", "GO message");
+    if (rc) goto failed_badly;
+
+    rc = migrate_read_fixedmessage(recv_fd, migrate_report,
+                                   sizeof(migrate_report),
+                                   "success/failure report message", rune);
+    if (rc) goto failed_badly;
+
+    rc = libxl_read_exactly(&common_ctx, recv_fd,
+                            &rc_buf, 1,
+                            "migration ack stream", "success/failure status");
+    if (rc) goto failed_badly;
+
+    if (rc_buf) {
+        fprintf(stderr, "migration sender: Target reports startup failure"
+                " (status code %d).\n", rc_buf);
+
+        rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
+                                       sizeof(migrate_permission_to_go),
+                                       "permission for sender to resume",
+                                       rune);
+        if (rc) goto failed_badly;
+
+        fprintf(stderr, "migration sender: Trying to resume at our end.\n");
+
+        if (common_domname) {
+            libxl_domain_rename(&common_ctx, common_domid,
+                                away_domname, common_domname, 0);
+        }
+        rc = libxl_domain_resume(&common_ctx, common_domid);
+        if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
+
+        fprintf(stderr, "Migration failed due to problems at target.\n");
+        exit(-ERROR_FAIL);
+    }
+
+    fprintf(stderr, "migration sender: Target reports successful startup.\n");
+    libxl_domain_destroy(&common_ctx, common_domid, 1); /* bang! */
+    fprintf(stderr, "Migration successful.\n");
+    exit(0);
+
+ failed_resume:
+    close(send_fd);
+    migration_child_report(child, recv_fd);
+    fprintf(stderr, "Migration failed, resuming at sender.\n");
+    libxl_domain_resume(&common_ctx, common_domid);
+    exit(-ERROR_FAIL);
+
+ failed_badly:
+    fprintf(stderr,
+ "** Migration failed during final handshake **\n"
+ "Domain state is now undefined !\n"
+ "Please CHECK AT BOTH ENDS for running instances, before renaming and\n"
+ " resuming at most one instance.  Two simultaneous instances of the domain\n"
+ " would probably result in SEVERE DATA LOSS and it is now your\n"
+ " responsibility to avoid that.  Sorry.\n");
+
+    close(send_fd);
+    migration_child_report(child, recv_fd);
+    exit(-ERROR_BADFAIL);
+}
+
+static void migrate_receive(int debug, int daemonize)
+{
+    int rc, rc2;
+    char rc_buf;
+
+    signal(SIGPIPE, SIG_IGN);
+    /* if we get SIGPIPE we'd rather just have it as an error */
+
+    fprintf(stderr, "migration target: Ready to receive domain.\n");
+
+    CHK_ERRNO( libxl_write_exactly(&common_ctx, 1,
+                                   migrate_receiver_banner,
+                                   sizeof(migrate_receiver_banner)-1,
+                                   "migration ack stream",
+                                   "banner") );
+
+    rc = create_domain(debug, daemonize,
+                       0 /* no config file, use incoming */,
+                       "incoming migration stream", 1, 0);
+    if (rc) {
+        fprintf(stderr, "migration target: Domain creation failed"
+                " (code %d).\n", rc);
+        exit(-rc);
+    }
+
+    fprintf(stderr, "migration target: Transfer complete,"
+            " requesting permission to start domain.\n");
+
+    rc = libxl_write_exactly(&common_ctx, 1,
+                             migrate_receiver_ready,
+                             sizeof(migrate_receiver_ready),
+                             "migration ack stream", "ready message");
+    if (rc) exit(-rc);
+
+    rc = migrate_read_fixedmessage(0, migrate_permission_to_go,
+                                   sizeof(migrate_permission_to_go),
+                                   "GO message", 0);
+    if (rc) goto perhaps_destroy_notify_rc;
+
+    fprintf(stderr, "migration target: Got permission, starting domain.\n");
+
+    if (migration_domname) {
+        rc = libxl_domain_rename(&common_ctx, common_domid,
+                                 migration_domname, common_domname, 0);
+        if (rc) goto perhaps_destroy_notify_rc;
+    }
+
+    rc = libxl_domain_unpause(&common_ctx, common_domid);
+    if (rc) goto perhaps_destroy_notify_rc;
+
+    fprintf(stderr, "migration target: Domain started successsfully.\n");
+    rc = 0;
+    
+ perhaps_destroy_notify_rc:
+    rc2 = libxl_write_exactly(&common_ctx, 1,
+                              migrate_report, sizeof(migrate_report),
+                              "migration ack stream",
+                              "success/failure report");
+    if (rc2) exit(-ERROR_BADFAIL);
+
+    rc_buf = -rc;
+    assert(!!rc_buf == !!rc);
+    rc2 = libxl_write_exactly(&common_ctx, 1, &rc_buf, 1,
+                              "migration ack stream",
+                              "success/failure code");
+    if (rc2) exit(-ERROR_BADFAIL);
+
+    if (rc) {
+        fprintf(stderr, "migration target: Failure, destroying our copy.\n");
+
+        rc2 = libxl_domain_destroy(&common_ctx, common_domid, 1);
+        if (rc2) {
+            fprintf(stderr, "migration target: Failed to destroy our copy"
+                    " (code %d).\n", rc2);
+            exit(-ERROR_BADFAIL);
+        }
+
+        fprintf(stderr, "migration target: Cleanup OK, granting sender"
+                " permission to resume.\n");
+
+        rc2 = libxl_write_exactly(&common_ctx, 1,
+                                  migrate_permission_to_go,
+                                  sizeof(migrate_permission_to_go),
+                                  "migration ack stream",
+                                  "permission to sender to have domain back");
+        if (rc2) exit(-ERROR_BADFAIL);
+    }
 
     exit(0);
 }
@@ -1689,7 +2093,7 @@ int main_restore(int argc, char **argv)
     char *checkpoint_file = NULL;
     char *config_file = NULL;
     int paused = 0, debug = 0, daemonize = 1;
-    int opt;
+    int opt, rc;
 
     while ((opt = getopt(argc, argv, "hpdeO")) != -1) {
         switch (opt) {
@@ -1723,7 +2127,39 @@ int main_restore(int argc, char **argv)
         help("restore");
         exit(2);
     }
-    create_domain(debug, daemonize, config_file, checkpoint_file, paused);
+    rc = create_domain(debug, daemonize, config_file,
+                       checkpoint_file, paused, -1);
+    exit(-rc);
+}
+
+int main_migrate_receive(int argc, char **argv)
+{
+    int debug = 0, daemonize = 1;
+    int opt;
+
+    while ((opt = getopt(argc, argv, "hed")) != -1) {
+        switch (opt) {
+        case 'h':
+            help("restore");
+            exit(2);
+            break;
+        case 'e':
+            daemonize = 0;
+            break;
+        case 'd':
+            debug = 1;
+            break;
+        default:
+            fprintf(stderr, "option not supported\n");
+            break;
+        }
+    }
+
+    if (argc-optind != 0) {
+        help("restore");
+        exit(2);
+    }
+    migrate_receive(debug, daemonize);
     exit(0);
 }
 
@@ -1763,6 +2199,60 @@ int main_save(int argc, char **argv)
     exit(0);
 }
 
+int main_migrate(int argc, char **argv)
+{
+    char *p = NULL;
+    const char *config_filename = NULL;
+    const char *ssh_command = "ssh";
+    char *rune = NULL;
+    char *host;
+    int opt, daemonize = 1, debug = 0;
+
+    while ((opt = getopt(argc, argv, "hC:s:ed")) != -1) {
+        switch (opt) {
+        case 'h':
+            help("migrate");
+            break;
+        case 'C':
+            help("save");
+            config_filename = optarg;
+            break;
+        case 's':
+            ssh_command = optarg;
+            break;
+        case 'e':
+            daemonize = 0;
+            break;
+        case 'd':
+            debug = 1;
+            break;
+        default:
+            fprintf(stderr, "option not supported\n");
+            break;
+        }
+    }
+
+    if (argc-optind < 2 || argc-optind > 2) {
+        help("save");
+        exit(2);
+    }
+
+    p = argv[optind];
+    host = argv[optind + 1];
+
+    if (!ssh_command[0]) {
+        rune= host;
+    } else {
+        asprintf(&rune, "exec %s %s xl migrate-receive%s%s",
+                 ssh_command, host,
+                 daemonize ? "" : " -e",
+                 debug ? " -d" : "");
+    }
+
+    migrate_domain(p, rune, config_filename);
+    exit(0);
+}
+
 int main_pause(int argc, char **argv)
 {
     int opt;
@@ -1885,7 +2375,7 @@ int main_create(int argc, char **argv)
 {
     char *filename = NULL;
     int debug = 0, daemonize = 1;
-    int opt;
+    int opt, rc;
 
     while ((opt = getopt(argc, argv, "hde")) != -1) {
         switch (opt) {
@@ -1910,8 +2400,8 @@ int main_create(int argc, char **argv)
     }
 
     filename = argv[optind];
-    create_domain(debug, daemonize, filename, NULL, 0);
-    exit(0);
+    rc = create_domain(debug, daemonize, filename, NULL, 0, -1);
+    exit(-rc);
 }
 
 void button_press(char *p, char *b)
@@ -1923,7 +2413,7 @@ void button_press(char *p, char *b)
     libxl_ctx_init(&ctx, LIBXL_VERSION);
     libxl_ctx_set_log(&ctx, log_callback, NULL);
 
-    if (domain_qualifier_to_domid(&ctx, p, &domid) < 0) {
+    if (domain_qualifier_to_domid(&ctx, p, &domid, 0) < 0) {
         fprintf(stderr, "%s is an invalid domain identifier\n", p);
         exit(2);
     }
@@ -1977,6 +2467,15 @@ int main(int argc, char **argv)
 
     srand(time(0));
 
+    if (libxl_ctx_init(&common_ctx, LIBXL_VERSION)) {
+        fprintf(stderr, "cannot init xl common context\n");
+        exit(-ERROR_FAIL);
+    }
+    if (libxl_ctx_set_log(&common_ctx, log_callback, NULL)) {
+        fprintf(stderr, "cannot set xl log callback\n");
+        exit(-ERROR_FAIL);
+    }
+
     if (!strcmp(argv[1], "create")) {
         main_create(argc - 1, argv + 1);
     } else if (!strcmp(argv[1], "list")) {
@@ -1999,8 +2498,12 @@ int main(int argc, char **argv)
         main_console(argc - 1, argv + 1);
     } else if (!strcmp(argv[1], "save")) {
         main_save(argc - 1, argv + 1);
+    } else if (!strcmp(argv[1], "migrate")) {
+        main_migrate(argc - 1, argv + 1);
     } else if (!strcmp(argv[1], "restore")) {
         main_restore(argc - 1, argv + 1);
+    } else if (!strcmp(argv[1], "migrate-receive")) {
+        main_migrate_receive(argc - 1, argv + 1);
     } else if (!strcmp(argv[1], "cd-insert")) {
         main_cd_insert(argc - 1, argv + 1);
     } else if (!strcmp(argv[1], "cd-eject")) {
-- 
1.5.6.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel