WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 3 of 9] Initiate failover if a packet is not received

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 3 of 9] Initiate failover if a packet is not received every 500ms
From: Brendan Cully <brendan@xxxxxxxxx>
Date: Wed, 13 May 2009 17:19:31 -0700
Cc: andy@xxxxxxxxx
Delivery-date: Wed, 13 May 2009 17:23:23 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <patchbomb.1242260368@xxxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <patchbomb.1242260368@xxxxxxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mercurial-patchbomb/1f0f01bc86a5
# HG changeset patch
# User Brendan Cully <brendan@xxxxxxxxx>
# Date 1240355510 25200
# Node ID b51238ea926948383500b94cd227321eb40a82dd
# Parent  f5c0d3208d8ae9183391398d52c9be5969da24ec
Initiate failover if a packet is not received every 500ms.
This breaks checkpoints at lower frequencies, and should be made
configurable.

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -59,6 +59,51 @@
 /* Address size of the guest, in bytes */
 unsigned int guest_width;
 
+/* set when a consistent image is available */
+static int completed = 0;
+
+#define HEARTBEAT_MS 500
+
+# ifndef __MINIOS__
+static ssize_t read_exact_timed(int fd, void* buf, size_t size)
+{
+  size_t offset = 0;
+  ssize_t len;
+  struct timeval tv;
+  fd_set rfds;
+
+  while ( offset < size )
+  {
+    if (completed) {
+      /* expect a heartbeat every HEARBEAT_MS ms maximum */
+      tv.tv_sec = 0;
+      tv.tv_usec = HEARTBEAT_MS * 1000;
+
+      FD_ZERO(&rfds);
+      FD_SET(fd, &rfds);
+      len = select(fd + 1, &rfds, NULL, NULL, &tv);
+      if ( !FD_ISSET(fd, &rfds) ) {
+       fprintf(stderr, "read_exact_timed failed (select returned %zd)\n", len);
+       return -1;
+      }
+    }
+
+    len = read(fd, buf + offset, size - offset);
+    if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
+      continue;
+    if ( len <= 0 )
+      return -1;
+    offset += len;
+  }
+
+  return 0;
+}
+
+#define read_exact read_exact_timed
+
+#else
+#define read_exact_timed read_exact
+#endif
 /*
 ** In the state file (or during transfer), all page-table pages are
 ** converted into a 'canonical' form where references to actual mfns
@@ -413,7 +458,9 @@
   // DPRINTF("reading batch of %d pages\n", count);
 
   if (!count) {
+    /*
     DPRINTF("Last batch read\n");
+    */
     return 0;
   } else if (count == -1) {
     DPRINTF("Entering page verify mode\n");
@@ -704,7 +751,8 @@
 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                       unsigned int store_evtchn, unsigned long *store_mfn,
                       unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae)
+                      unsigned int hvm, unsigned int pae,
+                     int (*resume)(void*), void* resumedata)
 {
     DECLARE_DOMCTL;
     int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
@@ -752,7 +800,6 @@
     /* Buffer for holding HVM context */
     uint8_t *hvm_buf = NULL;
 
-    int completed = 0;
     pagebuf_t pagebuf;
     tailbuf_t tailbuf, tmptail;
     void* vcpup;
@@ -946,7 +993,9 @@
         goto out;
     }
 
+    /*
     DPRINTF("Received all pages (%d races)\n", nraces);
+    */
 
     if ( hvm ) 
     {
@@ -1021,28 +1070,40 @@
     /* Non-HVM guests only from here on */
 
     if (!completed) {
+      int flags = 0;
+
       if ( buffer_tail(&tailbuf, io_fd, max_vcpu_id, vcpumap,
                       ext_vcpucontext) < 0 ) {
        ERROR ("error buffering image tail");
        goto out;
       }
+
       completed = 1;
+      /* shift into nonblocking mode for the remainder */
+      if ((flags = fcntl(io_fd, F_GETFL, 0)) < 0)
+       flags = 0;
+      fcntl(io_fd, F_SETFL, flags | O_NONBLOCK);
     }
-    
+
+    /*
     DPRINTF("Buffered checkpoint\n");
-    if (pagebuf_get(&pagebuf, io_fd)) {
-         ERROR("error when buffering batch, finishing\n");
-         goto finish;
+    */
+    if (!resume(resumedata)) {
+      if (pagebuf_get(&pagebuf, io_fd)) {
+       ERROR("error when buffering batch, finishing\n");
+       goto finish;
+      }
+      memset(&tmptail, 0, sizeof(tmptail));
+      if ( buffer_tail(&tmptail, io_fd, max_vcpu_id, vcpumap,
+                      ext_vcpucontext) < 0 ) {
+       ERROR ("error buffering image tail, finishing");
+       goto finish;
+      }
+      tailbuf_free(&tailbuf);
+      memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
+
+      goto loadpages;
     }
-    if ( buffer_tail(&tmptail, io_fd, max_vcpu_id, vcpumap,
-                    ext_vcpucontext) < 0 ) {
-      ERROR ("error buffering image tail, finishing");
-         goto finish;
-    }
-    tailbuf_free(&tailbuf);
-    memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
-
-    goto loadpages;
 
   finish:
 
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -54,12 +54,16 @@
  * @parm store_mfn returned with the mfn of the store page
  * @parm hvm non-zero if this is a HVM restore
  * @parm pae non-zero if this HVM domain has PAE support enabled
+ * @parm resume a function returning 1 to resume or 0 to expect
+ *       another checkpoint
+ * @parm resumedata a void pointer to pass back to the resume function
  * @return 0 on success, -1 on failure
  */
 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                       unsigned int store_evtchn, unsigned long *store_mfn,
                       unsigned int console_evtchn, unsigned long *console_mfn,
-                      unsigned int hvm, unsigned int pae);
+                      unsigned int hvm, unsigned int pae,
+                     int (*resume)(void*), void* resumedata);
 
 /**
  * This function will create a domain for a paravirtualized Linux
diff --git a/tools/python/xen/xend/XendCheckpoint.py 
b/tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py
+++ b/tools/python/xen/xend/XendCheckpoint.py
@@ -318,7 +318,8 @@
             restore_image.setCpuid()
 
 
-        os.read(fd, 1)           # Wait for source to close connection
+        #os.read(fd, 1)           # Wait for source to close connection
+        # ^^ breaks failover, and I don't know why it's needed.
         
         dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
 
diff --git a/tools/xcutils/xc_restore.c b/tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c
+++ b/tools/xcutils/xc_restore.c
@@ -11,10 +11,59 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <string.h>
+#include <sys/select.h>
 
 #include <xenctrl.h>
 #include <xenguest.h>
 
+typedef struct {
+  int fd;
+} resume_t;
+
+static int resume(void* resumedata)
+{
+  fd_set rfds;
+  struct timeval tv;
+//  char buf[64];
+  int rc;
+  resume_t* rd = resumedata;
+
+  FD_ZERO(&rfds);
+
+  do {
+    /* expect a heartbeat every 500ms maximum */
+    tv.tv_sec = 0;
+    tv.tv_usec = 500000;
+
+    FD_SET(rd->fd, &rfds);
+    rc = select(rd->fd + 1, &rfds, NULL, NULL, &tv);
+    if (!FD_ISSET(rd->fd, &rfds)) {
+      fprintf(stderr, "resume: heartbeat failed (select returned %d)\n", rc);
+      return -1;
+    }
+#if 0
+    rc = read(rd->fd, buf, 4);
+    if (rc == 4 && !strncmp(buf, "done", 4)) {
+      /*
+      fprintf(stderr, "resume: received 'done'\n");
+      */
+      return 0;
+    }
+    if (rc < 4 || strncmp(buf, "wait", 4)) {
+      if (rc >= 0)
+        buf[rc] = '\0';
+      else
+        buf[0] = '\0';
+      fprintf(stderr, "bad heartbeat response: %d, %s\n", rc, buf);
+      return -1;
+    }
+#endif
+  } while(0);
+
+  return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -22,6 +71,7 @@
     unsigned int hvm, pae, apic;
     int xc_fd, io_fd, ret;
     unsigned long store_mfn, console_mfn;
+    resume_t rdata;
 
     if ( argc != 8 )
         errx(1, "usage: %s iofd domid store_evtchn "
@@ -39,8 +89,11 @@
     pae  = atoi(argv[6]);
     apic = atoi(argv[7]);
 
+    rdata.fd = io_fd;
+
     ret = xc_domain_restore(xc_fd, io_fd, domid, store_evtchn, &store_mfn,
-                            console_evtchn, &console_mfn, hvm, pae);
+                            console_evtchn, &console_mfn, hvm, pae,
+                           resume, &rdata);
 
     if ( ret == 0 )
     {

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel