# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
# Date 1174045190 0
# Node ID 8e76e1b95b127c2bfca94cb3cb660c54bcced8b7
# Parent 422a61ebac541a40d60eee66e5ddf87d4855201e
[HVM][QEMU] Save/restore: enable HVM live migration
by getting page-dirtying bitmaps from qemu-dm as well as from xen.
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx>
---
tools/ioemu/target-i386-dm/exec-dm.c | 16 ++++
tools/ioemu/xenstore.c | 127 ++++++++++++++++++++++++++++++++++
tools/libxc/Makefile | 2
tools/libxc/xc_hvm_save.c | 39 +++++++++-
tools/libxc/xenguest.h | 6 +
tools/libxc/xg_private.c | 4 -
tools/xcutils/Makefile | 6 -
tools/xcutils/xc_save.c | 129 ++++++++++++++++++++++++++++++++++-
8 files changed, 314 insertions(+), 15 deletions(-)
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/ioemu/target-i386-dm/exec-dm.c Fri Mar 16 11:39:50 2007 +0000
@@ -450,6 +450,9 @@ static inline int paddr_is_ram(target_ph
#define phys_ram_addr(x) (phys_ram_base + (x))
#endif
+extern unsigned long *logdirty_bitmap;
+extern unsigned long logdirty_bitmap_size;
+
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
int len, int is_write)
{
@@ -485,9 +488,20 @@ void cpu_physical_memory_rw(target_phys_
l = 1;
}
} else if (paddr_is_ram(addr)) {
- /* Reading from RAM */
+ /* Writing to RAM */
ptr = phys_ram_addr(addr);
memcpy(ptr, buf, l);
+ if (logdirty_bitmap != NULL) {
+ /* Record that we have dirtied this frame */
+ unsigned long pfn = addr >> TARGET_PAGE_BITS;
+ if (pfn / 8 >= logdirty_bitmap_size) {
+ fprintf(logfile, "dirtying pfn %x >= bitmap size %x\n",
+ pfn, logdirty_bitmap_size * 8);
+ } else {
+ logdirty_bitmap[pfn / HOST_LONG_BITS]
+ |= 1UL << pfn % HOST_LONG_BITS;
+ }
+ }
#ifdef __ia64__
sync_icache(ptr, l);
#endif
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/ioemu/xenstore.c Fri Mar 16 11:39:50 2007 +0000
@@ -11,6 +11,11 @@
#include "vl.h"
#include "block_int.h"
#include <unistd.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
static struct xs_handle *xsh = NULL;
static char *hd_filename[MAX_DISKS];
@@ -183,6 +188,13 @@ void xenstore_parse_domain_config(int do
}
}
+ /* Set a watch for log-dirty requests from the migration tools */
+ if (pasprintf(&buf, "%s/logdirty/next-active", path) != -1) {
+ xs_watch(xsh, buf, "logdirty");
+ fprintf(logfile, "Watching %s\n", buf);
+ }
+
+
out:
free(type);
free(params);
@@ -201,6 +213,116 @@ int xenstore_fd(void)
return -1;
}
+unsigned long *logdirty_bitmap = NULL;
+unsigned long logdirty_bitmap_size;
+extern int vga_ram_size, bios_size;
+
+void xenstore_process_logdirty_event(void)
+{
+ char *act;
+ static char *active_path = NULL;
+ static char *next_active_path = NULL;
+ static char *seg = NULL;
+ unsigned int len;
+ int i;
+
+ fprintf(logfile, "Triggered log-dirty buffer switch\n");
+
+ if (!seg) {
+ char *path, *p, *key_ascii, *key_terminated[17] = {0,};
+ key_t key;
+ int shmid;
+
+ /* Find and map the shared memory segment for log-dirty bitmaps */
+ if (!(path = xs_get_domain_path(xsh, domid))) {
+ fprintf(logfile, "Log-dirty: can't get domain path in store\n");
+ exit(1);
+ }
+ if (!(path = realloc(path, strlen(path)
+ + strlen("/logdirty/next-active") + 1))) {
+ fprintf(logfile, "Log-dirty: out of memory\n");
+ exit(1);
+ }
+ strcat(path, "/logdirty/");
+ p = path + strlen(path);
+ strcpy(p, "key");
+
+ key_ascii = xs_read(xsh, XBT_NULL, path, &len);
+ if (!key_ascii) {
+ /* No key yet: wait for the next watch */
+ free(path);
+ return;
+ }
+ strncpy(key_terminated, key_ascii, 16);
+ free(key_ascii);
+ key = (key_t) strtoull(key_terminated, NULL, 16);
+
+ /* Figure out how bit the log-dirty bitmaps are */
+ logdirty_bitmap_size = ((phys_ram_size + 0x20
+ - (vga_ram_size + bios_size))
+ >> (TARGET_PAGE_BITS)); /* nr of bits in map*/
+ if (logdirty_bitmap_size > HVM_BELOW_4G_MMIO_START >> TARGET_PAGE_BITS)
+ logdirty_bitmap_size +=
+ HVM_BELOW_4G_MMIO_LENGTH >> TARGET_PAGE_BITS; /* still bits */
+ logdirty_bitmap_size = ((logdirty_bitmap_size + HOST_LONG_BITS - 1)
+ / HOST_LONG_BITS); /* longs */
+ logdirty_bitmap_size *= sizeof (unsigned long); /* bytes */
+
+ /* Map the shared-memory segment */
+ if ((shmid = shmget(key,
+ 2 * logdirty_bitmap_size,
+ S_IRUSR|S_IWUSR)) == -1
+ || (seg = shmat(shmid, NULL, 0)) == (void *)-1) {
+ fprintf(logfile, "Log-dirty: can't map segment %16.16llx (%s)\n",
+ (unsigned long long) key, strerror(errno));
+ exit(1);
+ }
+
+ fprintf(logfile, "Log-dirty: mapped segment at %p\n", seg);
+
+ /* Double-check that the bitmaps are the size we expect */
+ if (logdirty_bitmap_size != *(uint32_t *)seg) {
+ fprintf(logfile, "Log-dirty: got %lu, calc %lu\n",
+ *(uint32_t *)seg, logdirty_bitmap_size);
+ return;
+ }
+
+ /* Remember the paths for the next-active and active entries */
+ strcpy(p, "active");
+ if (!(active_path = strdup(path))) {
+ fprintf(logfile, "Log-dirty: out of memory\n");
+ exit(1);
+ }
+ strcpy(p, "next-active");
+ if (!(next_active_path = strdup(path))) {
+ fprintf(logfile, "Log-dirty: out of memory\n");
+ exit(1);
+ }
+ free(path);
+ }
+
+ /* Read the required active buffer from the store */
+ act = xs_read(xsh, XBT_NULL, next_active_path, &len);
+ if (!act) {
+ fprintf(logfile, "Log-dirty: can't read next-active\n");
+ exit(1);
+ }
+
+ /* Switch buffers */
+ i = act[0] - '0';
+ if (i != 0 && i != 1) {
+ fprintf(logfile, "Log-dirty: bad next-active entry: %s\n", act);
+ exit(1);
+ }
+ logdirty_bitmap = seg + i * logdirty_bitmap_size;
+
+ /* Ack that we've switched */
+ xs_write(xsh, XBT_NULL, active_path, act, len);
+ free(act);
+}
+
+
+
void xenstore_process_event(void *opaque)
{
char **vec, *image = NULL;
@@ -209,6 +331,11 @@ void xenstore_process_event(void *opaque
vec = xs_read_watch(xsh, &num);
if (!vec)
return;
+
+ if (!strcmp(vec[XS_WATCH_TOKEN], "logdirty")) {
+ xenstore_process_logdirty_event();
+ goto out;
+ }
if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
strlen(vec[XS_WATCH_TOKEN]) != 3)
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/Makefile
--- a/tools/libxc/Makefile Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/libxc/Makefile Fri Mar 16 11:39:50 2007 +0000
@@ -57,7 +57,7 @@ GUEST_SRCS-$(CONFIG_IA64) += xc_dom_c
CFLAGS += -Werror -Wmissing-prototypes
CFLAGS += -fno-strict-aliasing
-CFLAGS += $(INCLUDES) -I.
+CFLAGS += $(INCLUDES) -I. -I../xenstore
# Needed for posix_fadvise64() in xc_linux.c
CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/libxc/xc_hvm_save.c Fri Mar 16 11:39:50 2007 +0000
@@ -54,6 +54,11 @@ static unsigned long hvirt_start;
/* #levels of page tables used by the current guest */
static unsigned int pt_levels;
+/* Shared-memory bitmaps for getting log-dirty bits from qemu */
+static unsigned long *qemu_bitmaps[2];
+static int qemu_active;
+static int qemu_non_active;
+
int xc_hvm_drain_io(int handle, domid_t dom)
{
DECLARE_HYPERCALL;
@@ -77,7 +82,8 @@ int xc_hvm_drain_io(int handle, domid_t
*/
#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITMAP_SIZE ((pfn_array_size + BITS_PER_LONG - 1) / 8)
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define BITMAP_SIZE (BITS_TO_LONGS(pfn_array_size) * sizeof(unsigned long))
#define BITMAP_ENTRY(_nr,_bmap) \
((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
@@ -123,6 +129,7 @@ static inline int permute( int i, int nr
return i;
}
+
static uint64_t tv_to_us(struct timeval *new)
{
@@ -277,7 +284,9 @@ static int suspend_and_state(int (*suspe
}
int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int),
+ void *(*init_qemu_maps)(int, unsigned),
+ void (*qemu_flip_buffer)(int, int))
{
xc_dominfo_t info;
@@ -392,8 +401,6 @@ int xc_hvm_save(int xc_handle, int io_fd
"nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages);
if (live) {
- ERROR("hvm domain doesn't support live migration now.\n");
- goto out;
if (xc_shadow_control(xc_handle, dom,
XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
@@ -453,6 +460,15 @@ int xc_hvm_save(int xc_handle, int io_fd
to_skip = malloc(BITMAP_SIZE);
+ if (live) {
+ /* Get qemu-dm logging dirty pages too */
+ void *seg = init_qemu_maps(dom, BITMAP_SIZE);
+ qemu_bitmaps[0] = seg;
+ qemu_bitmaps[1] = seg + BITMAP_SIZE;
+ qemu_active = 0;
+ qemu_non_active = 1;
+ }
+
hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
if ( hvm_buf_size == -1 )
{
@@ -677,10 +693,23 @@ int xc_hvm_save(int xc_handle, int io_fd
goto out;
}
+ /* Pull in the dirty bits from qemu too */
+ if (!last_iter) {
+ qemu_active = qemu_non_active;
+ qemu_non_active = qemu_active ? 0 : 1;
+ qemu_flip_buffer(dom, qemu_active);
+ for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++) {
+ to_send[j] |= qemu_bitmaps[qemu_non_active][j];
+ qemu_bitmaps[qemu_non_active][j] = 0;
+ }
+ } else {
+ for (j = 0; j < BITMAP_SIZE / sizeof(unsigned long); j++)
+ to_send[j] |= qemu_bitmaps[qemu_active][j];
+ }
+
sent_last_iter = sent_this_iter;
print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
-
}
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/libxc/xenguest.h Fri Mar 16 11:39:50 2007 +0000
@@ -32,8 +32,10 @@ int xc_linux_save(int xc_handle, int io_
* @return 0 on success, -1 on failure
*/
int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
- uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
- int (*suspend)(int domid));
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ int (*suspend)(int domid),
+ void *(*init_qemu_maps)(int, unsigned),
+ void (*qemu_flip_buffer)(int, int));
/**
* This function will restore a saved domain running Linux.
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/libxc/xg_private.c Fri Mar 16 11:39:50 2007 +0000
@@ -201,7 +201,9 @@ __attribute__((weak))
__attribute__((weak))
int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags,
- int (*suspend)(int domid))
+ int (*suspend)(int domid),
+ void *(*init_qemu_maps)(int, unsigned),
+ void (*qemu_flip_buffer)(int, int))
{
errno = ENOSYS;
return -1;
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/xcutils/Makefile Fri Mar 16 11:39:50 2007 +0000
@@ -13,7 +13,7 @@ include $(XEN_ROOT)/tools/Rules.mk
PROGRAMS_INSTALL_DIR = /usr/$(LIBDIR)/xen/bin
-INCLUDES += -I $(XEN_LIBXC)
+INCLUDES += -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
CFLAGS += -Werror -fno-strict-aliasing
CFLAGS += $(INCLUDES)
@@ -22,9 +22,9 @@ CFLAGS += -Wp,-MD,.$(@F).d
CFLAGS += -Wp,-MD,.$(@F).d
PROG_DEP = .*.d
-PROGRAMS = xc_restore xc_save readnotes
+PROGRAMS = xc_restore xc_save readnotes
-LDLIBS = -L$(XEN_LIBXC) -lxenguest -lxenctrl
+LDLIBS = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl -lxenstore
.PHONY: all
all: build
diff -r 422a61ebac54 -r 8e76e1b95b12 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Fri Mar 16 10:42:25 2007 +0000
+++ b/tools/xcutils/xc_save.c Fri Mar 16 11:39:50 2007 +0000
@@ -12,7 +12,13 @@
#include <stdint.h>
#include <string.h>
#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <xs.h>
#include <xenctrl.h>
#include <xenguest.h>
@@ -29,6 +35,123 @@ static int suspend(int domid)
return (fgets(ans, sizeof(ans), stdin) != NULL &&
!strncmp(ans, "done\n", 5));
+}
+
+/* For HVM guests, there are two sources of dirty pages: the Xen shadow
+ * log-dirty bitmap, which we get with a hypercall, and qemu's version.
+ * The protocol for getting page-dirtying data from qemu uses a
+ * double-buffered shared memory interface directly between xc_save and
+ * qemu-dm.
+ *
+ * xc_save calculates the size of the bitmaps and notifies qemu-dm
+ * through the store that it wants to share the bitmaps. qemu-dm then
+ * starts filling in the 'active' buffer.
+ *
+ * To change the buffers over, xc_save writes the other buffer number to
+ * the store and waits for qemu to acknowledge that it is now writing to
+ * the new active buffer. xc_save can then process and clear the old
+ * active buffer. */
+
+static char *qemu_active_path;
+static char *qemu_next_active_path;
+static struct xs_handle *xs;
+
+/* Get qemu to change buffers. */
+static void qemu_flip_buffer(int domid, int next_active)
+{
+ char digit = '0' + next_active;
+ unsigned int len;
+ char *active_str, **watch;
+ struct timeval tv;
+ fd_set fdset;
+
+ /* Tell qemu that we want it to start writing log-dirty bits to the
+ * other buffer */
+ if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1)) {
+ errx(1, "can't write next-active to store path (%s)\n",
+ qemu_next_active_path);
+ exit(1);
+ }
+
+ /* Wait a while for qemu to signal that it has switched to the new
+ * active buffer */
+ read_again:
+ tv.tv_sec = 5;
+ tv.tv_usec = 0;
+ FD_ZERO(&fdset);
+ FD_SET(xs_fileno(xs), &fdset);
+ if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
+ errx(1, "timed out waiting for qemu to switch buffers\n");
+ exit(1);
+ }
+ watch = xs_read_watch(xs, &len);
+ free(watch);
+
+ active_str = xs_read(xs, XBT_NULL, qemu_active_path, &len);
+ if (active_str == NULL || active_str[0] - '0' != next_active)
+ /* Watch fired but value is not yet right */
+ goto read_again;
+}
+
+static void * init_qemu_maps(int domid, unsigned int bitmap_size)
+{
+ key_t key;
+ char key_ascii[17] = {0,};
+ int shmid = -1;
+ void *seg;
+ char *path, *p;
+
+ /* Make a shared-memory segment */
+ while (shmid == -1)
+ {
+ key = rand(); /* No security, just a sequence of numbers */
+ shmid = shmget(key, 2 * bitmap_size,
+ IPC_CREAT|IPC_EXCL|S_IRUSR|S_IWUSR);
+ if (shmid == -1 && errno != EEXIST)
+ errx(1, "can't get shmem to talk to qemu-dm");
+ }
+
+ /* Map it into our address space */
+ seg = shmat(shmid, NULL, 0);
+ if (seg == (void *) -1)
+ errx(1, "can't map shmem to talk to qemu-dm");
+ memset(seg, 0, 2 * bitmap_size);
+
+ /* Write the size of it into the first 32 bits */
+ *(uint32_t *)seg = bitmap_size;
+
+ /* Tell qemu about it */
+ if ((xs = xs_daemon_open()) == NULL)
+ errx(1, "Couldn't contact xenstore");
+ if (!(path = xs_get_domain_path(xs, domid)))
+ errx(1, "can't get domain path in store");
+ if (!(path = realloc(path, strlen(path)
+ + strlen("/logdirty/next-active") + 1)))
+ errx(1, "no memory for constructing xenstore path");
+ strcat(path, "/logdirty/");
+ p = path + strlen(path);
+
+ strcpy(p, "key");
+ snprintf(key_ascii, 17, "%16.16llx", (unsigned long long) key);
+ if (!xs_write(xs, XBT_NULL, path, key_ascii, 16))
+ errx(1, "can't write key (%s) to store path (%s)\n", key_ascii, path);
+
+ /* Watch for qemu's indication of the active buffer, and request it
+ * to start writing to buffer 0 */
+ strcpy(p, "active");
+ if (!xs_watch(xs, path, "qemu-active-buffer"))
+ errx(1, "can't set watch in store (%s)\n", path);
+ if (!(qemu_active_path = strdup(path)))
+ errx(1, "no memory for copying xenstore path");
+
+ strcpy(p, "next-active");
+ if (!(qemu_next_active_path = strdup(path)))
+ errx(1, "no memory for copying xenstore path");
+
+ qemu_flip_buffer(domid, 0);
+
+ free(path);
+ return seg;
}
@@ -52,9 +175,11 @@ main(int argc, char **argv)
flags = atoi(argv[5]);
if (flags & XCFLAGS_HVM)
- ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+ ret = xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags,
+ &suspend, &init_qemu_maps, &qemu_flip_buffer);
else
- ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags,
&suspend);
+ ret = xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags,
+ &suspend);
xc_interface_close(xc_fd);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|