# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxx>
# Date 1283535491 -3600
# Node ID 9aa2e9cc1b24bc516dcff13181dc4bd2d9e51922
# Parent 4e98698360910f68571a8d82b06681aa5c06d96c
libxc: document save/restore protocol
Reverse engineered from the code, likely contains inaccuracies but I
think provides a base to work from.
Add symbolic names for the minus-flags.
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
---
tools/libxc/xc_domain_restore.c | 16 ++---
tools/libxc/xc_domain_save.c | 20 +++----
tools/libxc/xg_save_restore.h | 113 +++++++++++++++++++++++++++++++++++++++-
3 files changed, 130 insertions(+), 19 deletions(-)
diff -r 4e9869836091 -r 9aa2e9cc1b24 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c Fri Sep 03 18:22:31 2010 +0100
+++ b/tools/libxc/xc_domain_restore.c Fri Sep 03 18:38:11 2010 +0100
@@ -683,11 +683,11 @@ static int pagebuf_get_one(xc_interface
if (!count) {
// DPRINTF("Last batch read\n");
return 0;
- } else if (count == -1) {
+ } else if (count == XC_SAVE_ID_ENABLE_VERIFY_MODE) {
DPRINTF("Entering page verify mode\n");
buf->verify = 1;
return pagebuf_get_one(xch, ctx, buf, fd, dom);
- } else if (count == -2) {
+ } else if (count == XC_SAVE_ID_VCPU_INFO) {
buf->new_ctxt_format = 1;
if ( RDEXACT(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) ||
buf->max_vcpu_id >= 64 || RDEXACT(fd, &buf->vcpumap,
@@ -697,7 +697,7 @@ static int pagebuf_get_one(xc_interface
}
// DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id,
buf->vcpumap);
return pagebuf_get_one(xch, ctx, buf, fd, dom);
- } else if (count == -3) {
+ } else if (count == XC_SAVE_ID_HVM_IDENT_PT) {
/* Skip padding 4 bytes then read the EPT identity PT location. */
if ( RDEXACT(fd, &buf->identpt, sizeof(uint32_t)) ||
RDEXACT(fd, &buf->identpt, sizeof(uint64_t)) )
@@ -707,7 +707,7 @@ static int pagebuf_get_one(xc_interface
}
// DPRINTF("EPT identity map address: %llx\n", buf->identpt);
return pagebuf_get_one(xch, ctx, buf, fd, dom);
- } else if ( count == -4 ) {
+ } else if ( count == XC_SAVE_ID_HVM_VM86_TSS ) {
/* Skip padding 4 bytes then read the vm86 TSS location. */
if ( RDEXACT(fd, &buf->vm86_tss, sizeof(uint32_t)) ||
RDEXACT(fd, &buf->vm86_tss, sizeof(uint64_t)) )
@@ -717,7 +717,7 @@ static int pagebuf_get_one(xc_interface
}
// DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss);
return pagebuf_get_one(xch, ctx, buf, fd, dom);
- } else if ( count == -5 ) {
+ } else if ( count == XC_SAVE_ID_TMEM ) {
DPRINTF("xc_domain_restore start tmem\n");
if ( xc_tmem_restore(xch, dom, fd) ) {
PERROR("error reading/restoring tmem");
@@ -725,13 +725,13 @@ static int pagebuf_get_one(xc_interface
}
return pagebuf_get_one(xch, ctx, buf, fd, dom);
}
- else if ( count == -6 ) {
+ else if ( count == XC_SAVE_ID_TMEM_EXTRA ) {
if ( xc_tmem_restore_extra(xch, dom, fd) ) {
PERROR("error reading/restoring tmem extra");
return -1;
}
return pagebuf_get_one(xch, ctx, buf, fd, dom);
- } else if ( count == -7 ) {
+ } else if ( count == XC_SAVE_ID_TSC_INFO ) {
uint32_t tsc_mode, khz, incarn;
uint64_t nsec;
if ( RDEXACT(fd, &tsc_mode, sizeof(uint32_t)) ||
@@ -743,7 +743,7 @@ static int pagebuf_get_one(xc_interface
return -1;
}
return pagebuf_get_one(xch, ctx, buf, fd, dom);
- } else if (count == -8 ) {
+ } else if (count == XC_SAVE_ID_HVM_CONSOLE_PFN ) {
/* Skip padding 4 bytes then read the console pfn location. */
if ( RDEXACT(fd, &buf->console_pfn, sizeof(uint32_t)) ||
RDEXACT(fd, &buf->console_pfn, sizeof(uint64_t)) )
diff -r 4e9869836091 -r 9aa2e9cc1b24 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c Fri Sep 03 18:22:31 2010 +0100
+++ b/tools/libxc/xc_domain_save.c Fri Sep 03 18:38:11 2010 +0100
@@ -861,7 +861,7 @@ static xen_pfn_t *map_and_save_p2m_table
/* must be done AFTER suspend_and_state() */
static int save_tsc_info(xc_interface *xch, uint32_t dom, int io_fd)
{
- int marker = -7;
+ int marker = XC_SAVE_ID_TSC_INFO;
uint32_t tsc_mode, khz, incarn;
uint64_t nsec;
@@ -1142,7 +1142,7 @@ int xc_domain_save(xc_interface *xch, in
print_stats(xch, dom, 0, &stats, 0);
- tmem_saved = xc_tmem_save(xch, dom, io_fd, live, -5);
+ tmem_saved = xc_tmem_save(xch, dom, io_fd, live, XC_SAVE_ID_TMEM);
if ( tmem_saved == -1 )
{
PERROR("Error when writing to state file (tmem)");
@@ -1474,13 +1474,13 @@ int xc_domain_save(xc_interface *xch, in
if ( last_iter && debug )
{
- int minusone = -1;
+ int id = XC_SAVE_ID_ENABLE_VERIFY_MODE;
memset(to_send, 0xff, BITMAP_SIZE);
debug = 0;
DPRINTF("Entering debug resend-all mode\n");
/* send "-1" to put receiver into debug mode */
- if ( wrexact(io_fd, &minusone, sizeof(int)) )
+ if ( wrexact(io_fd, &id, sizeof(int)) )
{
PERROR("Error when writing to state file (6)");
goto out;
@@ -1511,7 +1511,7 @@ int xc_domain_save(xc_interface *xch, in
DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
if ( (tmem_saved > 0) &&
- (xc_tmem_save_extra(xch,dom,io_fd,-6) == -1) )
+ (xc_tmem_save_extra(xch,dom,io_fd,XC_SAVE_ID_TMEM_EXTRA)
== -1) )
{
PERROR("Error when writing to state file (tmem)");
goto out;
@@ -1545,10 +1545,10 @@ int xc_domain_save(xc_interface *xch, in
{
struct {
- int minustwo;
+ int id;
int max_vcpu_id;
uint64_t vcpumap;
- } chunk = { -2, info.max_vcpu_id };
+ } chunk = { XC_SAVE_ID_VCPU_INFO, info.max_vcpu_id };
if ( info.max_vcpu_id >= 64 )
{
@@ -1580,7 +1580,7 @@ int xc_domain_save(xc_interface *xch, in
uint64_t data;
} chunk = { 0, };
- chunk.id = -3;
+ chunk.id = XC_SAVE_ID_HVM_IDENT_PT;
xc_get_hvm_param(xch, dom, HVM_PARAM_IDENT_PT,
(unsigned long *)&chunk.data);
@@ -1591,7 +1591,7 @@ int xc_domain_save(xc_interface *xch, in
goto out;
}
- chunk.id = -4;
+ chunk.id = XC_SAVE_ID_HVM_VM86_TSS;
xc_get_hvm_param(xch, dom, HVM_PARAM_VM86_TSS,
(unsigned long *)&chunk.data);
@@ -1602,7 +1602,7 @@ int xc_domain_save(xc_interface *xch, in
goto out;
}
- chunk.id = -8;
+ chunk.id = XC_SAVE_ID_HVM_CONSOLE_PFN;
xc_get_hvm_param(xch, dom, HVM_PARAM_CONSOLE_PFN,
(unsigned long *)&chunk.data);
diff -r 4e9869836091 -r 9aa2e9cc1b24 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Fri Sep 03 18:22:31 2010 +0100
+++ b/tools/libxc/xg_save_restore.h Fri Sep 03 18:38:11 2010 +0100
@@ -1,5 +1,5 @@
/*
- * Defintions and utilities for save / restore.
+ * Definitions and utilities for save / restore.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -20,6 +20,117 @@
#include <xen/foreign/x86_32.h>
#include <xen/foreign/x86_64.h>
+
+/*
+ * SAVE/RESTORE/MIGRATE PROTOCOL
+ * =============================
+ *
+ * The general form of a stream of chunks is a header followed by a
+ * body consisting of a variable number of chunks (terminated by a
+ * chunk with type 0) followed by a trailer.
+ *
+ * For a rolling/checkpoint (e.g. remus) migration then the body and
+ * trailer phases can be repeated until an external event
+ * (e.g. failure) causes the process to terminate and commit to the
+ * most recent complete checkpoint.
+ *
+ * HEADER
+ * ------
+ *
+ * unsigned long : p2m_size
+ *
+ * extended-info (PV-only, optional):
+ *
+ * If first unsigned long == ~0UL then extended info is present,
+ * otherwise unsigned long is part of p2m. Note that p2m_size above
+ * does not include the length of the extended info.
+ *
+ * extended-info:
+ *
+ * unsigned long : signature == ~0UL
+ * uint32_t : number of bytes remaining in extended-info
+ *
+ * 1 or more extended-info blocks of form:
+ * char[4] : block identifier
+ * uint32_t : block data size
+ * bytes : block data
+ *
+ * defined extended-info blocks:
+ * "vcpu" : VCPU context info containing vcpu_guest_context_t.
+ * The precise variant of the context structure
+ * (e.g. 32 vs 64 bit) is distinguished by
+ * the block size.
+ * "extv" : Presence indicates use of extended VCPU context in
+ * tail, data size is 0.
+ *
+ * p2m (PV-only):
+ *
+ * consists of p2m_size bytes comprising an array of xen_pfn_t sized entries.
+ *
+ * BODY PHASE
+ * ----------
+ *
+ * A series of chunks with a common header:
+ * int : chunk type
+ *
+ * If the chunk type is +ve then chunk contains guest memory data, and the
+ * type contains the number of pages in the batch:
+ *
+ * unsigned long[] : PFN array, length == number of pages in batch
+ * Each entry consists of XEN_DOMCTL_PFINFO_*
+ * in bits 31-28 and the PFN number in bits 27-0.
+ * page data : PAGE_SIZE bytes for each page marked present in PFN
+ * array
+ *
+ * If the chunk type is -ve then chunk consists of one of a number of
+ * metadata types. See definitions of XC_SAVE_ID_* below.
+ *
+ * If chunk type is 0 then body phase is complete.
+ *
+ * TAIL PHASE
+ * ----------
+ *
+ * Content differs for PV and HVM guests.
+ *
+ * HVM TAIL:
+ *
+ * "Magic" pages:
+ * uint64_t : I/O req PFN
+ * uint64_t : Buffered I/O req PFN
+ * uint64_t : Store PFN
+ * Xen HVM Context:
+ * uint32_t : Length of context in bytes
+ * bytes : Context data
+ * Qemu context:
+ * char[21] : Signature:
+ * "QemuDeviceModelRecord" : Read Qemu save data until EOF
+ * "RemusDeviceModelState" : uint32_t length field followed by that many
+ * bytes of Qemu save data
+ *
+ * PV TAIL:
+ *
+ * Unmapped PFN list : list of all the PFNs that were not in map at the
close
+ * unsigned int : Number of unmapped pages
+ * unsigned long[] : PFNs of unmapped pages
+ *
+ * VCPU context data : A series of VCPU records, one per present VCPU
+ * Maximum and present map supplied in
XC_SAVE_ID_VCPUINFO
+ * bytes: : VCPU context structure. Size is determined by size
+ * provided in extended-info header
+ * bytes[128] : Extended VCPU context (present IFF "extv" block
+ * present in extended-info header)
+ *
+ * Shared Info Page : 4096 bytes of shared info page
+ */
+
+#define XC_SAVE_ID_ENABLE_VERIFY_MODE -1 /* Switch to validation phase. */
+#define XC_SAVE_ID_VCPU_INFO -2 /* Additional VCPU info */
+#define XC_SAVE_ID_HVM_IDENT_PT -3 /* (HVM-only) */
+#define XC_SAVE_ID_HVM_VM86_TSS -4 /* (HVM-only) */
+#define XC_SAVE_ID_TMEM -5
+#define XC_SAVE_ID_TMEM_EXTRA -6
+#define XC_SAVE_ID_TSC_INFO -7
+#define XC_SAVE_ID_HVM_CONSOLE_PFN -8 /* (HVM-only) */
/*
** We process save/restore/migrate in batches of pages; the below
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|