attached is the vmx domain save/restore patch. works well for different
guest/host combination W/O breaking domu save/restore according to my test.
pls. first apply xiaowei's qemu dm fix.
===know issue===
* shpage pfn *
HV look for shpage pfn in an e820 entry when init. but some
guest(win/em64t linux) will reuse this e820 ram, which cause losing
shpage pfn when restore. so this entry is marked as "reserved" to avoid
guest reuse (in this patch xc_hvm_build.c). we can change this if
having good solution in future.
* 64bit host vmx restore python err *
when restore vmx guest on em64t host, i got a error "ERROR
(xmlrpclib2:167) int exceeds XML-RPC limits" W/O blocking restore. "xend
restart" can be a work around.
* guest smp support *
i'm doing guest smp support including apic/vmcs save/restore now. so
turn on "apic" in config file may cause save/restore failure.
* guest save/restore across platform*
e.g save 32 guest on 64 host, then restore 32 guest on 32 host. we
can't support this because save/restore face different vcpu_context
format on different host. need universal format for this.
=== test report ===
"+" stands for okay, "-" stands for fail
32b host:
+ 32/32
+ 32win/32
pae host:
+ 32/pae
+ pae/pae
+ 32win/pae
+ pae_win/pae
em64t host:
+ 32/64
+ pae/64
+ 64/64
+ 32win/64
+ pae_win/64
sometimes pae_win/64 are not stable:(
# HG changeset patch
# User Edwin Zhai <edwin.zhai@xxxxxxxxx>
# Node ID 2abb1c801ab72ee7e88b144871162fe2e47a0970
# Parent 98c3ddf83a59b0cbbdce63bb210adfd0d2ec1aea
vmx save/restore support
Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx>
Signed-off-by: Dong Eddie <eddie.dong@xxxxxxxxx>
Signed-off-by: Nakajima Jun <jun.nakajima@xxxxxxxxx>
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/ioemu/hw/cirrus_vga.c Wed Jul 19 16:09:59 2006 +0800
@@ -3010,11 +3010,44 @@ static CPUWriteMemoryFunc *cirrus_mmio_w
cirrus_mmio_writel,
};
+void cirrus_stop_acc(CirrusVGAState *s)
+{
+ if (s->map_addr){
+ int error;
+ s->map_addr = 0;
+ error = unset_vram_mapping(s->cirrus_lfb_addr,
+ s->cirrus_lfb_end);
+ fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
+
+ munmap(s->vram_ptr, VGA_RAM_SIZE);
+ }
+}
+
+void cirrus_restart_acc(CirrusVGAState *s)
+{
+ if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+ void *vram_pointer, *old_vram;
+ fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx,
lfb_end=0x%lx.\n",
+ s->cirrus_lfb_addr, s->cirrus_lfb_end);
+ vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
+ if (!vram_pointer){
+ fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
+ } else {
+ old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+ VGA_RAM_SIZE);
+ qemu_free(old_vram);
+ s->map_addr = s->cirrus_lfb_addr;
+ s->map_end = s->cirrus_lfb_end;
+ }
+ }
+}
+
/* load/save state */
static void cirrus_vga_save(QEMUFile *f, void *opaque)
{
CirrusVGAState *s = opaque;
+ uint8_t vga_acc;
qemu_put_be32s(f, &s->latch);
qemu_put_8s(f, &s->sr_index);
@@ -3049,11 +3082,20 @@ static void cirrus_vga_save(QEMUFile *f,
qemu_put_be32s(f, &s->hw_cursor_y);
/* XXX: we do not save the bitblt state - we assume we do not save
the state when the blitter is active */
+
+ vga_acc = (!!s->map_addr);
+ qemu_put_8s(f, &vga_acc);
+ qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+ qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+ qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
+ if (vga_acc)
+ cirrus_stop_acc(s);
}
static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
{
CirrusVGAState *s = opaque;
+ uint8_t vga_acc = 0;
if (version_id != 1)
return -EINVAL;
@@ -3091,6 +3133,14 @@ static int cirrus_vga_load(QEMUFile *f,
qemu_get_be32s(f, &s->hw_cursor_x);
qemu_get_be32s(f, &s->hw_cursor_y);
+
+ qemu_get_8s(f, &vga_acc);
+ qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
+ qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
+ qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE);
+ if (vga_acc){
+ cirrus_restart_acc(s);
+ }
/* force refresh */
s->graphic_mode = -1;
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/ioemu/target-i386-dm/helper2.c Wed Jul 19 16:09:59 2006 +0800
@@ -457,6 +457,7 @@ int main_loop(void)
{
extern int vm_running;
extern int shutdown_requested;
+ extern int suspend_requested;
CPUState *env = cpu_single_env;
int evtchn_fd = xc_evtchn_fd(xce_handle);
@@ -472,6 +473,10 @@ int main_loop(void)
qemu_system_reset();
reset_requested = 0;
}
+ if (suspend_requested) {
+ fprintf(logfile, "device model received suspend signal!\n");
+ break;
+ }
}
/* Wait up to 10 msec. */
@@ -483,7 +488,15 @@ int main_loop(void)
shared_page->vcpu_iodata[send_vcpu].dm_eport);
}
}
- destroy_hvm_domain();
+ if (!suspend_requested)
+ destroy_hvm_domain();
+ else {
+ char qemu_file[20];
+ sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid);
+ if (qemu_savevm(qemu_file) < 0)
+ fprintf(stderr, "qemu save fail.\n");
+ }
+
return 0;
}
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/ioemu/vl.c Wed Jul 19 16:09:59 2006 +0800
@@ -3884,6 +3884,11 @@ int qemu_loadvm(const char *filename)
qemu_fseek(f, cur_pos + record_len, SEEK_SET);
}
fclose(f);
+
+ /* del tmp file */
+ if (unlink(filename) == -1)
+ fprintf(stderr, "delete tmp qemu state file failed.\n");
+
ret = 0;
the_end:
if (saved_vm_running)
@@ -4470,6 +4475,7 @@ static QEMUResetEntry *first_reset_entry
static QEMUResetEntry *first_reset_entry;
int reset_requested;
int shutdown_requested;
+int suspend_requested;
static int powerdown_requested;
void qemu_register_reset(QEMUResetHandler *func, void *opaque)
@@ -5242,6 +5248,14 @@ int set_mm_mapping(int xc_handle, uint32
#endif
return 0;
+}
+
+void suspend(int sig)
+{
+ fprintf(logfile, "suspend sig handler called with requested=%d!\n",
suspend_requested);
+ if (sig != SIGUSR1)
+ fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
+ suspend_requested = 1;
}
int main(int argc, char **argv)
@@ -6010,6 +6024,27 @@ int main(int argc, char **argv)
vm_start();
}
}
+
+ /* register signal for the suspend request when save */
+ {
+ struct sigaction act;
+ sigset_t set;
+ act.sa_handler = suspend;
+ act.sa_flags = SA_RESTART;
+ sigemptyset(&act.sa_mask);
+
+ if (sigaction(SIGUSR1, &act, 0) == -1)
+ fprintf(stderr, "sigaction fail!\n");
+
+ /* control panel mask some signals when spawn qemu, need unmask here*/
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGTERM);
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
+ fprintf(stderr, "unblock signal fail!\n");
+
+ }
+
main_loop();
quit_timers();
return 0;
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/Makefile
--- a/tools/libxc/Makefile Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/Makefile Wed Jul 19 16:09:59 2006 +0800
@@ -33,7 +33,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_linux_bui
GUEST_SRCS-$(CONFIG_X86) += xc_linux_build.c
GUEST_SRCS-$(CONFIG_IA64) += xc_ia64_stubs.c xc_linux_build.c
GUEST_SRCS-$(CONFIG_MIGRATE) += xc_linux_restore.c xc_linux_save.c
-GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
+GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c xc_hvm_restore.c xc_hvm_save.c
CFLAGS += -Werror
CFLAGS += -fno-strict-aliasing
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/xc_domain.c Wed Jul 19 16:09:59 2006 +0800
@@ -182,6 +182,50 @@ int xc_domain_getinfolist(int xc_handle,
ret = -1;
return ret;
+}
+
+/* get info from hvm guest for save */
+int xc_domain_hvm_getcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt)
+{
+ int rc;
+ DECLARE_DOM0_OP;
+
+ op.cmd = DOM0_GETHVMCONTEXT;
+ op.u.gethvmcontext.domain = (domid_t)domid;
+ set_xen_guest_handle(op.u.gethvmcontext.hvm_ctxt, hvm_ctxt);
+
+ if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+ return rc;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+ return rc;
+}
+
+/* set info to hvm guest for restore */
+int xc_domain_hvm_setcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt)
+{
+ int rc;
+ DECLARE_DOM0_OP;
+
+ op.cmd = DOM0_SETHVMCONTEXT;
+ op.u.sethvmcontext.domain = (domid_t)domid;
+ set_xen_guest_handle(op.u.gethvmcontext.hvm_ctxt, hvm_ctxt);
+
+ if ( (rc = mlock(hvm_ctxt, sizeof(*hvm_ctxt))) != 0 )
+ return rc;
+
+ rc = do_dom0_op(xc_handle, &op);
+
+ safe_munlock(hvm_ctxt, sizeof(*hvm_ctxt));
+
+ return rc;
}
int xc_vcpu_getcontext(int xc_handle,
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/xc_hvm_build.c Wed Jul 19 16:09:59 2006 +0800
@@ -60,11 +60,11 @@ static unsigned char build_e820map(void
/* XXX: Doesn't work for > 4GB yet */
e820entry[nr_map].addr = 0x0;
- e820entry[nr_map].size = 0x9F800;
+ e820entry[nr_map].size = 0x90000;
e820entry[nr_map].type = E820_RAM;
nr_map++;
- e820entry[nr_map].addr = 0x9F800;
+ e820entry[nr_map].addr = 0x90000;
e820entry[nr_map].size = 0x800;
e820entry[nr_map].type = E820_RESERVED;
nr_map++;
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/xc_linux_save.c Wed Jul 19 16:09:59 2006 +0800
@@ -261,15 +261,6 @@ static int ratewrite(int io_fd, void *bu
#endif
-static inline ssize_t write_exact(int fd, void *buf, size_t count)
-{
- if(write(fd, buf, count) != count)
- return 0;
- return 1;
-}
-
-
-
static int print_stats(int xc_handle, uint32_t domid, int pages_sent,
xc_shadow_control_stats_t *stats, int print)
{
@@ -358,7 +349,7 @@ static int analysis_phase(int xc_handle,
}
-static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
int dom, xc_dominfo_t *info,
vcpu_guest_context_t *ctxt)
{
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/xenctrl.h Wed Jul 19 16:09:59 2006 +0800
@@ -286,6 +286,30 @@ int xc_domain_getinfolist(int xc_handle,
xc_domaininfo_t *info);
/**
+ * This function returns information about the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm hvm_ctxt a pointer to a structure to store the execution context of
the
+ * hvm domain
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt);
+
+/**
+ * This function will set the context for hvm domain
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to set the hvm domain context for
+ * @parm hvm_ctxt pointer to the the hvm context with the values to set
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_setcontext(int xc_handle,
+ uint32_t domid,
+ hvm_domain_context_t *hvm_ctxt);
+
+/**
* This function returns information about the execution context of a
* particular vcpu of a domain.
*
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/xenguest.h Wed Jul 19 16:09:59 2006 +0800
@@ -11,6 +11,7 @@
#define XCFLAGS_LIVE 1
#define XCFLAGS_DEBUG 2
+#define XCFLAGS_HVM 4
/**
@@ -25,6 +26,13 @@ int xc_linux_save(int xc_handle, int io_
uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
int (*suspend)(int domid));
+/**
+ * This function will save a hvm domain running unmodified guest.
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
+ int (*suspend)(int domid));
/**
* This function will restore a saved domain running Linux.
@@ -41,6 +49,17 @@ int xc_linux_restore(int xc_handle, int
unsigned long nr_pfns, unsigned int store_evtchn,
unsigned long *store_mfn, unsigned int console_evtchn,
unsigned long *console_mfn);
+
+/**
+ * This function will restore a saved hvm domain running unmodified guest.
+ *
+ * @parm store_mfn pass mem size & returned with the mfn of the store page
+ * @return 0 on success, -1 on failure
+ */
+int xc_hvm_restore(int xc_handle, int io_fd, uint32_t dom,
+ unsigned long nr_pfns, unsigned int store_evtchn,
+ unsigned long *store_mfn, unsigned int console_evtchn,
+ unsigned long *console_mfn);
/**
* This function will create a domain for a paravirtualized Linux
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/libxc/xg_save_restore.h Wed Jul 19 16:09:59 2006 +0800
@@ -65,6 +65,16 @@ static int get_platform_info(int xc_hand
return 1;
}
+static inline ssize_t write_exact(int fd, void *buf, size_t count)
+{
+ if(write(fd, buf, count) != count)
+ return 0;
+ return 1;
+}
+
+extern int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+ int dom, xc_dominfo_t *info,
+ vcpu_guest_context_t *ctxt);
/*
** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables.
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Jul 19 16:09:59 2006 +0800
@@ -132,6 +132,20 @@ static PyObject *pyxc_domain_destroy(XcO
static PyObject *pyxc_domain_destroy(XcObject *self, PyObject *args)
{
return dom_op(self, args, xc_domain_destroy);
+}
+
+static PyObject *pyxc_domain_shutdown(XcObject *self, PyObject *args)
+{
+ uint32_t dom, reason;
+
+ if (!PyArg_ParseTuple(args, "ii", &dom, &reason))
+ return NULL;
+
+ if (xc_domain_shutdown(self->xc_handle, dom, reason) != 0)
+ return PyErr_SetFromErrno(xc_error);
+
+ Py_INCREF(zero);
+ return zero;
}
@@ -966,6 +980,14 @@ static PyMethodDef pyxc_methods[] = {
METH_VARARGS, "\n"
"Destroy a domain.\n"
" dom [int]: Identifier of domain to be destroyed.\n\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
+ { "domain_shutdown",
+ (PyCFunction)pyxc_domain_shutdown,
+ METH_VARARGS, "\n"
+ "Shutdown a domain.\n"
+ " dom [int, 0]: Domain identifier to use.\n"
+ " reason [int, 0]: Reason for shutdown.\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "vcpu_setaffinity",
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/python/xen/xend/XendCheckpoint.py Wed Jul 19 16:09:59 2006 +0800
@@ -25,11 +25,14 @@ from XendDomainInfo import DEV_MIGRATE_S
from XendDomainInfo import DEV_MIGRATE_STEP3
SIGNATURE = "LinuxGuestRecord"
+QEMU_SIGNATURE = "QemuDeviceModelRecord"
+dm_batch = 512
XC_SAVE = "xc_save"
XC_RESTORE = "xc_restore"
sizeof_int = calcsize("i")
+sizeof_unsigned_int = calcsize("I")
sizeof_unsigned_long = calcsize("L")
@@ -72,6 +75,10 @@ def save(fd, dominfo, network, live, dst
"could not write guest state file: config len")
write_exact(fd, config, "could not write guest state file: config")
+ hvm = 0
+ if dominfo.info['image'][0] == 'hvm':
+ hvm = 1
+ log.info("save hvm domain %d", hvm)
# xc_save takes three customization parameters: maxit, max_f, and
# flags the last controls whether or not save is 'live', while the
# first two further customize behaviour when 'live' save is
@@ -79,7 +86,7 @@ def save(fd, dominfo, network, live, dst
# libxenguest; see the comments and/or code in xc_linux_save() for
# more information.
cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(xc.handle()), str(fd),
- str(dominfo.getDomid()), "0", "0", str(int(live)) ]
+ str(dominfo.getDomid()), "0", "0", str(int(live) | int(hvm <<
2)) ]
log.debug("[xc_save]: %s", string.join(cmd))
def saveInputHandler(line, tochild):
@@ -93,11 +100,28 @@ def save(fd, dominfo, network, live, dst
log.info("Domain %d suspended.", dominfo.getDomid())
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
domain_name)
+ #send signal to device model for save
+ if hvm == 1:
+ log.info("release_devices for hvm domain")
+ dominfo.release_devices(True)
tochild.write("done\n")
tochild.flush()
log.debug('Written done')
forkHelper(cmd, fd, saveInputHandler, False)
+
+ # put qemu device model state
+ if hvm:
+ write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
+ qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
os.O_RDONLY)
+ while True:
+ buf = os.read(qemu_fd, dm_batch)
+ if len(buf):
+ write_exact(fd, buf, "could not write device model state")
+ else:
+ break
+ os.close(qemu_fd)
+ os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid())
dominfo.destroyDomain()
@@ -139,10 +163,21 @@ def restore(xd, fd):
assert store_port
assert console_port
- try:
- l = read_exact(fd, sizeof_unsigned_long,
- "not a valid guest state file: pfn count read")
- nr_pfns = unpack("L", l)[0] # native sizeof long
+ #if hvm, pass mem size to calculate the store_mfn
+ hvm = 0
+ if dominfo.info['image'][0] == 'hvm':
+ hvm = dominfo.info['memory']
+ log.info("restore hvm domain %d, mem=%d", dominfo.domid, hvm)
+
+ try:
+ if hvm:
+ l = read_exact(fd, sizeof_unsigned_int,
+ "not a valid hvm guest state file: pfn count read")
+ nr_pfns = unpack("I", l)[0] # native sizeof int
+ else:
+ l = read_exact(fd, sizeof_unsigned_long,
+ "not a valid guest state file: pfn count read")
+ nr_pfns = unpack("L", l)[0] # native sizeof long
if nr_pfns > 16*1024*1024: # XXX
raise XendError(
"not a valid guest state file: pfn count out of range")
@@ -151,7 +186,7 @@ def restore(xd, fd):
cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
xc.handle(), fd, dominfo.getDomid(), nr_pfns,
- store_port, console_port])
+ store_port, console_port, hvm])
log.debug("[xc_restore]: %s", string.join(cmd))
handler = RestoreInputHandler()
@@ -163,6 +198,23 @@ def restore(xd, fd):
dominfo.unpause()
+ # get qemu state and create a tmp file for dm restore
+ if hvm:
+ qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
+ "not a valid device model state: signature read")
+ if qemu_signature != QEMU_SIGNATURE:
+ raise XendError("not a valid device model state: found '%s'" %
+ signature)
+ qemu_fd = os.open("/tmp/xen.qemu-dm.%d" % dominfo.getDomid(),
+ os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+ while True:
+ buf = os.read(fd, dm_batch)
+ if len(buf):
+ write_exact(qemu_fd, buf, "could not write dm state to tmp
file")
+ else:
+ break
+ os.close(qemu_fd)
+
dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
return dominfo
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jul 19 16:09:59 2006 +0800
@@ -668,6 +668,13 @@ class XendDomainInfo:
self.console_mfn = console_mfn
self.introduceDomain()
+ if self.info['image'][0] == 'hvm':
+ self.image = image.create(self,
+ self.info['image'],
+ self.info['device'])
+ if self.image:
+ log.debug("createDevicemodel for hvm domain restore")
+ self.image.createDeviceModel(True)
self.storeDomDetails()
self.registerWatches()
self.refreshShutdown()
@@ -945,6 +952,13 @@ class XendDomainInfo:
raise XendError('Invalid reason: %s' % reason)
self.storeDom("control/shutdown", reason)
+ ## shutdown hypercall for hvm domain desides xenstore write
+ if self.info['image'][0] == 'hvm':
+ for code in shutdown_reasons.keys():
+ if shutdown_reasons[code] == reason:
+ break
+ xc.domain_shutdown(self.domid, code)
+
## private:
@@ -1417,8 +1431,11 @@ class XendDomainInfo:
## private:
- def release_devices(self):
+ def release_devices(self, suspend = False):
"""Release all domain's devices. Nothrow guarantee."""
+ if suspend and self.image:
+ self.image.destroy(suspend)
+ return
while True:
t = xstransact("%s/device" % self.dompath)
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/python/xen/xend/image.py Wed Jul 19 16:09:59 2006 +0800
@@ -157,7 +157,7 @@ class ImageHandler:
"""Build the domain. Define in subclass."""
raise NotImplementedError()
- def createDeviceModel(self):
+ def createDeviceModel(self, restore = False):
"""Create device model for the domain (define in subclass if
needed)."""
pass
@@ -331,7 +331,7 @@ class HVMImageHandler(ImageHandler):
ret = ret + ['-vnc', '%d' % vncdisplay, '-k', 'en-us']
return ret
- def createDeviceModel(self):
+ def createDeviceModel(self, restore = False):
if self.pid:
return
# Execute device model.
@@ -340,6 +340,8 @@ class HVMImageHandler(ImageHandler):
args = args + ([ "-d", "%d" % self.vm.getDomid(),
"-m", "%s" % (self.vm.getMemoryTarget() / 1024)])
args = args + self.dmargs
+ if restore:
+ args = args + ([ "-loadvm", "/tmp/xen.qemu-dm.%d" %
self.vm.getDomid() ])
env = dict(os.environ)
if self.display:
env['DISPLAY'] = self.display
@@ -351,12 +353,16 @@ class HVMImageHandler(ImageHandler):
self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
log.info("device model pid: %d", self.pid)
- def destroy(self):
+ def destroy(self, suspend = False):
self.unregister_shutdown_watch();
import signal
if not self.pid:
return
- os.kill(self.pid, signal.SIGKILL)
+ sig = signal.SIGKILL
+ if suspend:
+ log.info("use sigusr1 to signal qemu %d", self.pid)
+ sig = signal.SIGUSR1
+ os.kill(self.pid, sig)
os.waitpid(self.pid, 0)
self.pid = 0
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/xcutils/xc_restore.c Wed Jul 19 16:09:59 2006 +0800
@@ -18,12 +18,13 @@ main(int argc, char **argv)
main(int argc, char **argv)
{
unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn;
+ unsigned int hvm;
int ret;
unsigned long store_mfn, console_mfn;
- if (argc != 7)
+ if (argc != 8)
errx(1,
- "usage: %s xcfd iofd domid nr_pfns store_evtchn console_evtchn",
+ "usage: %s xcfd iofd domid nr_pfns store_evtchn console_evtchn
is_hvm",
argv[0]);
xc_fd = atoi(argv[1]);
@@ -32,9 +33,17 @@ main(int argc, char **argv)
nr_pfns = atoi(argv[4]);
store_evtchn = atoi(argv[5]);
console_evtchn = atoi(argv[6]);
+ hvm = atoi(argv[7]);
- ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
- &store_mfn, console_evtchn, &console_mfn);
+ if (hvm) {
+ /* pass the memsize to xc_hvm_restore to find the store_mfn */
+ store_mfn = hvm;
+ ret = xc_hvm_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+ &store_mfn, console_evtchn, &console_mfn);
+ } else
+ ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn,
+ &store_mfn, console_evtchn, &console_mfn);
+
if (ret == 0) {
printf("store-mfn %li\n", store_mfn);
printf("console-mfn %li\n", console_mfn);
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Wed Jul 19 13:45:04 2006 +0800
+++ b/tools/xcutils/xc_save.c Wed Jul 19 16:09:59 2006 +0800
@@ -47,5 +47,8 @@ main(int argc, char **argv)
max_f = atoi(argv[5]);
flags = atoi(argv[6]);
- return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+ if (flags & XCFLAGS_HVM)
+ return xc_hvm_save(xc_fd, io_fd, domid, maxit, max_f, flags, &suspend);
+ else
+ return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags,
&suspend);
}
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/dom0_ops.c Wed Jul 19 16:09:59 2006 +0800
@@ -454,6 +454,7 @@ void arch_getdomaininfo_ctxt(
if ( hvm_guest(v) )
{
hvm_store_cpu_guest_regs(v, &c->user_regs, c->ctrlreg);
+ hvm_save_cpu_context(v, &c->hvmcpu_ctxt);
}
else
{
@@ -473,6 +474,25 @@ void arch_getdomaininfo_ctxt(
c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
c->vm_assist = v->domain->vm_assist;
+}
+
+void arch_gethvm_ctxt(
+ struct vcpu *v, struct hvm_domain_context *c)
+{
+ if ( !hvm_guest(v) )
+ return;
+
+ hvm_save(v, c);
+
+}
+
+void arch_sethvm_ctxt(
+ struct vcpu *v, struct hvm_domain_context *c)
+{
+ if ( !hvm_guest(v) )
+ return;
+
+ hvm_load(v, c);
}
/*
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/hvm.c Wed Jul 19 16:09:59 2006 +0800
@@ -182,6 +182,8 @@ static void hvm_get_info(struct domain *
unmap_domain_page(p);
}
+extern void arch_sethvm_ctxt(
+ struct vcpu *, struct hvm_domain_context *);
void hvm_setup_platform(struct domain* d)
{
struct hvm_domain *platform;
@@ -211,6 +213,16 @@ void hvm_setup_platform(struct domain* d
init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v,
v->processor);
pit_init(v, cpu_khz);
+
+ /* restore hvm context including pic/pit/shpage */
+ shpage_init(get_sp(d));
+
+ if (platform->hvm_ctxt) {
+ arch_sethvm_ctxt(current, platform->hvm_ctxt);
+ xfree(platform->hvm_ctxt);
+ platform->hvm_ctxt = NULL;
+ }
+
}
void pic_irq_request(void *data, int level)
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/i8254.c Wed Jul 19 16:09:59 2006 +0800
@@ -357,6 +357,142 @@ static uint32_t pit_ioport_read(void *op
return ret;
}
+void pit_info(PITState *pit)
+{
+ PITChannelState *s;
+ int i;
+
+ for(i = 0; i < 3; i++) {
+ printk("*****pit channel %d's state:*****\n", i);
+ s = &pit->channels[i];
+ printk("pit 0x%x.\n", s->count);
+ printk("pit 0x%x.\n", s->latched_count);
+ printk("pit 0x%x.\n", s->count_latched);
+ printk("pit 0x%x.\n", s->status_latched);
+ printk("pit 0x%x.\n", s->status);
+ printk("pit 0x%x.\n", s->read_state);
+ printk("pit 0x%x.\n", s->write_state);
+ printk("pit 0x%x.\n", s->write_latch);
+ printk("pit 0x%x.\n", s->rw_mode);
+ printk("pit 0x%x.\n", s->mode);
+ printk("pit 0x%x.\n", s->bcd);
+ printk("pit 0x%x.\n", s->gate);
+ printk("pit %"PRId64"\n", s->count_load_time);
+
+ if (s->pt) {
+ struct periodic_time *pt = s->pt;
+ printk("pit channel %d has a periodic timer:\n", i);
+ printk("pt %d.\n", pt->enabled);
+ printk("pt %d.\n", pt->one_shot);
+ printk("pt %d.\n", pt->irq);
+ printk("pt %d.\n", pt->first_injected);
+
+ printk("pt %d.\n", pt->pending_intr_nr);
+ printk("pt %d.\n", pt->period);
+ printk("pt %"PRId64"\n", pt->period_cycles);
+ printk("pt %"PRId64"\n", pt->last_plt_gtime);
+ }
+ }
+
+}
+
+static void pit_save(hvm_domain_context_t *h, void *opaque)
+{
+ PITState *pit = opaque;
+ PITChannelState *s;
+ struct periodic_time *pt;
+ int i, pti = -1;
+
+#ifdef HVM_DEBUG_SUSPEND
+ pit_info(pit);
+#endif
+
+ for(i = 0; i < 3; i++) {
+ s = &pit->channels[i];
+ hvm_put_32u(h, s->count);
+ hvm_put_16u(h, s->latched_count);
+ hvm_put_8u(h, s->count_latched);
+ hvm_put_8u(h, s->status_latched);
+ hvm_put_8u(h, s->status);
+ hvm_put_8u(h, s->read_state);
+ hvm_put_8u(h, s->write_state);
+ hvm_put_8u(h, s->write_latch);
+ hvm_put_8u(h, s->rw_mode);
+ hvm_put_8u(h, s->mode);
+ hvm_put_8u(h, s->bcd);
+ hvm_put_8u(h, s->gate);
+ hvm_put_64u(h, s->count_load_time);
+
+ if (s->pt && pti == -1)
+ pti = i;
+ }
+
+ /* save guest time */
+ pt = pit->channels[pti].pt;
+ hvm_put_8u(h, pti);
+ hvm_put_8u(h, pt->first_injected);
+ hvm_put_32u(h, pt->pending_intr_nr);
+ hvm_put_64u(h, pt->last_plt_gtime);
+
+}
+
+static int pit_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ PITState *pit = opaque;
+ PITChannelState *s;
+ int i, pti;
+ u32 period;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ for(i = 0; i < 3; i++) {
+ s = &pit->channels[i];
+ s->count = hvm_get_32u(h);
+ s->latched_count = hvm_get_16u(h);
+ s->count_latched = hvm_get_8u(h);
+ s->status_latched = hvm_get_8u(h);
+ s->status = hvm_get_8u(h);
+ s->read_state = hvm_get_8u(h);
+ s->write_state = hvm_get_8u(h);
+ s->write_latch = hvm_get_8u(h);
+ s->rw_mode = hvm_get_8u(h);
+ s->mode = hvm_get_8u(h);
+ s->bcd = hvm_get_8u(h);
+ s->gate = hvm_get_8u(h);
+ s->count_load_time = hvm_get_64u(h);
+ }
+
+ pti = hvm_get_8u(h);
+ s = &pit->channels[pti];
+ period = DIV_ROUND((s->count * 1000000000ULL), PIT_FREQ);
+
+ printk("recreate periodic timer %d in mode %d, freq=%d.\n", pti, s->mode,
period);
+ switch (s->mode) {
+ case 2:
+ /* create periodic time */
+ s->pt = create_periodic_time(s, period, 0, 0);
+ break;
+ case 1:
+ /* create one shot time */
+ s->pt = create_periodic_time(s, period, 0, 1);
+ break;
+ default:
+ break;
+ }
+
+ s->pt->first_injected = hvm_get_8u(h);
+ s->pt->pending_intr_nr = hvm_get_32u(h);
+ s->pt->last_plt_gtime = hvm_get_64u(h);
+ /*XXX: need set_guest_time here or do this when post_inject? */
+
+#ifdef HVM_DEBUG_SUSPEND
+ pit_info(pit);
+#endif
+
+ return 0;
+}
+
static void pit_reset(void *opaque)
{
PITState *pit = opaque;
@@ -385,6 +521,8 @@ void pit_init(struct vcpu *v, unsigned l
s->vcpu = v;
s++; s->vcpu = v;
s++; s->vcpu = v;
+
+ hvm_register_savevm("xen_hvm_i8254", PIT_BASE, 1, pit_save, pit_load, pit);
register_portio_handler(PIT_BASE, 4, handle_pit_io);
/* register the speaker port */
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/i8259.c
--- a/xen/arch/x86/hvm/i8259.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/i8259.c Wed Jul 19 16:09:59 2006 +0800
@@ -454,12 +454,91 @@ static uint32_t elcr_ioport_read(void *o
return s->elcr;
}
+void pic_info(PicState *s)
+{
+ printk("*****pic state:*****\n");
+ printk("pic 0x%x.\n", s->last_irr);
+ printk("pic 0x%x.\n", s->irr);
+ printk("pic 0x%x.\n", s->imr);
+ printk("pic 0x%x.\n", s->isr);
+ printk("pic 0x%x.\n", s->priority_add);
+ printk("pic 0x%x.\n", s->irq_base);
+ printk("pic 0x%x.\n", s->read_reg_select);
+ printk("pic 0x%x.\n", s->poll);
+ printk("pic 0x%x.\n", s->special_mask);
+ printk("pic 0x%x.\n", s->init_state);
+ printk("pic 0x%x.\n", s->auto_eoi);
+ printk("pic 0x%x.\n", s->rotate_on_auto_eoi);
+ printk("pic 0x%x.\n", s->special_fully_nested_mode);
+ printk("pic 0x%x.\n", s->init4);
+ printk("pic 0x%x.\n", s->elcr);
+ printk("pic 0x%x.\n", s->elcr_mask);
+}
+
+static void pic_save(hvm_domain_context_t *h, void *opaque)
+{
+ PicState *s = opaque;
+
+#ifdef HVM_DEBUG_SUSPEND
+ pic_info(s);
+#endif
+
+ hvm_put_8u(h, s->last_irr);
+ hvm_put_8u(h, s->irr);
+ hvm_put_8u(h, s->imr);
+ hvm_put_8u(h, s->isr);
+ hvm_put_8u(h, s->priority_add);
+ hvm_put_8u(h, s->irq_base);
+ hvm_put_8u(h, s->read_reg_select);
+ hvm_put_8u(h, s->poll);
+ hvm_put_8u(h, s->special_mask);
+ hvm_put_8u(h, s->init_state);
+ hvm_put_8u(h, s->auto_eoi);
+ hvm_put_8u(h, s->rotate_on_auto_eoi);
+ hvm_put_8u(h, s->special_fully_nested_mode);
+ hvm_put_8u(h, s->init4);
+ hvm_put_8u(h, s->elcr);
+ hvm_put_8u(h, s->elcr_mask);
+}
+
+static int pic_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ PicState *s = opaque;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ s->last_irr = hvm_get_8u(h);
+ s->irr = hvm_get_8u(h);
+ s->imr = hvm_get_8u(h);
+ s->isr = hvm_get_8u(h);
+ s->priority_add = hvm_get_8u(h);
+ s->irq_base = hvm_get_8u(h);
+ s->read_reg_select= hvm_get_8u(h);
+ s->poll = hvm_get_8u(h);
+ s->special_mask = hvm_get_8u(h);
+ s->init_state = hvm_get_8u(h);
+ s->auto_eoi = hvm_get_8u(h);
+ s->rotate_on_auto_eoi = hvm_get_8u(h);
+ s->special_fully_nested_mode = hvm_get_8u(h);
+ s->init4 = hvm_get_8u(h);
+ s->elcr = hvm_get_8u(h);
+ s->elcr_mask = hvm_get_8u(h);
+
+#ifdef HVM_DEBUG_SUSPEND
+ pic_info(s);
+#endif
+
+ return 0;
+}
+
/* XXX: add generic master/slave system */
/* Caller must hold vpic lock */
static void pic_init1(int io_addr, int elcr_addr, PicState *s)
{
BUG_ON(!spin_is_locked(&s->pics_state->lock));
+ hvm_register_savevm("xen_hvm_i8259", io_addr, 1, pic_save, pic_load, s);
pic_reset(s);
}
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/intercept.c Wed Jul 19 16:09:59 2006 +0800
@@ -29,6 +29,8 @@
#include <asm/current.h>
#include <io_ports.h>
#include <xen/event.h>
+#include <xen/compile.h>
+#include <public/version.h>
extern struct hvm_mmio_handler vlapic_mmio_handler;
@@ -303,6 +305,266 @@ void destroy_periodic_time(struct period
}
}
+/* save/restore support */
+#define HVM_FILE_MAGIC 0x54381286
+#define HVM_FILE_VERSION 0x00000001
+
+int hvm_register_savevm(const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque)
+{
+ HVMStateEntry *se, **pse;
+ struct vcpu *v = current;
+
+ if (!hvm_guest(v)) {
+ printk("register savevm only for hvm guest!\n");
+ return -1;
+ }
+
+ if ( (se = xmalloc(struct HVMStateEntry)) == NULL ){
+ printk("allocat hvmstate entry fail.\n");
+ return -1;
+ }
+
+ strncpy(se->idstr, idstr, HVM_SE_IDSTR_LEN);
+
+ se->instance_id = instance_id;
+ se->version_id = version_id;
+ se->save_state = save_state;
+ se->load_state = load_state;
+ se->opaque = opaque;
+ se->next = NULL;
+
+ /* add at the end of list */
+ pse = &v->domain->arch.hvm_domain.first_se;
+ while (*pse != NULL)
+ pse = &(*pse)->next;
+ *pse = se;
+ return 0;
+}
+
+int hvm_save(struct vcpu *v, hvm_domain_context_t *h)
+{
+ uint32_t len, len_pos, cur_pos;
+ uint32_t eax, ebx, ecx, edx;
+ HVMStateEntry *se;
+ char *chgset;
+
+ if (!hvm_guest(v)) {
+ printk("hvm_save only for hvm guest!\n");
+ return -1;
+ }
+
+ memset(h, 0, sizeof(hvm_domain_context_t));
+ hvm_put_32u(h, HVM_FILE_MAGIC);
+ hvm_put_32u(h, HVM_FILE_VERSION);
+
+ /* save xen changeset */
+ chgset = strrchr(XEN_CHANGESET, ' ') + 1;
+
+ len = strlen(chgset);
+ hvm_put_8u(h, len);
+ hvm_put_buffer(h, chgset, len);
+
+ /* save cpuid */
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ hvm_put_32u(h, eax);
+
+ for(se = v->domain->arch.hvm_domain.first_se; se != NULL; se = se->next) {
+ /* ID string */
+ len = strnlen(se->idstr, HVM_SE_IDSTR_LEN);
+ hvm_put_8u(h, len);
+ hvm_put_buffer(h, se->idstr, len);
+
+ hvm_put_32u(h, se->instance_id);
+ hvm_put_32u(h, se->version_id);
+
+ /* record size */
+ len_pos = hvm_ctxt_tell(h);
+ hvm_put_32u(h, 0);
+
+ se->save_state(h, se->opaque);
+
+ cur_pos = hvm_ctxt_tell(h);
+ len = cur_pos - len_pos - 4;
+ hvm_ctxt_seek(h, len_pos);
+ hvm_put_32u(h, len);
+ hvm_ctxt_seek(h, cur_pos);
+
+ }
+
+ h->size = hvm_ctxt_tell(h);
+ hvm_ctxt_seek(h, 0);
+
+ return 0;
+
+}
+
+static HVMStateEntry *find_se(struct domain *d, const char *idstr, int
instance_id)
+{
+ HVMStateEntry *se;
+
+ for(se = d->arch.hvm_domain.first_se; se != NULL; se = se->next) {
+ if (!strncmp(se->idstr, idstr, HVM_SE_IDSTR_LEN) &&
+ instance_id == se->instance_id){
+ return se;
+ }
+ }
+ return NULL;
+}
+
+int hvm_load(struct vcpu *v, hvm_domain_context_t *h)
+{
+ uint32_t len, rec_len, rec_pos, magic, instance_id, version_id;
+ uint32_t eax, ebx, ecx, edx;
+ HVMStateEntry *se;
+ char idstr[HVM_SE_IDSTR_LEN];
+ xen_changeset_info_t chgset;
+ char *cur_chgset;
+
+ if (!hvm_guest(v)) {
+ printk("hvm_load only for hvm guest!\n");
+ return -1;
+ }
+
+ hvm_ctxt_seek(h, 0);
+
+ magic = hvm_get_32u(h);
+ if (magic != HVM_FILE_MAGIC) {
+ printk("HVM restore magic dismatch!\n");
+ return -1;
+ }
+
+ magic = hvm_get_32u(h);
+ if (magic != HVM_FILE_VERSION) {
+ printk("HVM restore version dismatch!\n");
+ return -1;
+ }
+
+ /* check xen change set */
+ cur_chgset = strrchr(XEN_CHANGESET, ' ') + 1;
+
+ len = hvm_get_8u(h);
+ hvm_get_buffer(h, chgset, len);
+ chgset[len] = '\0';
+ if (strncmp(cur_chgset, chgset, len + 1))
+ printk("warnings: try to restore hvm guest(%s) on a different
changeset %s.\n",
+ chgset, cur_chgset);
+
+ /* check cpuid */
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ ebx = hvm_get_32u(h);
+ /*TODO: need difine how big difference is acceptable */
+ if (ebx != eax)
+ printk("warnings: try to restore hvm guest(0x%"PRIx32") "
+ "on a different type processor(0x%"PRIx32").\n",
+ ebx,
+ eax);
+
+ while(1) {
+ if (hvm_ctxt_end(h)) {
+ break;
+ }
+
+ /* ID string */
+ len = hvm_get_8u(h);
+ if (len > HVM_SE_IDSTR_LEN)
+ printk("HVM save entry idstr len wrong!");
+
+ hvm_get_buffer(h, idstr, len);
+ idstr[len] = '\0';
+
+ instance_id = hvm_get_32u(h);
+ version_id = hvm_get_32u(h);
+
+ rec_len = hvm_get_32u(h);
+ rec_pos = hvm_ctxt_tell(h);
+
+ se = find_se(v->domain, idstr, instance_id);
+ if (se)
+ se->load_state(h, se->opaque, version_id);
+ else
+ printk("warnings: hvm load can't find device %s's instance %d!\n",
+ idstr, version_id);
+
+
+ /* make sure to jump end of record */
+ if ( hvm_ctxt_tell(h) - rec_pos != rec_len) {
+ printk("wrong hvm record size, maybe some dismatch between
save&restoreo handler!\n");
+ }
+ hvm_ctxt_seek(h, rec_pos + rec_len);
+ }
+
+ return 0;
+}
+
+void shpage_info(shared_iopage_t *sh)
+{
+
+ vcpu_iodata_t *p = &sh->vcpu_iodata[0];
+ ioreq_t *req = &p->vp_ioreq;
+ printk("*****sharepage_info******!\n");
+ printk("vp_eport=%d,dm_eport=%d\n", p->vp_eport, p->dm_eport);
+ printk("io packet: "
+ "state:%x, pvalid: %x, dir:%x, port: %"PRIx64", "
+ "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
+ req->state, req->pdata_valid, req->dir, req->addr,
+ req->u.data, req->count, req->size);
+ printk("pic_elcr=0x%x, pic_irr=0x%x, pic_last_irr=0x%x,
pic_clear_irr=0x%x.\n",
+ sh->sp_global.pic_elcr,
+ sh->sp_global.pic_irr,
+ sh->sp_global.pic_last_irr,
+ sh->sp_global.pic_clear_irr);
+}
+
+static void shpage_save(hvm_domain_context_t *h, void *opaque)
+{
+ struct shared_iopage *s = opaque;
+ /* XXX:smp */
+ struct ioreq *req = &s->vcpu_iodata[0].vp_ioreq;
+
+#ifdef HVM_DEBUG_SUSPEND
+ shpage_info(s);
+#endif
+
+ hvm_put_16u(h, s->sp_global.pic_elcr);
+ hvm_put_16u(h, s->sp_global.pic_irr);
+ hvm_put_16u(h, s->sp_global.pic_last_irr);
+ hvm_put_16u(h, s->sp_global.pic_clear_irr);
+
+ hvm_put_buffer(h, (char*)req, sizeof(struct ioreq));
+}
+
+static int shpage_load(hvm_domain_context_t *h, void *opaque, int version_id)
+{
+ struct shared_iopage *s = opaque;
+ /* XXX:smp */
+ struct ioreq *req = &s->vcpu_iodata[0].vp_ioreq;
+ if (version_id != 1)
+ return -EINVAL;
+
+ s->sp_global.pic_elcr = hvm_get_16u(h);
+ s->sp_global.pic_irr = hvm_get_16u(h);
+ s->sp_global.pic_last_irr = hvm_get_16u(h);
+ s->sp_global.pic_clear_irr = hvm_get_16u(h);
+
+ hvm_get_buffer(h, (char*)req, sizeof(struct ioreq));
+
+#ifdef HVM_DEBUG_SUSPEND
+ shpage_info(s);
+#endif
+
+ return 0;
+}
+
+void shpage_init(shared_iopage_t *sp)
+{
+ hvm_register_savevm("xen_hvm_shpage", 0x10, 1, shpage_save, shpage_load,
sp);
+}
+
/*
* Local variables:
* mode: C
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/svm/svm.c Wed Jul 19 16:09:59 2006 +0800
@@ -763,6 +763,7 @@ static void svm_relinquish_guest_resourc
{
extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
struct vcpu *v;
+ HVMStateEntry *se, *dse;
for_each_vcpu ( d, v )
{
@@ -780,6 +781,13 @@ static void svm_relinquish_guest_resourc
}
kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
+
+ se = d->arch.hvm_domain.first_se;
+ while (se) {
+ dse = se;
+ se = se->next;
+ xfree(dse);
+ }
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jul 19 16:09:59 2006 +0800
@@ -572,6 +572,7 @@ void arch_vmx_do_launch(struct vcpu *v)
}
vmx_do_launch(v);
+ hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt);
reset_stack_and_jump(vmx_asm_do_vmentry);
}
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jul 19 16:09:59 2006 +0800
@@ -126,6 +126,7 @@ static void vmx_relinquish_guest_resourc
static void vmx_relinquish_guest_resources(struct domain *d)
{
struct vcpu *v;
+ HVMStateEntry *se, *dse;
for_each_vcpu ( d, v )
{
@@ -142,6 +143,13 @@ static void vmx_relinquish_guest_resourc
}
kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
+
+ se = d->arch.hvm_domain.first_se;
+ while (se) {
+ dse = se;
+ se = se->next;
+ xfree(dse);
+ }
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
@@ -521,6 +529,337 @@ static void vmx_store_cpu_guest_regs(
}
vmx_vmcs_exit(v);
+}
+
+int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c)
+{
+ unsigned long inst_len;
+ int error = 0;
+
+ error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
+ error |= __vmread(GUEST_RIP, &c->eip);
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n",
+ inst_len, c->eip);
+#endif
+
+ error |= __vmread(GUEST_RSP, &c->esp);
+ error |= __vmread(GUEST_RFLAGS, &c->eflags);
+
+ error |= __vmread(CR0_READ_SHADOW, &c->cr0);
+
+ c->cr3 = v->arch.hvm_vmx.cpu_cr3;
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_save: cr3=0x%"PRIx64".\n", c->cr3);
+#endif
+
+ error |= __vmread(CR4_READ_SHADOW, &c->cr4);
+
+ error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
+ error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
+
+ error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
+ error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
+
+ error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
+ error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
+ error |= __vmread(GUEST_CS_BASE, &c->cs_base);
+ error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes);
+
+ error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
+ error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
+ error |= __vmread(GUEST_DS_BASE, &c->ds_base);
+ error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes);
+
+ error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
+ error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
+ error |= __vmread(GUEST_ES_BASE, &c->es_base);
+ error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes);
+
+ error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
+ error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
+ error |= __vmread(GUEST_SS_BASE, &c->ss_base);
+ error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes);
+
+ error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
+ error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
+ error |= __vmread(GUEST_FS_BASE, &c->fs_base);
+ error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes);
+
+ error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
+ error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
+ error |= __vmread(GUEST_GS_BASE, &c->gs_base);
+ error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes);
+
+ error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
+ error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
+ error |= __vmread(GUEST_TR_BASE, &c->tr_base);
+ error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes);
+
+ error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
+ error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
+ error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
+ error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes);
+
+ error |= __vmread(GUEST_SYSENTER_CS, &c->sysenter_cs);
+ error |= __vmread(GUEST_SYSENTER_ESP, &c->sysenter_esp);
+ error |= __vmread(GUEST_SYSENTER_EIP, &c->sysenter_eip);
+
+ return !error;
+}
+
+int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c)
+{
+ unsigned long mfn, old_cr4, old_base_mfn;
+ int error = 0;
+
+ error |= __vmwrite(GUEST_RIP, c->eip);
+ error |= __vmwrite(GUEST_RSP, c->esp);
+ error |= __vmwrite(GUEST_RFLAGS, c->eflags);
+
+ error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
+
+ if (!vmx_paging_enabled(v)) {
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
+ goto skip_cr3;
+ }
+
+ if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
+ /*
+ * This is simple TLB flush, implying the guest has
+ * removed some translation or changed page attributes.
+ * We simply invalidate the shadow.
+ */
+ mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
+ if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+ printk("Invalid CR3 value=%"PRIx64"", c->cr3);
+ domain_crash_synchronous();
+ return 0;
+ }
+ shadow_sync_all(v->domain);
+ } else {
+ /*
+ * If different, make a shadow. Check if the PDBR is valid
+ * first.
+ */
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
+ if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
+ printk("Invalid CR3 value=%"PRIx64"", c->cr3);
+ domain_crash_synchronous();
+ return 0;
+ }
+ mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT);
+ if(!get_page(mfn_to_page(mfn), v->domain))
+ return 0;
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if (old_base_mfn)
+ put_page(mfn_to_page(old_base_mfn));
+ /*
+ * arch.shadow_table should now hold the next CR3 for shadow
+ */
+ v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+ update_pagetables(v);
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
+ __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
+ }
+
+ skip_cr3:
+
+ error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+ error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
+
+ error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+ error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+ error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+ error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+ error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+ error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+ error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
+ error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
+
+ error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+ error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+ error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
+ error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
+
+ error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+ error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+ error |= __vmwrite(GUEST_ES_BASE, c->es_base);
+ error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
+
+ error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+ error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+ error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
+ error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
+
+ error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+ error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+ error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
+ error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
+
+ error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+ error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+ error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
+ error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
+
+ error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+ error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+ error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
+ error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
+
+ error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+ error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+ error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+ error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
+
+ error |= __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
+ error |= __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
+ error |= __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
+
+ return !error;
+}
+
+void dump_msr_state(struct vmx_msr_state *m)
+{
+ int i = 0;
+ printk("**** msr state ****\n");
+ printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ printk("0x%lx,", m->msr_items[i]);
+ printk("\n");
+}
+
+void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ struct vmcs_data *data = &ctxt->data;
+ struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_content;
+ unsigned long guest_flags = guest_state->flags;
+ int i = 0;
+
+ data->shadow_gs = guest_state->shadow_gs;
+ data->cpu_state = v->arch.hvm_vmx.cpu_state;
+ /* save msrs */
+ data->flags = guest_flags;
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ data->msr_items[i] = guest_state->msr_items[i];
+
+#ifdef HVM_DEBUG_SUSPEND
+ dump_msr_state(guest_state);
+ printk("saved cpu_state=0x%"PRIX64"\n", data->cpu_state);
+#endif
+}
+
+void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ int i = 0;
+ struct vmcs_data *data = &ctxt->data;
+ struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_content;
+
+ /* restore msrs */
+ guest_state->flags = data->flags;
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ guest_state->msr_items[i] = data->msr_items[i];
+
+ guest_state->shadow_gs = data->shadow_gs;
+
+ vmx_restore_msrs(v);
+
+ v->arch.hvm_vmx.cpu_state = data->cpu_state;
+
+#ifdef HVM_DEBUG_SUSPEND
+ dump_msr_state(guest_state);
+ printk("restore cpu_state=0x%lx.\n", v->arch.hvm_vmx.cpu_state);
+
+#endif
+
+#if defined(__x86_64__)
+ if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
+ {
+ unsigned long vm_entry_value;
+ if ( test_bit(VMX_CPU_STATE_LMA_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) ) {
+ __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
+ vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
+ __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+
+ if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L4) )
+ {
+ printk("Unsupported guest paging levels\n");
+ domain_crash_synchronous(); /* need to take a clean path */
+ }
+ }
+ }
+ else
+#endif /* __x86_64__ */
+ {
+#if CONFIG_PAGING_LEVELS >= 3
+ /* seems it's a 32-bit or 32-bit PAE guest */
+ if ( test_bit(VMX_CPU_STATE_PAE_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
+ {
+ /* The guest enables PAE first and then it enables PG, it is
+ * really a PAE guest */
+ if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
+ {
+ printk("Unsupported guest paging levels\n");
+ domain_crash_synchronous();
+ }
+ }
+ else
+ {
+ if ( !shadow_set_guest_paging_levels(v->domain, PAGING_L2) )
+ {
+ printk("Unsupported guest paging levels\n");
+ domain_crash_synchronous(); /* need to take a clean path */
+ }
+ }
+#endif
+ }
+
+}
+
+void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ struct vmcs_data *data = &ctxt->data;
+
+ /* set valid flag to recover whole vmcs when restore */
+ ctxt->valid = 1;
+
+ vmx_save_cpu_state(v, ctxt);
+
+ vmx_vmcs_enter(v);
+
+ if (!vmx_vmcs_save(v, data))
+ printk("vmx_vmcs save failed!\n");
+
+ vmx_vmcs_exit(v);
+
+}
+
+void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ if (!ctxt->valid)
+ return;
+
+ vmx_load_cpu_state(v, ctxt);
+
+ vmx_vmcs_enter(v);
+
+ if (!vmx_vmcs_restore(v, &ctxt->data))
+ printk("vmx_vmcs restore failed!\n");
+
+ /* only load vmcs once */
+ ctxt->valid = 0;
+
+ vmx_vmcs_exit(v);
+
}
/*
@@ -741,6 +1080,9 @@ int start_vmx(void)
hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
+
+ hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt;
+ hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt;
hvm_funcs.realmode = vmx_realmode;
hvm_funcs.paging_enabled = vmx_paging_enabled;
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/common/dom0_ops.c Wed Jul 19 16:09:59 2006 +0800
@@ -27,6 +27,8 @@ extern long arch_do_dom0_op(
struct dom0_op *op, XEN_GUEST_HANDLE(dom0_op_t) u_dom0_op);
extern void arch_getdomaininfo_ctxt(
struct vcpu *, struct vcpu_guest_context *);
+extern void arch_gethvm_ctxt(
+ struct vcpu *, struct hvm_domain_context *);
static inline int is_free_domid(domid_t dom)
{
@@ -504,6 +506,77 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op
}
break;
+ case DOM0_GETHVMCONTEXT:
+ {
+ struct hvm_domain_context *c;
+ struct domain *d;
+ struct vcpu *v;
+
+ ret = -ESRCH;
+ if ( (d = find_domain_by_id(op->u.gethvmcontext.domain)) == NULL )
+ break;
+
+ ret = -ENOMEM;
+ if ( (c = xmalloc(struct hvm_domain_context)) == NULL )
+ goto gethvmcontext_out;
+
+ v = d->vcpu[0];
+
+ ret = -ENODATA;
+ if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ goto gethvmcontext_out;
+
+ arch_gethvm_ctxt(v, c);
+
+ ret = 0;
+ if ( copy_to_guest(op->u.gethvmcontext.hvm_ctxt, c, 1) )
+ ret = -EFAULT;
+
+ xfree(c);
+
+ if ( copy_to_guest(u_dom0_op, op, 1) )
+ ret = -EFAULT;
+
+ gethvmcontext_out:
+ put_domain(d);
+ }
+ break;
+
+ case DOM0_SETHVMCONTEXT:
+ {
+ struct hvm_domain_context *c;
+ struct domain *d;
+ struct vcpu *v;
+
+ ret = -ESRCH;
+ if ( (d = find_domain_by_id(op->u.sethvmcontext.domain)) == NULL )
+ break;
+
+ ret = -ENOMEM;
+ if ( (c = xmalloc(struct hvm_domain_context)) == NULL )
+ goto sethvmcontext_out;
+
+ /*XXX: need check input vcpu when smp */
+ v = d->vcpu[0];
+
+ ret = -EFAULT;
+ if ( copy_from_guest(c, op->u.sethvmcontext.hvm_ctxt, 1) != 0 )
+ goto sethvmcontext_out;
+
+ /* store the data for future use */
+ d->arch.hvm_domain.hvm_ctxt = c;
+
+ ret = 0;
+
+ if ( copy_to_guest(u_dom0_op, op, 1) )
+ ret = -EFAULT;
+
+ sethvmcontext_out:
+ put_domain(d);
+ }
+ break;
+
+
case DOM0_GETVCPUINFO:
{
struct domain *d;
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/include/asm-x86/hvm/domain.h Wed Jul 19 16:09:59 2006 +0800
@@ -30,6 +30,20 @@
#define HVM_PBUF_SIZE 80
+typedef void SaveStateHandler(hvm_domain_context_t *h, void *opaque);
+typedef int LoadStateHandler(hvm_domain_context_t *h, void *opaque, int
version_id);
+
+#define HVM_SE_IDSTR_LEN 32
+typedef struct HVMStateEntry {
+ char idstr[HVM_SE_IDSTR_LEN];
+ int instance_id;
+ int version_id;
+ SaveStateHandler *save_state;
+ LoadStateHandler *load_state;
+ void *opaque;
+ struct HVMStateEntry *next;
+} HVMStateEntry;
+
struct hvm_domain {
unsigned long shared_page_va;
unsigned int nr_vcpus;
@@ -48,6 +62,8 @@ struct hvm_domain {
int pbuf_index;
char pbuf[HVM_PBUF_SIZE];
+ struct hvm_domain_context *hvm_ctxt;
+ HVMStateEntry *first_se;
};
#endif /* __ASM_X86_HVM_DOMAIN_H__ */
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/include/asm-x86/hvm/hvm.h Wed Jul 19 16:09:59 2006 +0800
@@ -47,6 +47,13 @@ struct hvm_function_table {
struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs);
void (*load_cpu_guest_regs)(
struct vcpu *v, struct cpu_user_regs *r);
+
+ /* save and load hvm guest cpu context for save/restore */
+ void (*save_cpu_ctxt)(
+ struct vcpu *v, struct hvmcpu_context *ctxt);
+ void (*load_cpu_ctxt)(
+ struct vcpu *v, struct hvmcpu_context *ctxt);
+
/*
* Examine specifics of the guest state:
* 1) determine whether the guest is in real or vm8086 mode,
@@ -103,6 +110,20 @@ hvm_load_cpu_guest_regs(struct vcpu *v,
hvm_funcs.load_cpu_guest_regs(v, r);
}
+static inline void
+hvm_save_cpu_context(
+ struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ hvm_funcs.save_cpu_ctxt(v, ctxt);
+}
+
+static inline void
+hvm_load_cpu_context(
+ struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ hvm_funcs.load_cpu_ctxt(v, ctxt);
+}
+
static inline int
hvm_realmode(struct vcpu *v)
{
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/include/asm-x86/hvm/support.h Wed Jul 19 16:09:59 2006 +0800
@@ -25,6 +25,7 @@
#include <asm/types.h>
#include <asm/regs.h>
#include <asm/processor.h>
+#include <public/dom0_ops.h>
#ifndef NDEBUG
#define HVM_DEBUG 1
@@ -136,6 +137,129 @@ extern unsigned int opt_hvm_debug_level;
domain_crash_synchronous(); \
} while (0)
+/* save/restore support */
+
+//#define HVM_DEBUG_SUSPEND
+
+extern int hvm_register_savevm(const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque);
+
+static inline void hvm_ctxt_seek(hvm_domain_context_t *h, unsigned int pos)
+{
+ h->cur = pos;
+}
+
+static inline uint32_t hvm_ctxt_tell(hvm_domain_context_t *h)
+{
+ return h->cur;
+}
+
+static inline int hvm_ctxt_end(hvm_domain_context_t *h)
+{
+ return (h->cur >= h->size || h->cur >= HVM_CTXT_SIZE);
+}
+
+static inline void hvm_put_byte(hvm_domain_context_t *h, unsigned int i)
+{
+ if (h->cur >= HVM_CTXT_SIZE) {
+ printk("hvm_put_byte overflow.\n");
+ return;
+ }
+ h->data[h->cur++] = (char)i;
+}
+
+static inline void hvm_put_8u(hvm_domain_context_t *h, uint8_t b)
+{
+ hvm_put_byte(h, b);
+}
+
+static inline void hvm_put_16u(hvm_domain_context_t *h, uint16_t b)
+{
+ hvm_put_8u(h, b >> 8);
+ hvm_put_8u(h, b);
+}
+
+static inline void hvm_put_32u(hvm_domain_context_t *h, uint32_t b)
+{
+ hvm_put_16u(h, b >> 16);
+ hvm_put_16u(h, b);
+}
+
+static inline void hvm_put_64u(hvm_domain_context_t *h, uint64_t b)
+{
+ hvm_put_32u(h, b >> 32);
+ hvm_put_32u(h, b);
+}
+
+static inline void hvm_put_buffer(hvm_domain_context_t *h, const char *buf,
int len)
+{
+ memcpy(&h->data[h->cur], buf, len);
+ h->cur += len;
+}
+
+
+static inline char hvm_get_byte(hvm_domain_context_t *h)
+{
+ if (h->cur >= HVM_CTXT_SIZE) {
+ printk("hvm_get_byte overflow.\n");
+ return -1;
+ }
+
+ if (h->cur >= h->size) {
+ printk("hvm_get_byte exceed data area.\n");
+ return -1;
+ }
+
+ return h->data[h->cur++];
+}
+
+static inline uint8_t hvm_get_8u(hvm_domain_context_t *h)
+{
+ return hvm_get_byte(h);
+}
+
+static inline uint16_t hvm_get_16u(hvm_domain_context_t *h)
+{
+ uint16_t v;
+ v = hvm_get_8u(h) << 8;
+ v |= hvm_get_8u(h);
+
+ return v;
+}
+
+static inline uint32_t hvm_get_32u(hvm_domain_context_t *h)
+{
+ uint32_t v;
+ v = hvm_get_16u(h) << 16;
+ v |= hvm_get_16u(h);
+
+ return v;
+}
+
+static inline uint64_t hvm_get_64u(hvm_domain_context_t *h)
+{
+ uint64_t v;
+ v = (uint64_t)hvm_get_32u(h) << 32;
+ v |= hvm_get_32u(h);
+
+ return v;
+}
+
+static inline void hvm_get_buffer(hvm_domain_context_t *h, char *buf, int len)
+{
+ memcpy(buf, &h->data[h->cur], len);
+ h->cur += len;
+}
+
+extern int hvm_save(struct vcpu*, hvm_domain_context_t *h);
+extern int hvm_load(struct vcpu*, hvm_domain_context_t *h);
+
+extern void shpage_init(shared_iopage_t *sp);
+
extern int hvm_enabled;
enum { HVM_COPY_IN = 0, HVM_COPY_OUT };
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/include/public/arch-x86_32.h Wed Jul 19 16:09:59 2006 +0800
@@ -142,6 +142,13 @@ DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t)
DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+#include "vmcs_data.h"
+
+struct hvmcpu_context {
+ uint32_t valid;
+ struct vmcs_data data;
+};
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
@@ -174,6 +181,7 @@ struct vcpu_guest_context {
unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
unsigned long failsafe_callback_eip;
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+ struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */
};
typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/include/public/arch-x86_64.h Wed Jul 19 16:09:59 2006 +0800
@@ -212,6 +212,13 @@ DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t)
#undef __DECL_REG
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+#include "vmcs_data.h"
+
+struct hvmcpu_context {
+ uint32_t valid;
+ struct vmcs_data data;
+};
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
@@ -249,6 +256,7 @@ struct vcpu_guest_context {
uint64_t fs_base;
uint64_t gs_base_kernel;
uint64_t gs_base_user;
+ struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */
};
typedef struct vcpu_guest_context vcpu_guest_context_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Wed Jul 19 13:45:04 2006 +0800
+++ b/xen/include/public/dom0_ops.h Wed Jul 19 16:09:59 2006 +0800
@@ -535,6 +535,31 @@ struct dom0_settimeoffset {
};
typedef struct dom0_settimeoffset dom0_settimeoffset_t;
DEFINE_XEN_GUEST_HANDLE(dom0_settimeoffset_t);
+
+#define HVM_CTXT_SIZE 4096
+typedef struct hvm_domain_context {
+ uint32_t cur;
+ uint32_t size;
+ uint8_t data[HVM_CTXT_SIZE];
+} hvm_domain_context_t;
+DEFINE_XEN_GUEST_HANDLE(hvm_domain_context_t);
+#define DOM0_GETHVMCONTEXT 51
+typedef struct dom0_gethvmcontext {
+ /* IN variables. */
+ domid_t domain; /* domain to be affected */
+ /* OUT variables. */
+ XEN_GUEST_HANDLE(hvm_domain_context_t) hvm_ctxt;
+} dom0_gethvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_gethvmcontext_t);
+
+#define DOM0_SETHVMCONTEXT 52
+typedef struct dom0_sethvmcontext {
+ /* IN variables. */
+ domid_t domain; /* domain to be affected */
+ /* OUT variables. */
+ XEN_GUEST_HANDLE(hvm_domain_context_t) hvm_ctxt;
+} dom0_sethvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_sethvmcontext_t);
struct dom0_op {
uint32_t cmd;
@@ -579,6 +604,8 @@ struct dom0_op {
struct dom0_hypercall_init hypercall_init;
struct dom0_domain_setup domain_setup;
struct dom0_settimeoffset settimeoffset;
+ struct dom0_gethvmcontext gethvmcontext;
+ struct dom0_sethvmcontext sethvmcontext;
uint8_t pad[128];
} u;
};
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xc_hvm_restore.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_restore.c Wed Jul 19 16:09:59 2006 +0800
@@ -0,0 +1,264 @@
+/******************************************************************************
+ * xc_hvm_restore.c
+ *
+ * Restore the state of a HVM guest.
+ *
+ * Copyright (c) 2006 Intel Corperation
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+#include <xen/hvm/ioreq.h>
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+/* A table mapping each PFN to its new MFN. */
+static xen_pfn_t *p2m = NULL;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+ int r = 0, s;
+ unsigned char *b = buf;
+
+ while (r < count) {
+ s = read(fd, &b[r], count - r);
+ if ((s == -1) && (errno == EINTR))
+ continue;
+ if (s <= 0) {
+ break;
+ }
+ r += s;
+ }
+
+ return (r == count) ? 1 : 0;
+}
+
+int xc_hvm_restore(int xc_handle, int io_fd,
+ uint32_t dom, unsigned long nr_pfns,
+ unsigned int store_evtchn, unsigned long *store_mfn,
+ unsigned int console_evtchn, unsigned long *console_mfn)
+{
+ DECLARE_DOM0_OP;
+
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ char *region_base;
+
+ xc_mmu_t *mmu = NULL;
+
+ xc_dominfo_t info;
+ unsigned int rc = 1, i;
+ uint32_t rec_len;
+ hvm_domain_context_t hvm_ctxt;
+ unsigned int vp_eport;
+ unsigned long count;
+ unsigned long long ptr;
+ unsigned long long v_end, memsize;
+ unsigned long shared_page_frame = 0;
+ shared_iopage_t *sp;
+
+ /* hvm guest mem size (Mb) */
+ memsize = (unsigned long long)*store_mfn;
+ v_end = memsize << 20;
+
+ DPRINTF("xc_hvm_restore:dom=%d, nr_pfns=0x%lx, store_evtchn=%d,
*store_mfn=%ld, console_evtchn=%d, *console_mfn=%ld.\n",
+ dom, nr_pfns, store_evtchn, *store_mfn, console_evtchn,
*console_mfn);
+
+
+ max_pfn = nr_pfns;
+
+ if(!get_platform_info(xc_handle, dom,
+ &max_mfn, &hvirt_start, &pt_levels)) {
+ ERR("Unable to get platform info.");
+ return 1;
+ }
+
+ DPRINTF("xc_hvm_restore start: max_pfn = %lx, max_mfn = %lx,
hvirt_start=%lx, pt_levels=%d\n",
+ max_pfn,
+ max_mfn,
+ hvirt_start,
+ pt_levels);
+
+ if (mlock(&ctxt, sizeof(ctxt))) {
+ /* needed for build dom0 op, but might as well do early */
+ ERR("Unable to mlock ctxt");
+ return 1;
+ }
+
+
+ /* We want zeroed memory so use calloc rather than malloc. */
+ p2m = calloc(max_pfn, sizeof(unsigned long));
+
+ if (p2m == NULL) {
+ ERR("memory alloc failed");
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Get the domain's shared-info frame. */
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = (domid_t)dom;
+ if (xc_dom0_op(xc_handle, &op) < 0) {
+ ERR("Could not get information on new domain");
+ goto out;
+ }
+ shared_info_frame = op.u.getdomaininfo.shared_info_frame;
+
+ if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if(xc_domain_memory_increase_reservation(
+ xc_handle, dom, max_pfn, 0, 0, NULL) != 0) {
+ ERR("Failed to increase reservation by %lx KB", PFN_TO_KB(max_pfn));
+ errno = ENOMEM;
+ goto out;
+ }
+
+ DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn));
+
+ if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+ ERR("Could not get domain info");
+ return 1;
+ }
+
+ DPRINTF("after increasing domain reservation, nr_pages=0x%lx,
maxmemkb=0x%lx\n", info.nr_pages, info.max_memkb);
+
+ /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
+ if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) {
+ ERR("Did not read correct number of frame numbers for new dom");
+ goto out;
+ }
+
+ if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
+ ERR("Could not initialise for MMU updates");
+ goto out;
+ }
+
+ /* resotre memory */
+ if ( (region_base = xc_map_foreign_batch(xc_handle, dom, PROT_READ |
PROT_WRITE, p2m, max_pfn) ) == 0) {
+ ERR("HVM:map page_array failed!\n");
+ goto out;
+ }
+
+ for (i = 0; i < max_pfn; i++) {
+ void *zpage = region_base + i * PAGE_SIZE;
+ if (!read_exact(io_fd, zpage, PAGE_SIZE)) {
+ ERR("HVM:read page %d failed!\n", i);
+ goto out;
+ }
+ }
+
+ /* Write the machine->phys table entries. */
+ for ( count = 0; count < max_pfn; count++ )
+ {
+ ptr = (unsigned long long)p2m[count] << PAGE_SHIFT;
+ if ( xc_add_mmu_update(xc_handle, mmu,
+ ptr | MMU_MACHPHYS_UPDATE, count) )
+ goto out;
+ }
+
+ (void)munmap(region_base, max_pfn*PAGE_SIZE);
+
+ if (xc_finish_mmu_updates(xc_handle, mmu)) {
+ ERR("HVM:Error doing finish_mmu_updates()");
+ goto out;
+ }
+
+ /* realloc a evtchn port on vcpu */
+ vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0);
+ if ( vp_eport < 0 ) {
+ ERR("Couldn't get unbound port from VMX guest when restore.\n");
+ goto out;
+ }
+
+ /* restore hvm context including pic/pit/shpage */
+ if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+ ERR("error read hvm context size!\n");
+ goto out;
+ }
+ if (rec_len != sizeof(hvm_ctxt)) {
+ ERR("hvm context size dismatch!\n");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt))) {
+ ERR("error read hvm context!\n");
+ goto out;
+ }
+
+ xc_domain_hvm_setcontext(xc_handle, dom, &hvm_ctxt);
+
+ /* Populate the event channel port in the shared page */
+ shared_page_frame = p2m[(v_end >> PAGE_SHIFT) - 1];
+ if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
+ xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ shared_page_frame)) == 0 ) {
+ ERR("map share page fail");
+ goto out;
+ }
+
+ /* set new vp_eport */
+ DPRINTF("new vp_eport=%d.\n",
+ vp_eport);
+ /*XXX: smp support */
+ sp->vcpu_iodata[0].vp_eport = vp_eport;
+
+ /* restore vcpu ctxt & vmcs */
+ if (!read_exact(io_fd, &rec_len, sizeof(uint32_t))) {
+ ERR("error read vcpu context size!\n");
+ goto out;
+ }
+ if (rec_len != sizeof(ctxt)) {
+ ERR("vcpu context size dismatch!\n");
+ goto out;
+ }
+
+ if (!read_exact(io_fd, &(ctxt), sizeof(ctxt))) {
+ ERR("error read vcpu context.\n");
+ goto out;
+ }
+
+ if ( (rc = xc_vcpu_setcontext(xc_handle, dom, 0, &ctxt)) ) {
+ ERR("Could not set vcpu context, rc=%d", rc);
+ goto out;
+ }
+
+ /* caculate the store_mfn , wrong val cause hang when introduceDomain */
+ *store_mfn = p2m[(v_end >> PAGE_SHIFT) - 2];
+ DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n",
*store_mfn, v_end);
+
+ rc = 0;
+ goto out;
+
+ out:
+ if ( (rc != 0) && (dom != 0) )
+ xc_domain_destroy(xc_handle, dom);
+ free(mmu);
+ free(p2m);
+
+ DPRINTF("Restore exit with rc=%d\n", rc);
+
+ return rc;
+}
diff -r 98c3ddf83a59 -r 2abb1c801ab7 tools/libxc/xc_hvm_save.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_hvm_save.c Wed Jul 19 16:09:59 2006 +0800
@@ -0,0 +1,207 @@
+/******************************************************************************
+ * xc_hvm_save.c
+ *
+ * Save the state of a running HVM guest.
+ *
+ * Copyright (c) 2006 Intel Corperation
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "xc_private.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+/* max mfn of the whole machine */
+static unsigned long max_mfn;
+
+/* virtual starting address of the hypervisor */
+static unsigned long hvirt_start;
+
+/* #levels of page tables used by the currrent guest */
+static unsigned int pt_levels;
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
+
+int xc_hvm_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+ uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+ xc_dominfo_t info;
+
+ int rc = 1, i;
+ int live = (flags & XCFLAGS_LIVE);
+ int debug = (flags & XCFLAGS_DEBUG);
+
+ /* The new domain's shared-info frame number. */
+ unsigned long shared_info_frame;
+
+ /* A copy of the CPU context of the guest. */
+ vcpu_guest_context_t ctxt;
+
+ /* A copy of hvm domain context */
+ hvm_domain_context_t hvm_ctxt;
+
+ /* Live mapping of shared info structure */
+ shared_info_t *live_shinfo = NULL;
+
+ /* base of the region in which domain memory is mapped */
+ unsigned char *region_base = NULL;
+
+ uint32_t nr_pfns, rec_size;
+ unsigned long *page_array;
+
+ DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x.\n",
+ dom, max_iters, max_factor, flags);
+
+ /* If no explicit control parameters given, use defaults */
+/* if(!max_iters)*/
+/* max_iters = DEF_MAX_ITERS;*/
+/* if(!max_factor)*/
+/* max_factor = DEF_MAX_FACTOR;*/
+
+/* initialize_mbit_rate();*/
+
+ if(!get_platform_info(xc_handle, dom,
+ &max_mfn, &hvirt_start, &pt_levels)) {
+ ERR("HVM:Unable to get platform info.");
+ return 1;
+ }
+
+ if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+ ERR("HVM:Could not get domain info");
+ return 1;
+ }
+
+ if (mlock(&ctxt, sizeof(ctxt))) {
+ ERR("HVM:Unable to mlock ctxt");
+ return 1;
+ }
+
+ /* Only have to worry about vcpu 0 even for SMP */
+ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+ ERR("HVM:Could not get vcpu context");
+ goto out;
+ }
+ shared_info_frame = info.shared_info_frame;
+
+ /* A cheesy test to see whether the domain contains valid state. */
+ if (ctxt.ctrlreg[3] == 0)
+ {
+ ERR("Domain is not in a valid HVM guest state");
+ goto out;
+ }
+
+ /* cheesy sanity check */
+ if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) {
+ ERR("Invalid HVM state record -- pfn count out of range: %lu",
+ (info.max_memkb >> (PAGE_SHIFT - 10)));
+ goto out;
+ }
+
+ /* Map the shared info frame */
+ if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, shared_info_frame))) {
+ ERR("HVM:Couldn't map live_shinfo");
+ goto out;
+ }
+
+ max_pfn = live_shinfo->arch.max_pfn;
+
+ DPRINTF("saved hvm domain info:max_pfn=0x%lx, max_mfn=0x%lx,
nr_pages=0x%lx\n", max_pfn, max_mfn, info.nr_pages);
+
+ if (live) {
+ ERR("hvm domain doesn't support live migration now.\n");
+ if (debug)
+ ERR("hvm domain debug on.\n");
+ goto out;
+ }
+
+ /* suspend hvm domain */
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info, &ctxt)) {
+ ERR("HVM Domain appears not to have suspended");
+ goto out;
+ }
+
+ nr_pfns = info.nr_pages;
+ DPRINTF("after suspend hvm domain nr_pages=0x%x.\n", nr_pfns);
+
+ /* get all the HVM domain pfns */
+ if ( (page_array = (unsigned long *) malloc (sizeof(unsigned long) *
nr_pfns)) == NULL) {
+ ERR("HVM:malloc fail!\n");
+ goto out;
+ }
+
+ if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pfns) != nr_pfns) {
+ ERR("HVM domain get pfn list fail!\n");
+ goto out;
+ }
+
+ if ( (region_base = xc_map_foreign_batch(xc_handle, dom, PROT_READ |
PROT_WRITE, page_array, nr_pfns) ) == 0) {
+ ERR("HVM domain map pages failed!\n");
+ goto out;
+ }
+
+
+ /* Start writing out the saved-domain record. begin with mem */
+ if (!write_exact(io_fd, &nr_pfns, sizeof(unsigned int))) {
+ ERR("write: nr_pfns");
+ goto out;
+ }
+
+ for (i = 0; i < nr_pfns; i++) {
+ void *zpage = region_base + i * PAGE_SIZE;
+ if (ratewrite(io_fd, zpage, PAGE_SIZE) != PAGE_SIZE) {
+ ERR("HVM:write page %d failed!.\n", i);
+ goto out;
+ }
+ }
+
+ /* save hvm hypervisor state including pic/pit/shpage */
+ if (mlock(&hvm_ctxt, sizeof(hvm_ctxt))) {
+ ERR("Unable to mlock ctxt");
+ return 1;
+ }
+ xc_domain_hvm_getcontext(xc_handle, dom, &hvm_ctxt);
+
+/* ERR("hvm_getcontext get %d, size=%d!\n", hvm_ctxt.size,
sizeof(hvm_ctxt));*/
+ rec_size = sizeof(hvm_ctxt);
+ if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+ ERR("error write hvm ctxt size");
+ goto out;
+ }
+
+ if ( !write_exact(io_fd, &hvm_ctxt, sizeof(hvm_ctxt)) ) {
+ ERR("write HVM info failed!\n");
+ }
+
+
+ /* save vcpu/vmcs context XXX:smp support*/
+ if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+ ERR("HVM:Could not get vcpu context");
+ goto out;
+ }
+
+ rec_size = sizeof(ctxt);
+ if (!write_exact(io_fd, &rec_size, sizeof(uint32_t))) {
+ ERR("error write vcpu ctxt size");
+ goto out;
+ }
+
+ if (!write_exact(io_fd, &(ctxt), sizeof(ctxt)) )
+ ERR("write vmcs failed!\n");
+
+
+
+ /* Success! */
+ rc = 0;
+
+ out:
+ return !!rc;
+}
diff -r 98c3ddf83a59 -r 2abb1c801ab7 xen/include/public/vmcs_data.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/vmcs_data.h Wed Jul 19 16:09:59 2006 +0800
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * vmcs_data.h
+ *
+ * Copyright (c) 2006 Intel Corperation
+ *
+ */
+
+#ifndef __XEN_PUBLIC_VMCS_DATA_H__
+#define __XEN_PUBLIC_VMCS_DATA_H__
+
+/*
+ * World vmcs state
+ */
+struct vmcs_data {
+ uint64_t eip; /* execution pointer */
+ uint64_t esp; /* stack pointer */
+ uint64_t eflags; /* flags register */
+ uint64_t cr0;
+ uint64_t cr3; /* page table directory */
+ uint64_t cr4;
+ uint32_t idtr_limit; /* idt */
+ uint64_t idtr_base;
+ uint32_t gdtr_limit; /* gdt */
+ uint64_t gdtr_base;
+ uint32_t cs_sel; /* cs selector */
+ uint32_t cs_limit;
+ uint64_t cs_base;
+ uint32_t cs_arbytes;
+ uint32_t ds_sel; /* ds selector */
+ uint32_t ds_limit;
+ uint64_t ds_base;
+ uint32_t ds_arbytes;
+ uint32_t es_sel; /* es selector */
+ uint32_t es_limit;
+ uint64_t es_base;
+ uint32_t es_arbytes;
+ uint32_t ss_sel; /* ss selector */
+ uint32_t ss_limit;
+ uint64_t ss_base;
+ uint32_t ss_arbytes;
+ uint32_t fs_sel; /* fs selector */
+ uint32_t fs_limit;
+ uint64_t fs_base;
+ uint32_t fs_arbytes;
+ uint32_t gs_sel; /* gs selector */
+ uint32_t gs_limit;
+ uint64_t gs_base;
+ uint32_t gs_arbytes;
+ uint32_t tr_sel; /* task selector */
+ uint32_t tr_limit;
+ uint64_t tr_base;
+ uint32_t tr_arbytes;
+ uint32_t ldtr_sel; /* ldtr selector */
+ uint32_t ldtr_limit;
+ uint64_t ldtr_base;
+ uint32_t ldtr_arbytes;
+ uint32_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ /* msr for em64t */
+ uint64_t shadow_gs;
+ uint64_t flags;
+ /* same size as VMX_MSR_COUNT */
+ uint64_t msr_items[6];
+ uint64_t cpu_state;
+};
+typedef struct vmcs_data vmcs_data_t;
+#endif
# HG changeset patch
# User Edwin Zhai <edwin.zhai@xxxxxxxxx>
# Node ID 98c3ddf83a59b0cbbdce63bb210adfd0d2ec1aea
# Parent ecb8ff1fcf1fc24561c8bd272a58828592d90806
cirrus&rtl8139 coexist issue fix
diff -r ecb8ff1fcf1f -r 98c3ddf83a59 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c Fri Jul 14 18:53:27 2006 +0100
+++ b/tools/ioemu/target-i386-dm/exec-dm.c Wed Jul 19 13:45:04 2006 +0800
@@ -382,7 +382,7 @@ int iomem_index(target_phys_addr_t addr)
start = mmio[i].start;
end = mmio[i].start + mmio[i].size;
- if ((addr >= start) && (addr <= end)){
+ if ((addr >= start) && (addr < end)){
return (mmio[i].io_index >> IO_MEM_SHIFT) &
(IO_MEM_NB_ENTRIES - 1);
}
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|