[PATCH 4/8] HVM save restore: vcpu context support
Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx>
save/restore HVM vcpu context such as vmcs
diff -r ee20d1905bde xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/domain.c Thu Jan 11 16:46:59 2007 +0800
@@ -573,6 +573,7 @@ int arch_set_info_guest(
else
{
hvm_load_cpu_guest_regs(v, &v->arch.guest_context.user_regs);
+ hvm_load_cpu_context(v, &v->arch.guest_context.hvmcpu_ctxt);
}
if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
diff -r ee20d1905bde xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/domctl.c Thu Jan 11 16:49:34 2007 +0800
@@ -322,8 +322,10 @@ void arch_get_info_guest(struct vcpu *v,
if ( is_hvm_vcpu(v) )
{
- if ( !IS_COMPAT(v->domain) )
+ if ( !IS_COMPAT(v->domain) ) {
hvm_store_cpu_guest_regs(v, &c.nat->user_regs, c.nat->ctrlreg);
+ hvm_save_cpu_context(v, &c.nat->hvmcpu_ctxt);
+ }
#ifdef CONFIG_COMPAT
else
{
diff -r ee20d1905bde xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jan 11 16:48:01 2007 +0800
@@ -429,6 +429,299 @@ static void vmx_store_cpu_guest_regs(
vmx_vmcs_exit(v);
}
+static int __get_instruction_length(void);
+int vmx_vmcs_save(struct vcpu *v, struct vmcs_data *c)
+{
+ unsigned long inst_len;
+
+ inst_len = __get_instruction_length();
+ c->eip = __vmread(GUEST_RIP);
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_save: inst_len=0x%lx, eip=0x%"PRIx64".\n",
+ inst_len, c->eip);
+#endif
+
+ c->esp = __vmread(GUEST_RSP);
+ c->eflags = __vmread(GUEST_RFLAGS);
+
+ c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
+ c->cr3 = v->arch.hvm_vmx.cpu_cr3;
+ c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4;
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_save: cr3=0x%"PRIx64", cr0=0x%"PRIx64",
cr4=0x%"PRIx64".\n",
+ c->cr3,
+ c->cr0,
+ c->cr4);
+#endif
+
+ c->idtr_limit = __vmread(GUEST_IDTR_LIMIT);
+ c->idtr_base = __vmread(GUEST_IDTR_BASE);
+
+ c->gdtr_limit = __vmread(GUEST_GDTR_LIMIT);
+ c->gdtr_base = __vmread(GUEST_GDTR_BASE);
+
+ c->cs_sel = __vmread(GUEST_CS_SELECTOR);
+ c->cs_limit = __vmread(GUEST_CS_LIMIT);
+ c->cs_base = __vmread(GUEST_CS_BASE);
+ c->cs_arbytes = __vmread(GUEST_CS_AR_BYTES);
+
+ c->ds_sel = __vmread(GUEST_DS_SELECTOR);
+ c->ds_limit = __vmread(GUEST_DS_LIMIT);
+ c->ds_base = __vmread(GUEST_DS_BASE);
+ c->ds_arbytes = __vmread(GUEST_DS_AR_BYTES);
+
+ c->es_sel = __vmread(GUEST_ES_SELECTOR);
+ c->es_limit = __vmread(GUEST_ES_LIMIT);
+ c->es_base = __vmread(GUEST_ES_BASE);
+ c->es_arbytes = __vmread(GUEST_ES_AR_BYTES);
+
+ c->ss_sel = __vmread(GUEST_SS_SELECTOR);
+ c->ss_limit = __vmread(GUEST_SS_LIMIT);
+ c->ss_base = __vmread(GUEST_SS_BASE);
+ c->ss_arbytes = __vmread(GUEST_SS_AR_BYTES);
+
+ c->fs_sel = __vmread(GUEST_FS_SELECTOR);
+ c->fs_limit = __vmread(GUEST_FS_LIMIT);
+ c->fs_base = __vmread(GUEST_FS_BASE);
+ c->fs_arbytes = __vmread(GUEST_FS_AR_BYTES);
+
+ c->gs_sel = __vmread(GUEST_GS_SELECTOR);
+ c->gs_limit = __vmread(GUEST_GS_LIMIT);
+ c->gs_base = __vmread(GUEST_GS_BASE);
+ c->gs_arbytes = __vmread(GUEST_GS_AR_BYTES);
+
+ c->tr_sel = __vmread(GUEST_TR_SELECTOR);
+ c->tr_limit = __vmread(GUEST_TR_LIMIT);
+ c->tr_base = __vmread(GUEST_TR_BASE);
+ c->tr_arbytes = __vmread(GUEST_TR_AR_BYTES);
+
+ c->ldtr_sel = __vmread(GUEST_LDTR_SELECTOR);
+ c->ldtr_limit = __vmread(GUEST_LDTR_LIMIT);
+ c->ldtr_base = __vmread(GUEST_LDTR_BASE);
+ c->ldtr_arbytes = __vmread(GUEST_LDTR_AR_BYTES);
+
+ c->sysenter_cs = __vmread(GUEST_SYSENTER_CS);
+ c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
+ c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);
+
+ return 1;
+}
+
+int vmx_vmcs_restore(struct vcpu *v, struct vmcs_data *c)
+{
+ unsigned long mfn, old_base_mfn;
+
+ vmx_vmcs_enter(v);
+
+ __vmwrite(GUEST_RIP, c->eip);
+ __vmwrite(GUEST_RSP, c->esp);
+ __vmwrite(GUEST_RFLAGS, c->eflags);
+
+ v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
+ __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
+
+#ifdef HVM_DEBUG_SUSPEND
+ printk("vmx_vmcs_restore: cr3=0x%"PRIx64", cr0=0x%"PRIx64",
cr4=0x%"PRIx64".\n",
+ c->cr3,
+ c->cr0,
+ c->cr4);
+#endif
+
+ if (!vmx_paging_enabled(v)) {
+ printk("vmx_vmcs_restore: paging not enabled.");
+ goto skip_cr3;
+ }
+
+ if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
+ /*
+ * This is simple TLB flush, implying the guest has
+ * removed some translation or changed page attributes.
+ * We simply invalidate the shadow.
+ */
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+ goto bad_cr3;
+ }
+ } else {
+ /*
+ * If different, make a shadow. Check if the PDBR is valid
+ * first.
+ */
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
+ /* current!=vcpu as not called by arch_vmx_do_launch */
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+ goto bad_cr3;
+ }
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if (old_base_mfn)
+ put_page(mfn_to_page(old_base_mfn));
+ /*
+ * arch.shadow_table should now hold the next CR3 for shadow
+ */
+ v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+ }
+
+ skip_cr3:
+#if defined(__x86_64__)
+ if (vmx_long_mode_enabled(v)) {
+ unsigned long vm_entry_value;
+ vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
+ vm_entry_value |= VM_ENTRY_IA32E_MODE;
+ __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+ }
+#endif
+
+ __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
+ __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
+
+ __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+ __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+ __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+ __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+ __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+ __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+ __vmwrite(GUEST_CS_BASE, c->cs_base);
+ __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes);
+
+ __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+ __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+ __vmwrite(GUEST_DS_BASE, c->ds_base);
+ __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes);
+
+ __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+ __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+ __vmwrite(GUEST_ES_BASE, c->es_base);
+ __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes);
+
+ __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+ __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+ __vmwrite(GUEST_SS_BASE, c->ss_base);
+ __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes);
+
+ __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+ __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+ __vmwrite(GUEST_FS_BASE, c->fs_base);
+ __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes);
+
+ __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+ __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+ __vmwrite(GUEST_GS_BASE, c->gs_base);
+ __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes);
+
+ __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+ __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+ __vmwrite(GUEST_TR_BASE, c->tr_base);
+ __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes);
+
+ __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+ __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+ __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+ __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes);
+
+ __vmwrite(GUEST_SYSENTER_CS, c->sysenter_cs);
+ __vmwrite(GUEST_SYSENTER_ESP, c->sysenter_esp);
+ __vmwrite(GUEST_SYSENTER_EIP, c->sysenter_eip);
+
+ vmx_vmcs_exit(v);
+
+ shadow_update_paging_modes(v);
+ return 0;
+
+ bad_cr3:
+ gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"", c->cr3);
+ vmx_vmcs_exit(v);
+ return -EINVAL;
+}
+
+#ifdef HVM_DEBUG_SUSPEND
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+ int i = 0;
+ printk("**** msr state ****\n");
+ printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ printk("0x%lx,", m->msrs[i]);
+ printk("\n");
+}
+#else
+static void dump_msr_state(struct vmx_msr_state *m)
+{
+}
+#endif
+
+void vmx_save_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ struct vmcs_data *data = &ctxt->data;
+ struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+ unsigned long guest_flags = guest_state->flags;
+ int i = 0;
+
+ data->shadow_gs = guest_state->shadow_gs;
+ data->vmxassist_enabled = v->arch.hvm_vmx.vmxassist_enabled;
+ /* save msrs */
+ data->flags = guest_flags;
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ data->msr_items[i] = guest_state->msrs[i];
+
+ dump_msr_state(guest_state);
+}
+
+void vmx_load_cpu_state(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ int i = 0;
+ struct vmcs_data *data = &ctxt->data;
+ struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
+
+ /* restore msrs */
+ guest_state->flags = data->flags;
+ for (i = 0; i < VMX_MSR_COUNT; i++)
+ guest_state->msrs[i] = data->msr_items[i];
+
+ guest_state->shadow_gs = data->shadow_gs;
+
+ /*XXX:no need to restore msrs, current!=vcpu as not called by
arch_vmx_do_launch */
+/* vmx_restore_guest_msrs(v);*/
+
+ v->arch.hvm_vmx.vmxassist_enabled = data->vmxassist_enabled;
+
+ dump_msr_state(guest_state);
+}
+
+void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ struct vmcs_data *data = &ctxt->data;
+
+ vmx_save_cpu_state(v, ctxt);
+
+ vmx_vmcs_enter(v);
+
+ vmx_vmcs_save(v, data);
+
+ vmx_vmcs_exit(v);
+
+}
+
+void vmx_load_vmcs_ctxt(struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ vmx_load_cpu_state(v, ctxt);
+
+ if (vmx_vmcs_restore(v, &ctxt->data)) {
+ printk("vmx_vmcs restore failed!\n");
+ domain_crash(v->domain);
+ }
+
+ /* only load vmcs once */
+ ctxt->valid = 0;
+
+}
+
/*
* The VMX spec (section 4.3.1.2, Checks on Guest Segment
* Registers) says that virtual-8086 mode guests' segment
@@ -750,6 +1043,9 @@ static void vmx_setup_hvm_funcs(void)
hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
+
+ hvm_funcs.save_cpu_ctxt = vmx_save_vmcs_ctxt;
+ hvm_funcs.load_cpu_ctxt = vmx_load_vmcs_ctxt;
hvm_funcs.paging_enabled = vmx_paging_enabled;
hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
diff -r ee20d1905bde xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/asm-x86/hvm/hvm.h Thu Jan 11 16:48:01 2007 +0800
@@ -79,6 +79,13 @@ struct hvm_function_table {
struct vcpu *v, struct cpu_user_regs *r, unsigned long *crs);
void (*load_cpu_guest_regs)(
struct vcpu *v, struct cpu_user_regs *r);
+
+ /* save and load hvm guest cpu context for save/restore */
+ void (*save_cpu_ctxt)(
+ struct vcpu *v, struct hvmcpu_context *ctxt);
+ void (*load_cpu_ctxt)(
+ struct vcpu *v, struct hvmcpu_context *ctxt);
+
/*
* Examine specifics of the guest state:
* 1) determine whether paging is enabled,
@@ -157,6 +164,35 @@ hvm_load_cpu_guest_regs(struct vcpu *v,
hvm_funcs.load_cpu_guest_regs(v, r);
}
+void hvm_set_guest_time(struct vcpu *v, u64 gtime);
+u64 hvm_get_guest_time(struct vcpu *v);
+
+static inline void
+hvm_save_cpu_context(
+ struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ hvm_funcs.save_cpu_ctxt(v, ctxt);
+
+ /* save guest time */
+ ctxt->gtime = hvm_get_guest_time(v);
+
+ /* set valid flag to recover whole vmcs when restore */
+ ctxt->valid = 0x55885588;
+}
+
+static inline void
+hvm_load_cpu_context(
+ struct vcpu *v, struct hvmcpu_context *ctxt)
+{
+ if ( ctxt->valid != 0x55885588)
+ return;
+
+ hvm_funcs.load_cpu_ctxt(v, ctxt);
+
+ /* restore guest time*/
+ hvm_set_guest_time(v, ctxt->gtime);
+}
+
static inline int
hvm_paging_enabled(struct vcpu *v)
{
@@ -222,8 +258,6 @@ void hvm_cpuid(unsigned int input, unsig
void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
void hvm_stts(struct vcpu *v);
-void hvm_set_guest_time(struct vcpu *v, u64 gtime);
-u64 hvm_get_guest_time(struct vcpu *v);
void hvm_migrate_timers(struct vcpu *v);
void hvm_do_resume(struct vcpu *v);
diff -r ee20d1905bde xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/public/arch-x86/xen.h Thu Jan 11 16:51:03 2007 +0800
@@ -107,6 +107,70 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
DEFINE_XEN_GUEST_HANDLE(trap_info_t);
typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * World vmcs state
+ */
+struct vmcs_data {
+ uint64_t eip; /* execution pointer */
+ uint64_t esp; /* stack pointer */
+ uint64_t eflags; /* flags register */
+ uint64_t cr0;
+ uint64_t cr3; /* page table directory */
+ uint64_t cr4;
+ uint32_t idtr_limit; /* idt */
+ uint64_t idtr_base;
+ uint32_t gdtr_limit; /* gdt */
+ uint64_t gdtr_base;
+ uint32_t cs_sel; /* cs selector */
+ uint32_t cs_limit;
+ uint64_t cs_base;
+ uint32_t cs_arbytes;
+ uint32_t ds_sel; /* ds selector */
+ uint32_t ds_limit;
+ uint64_t ds_base;
+ uint32_t ds_arbytes;
+ uint32_t es_sel; /* es selector */
+ uint32_t es_limit;
+ uint64_t es_base;
+ uint32_t es_arbytes;
+ uint32_t ss_sel; /* ss selector */
+ uint32_t ss_limit;
+ uint64_t ss_base;
+ uint32_t ss_arbytes;
+ uint32_t fs_sel; /* fs selector */
+ uint32_t fs_limit;
+ uint64_t fs_base;
+ uint32_t fs_arbytes;
+ uint32_t gs_sel; /* gs selector */
+ uint32_t gs_limit;
+ uint64_t gs_base;
+ uint32_t gs_arbytes;
+ uint32_t tr_sel; /* task selector */
+ uint32_t tr_limit;
+ uint64_t tr_base;
+ uint32_t tr_arbytes;
+ uint32_t ldtr_sel; /* ldtr selector */
+ uint32_t ldtr_limit;
+ uint64_t ldtr_base;
+ uint32_t ldtr_arbytes;
+ uint32_t sysenter_cs;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ /* msr for em64t */
+ uint64_t shadow_gs;
+ uint64_t flags;
+ /* same size as VMX_MSR_COUNT */
+ uint64_t msr_items[6];
+ uint64_t vmxassist_enabled;
+};
+typedef struct vmcs_data vmcs_data_t;
+
+struct hvmcpu_context {
+ uint32_t valid;
+ struct vmcs_data data;
+ uint64_t gtime;
+};
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
@@ -154,6 +218,7 @@ struct vcpu_guest_context {
#endif
#endif
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+ struct hvmcpu_context hvmcpu_ctxt; /* whole vmcs region */
#ifdef __x86_64__
/* Segment base addresses. */
uint64_t fs_base;
diff -r ee20d1905bde xen/include/xlat.lst
--- a/xen/include/xlat.lst Thu Jan 11 16:40:55 2007 +0800
+++ b/xen/include/xlat.lst Thu Jan 11 16:51:35 2007 +0800
@@ -8,6 +8,8 @@
? vcpu_time_info xen.h
! cpu_user_regs arch-x86/xen-@arch@.h
! trap_info arch-x86/xen.h
+! hvmcpu_context arch-x86/xen.h
+! vmcs_data arch-x86/xen.h
! vcpu_guest_context arch-x86/xen.h
? acm_getdecision acm_ops.h
! ctl_cpumap domctl.h
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|