# HG changeset patch # User cegger # Date 1287134065 -7200 Nested Virtualization core implementation Signed-off-by: Christoph Egger diff -r 0c2b398816b1 -r a1355c180672 xen/arch/x86/hvm/Makefile --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -10,6 +10,7 @@ obj-y += intercept.o obj-y += io.o obj-y += irq.o obj-y += mtrr.o +obj-y += nestedhvm.o obj-y += pmtimer.o obj-y += quirks.o obj-y += rtc.o diff -r 0c2b398816b1 -r a1355c180672 xen/arch/x86/hvm/nestedhvm.c --- /dev/null +++ b/xen/arch/x86/hvm/nestedhvm.c @@ -0,0 +1,429 @@ +/* + * Nested HVM + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include /* for HVM_DELIVER_NO_ERROR_CODE */ +#include +#include +#include /* for local_event_delivery_(en|dis)able */ +#include /* for paging_mode_hap() */ + + +uint64_t nhvm_exitcode[NESTEDHVM_INTERCEPT_LAST]; + + +/* Nested HVM on/off per domain */ +bool_t +nestedhvm_enabled(struct domain *d) +{ + bool_t enabled; + + enabled = !!(d->arch.hvm_domain.params[HVM_PARAM_NESTEDHVM]); + /* sanity check */ + BUG_ON(enabled && !is_hvm_domain(d)); + + if (!is_hvm_domain(d)) + return 0; + + return enabled; +} + +/* Nested VCPU */ +int +nestedhvm_vcpu_state_validate(struct vcpu *v, uint64_t vmaddr) +{ + struct segment_register reg; + + if ( !nestedhvm_enabled(v->domain) ) + return TRAP_invalid_op; + + /* if CPL != 0 inject #GP */ + hvm_get_segment_register(v, x86_seg_ss, ®); + if (reg.attr.fields.dpl != 0) + return TRAP_gp_fault; + + if ((vmaddr & ~PAGE_MASK) != 0) + return TRAP_gp_fault; + + if (!nhvm_vmcx_isvalid(v, vmaddr)) + return TRAP_gp_fault; + + return 0; +} + +bool_t +nestedhvm_vcpu_in_guestmode(struct vcpu *v) +{ + return vcpu_nestedhvm(v).nh_guestmode; +} + +void +nestedhvm_vcpu_reset(struct vcpu *v) +{ + struct nestedhvm *nh = &vcpu_nestedhvm(v); + + nh->nh_hap_enabled = 0; + nh->nh_vm_guestcr3 = 0; + nh->nh_vm_hostcr3 = 0; + nh->nh_guest_asid = 0; + nh->nh_flushp2m = 0; + nh->nh_p2m = NULL; + + nhvm_vcpu_reset(v); + + /* vcpu is in host mode */ + nestedhvm_vcpu_exit_guestmode(v); +} + +int +nestedhvm_vcpu_initialise(struct vcpu *v) +{ + int rc; + struct nestedhvm *hvm = &vcpu_nestedhvm(v); + + if (!nestedhvm_enabled(v->domain)) + return 0; + + memset(hvm, 0x0, sizeof(struct nestedhvm)); + + /* initialise hostsave, for example */ + rc = nhvm_vcpu_initialise(v); + if (rc) { + nhvm_vcpu_destroy(v); + return rc; + } + + nestedhvm_vcpu_reset(v); + return 0; +} + +int +nestedhvm_vcpu_destroy(struct vcpu *v) +{ + if (!nestedhvm_enabled(v->domain)) + return 0; + + return nhvm_vcpu_destroy(v); +} + +int +nestedhvm_vcpu_vmentry(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t vmaddr, unsigned int inst_len) +{ + int ret; + struct nestedhvm *hvm = &vcpu_nestedhvm(v); + + hvm->nh_hostflags.fields.vmentry = 1; + + ret = nestedhvm_vcpu_state_validate(v, vmaddr); + if (ret) { + gdprintk(XENLOG_ERR, + "nestedhvm_vcpu_state_validate failed, injecting 0x%x\n", + ret); + hvm_inject_exception(ret, HVM_DELIVER_NO_ERROR_CODE, 0); + return ret; + } + + /* Save vmaddr. Needed for VMEXIT */ + hvm->nh_vmaddr = vmaddr; + + /* get nested vm */ + ASSERT(hvm->nh_vmcx == NULL); + hvm->nh_vmcx = hvm_map_guest_frame_ro(vmaddr >> PAGE_SHIFT); + if (hvm->nh_vmcx == NULL) { + gdprintk(XENLOG_ERR, + "hvm_map_guest_frame_ro failed, injecting #GP\n"); + hvm_inject_exception(TRAP_gp_fault, + HVM_DELIVER_NO_ERROR_CODE, 0); + hvm->nh_hostflags.fields.vmentry = 0; + return TRAP_gp_fault; + } + + /* save host state */ + ret = nhvm_vcpu_vmentry(v, regs, inst_len); + if (ret) { + gdprintk(XENLOG_ERR, + "nhvm_vcpu_vmentry failed, injecting #UD\n"); + hvm_inject_exception(TRAP_invalid_op, + HVM_DELIVER_NO_ERROR_CODE, 0); + hvm->nh_hostflags.fields.vmentry = 0; + hvm_unmap_guest_frame(hvm->nh_vmcx); + hvm->nh_vmcx = NULL; + return ret; + } + + hvm_unmap_guest_frame(hvm->nh_vmcx); + hvm->nh_vmcx = NULL; + + /* Switch vcpu to guest mode. + */ + nestedhvm_vcpu_enter_guestmode(v); + + hvm->nh_hostflags.fields.vmentry = 0; + return 0; +} + +int +nestedhvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack) +{ + enum hvm_intblk intr; + uint64_t exitcode = NESTEDHVM_INTERCEPT_INTR; + uint64_t exitinfo2 = 0; + struct nestedhvm *hvm = &vcpu_nestedhvm(v); + ASSERT(nestedhvm_vcpu_in_guestmode(v)); + + intr = nhvm_interrupt_blocked(v); + if ( intr != hvm_intblk_none ) + return NESTEDHVM_INTR_MASKED; + + switch (intack.source) { + case hvm_intsrc_pic: + case hvm_intsrc_lapic: + exitcode = NESTEDHVM_INTERCEPT_INTR; + exitinfo2 = intack.vector; + break; + case hvm_intsrc_nmi: + exitcode = NESTEDHVM_INTERCEPT_NMI; + exitinfo2 = intack.vector; + break; + case hvm_intsrc_mce: + exitcode = NESTEDHVM_INTERCEPT_MCE; + exitinfo2 = intack.vector; + break; + case hvm_intsrc_none: + return NESTEDHVM_INTR_NOTHANDLED; + default: + BUG(); + } + + hvm->nh_hostflags.fields.forcevmexit = 1; + hvm->nh_hostflags.fields.use_native_exitcode = 0; + hvm->nh_forcevmexit.exitcode = exitcode; + hvm->nh_forcevmexit.exitinfo1 = intack.source; + hvm->nh_forcevmexit.exitinfo2 = exitinfo2; + if ( nhvm_vmcx_guest_intercepts_exitcode(v, + guest_cpu_user_regs(), exitcode) ) + return NESTEDHVM_INTR_FORCEVMEXIT; + + hvm->nh_hostflags.fields.forcevmexit = 0; + return NESTEDHVM_INTR_NOTINTERCEPTED; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit_intercepts(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t exitcode) +{ + bool_t is_intercepted; + struct nestedhvm *hvm = &vcpu_nestedhvm(v); + + is_intercepted = nhvm_vmcx_guest_intercepts_exitcode(v, regs, exitcode); + + if (hvm->nh_hostflags.fields.forcevmexit) { + if (is_intercepted) + return NESTEDHVM_VMEXIT_INJECT; + gdprintk(XENLOG_ERR, + "forced VMEXIT can't happen as guest can't " + "handle the intercept\n"); + return NESTEDHVM_VMEXIT_FATALERROR; + } + + exitcode = nhvm_vmcx_exitcode_native2generic(v, exitcode); + + switch (exitcode) { + case NESTEDHVM_INTERCEPT_INVALID: + if (is_intercepted) + return NESTEDHVM_VMEXIT_INJECT; + return NESTEDHVM_VMEXIT_HOST; + + case NESTEDHVM_INTERCEPT_INTR: + return NESTEDHVM_VMEXIT_HOST; + case NESTEDHVM_INTERCEPT_NMI: + return NESTEDHVM_VMEXIT_HOST; + case NESTEDHVM_INTERCEPT_NM: + /* Host must handle lazy fpu context switching first. + * Then inject the VMEXIT if L1 guest intercepts this. + */ + return NESTEDHVM_VMEXIT_HOST; + + case NESTEDHVM_INTERCEPT_NPF: + if (nestedhvm_paging_mode_hap(v)) { + if (!is_intercepted) + return NESTEDHVM_VMEXIT_FATALERROR; + /* host nested paging + guest nested paging */ + return NESTEDHVM_VMEXIT_HOST; + } + if (paging_mode_hap(v->domain)) { + if (is_intercepted) + return NESTEDHVM_VMEXIT_FATALERROR; + /* host nested paging + guest shadow paging */ + return NESTEDHVM_VMEXIT_HOST; + } + /* host shadow paging + guest shadow paging */ + /* Can this happen? */ + BUG(); + return NESTEDHVM_VMEXIT_FATALERROR; + case NESTEDHVM_INTERCEPT_PF: + if (nestedhvm_paging_mode_hap(v)) { + /* host nested paging + guest nested paging */ + if (!is_intercepted) + /* guest intercepts #PF unnecessarily */ + return NESTEDHVM_VMEXIT_HOST; + /* nested guest intercepts #PF unnecessarily */ + return NESTEDHVM_VMEXIT_INJECT; + } + if (!paging_mode_hap(v->domain)) { + /* host shadow paging + guest shadow paging */ + return NESTEDHVM_VMEXIT_HOST; + } + /* host nested paging + guest shadow paging */ + return NESTEDHVM_VMEXIT_INJECT; + case NESTEDHVM_INTERCEPT_VMMCALL: + /* Always let the guest handle VMMCALL/VMCALL */ + return NESTEDHVM_VMEXIT_INJECT; + default: + break; + } + + if (is_intercepted) + return NESTEDHVM_VMEXIT_CONTINUE; + return NESTEDHVM_VMEXIT_HOST; +} + +static enum nestedhvm_vmexits +nestedhvm_vmexit(struct vcpu *v, struct cpu_user_regs *regs, uint64_t exitcode) +{ + int rc; + enum nestedhvm_vmexits ret; + + ASSERT(nestedhvm_vcpu_in_guestmode(v)); + + ret = nestedhvm_vmexit_intercepts(v, regs, exitcode); + switch (ret) { + case NESTEDHVM_VMEXIT_CONTINUE: + case NESTEDHVM_VMEXIT_INJECT: + break; + case NESTEDHVM_VMEXIT_ERROR: + case NESTEDHVM_VMEXIT_FATALERROR: + goto out; + case NESTEDHVM_VMEXIT_HOST: + return ret; + default: + break; + } + + rc = nhvm_vmcx_prepare4vmexit(v); + if (rc) + ret = NESTEDHVM_VMEXIT_ERROR; + +out: + rc = nhvm_vcpu_hostrestore(v, regs); + if (rc) + ret = NESTEDHVM_VMEXIT_FATALERROR; + + nestedhvm_vcpu_exit_guestmode(v); + return ret; +} + +/* The exitcode is in native SVM/VMX format. The forced exitcode + * is in generic format. + */ +enum nestedhvm_vmexits +nestedhvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t exitcode) +{ + int rc; + struct nestedhvm *hvm = &vcpu_nestedhvm(v); + + hvm->nh_hostflags.fields.vmentry = 1; + + ASSERT(hvm->nh_vmcx == NULL); + hvm->nh_vmcx = hvm_map_guest_frame_rw(hvm->nh_vmaddr >> PAGE_SHIFT); + if (hvm->nh_vmcx == NULL) { + gdprintk(XENLOG_ERR, + "VMEXIT: hvm_map_guest_frame_rw failed\n"); + hvm->nh_hostflags.fields.vmentry = 0; + return NESTEDHVM_VMEXIT_FATALERROR; + } + + if (hvm->nh_hostflags.fields.forcevmexit) { + if (!hvm->nh_hostflags.fields.use_native_exitcode) { + BUG_ON(hvm->nh_forcevmexit.exitcode == NESTEDHVM_INTERCEPT_LAST); + exitcode = + nhvm_exitcode[hvm->nh_forcevmexit.exitcode]; + hvm->nh_hostflags.fields.use_native_exitcode = 1; + } + } + + if (nestedhvm_vcpu_in_guestmode(v)) { + enum nestedhvm_vmexits ret; + + ret = nestedhvm_vmexit(v, regs, exitcode); + switch (ret) { + case NESTEDHVM_VMEXIT_FATALERROR: + gdprintk(XENLOG_ERR, "VMEXIT: fatal error\n"); + case NESTEDHVM_VMEXIT_HOST: + hvm->nh_hostflags.fields.vmentry = 0; + hvm_unmap_guest_frame(hvm->nh_vmcx); + hvm->nh_vmcx = NULL; + return ret; + case NESTEDHVM_VMEXIT_ERROR: + hvm->nh_hostflags.fields.forcevmexit = 1; + hvm->nh_hostflags.fields.use_native_exitcode = 0; + hvm->nh_forcevmexit.exitcode = NESTEDHVM_INTERCEPT_INVALID; + hvm->nh_forcevmexit.exitinfo1 = 0; + hvm->nh_forcevmexit.exitinfo2 = 0; + break; + default: + ASSERT(!nestedhvm_vcpu_in_guestmode(v)); + break; + } + + /* host state has been restored */ + } + + ASSERT(!nestedhvm_vcpu_in_guestmode(v)); + + /* Prepare for running the l1 guest. Make the actual + * modifications to the virtual VMCB/VMCS. + */ + rc = nhvm_vcpu_vmexit(v, regs, exitcode); + + hvm->nh_hostflags.fields.forcevmexit = 0; + hvm->nh_hostflags.fields.use_native_exitcode = 0; + hvm->nh_hostflags.fields.vmentry = 0; + hvm_unmap_guest_frame(hvm->nh_vmcx); + hvm->nh_vmcx = NULL; + + if (rc) + return NESTEDHVM_VMEXIT_FATALERROR; + + return NESTEDHVM_VMEXIT_DONE; +} + +void +nestedhvm_vcpu_enter_guestmode(struct vcpu *v) +{ + vcpu_nestedhvm(v).nh_guestmode = 1; +} + +void +nestedhvm_vcpu_exit_guestmode(struct vcpu *v) +{ + vcpu_nestedhvm(v).nh_guestmode = 0; +} diff -r 0c2b398816b1 -r a1355c180672 xen/include/asm-x86/hvm/nestedhvm.h --- /dev/null +++ b/xen/include/asm-x86/hvm/nestedhvm.h @@ -0,0 +1,102 @@ +/* + * Nested HVM + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#ifndef _HVM_NESTEDHVM_H +#define _HVM_NESTEDHVM_H + +#include /* for uintNN_t */ +#include /* for struct vcpu, struct domain */ +#include /* for vcpu_nestedhvm */ + +enum nestedhvm_vmexits { + NESTEDHVM_VMEXIT_ERROR = 0, /* inject VMEXIT w/ invalid VMCB */ + NESTEDHVM_VMEXIT_FATALERROR = 1, /* crash first level guest */ + NESTEDHVM_VMEXIT_HOST = 2, /* exit handled on host level */ + NESTEDHVM_VMEXIT_CONTINUE = 3, /* further handling */ + NESTEDHVM_VMEXIT_INJECT = 4, /* inject VMEXIT */ + NESTEDHVM_VMEXIT_DONE = 5, /* VMEXIT handled */ +}; + +/* Generic exit codes + * Note: This is not a complete list. Only maintain those which are + * used in the generic code. All other exit codes are represented + * by NESTEDHVM_INTERCEPT_LAST. + */ +enum nestedhvm_intercepts { + /* exitinfo1 and exitinfo2 undefined */ + NESTEDHVM_INTERCEPT_INVALID = 0, /* INVALID vmcb/vmcs */ + NESTEDHVM_INTERCEPT_SHUTDOWN = 1, /* kill guest */ + NESTEDHVM_INTERCEPT_MCE = 2, /* machine check exception */ + NESTEDHVM_INTERCEPT_VMMCALL = 3, /* VMMCALL/VMCALL */ + + /* exitinfo1 is hvm_intsrc_*, exitinfo2 is the vector */ + NESTEDHVM_INTERCEPT_INTR = 4, /* interrupt exit code */ + NESTEDHVM_INTERCEPT_NMI = 5, /* NMI exit code */ + + /* exitinfo1 is PF error code, exitinfo2 is PF fault address */ + NESTEDHVM_INTERCEPT_NPF = 6, /* nested page fault */ + NESTEDHVM_INTERCEPT_PF = 7, /* page fault */ + + /* exceptions: exitinfo1 and exitinfo2 are undefined */ + NESTEDHVM_INTERCEPT_NM = 8, /* device-not-available */ + + /* end mark */ + NESTEDHVM_INTERCEPT_LAST, +}; + +extern uint64_t nhvm_exitcode[NESTEDHVM_INTERCEPT_LAST]; + +/* Nested HVM on/off per domain */ +bool_t nestedhvm_enabled(struct domain *d); +int nestedhvm_initialise(struct domain *d); + +/* Nested VCPU */ +int nestedhvm_vcpu_initialise(struct vcpu *v); +int nestedhvm_vcpu_destroy(struct vcpu *v); +void nestedhvm_vcpu_reset(struct vcpu *v); +bool_t nestedhvm_vcpu_in_guestmode(struct vcpu *v); +void nestedhvm_vcpu_enter_guestmode(struct vcpu *v); +void nestedhvm_vcpu_exit_guestmode(struct vcpu *v); +int nestedhvm_vcpu_vmentry(struct vcpu *v, struct cpu_user_regs *regs, + uint64_t vmcbaddr, unsigned int inst_len); +enum nestedhvm_vmexits nestedhvm_vcpu_vmexit(struct vcpu *v, + struct cpu_user_regs *regs, uint64_t exitcode); +int nestedhvm_vcpu_state_validate(struct vcpu *v, uint64_t vmcbaddr); + +/* Interrupts */ +#define NESTEDHVM_INTR_NOTHANDLED 3 +#define NESTEDHVM_INTR_NOTINTERCEPTED 2 +#define NESTEDHVM_INTR_FORCEVMEXIT 1 +#define NESTEDHVM_INTR_MASKED 0 +int nestedhvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack); + +/* Nested paging */ +#define NESTEDHVM_PAGEFAULT_DONE 0 +#define NESTEDHVM_PAGEFAULT_INJECT 1 +#define NESTEDHVM_PAGEFAULT_ERROR 2 +int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t L2_gpa); + +/* Misc */ +#define nestedhvm_paging_mode_hap(v) (!!vcpu_nestedhvm((v)).nh_hap_enabled) +#define nestedhvm_vmentry_emulate(v) \ + (!!vcpu_nestedhvm((v)).nh_hostflags.fields.vmentry) +#define nestedhvm_vm_flushtlb(d) \ + flush_tlb_mask(&(d)->arch.hvm_domain.nh_dirty_cpumask) + +#endif /* _HVM_NESTEDHVM_H */