This patch adds virtual ept capability to L1.
It's implemented as a simple per vCPU vTLB like component
independent to domain wide p2m.
Signed-off-by: Qing He <qing.he@xxxxxxxxx>
---
b/xen/arch/x86/hvm/vmx/vept.c | 574 +++++++++++++++++++++++++++++++++++
b/xen/include/asm-x86/hvm/vmx/vept.h | 10
xen/arch/x86/hvm/vmx/Makefile | 1
xen/arch/x86/hvm/vmx/nest.c | 136 +++++++-
xen/arch/x86/hvm/vmx/vmx.c | 13
xen/include/asm-x86/hvm/vmx/nest.h | 7
6 files changed, 734 insertions(+), 7 deletions(-)
diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/Makefile Thu Apr 22 22:30:10 2010 +0800
@@ -6,3 +6,4 @@
obj-y += vpmu.o
obj-y += vpmu_core2.o
obj-y += nest.o
+obj-y += vept.o
diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/nest.c
--- a/xen/arch/x86/hvm/vmx/nest.c Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/nest.c Thu Apr 22 22:30:10 2010 +0800
@@ -26,6 +26,7 @@
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/vmx/vvmcs.h>
#include <asm/hvm/vmx/nest.h>
+#include <asm/hvm/vmx/vept.h>
/*
* VMX instructions support functions
@@ -295,6 +296,9 @@
__vmptrld(virt_to_maddr(nest->hvmcs));
v->arch.hvm_vmx.launched = 0;
+ nest->geptp = 0;
+ nest->vept = vept_init(v);
+
vmreturn(regs, VMSUCCEED);
out:
@@ -313,6 +317,9 @@
if ( unlikely(!nest->guest_vmxon_pa) )
goto invalid_op;
+ vept_teardown(nest->vept);
+ nest->vept = 0;
+
nest->guest_vmxon_pa = 0;
__vmpclear(virt_to_maddr(nest->svmcs));
@@ -529,6 +536,67 @@
return vmx_nest_handle_vmresume(regs);
}
+int vmx_nest_handle_invept(struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ struct vmx_inst_decoded decode;
+ struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+ mfn_t mfn;
+ u64 eptp;
+ int type;
+
+ if ( unlikely(!nest->guest_vmxon_pa) )
+ goto invalid_op;
+
+ decode_vmx_inst(regs, &decode);
+
+ hvm_copy_from_guest_virt(&eptp, decode.mem, sizeof(eptp), 0);
+ type = reg_read(regs, decode.reg2);
+
+ /* TODO: physical invept on other cpus */
+ switch ( type )
+ {
+ case 1:
+ mfn = vept_invalidate(nest->vept, eptp);
+ if ( eptp == nest->geptp )
+ nest->geptp = 0;
+
+ if ( __mfn_valid(mfn_x(mfn)) )
+ __invept(1, mfn_x(mfn) << PAGE_SHIFT | (eptp & 0xfff), 0);
+ break;
+ case 2:
+ vept_invalidate_all(nest->vept);
+ nest->geptp = 0;
+ break;
+ default:
+ gdprintk(XENLOG_ERR, "nest: unsupported invept type %d\n", type);
+ break;
+ }
+
+ vmreturn(regs, VMSUCCEED);
+
+ return X86EMUL_OKAY;
+
+invalid_op:
+ hvm_inject_exception(TRAP_invalid_op, 0, 0);
+ return X86EMUL_EXCEPTION;
+}
+
+int vmx_nest_vept(struct vcpu *v)
+{
+ struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+ int r = 0;
+
+ if ( paging_mode_hap(v->domain) &&
+ (__get_vvmcs(nest->vvmcs, CPU_BASED_VM_EXEC_CONTROL) &
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
+ (__get_vvmcs(nest->vvmcs, SECONDARY_VM_EXEC_CONTROL) &
+ SECONDARY_EXEC_ENABLE_EPT) )
+ r = 1;
+
+ return r;
+}
+
/*
* Nested VMX context switch
*/
@@ -739,7 +807,14 @@
vvmcs_to_shadow(nest->vvmcs, CR0_GUEST_HOST_MASK);
vvmcs_to_shadow(nest->vvmcs, CR4_GUEST_HOST_MASK);
- /* TODO: PDPTRs for nested ept */
+ if ( vmx_nest_vept(v) )
+ {
+ vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR0);
+ vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR1);
+ vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR2);
+ vvmcs_to_shadow(nest->vvmcs, GUEST_PDPTR3);
+ }
+
/* TODO: CR3 target control */
}
@@ -787,14 +862,32 @@
}
#endif
+
+ /* loading EPT_POINTER for L2 */
+ if ( vmx_nest_vept(v) )
+ {
+ u64 geptp;
+ mfn_t mfn;
+
+ geptp = __get_vvmcs(nest->vvmcs, EPT_POINTER);
+ if ( geptp != nest->geptp )
+ {
+ mfn = vept_load_eptp(nest->vept, geptp);
+ nest->geptp = geptp;
+
+ __vmwrite(EPT_POINTER, (mfn_x(mfn) << PAGE_SHIFT) | 0x1e);
+#ifdef __i386__
+ __vmwrite(EPT_POINTER_HIGH, (mfn_x(mfn) << PAGE_SHIFT) >> 32);
+#endif
+ }
+ }
+
regs->rip = __get_vvmcs(nest->vvmcs, GUEST_RIP);
regs->rsp = __get_vvmcs(nest->vvmcs, GUEST_RSP);
regs->rflags = __get_vvmcs(nest->vvmcs, GUEST_RFLAGS);
/* updating host cr0 to sync TS bit */
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
-
- /* TODO: EPT_POINTER */
}
static void sync_vvmcs_guest_state(struct vmx_nest_struct *nest)
@@ -1064,8 +1157,26 @@
break;
}
+ case EXIT_REASON_EPT_VIOLATION:
+ {
+ unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION);
+ paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS);
+#ifdef __i386__
+ gpa |= (paddr_t)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32;
+#endif
+ if ( vmx_nest_vept(v) )
+ {
+ if ( !vept_ept_violation(nest->vept, nest->geptp,
+ exit_qualification, gpa) )
+ bypass_l0 = 1;
+ else
+ nest->vmexit_pending = 1;
+ }
+
+ break;
+ }
+
case EXIT_REASON_WBINVD:
- case EXIT_REASON_EPT_VIOLATION:
case EXIT_REASON_EPT_MISCONFIG:
case EXIT_REASON_EXTERNAL_INTERRUPT:
/* pass to L0 handler */
@@ -1229,11 +1340,14 @@
data = (data << 32) | eax;
break;
case MSR_IA32_VMX_PROCBASED_CTLS:
+ mask = paging_mode_hap(current->domain)?
+ 0: CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
rdmsr(regs->ecx, eax, edx);
#define REMOVED_EXEC_CONTROL_CAP (CPU_BASED_TPR_SHADOW \
- | CPU_BASED_ACTIVATE_MSR_BITMAP \
- | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+ | CPU_BASED_ACTIVATE_MSR_BITMAP)
data = edx & ~REMOVED_EXEC_CONTROL_CAP;
+ data = edx & ~mask;
data = (data << 32) | eax;
break;
case MSR_IA32_VMX_EXIT_CTLS:
@@ -1254,12 +1368,20 @@
data = (data << 32) | eax;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
- mask = 0;
+ mask = paging_mode_hap(current->domain)?
+ SECONDARY_EXEC_ENABLE_EPT : 0;
rdmsr(regs->ecx, eax, edx);
data = edx & mask;
data = (data << 32) | eax;
break;
+ case MSR_IA32_VMX_EPT_VPID_CAP:
+ rdmsr(regs->ecx, eax, edx);
+#define REMOVED_EPT_VPID_CAP_HIGH ( 1 | 1<<8 | 1<<9 | 1<<10 | 1<<11 )
+#define REMOVED_EPT_VPID_CAP_LOW ( 1<<16 | 1<<17 | 1<<26 )
+ data = edx & ~REMOVED_EPT_VPID_CAP_HIGH;
+ data = (data << 32) | (eax & ~REMOVED_EPT_VPID_CAP_LOW);
+ break;
/* pass through MSRs */
case IA32_FEATURE_CONTROL_MSR:
diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/vept.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vept.c Thu Apr 22 22:30:10 2010 +0800
@@ -0,0 +1,574 @@
+/*
+ * vept.c: virtual EPT for nested virtualization
+ *
+ * Copyright (c) 2010, Intel Corporation.
+ * Author: Qing He <qing.he@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/list.h>
+#include <xen/mm.h>
+#include <xen/paging.h>
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <asm/page.h>
+#include <xen/numa.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vept.h>
+
+#undef mfn_to_page
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
+#undef page_to_mfn
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
+
+/*
+ * This virtual EPT implementation is independent to p2m facility
+ * and has some different characteristics. It works in a similar
+ * way as shadow page table (guest table and host table composition),
+ * but is per-vcpu, and of vTLB style
+ * - per vCPU so no lock is required
+ * - vTLB style signifies honoring all invalidations, and not
+ * write protection. Unlike ordinary page table, since EPT updates
+ * and invalidations are minimal in a well written VMM, overhead
+ * is also minimized.
+ *
+ * The physical root is loaded directly to L2 sVMCS, without entering
+ * any other host controls. Multiple `cache slots' are maintained
+ * for multiple guest EPTPs, with simple LRU replacement.
+ *
+ * One of the limitations so far, is that it doesn't work with
+ * L0 emulation code, so L1 p2m_mmio_direct on top of L0 p2m_mmio_dm
+ * is not supported as for now.
+ */
+
+#define VEPT_MAX_SLOTS 8
+#define VEPT_ALLOCATION_SIZE 512
+
+struct vept_slot {
+ u64 eptp; /* guest eptp */
+ mfn_t root; /* root of phys table */
+ struct list_head list;
+
+ struct page_list_head page_list;
+};
+
+struct vept {
+ struct list_head used_slots; /* lru: new->tail, old->head */
+ struct list_head free_slots;
+
+ int total_pages;
+ int free_pages;
+ struct page_list_head freelist;
+
+ struct vcpu *vcpu;
+};
+
+
+static struct vept_slot *__get_eptp_slot(struct vept *vept, u64 geptp)
+{
+ struct vept_slot *slot, *tmp;
+
+ list_for_each_entry_safe( slot, tmp, &vept->used_slots, list )
+ if ( slot->eptp == geptp )
+ return slot;
+
+ return NULL;
+}
+
+static struct vept_slot *get_eptp_slot(struct vept *vept, u64 geptp)
+{
+ struct vept_slot *slot;
+
+ slot = __get_eptp_slot(vept, geptp);
+ if ( slot != NULL )
+ list_del(&slot->list);
+
+ return slot;
+}
+
+static void __clear_slot(struct vept *vept, struct vept_slot *slot)
+{
+ struct page_info *pg;
+
+ slot->eptp = 0;
+
+ while ( !page_list_empty(&slot->page_list) )
+ {
+ pg = page_list_remove_head(&slot->page_list);
+ page_list_add_tail(pg, &vept->freelist);
+
+ vept->free_pages++;
+ }
+}
+
+static struct vept_slot *get_free_slot(struct vept *vept)
+{
+ struct vept_slot *slot = NULL;
+
+ if ( !list_empty(&vept->free_slots) )
+ {
+ slot = list_entry(vept->free_slots.next, struct vept_slot, list);
+ list_del(&slot->list);
+ }
+ else if ( !list_empty(&vept->used_slots) )
+ {
+ slot = list_entry(vept->used_slots.next, struct vept_slot, list);
+ list_del(&slot->list);
+ __clear_slot(vept, slot);
+ }
+
+ return slot;
+}
+
+static void clear_all_slots(struct vept *vept)
+{
+ struct vept_slot *slot, *tmp;
+
+ list_for_each_entry_safe( slot, tmp, &vept->used_slots, list )
+ {
+ list_del(&slot->list);
+ __clear_slot(vept, slot);
+ list_add_tail(&slot->list, &vept->free_slots);
+ }
+}
+
+static int free_some_pages(struct vept *vept, struct vept_slot *curr)
+{
+ struct vept_slot *slot;
+ int r = 0;
+
+ if ( !list_empty(&vept->used_slots) )
+ {
+ slot = list_entry(vept->used_slots.next, struct vept_slot, list);
+ if ( slot != curr )
+ {
+ list_del(&slot->list);
+ __clear_slot(vept, slot);
+ list_add_tail(&slot->list, &vept->free_slots);
+
+ r = 1;
+ }
+ }
+
+ return r;
+}
+
+struct vept *vept_init(struct vcpu *v)
+{
+ struct vept *vept;
+ struct vept_slot *slot;
+ struct page_info *pg;
+ int i;
+
+ vept = xmalloc(struct vept);
+ if ( vept == NULL )
+ goto out;
+
+ memset(vept, 0, sizeof(*vept));
+ vept->vcpu = v;
+
+ INIT_PAGE_LIST_HEAD(&vept->freelist);
+ INIT_LIST_HEAD(&vept->used_slots);
+ INIT_LIST_HEAD(&vept->free_slots);
+
+ for ( i = 0; i < VEPT_MAX_SLOTS; i++ )
+ {
+ slot = xmalloc(struct vept_slot);
+ if ( slot == NULL )
+ break;
+
+ memset(slot, 0, sizeof(*slot));
+
+ INIT_LIST_HEAD(&slot->list);
+ INIT_PAGE_LIST_HEAD(&slot->page_list);
+
+ list_add(&slot->list, &vept->free_slots);
+ }
+
+ for ( i = 0; i < VEPT_ALLOCATION_SIZE; i++ )
+ {
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(v->domain)));
+ if ( pg == NULL )
+ break;
+
+ page_list_add_tail(pg, &vept->freelist);
+ vept->total_pages++;
+ vept->free_pages++;
+ }
+
+ out:
+ return vept;
+}
+
+void vept_teardown(struct vept *vept)
+{
+ struct page_info *pg;
+ struct vept_slot *slot, *tmp;
+
+ clear_all_slots(vept);
+
+ while ( !page_list_empty(&vept->freelist) )
+ {
+ pg = page_list_remove_head(&vept->freelist);
+ free_domheap_page(pg);
+ vept->free_pages++;
+ vept->total_pages++;
+ }
+
+ list_for_each_entry_safe( slot, tmp, &vept->free_slots, list )
+ xfree(slot);
+
+ xfree(vept);
+}
+
+mfn_t vept_load_eptp(struct vept *vept, u64 geptp)
+{
+ struct page_info *pg;
+ struct vept_slot *slot;
+ mfn_t mfn = _mfn(INVALID_MFN);
+ void *addr;
+
+ ASSERT(vept->vcpu == current);
+
+ slot = get_eptp_slot(vept, geptp);
+ if ( slot == NULL )
+ {
+ slot = get_free_slot(vept);
+ if ( unlikely(slot == NULL) )
+ {
+ gdprintk(XENLOG_ERR, "nest: can't get free slot\n");
+ return mfn;
+ }
+
+ while ( !vept->free_pages )
+ if ( !free_some_pages(vept, slot) )
+ {
+ slot->eptp = 0;
+ list_add_tail(&slot->list, &vept->free_slots);
+ gdprintk(XENLOG_ERR, "nest: vept no free pages\n");
+
+ return mfn;
+ }
+
+ vept->free_pages--;
+ pg = page_list_remove_head(&vept->freelist);
+
+ mfn = page_to_mfn(pg);
+ addr = map_domain_page(mfn_x(mfn));
+ clear_page(addr);
+ unmap_domain_page(addr);
+ page_list_add_tail(pg, &slot->page_list);
+ slot->eptp = geptp;
+ slot->root = mfn;
+ }
+
+ mfn = slot->root;
+ list_add_tail(&slot->list, &vept->used_slots);
+
+ return mfn;
+}
+
+mfn_t vept_invalidate(struct vept *vept, u64 geptp)
+{
+ struct vept_slot *slot;
+ mfn_t mfn = _mfn(INVALID_MFN);
+
+ ASSERT(vept->vcpu == current);
+
+ slot = get_eptp_slot(vept, geptp);
+ if ( slot != NULL )
+ {
+ mfn = slot->root;
+ __clear_slot(vept, slot);
+ list_add_tail(&slot->list, &vept->free_slots);
+ }
+
+ return mfn;
+}
+
+void vept_invalidate_all(struct vept *vept)
+{
+ ASSERT(vept->vcpu == current);
+
+ clear_all_slots(vept);
+}
+
+/*
+ * guest EPT walk and EPT violation
+ */
+struct ept_walk {
+ unsigned long gfn;
+ unsigned long gfn_remainder;
+ ept_entry_t l4e, l3e, l2e, l1e;
+ mfn_t l4mfn, l3mfn, l2mfn, l1mfn;
+ int sp;
+};
+typedef struct ept_walk ept_walk_t;
+
+#define GEPT_NORMAL_PAGE 0
+#define GEPT_SUPER_PAGE 1
+#define GEPT_NOT_PRESENT 2
+static int guest_ept_next_level(struct vcpu *v, ept_entry_t **table,
+ unsigned long *gfn_remainder, int level, u32 *ar,
+ ept_entry_t *entry, mfn_t *next_mfn)
+{
+ int index;
+ ept_entry_t *ept_entry;
+ ept_entry_t *next;
+ p2m_type_t p2mt;
+ int rc = GEPT_NORMAL_PAGE;
+ mfn_t mfn;
+
+ index = *gfn_remainder >> (level * EPT_TABLE_ORDER);
+
+ ept_entry = (*table) + index;
+ *entry = *ept_entry;
+ *ar &= entry->epte & 0x7;
+
+ *gfn_remainder &= (1UL << (level * EPT_TABLE_ORDER)) - 1;
+
+ if ( !(ept_entry->epte & 0x7) )
+ rc = GEPT_NOT_PRESENT;
+ else if ( ept_entry->sp_avail )
+ rc = GEPT_SUPER_PAGE;
+ else
+ {
+ mfn = gfn_to_mfn(v->domain, ept_entry->mfn, &p2mt);
+ if ( !p2m_is_ram(p2mt) )
+ return GEPT_NOT_PRESENT;
+
+ if ( next_mfn )
+ {
+ next = map_domain_page(mfn_x(mfn));
+ unmap_domain_page(*table);
+
+ *table = next;
+ *next_mfn = mfn;
+ }
+ }
+
+ return rc;
+}
+
+static u32 guest_walk_ept(struct vcpu *v, ept_walk_t *gw,
+ u64 geptp, u64 ggpa)
+{
+ ept_entry_t *table;
+ p2m_type_t p2mt;
+ int rc;
+ u32 ar = 0x7;
+
+ unsigned long gfn = (unsigned long) (ggpa >> PAGE_SHIFT);
+ unsigned long gfn_remainder = gfn;
+
+ memset(gw, 0, sizeof(*gw));
+ gw->gfn = gfn;
+ gw->sp = 0;
+
+ gw->l4mfn = gfn_to_mfn(v->domain, geptp >> PAGE_SHIFT, &p2mt);
+ if ( !p2m_is_ram(p2mt) )
+ return 0;
+
+ table = map_domain_page(mfn_x(gw->l4mfn));
+
+ rc = guest_ept_next_level(v, &table, &gfn_remainder, 3, &ar,
+ &gw->l4e, &gw->l3mfn);
+
+ if ( rc )
+ goto out;
+
+ rc = guest_ept_next_level(v, &table, &gfn_remainder, 2, &ar,
+ &gw->l3e, &gw->l2mfn);
+
+ if ( rc == GEPT_SUPER_PAGE )
+ gw->sp = 2;
+ if ( rc )
+ goto out;
+
+ rc = guest_ept_next_level(v, &table, &gfn_remainder, 1, &ar,
+ &gw->l2e, &gw->l1mfn);
+
+ if ( rc == GEPT_SUPER_PAGE )
+ gw->sp = 1;
+ if ( rc )
+ goto out;
+
+ rc = guest_ept_next_level(v, &table, &gfn_remainder, 0, &ar,
+ &gw->l1e, NULL);
+
+ out:
+ gw->gfn_remainder = gfn_remainder;
+ unmap_domain_page(*table);
+ return ar;
+}
+
+static void epte_set_ar_bits(ept_entry_t *entry, unsigned long ar)
+{
+ entry->epte &= ~0x7f;
+ entry->epte |= ar & 0x7f;
+}
+
+static int shadow_ept_next_level(struct vept *vept, struct vept_slot *slot,
+ ept_entry_t **table, unsigned long *gfn_remainder,
+ int level, u32 *ar, ept_entry_t gentry)
+{
+ int index;
+ ept_entry_t *sentry;
+ ept_entry_t *next;
+ mfn_t mfn;
+ struct page_info *pg;
+
+ index = *gfn_remainder >> (level * EPT_TABLE_ORDER);
+
+ sentry = (*table) + index;
+ *ar = sentry->epte & 0x7;
+
+ *gfn_remainder &= (1UL << (level * EPT_TABLE_ORDER)) - 1;
+
+ if ( !(sentry->epte & 0x7) )
+ {
+ while ( !vept->free_pages )
+ if ( !free_some_pages(vept, slot) )
+ {
+ gdprintk(XENLOG_ERR, "nest: vept no free pages\n");
+ return 0;
+ }
+
+ vept->free_pages--;
+ pg = page_list_remove_head(&vept->freelist);
+ page_list_add_tail(pg, &slot->page_list);
+ mfn = page_to_mfn(pg);
+ next = map_domain_page(mfn_x(mfn));
+ clear_page(next);
+
+ sentry->mfn = mfn_x(mfn);
+ }
+ else
+ {
+ next = map_domain_page(sentry->mfn);
+ }
+
+ epte_set_ar_bits(sentry, gentry.epte);
+
+ unmap_domain_page(*table);
+ *table = next;
+
+ return 1;
+}
+
+int vept_ept_violation(struct vept *vept, u64 geptp,
+ unsigned long qualification, paddr_t addr)
+{
+ ept_walk_t gw;
+ struct vept_slot *slot;
+ ept_entry_t *table, *gept;
+ ept_entry_t *sentry, *gentry;
+ u32 old_entry, sp_ar = 0;
+ p2m_type_t p2mt;
+ unsigned long mfn_start = 0;
+ unsigned long gfn_remainder;
+ int rc, i;
+
+ ASSERT(vept->vcpu == current);
+
+ slot = __get_eptp_slot(vept, geptp);
+ if ( unlikely(slot == NULL) )
+ return 0;
+
+ rc = guest_walk_ept(vept->vcpu, &gw, geptp, addr);
+
+ if ( !(rc & (qualification & 0x7)) ) /* inject to guest */
+ return 1;
+
+ if ( gw.sp == 2 ) /* 1G */
+ {
+ sp_ar = gw.l3e.epte & 0x7;
+ mfn_start = gw.l3e.mfn +
+ (gw.gfn_remainder & (~(1 << EPT_TABLE_ORDER) - 1));
+ }
+ if ( gw.sp == 1 ) /* 2M */
+ {
+ sp_ar = gw.l2e.epte & 0x7;
+ mfn_start = gw.l2e.mfn;
+ }
+ else
+ mfn_start = 0;
+
+ table = map_domain_page(mfn_x(slot->root));
+ gfn_remainder = gw.gfn;
+
+ shadow_ept_next_level(vept, slot, &table, &gfn_remainder, 3,
+ &old_entry, gw.l4e);
+
+ shadow_ept_next_level(vept, slot, &table, &gfn_remainder, 2,
+ &old_entry, gw.l3e);
+
+ shadow_ept_next_level(vept, slot, &table, &gfn_remainder, 1,
+ &old_entry, (gw.sp == 2) ? gw.l3e : gw.l2e);
+
+ /* if l1p is just allocated, do a full prefetch */
+ if ( !old_entry && !gw.sp )
+ {
+ gept = map_domain_page(mfn_x(gw.l1mfn));
+ for ( i = 0; i < 512; i++ )
+ {
+ gentry = gept + i;
+ sentry = table + i;
+ if ( gentry->epte & 0x7 )
+ {
+ sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain,
+ gentry->mfn, &p2mt));
+ epte_set_ar_bits(sentry, gentry->epte);
+ }
+ else
+ sentry->epte = 0;
+ }
+ unmap_domain_page(gept);
+ }
+ else if ( !old_entry && gw.sp )
+ {
+ for ( i = 0; i < 512; i++ )
+ {
+ sentry = table + i;
+ sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain,
+ mfn_start + i, &p2mt));
+ epte_set_ar_bits(sentry, sp_ar);
+ }
+ }
+ else if ( old_entry && !gw.sp )
+ {
+ i = gw.gfn & ((1 << EPT_TABLE_ORDER) - 1);
+ sentry = table + i;
+ sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain,
+ gw.l1e.mfn, &p2mt));
+ epte_set_ar_bits(sentry, gw.l1e.epte);
+ }
+ else // old_entry && gw.sp
+ {
+ i = gw.gfn & ((1 << EPT_TABLE_ORDER) - 1);
+ sentry = table + i;
+ sentry->mfn = mfn_x(gfn_to_mfn_guest(vept->vcpu->domain,
+ mfn_start + i, &p2mt));
+ epte_set_ar_bits(sentry, sp_ar);
+ }
+
+ unmap_domain_page(table);
+ return 0;
+}
diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 22 22:30:10 2010 +0800
@@ -1032,6 +1032,14 @@
p2m_type_t p2mt;
char *p;
+ /*
+ * If in nesting EPT operation, L0 doesn't have the knowledge on
+ * how to interpret CR3, it's L1's responsibility to provide
+ * GUEST_PDPTRn, we rely solely on them.
+ */
+ if ( v->arch.hvm_vcpu.in_nesting && vmx_nest_vept(v) )
+ return;
+
/* EPT needs to load PDPTRS into VMCS for PAE. */
if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
return;
@@ -2705,6 +2713,11 @@
if ( vmx_nest_handle_vmxon(regs) == X86EMUL_OKAY )
__update_guest_eip(inst_len);
break;
+ case EXIT_REASON_INVEPT:
+ inst_len = __get_instruction_length();
+ if ( vmx_nest_handle_invept(regs) == X86EMUL_OKAY )
+ __update_guest_eip(inst_len);
+ break;
case EXIT_REASON_MWAIT_INSTRUCTION:
case EXIT_REASON_MONITOR_INSTRUCTION:
diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/include/asm-x86/hvm/vmx/nest.h
--- a/xen/include/asm-x86/hvm/vmx/nest.h Thu Apr 22 22:30:09 2010 +0800
+++ b/xen/include/asm-x86/hvm/vmx/nest.h Thu Apr 22 22:30:10 2010 +0800
@@ -47,6 +47,9 @@
unsigned long intr_info;
unsigned long error_code;
+
+ u64 geptp;
+ struct vept *vept;
};
asmlinkage void vmx_nest_switch_mode(void);
@@ -64,6 +67,8 @@
int vmx_nest_handle_vmresume(struct cpu_user_regs *regs);
int vmx_nest_handle_vmlaunch(struct cpu_user_regs *regs);
+int vmx_nest_handle_invept(struct cpu_user_regs *regs);
+
void vmx_nest_update_exec_control(struct vcpu *v, unsigned long value);
void vmx_nest_update_secondary_exec_control(struct vcpu *v,
unsigned long value);
@@ -81,4 +86,6 @@
int vmx_nest_msr_write_intercept(struct cpu_user_regs *regs,
u64 msr_content);
+int vmx_nest_vept(struct vcpu *v);
+
#endif /* __ASM_X86_HVM_NEST_H__ */
diff -r 22df5f7ec6d3 -r 7f54e6615e1e xen/include/asm-x86/hvm/vmx/vept.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vept.h Thu Apr 22 22:30:10 2010 +0800
@@ -0,0 +1,10 @@
+#include <asm/hvm/vmx/vmx.h>
+
+
+struct vept *vept_init(struct vcpu *v);
+void vept_teardown(struct vept *vept);
+mfn_t vept_load_eptp(struct vept *vept, u64 eptp);
+mfn_t vept_invalidate(struct vept *vept, u64 eptp);
+void vept_invalidate_all(struct vept *vept);
+int vept_ept_violation(struct vept *vept, u64 eptp,
+ unsigned long qualification, paddr_t addr);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|