.. to have feature parity with SVM.
This required adding infrastructure to make use of VMX' MSR save/
restore feature as well as making the MSR intercept bitmap per-VM.
(Applies cleanly only on top of the previously sent SVM/EFER and
HVM/CPUID patches.)
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c
===================================================================
--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-06 15:08:41.000000000
+0200
+++ 2007-08-08/xen/arch/x86/hvm/vmx/vmcs.c 2007-08-08 11:46:40.000000000
+0200
@@ -163,6 +163,10 @@ static void vmx_init_vmcs_config(void)
/* Require Write-Back (WB) memory type for VMCS accesses. */
BUG_ON(((vmx_msr_high >> 18) & 15) != 6);
+
+ rdmsr(MSR_IA32_VMX_MISC, vmx_msr_low, vmx_msr_high);
+ /* 16-byte entries in 512-entry steps */
+ vmx_msr_max_order = ((vmx_msr_low >> 25) & 7) + 13 - PAGE_SHIFT;
}
static struct vmcs_struct *vmx_alloc_vmcs(void)
@@ -378,7 +382,7 @@ static void vmx_set_host_env(struct vcpu
#define GUEST_SEGMENT_LIMIT 0xffffffff
-static void construct_vmcs(struct vcpu *v)
+static int construct_vmcs(struct vcpu *v)
{
unsigned long cr0, cr4;
union vmcs_arbytes arbytes;
@@ -394,8 +398,22 @@ static void construct_vmcs(struct vcpu *
if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
__vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
+ /* MSR access bitmap. */
if ( cpu_has_vmx_msr_bitmap )
- __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
+ {
+ char *msr_bitmap = alloc_xenheap_page();
+
+ if ( msr_bitmap == NULL)
+ return -ENOMEM;
+ memset(msr_bitmap, ~0, PAGE_SIZE);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP);
+ vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP);
+ v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+ __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+ }
/* I/O access bitmap. */
__vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
@@ -427,10 +445,8 @@ static void construct_vmcs(struct vcpu *
__vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
/* MSR intercepts. */
- __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
- __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
- __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
__vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
__vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
__vmwrite(VM_ENTRY_INTR_INFO, 0);
@@ -537,6 +553,131 @@ static void construct_vmcs(struct vcpu *
paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
vmx_vlapic_msr_changed(v);
+
+ return 0;
+}
+
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val)
+{
+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+ const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+ for ( i = 0; i < msr_count; ++i )
+ if (msr_area[i].index == msr)
+ {
+ *val = msr_area[i].data;
+ return 0;
+ }
+
+ return -ESRCH;
+}
+
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val)
+{
+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+ for ( i = 0; i < msr_count; ++i )
+ if (msr_area[i].index == msr)
+ {
+ msr_area[i].data = val;
+ return 0;
+ }
+
+ return -ESRCH;
+}
+
+int vmx_add_guest_msr(struct vcpu *v, u32 msr)
+{
+ unsigned int i, order;
+ unsigned int msr_count = v->arch.hvm_vmx.msr_count;
+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+ for ( i = 0; i < msr_count; ++i )
+ if (msr_area[i].index == msr)
+ return 0;
+
+ order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area));
+ if ( order > vmx_msr_max_order )
+ return -ENOSPC;
+
+ if ( v->arch.hvm_vmx.msr_order < order )
+ {
+ if ( (msr_area = alloc_xenheap_pages(order)) == NULL )
+ return -ENOMEM;
+ if ( v->arch.hvm_vmx.msr_order )
+ {
+ memcpy(msr_area,
+ v->arch.hvm_vmx.msr_area,
+ msr_count * sizeof(*msr_area));
+ free_xenheap_pages(v->arch.hvm_vmx.msr_area,
+ v->arch.hvm_vmx.msr_order);
+ }
+#ifdef __i386__
+ else
+ {
+ __vmwrite(VM_EXIT_MSR_STORE_ADDR_HIGH, 0);
+ __vmwrite(VM_ENTRY_MSR_LOAD_ADDR_HIGH, 0);
+ }
+#endif
+ v->arch.hvm_vmx.msr_area = msr_area;
+ v->arch.hvm_vmx.msr_order = order;
+ __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
+ __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+ }
+
+ msr_area[msr_count].index = msr;
+ msr_area[msr_count].mbz = 0;
+ msr_area[msr_count].data = 0;
+ v->arch.hvm_vmx.msr_count = ++msr_count;
+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
+ __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
+
+ return 0;
+}
+
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr)
+{
+ unsigned int i, order;
+ unsigned int msr_count = v->arch.hvm_vmx.host_msr_count;
+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area;
+
+ for ( i = 0; i < msr_count; ++i )
+ if (msr_area[i].index == msr)
+ return 0;
+
+ order = get_order_from_bytes((msr_count + 1) * sizeof(*msr_area));
+ if ( order > vmx_msr_max_order )
+ return -ENOSPC;
+
+ if ( v->arch.hvm_vmx.host_msr_order < order )
+ {
+ if ( (msr_area = alloc_xenheap_pages(order)) == NULL )
+ return -ENOMEM;
+ if ( v->arch.hvm_vmx.host_msr_order )
+ {
+ memcpy(msr_area,
+ v->arch.hvm_vmx.host_msr_area,
+ msr_count * sizeof(*msr_area));
+ free_xenheap_pages(v->arch.hvm_vmx.host_msr_area,
+ v->arch.hvm_vmx.host_msr_order);
+ }
+#ifdef __i386__
+ else
+ __vmwrite(VM_EXIT_MSR_LOAD_ADDR_HIGH, 0);
+#endif
+ v->arch.hvm_vmx.host_msr_area = msr_area;
+ v->arch.hvm_vmx.host_msr_order = order;
+ __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+ }
+
+ msr_area[msr_count].index = msr;
+ msr_area[msr_count].mbz = 0;
+ rdmsrl(msr, msr_area[msr_count].data);
+ v->arch.hvm_vmx.host_msr_count = ++msr_count;
+ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
+
+ return 0;
}
int vmx_create_vmcs(struct vcpu *v)
Index: 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c
===================================================================
--- 2007-08-08.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 11:45:25.000000000
+0200
+++ 2007-08-08/xen/arch/x86/hvm/vmx/vmx.c 2007-08-08 11:56:05.000000000
+0200
@@ -53,7 +53,7 @@
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
-char *vmx_msr_bitmap;
+unsigned int vmx_msr_max_order = 0;
static void vmx_ctxt_switch_from(struct vcpu *v);
static void vmx_ctxt_switch_to(struct vcpu *v);
@@ -1170,26 +1170,6 @@ static int vmx_event_pending(struct vcpu
return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
}
-static void disable_intercept_for_msr(u32 msr)
-{
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if ( msr <= 0x1fff )
- {
- __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
- __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
- }
- else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
- {
- msr &= 0x1fff;
- __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
- __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
- }
-}
-
static struct hvm_function_table vmx_function_table = {
.name = "VMX",
.domain_initialise = vmx_domain_initialise,
@@ -1259,21 +1239,6 @@ void start_vmx(void)
setup_vmcs_dump();
hvm_enable(&vmx_function_table);
-
- if ( cpu_has_vmx_msr_bitmap )
- {
- printk("VMX: MSR intercept bitmap enabled\n");
- vmx_msr_bitmap = alloc_xenheap_page();
- BUG_ON(vmx_msr_bitmap == NULL);
- memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
-
- disable_intercept_for_msr(MSR_FS_BASE);
- disable_intercept_for_msr(MSR_GS_BASE);
-
- disable_intercept_for_msr(MSR_IA32_SYSENTER_CS);
- disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP);
- disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP);
- }
}
/*
@@ -1380,7 +1345,10 @@ static void vmx_do_cpuid(struct cpu_user
bitmaskof(X86_FEATURE_ACC));
/* Unsupported for virtualised CPUs. */
- ecx &= ~(bitmaskof(X86_FEATURE_PDCM));
+ ecx &= ~(bitmaskof(X86_FEATURE_PDCM) |
+ bitmaskof(X86_FEATURE_DSCPL));
+
+ edx &= ~bitmaskof(X86_FEATURE_DTES);
break;
@@ -2572,6 +2540,82 @@ static int vmx_cr_access(unsigned long e
return 1;
}
+static const struct lbr_info {
+ u32 base, count;
+} p4_lbr[] = {
+ { MSR_P4_LER_FROM_LIP, 1 },
+ { MSR_P4_LER_TO_LIP, 1 },
+ { MSR_P4_LASTBRANCH_TOS, 1 },
+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+ { 0, 0 }
+}, c2_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_P6_LASTBRANCH_TOS, 1 },
+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
+ { 0, 0 }
+#ifdef __i386__
+}, pm_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_P6_LASTBRANCH_TOS, 1 },
+ { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH },
+ { 0, 0 }
+#endif
+};
+
+static const struct lbr_info *last_branch_msr_get(void)
+{
+ switch ( boot_cpu_data.x86 )
+ {
+ case 6:
+ switch ( boot_cpu_data.x86_model )
+ {
+#ifdef __i386__
+ /* PentiumM */
+ case 9: case 13:
+ /* Core Solo/Duo */
+ case 14:
+ return pm_lbr;
+ break;
+#endif
+ /* Core2 Duo */
+ case 15:
+ return c2_lbr;
+ break;
+ }
+ break;
+
+ case 15:
+ switch ( boot_cpu_data.x86_model )
+ {
+ /* Pentium4/Xeon with em64t */
+ case 3: case 4: case 6:
+ return p4_lbr;
+ break;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
+static int last_branch_msr(u32 ecx)
+{
+ const struct lbr_info *lbr = last_branch_msr_get();
+
+ if ( lbr != NULL )
+ {
+ for ( ; lbr->count; ++lbr )
+ if ( ecx >= lbr->base && ecx < lbr->base + lbr->count )
+ return 1;
+ }
+
+ return 0;
+}
+
static int vmx_do_msr_read(struct cpu_user_regs *regs)
{
u64 msr_content = 0;
@@ -2597,6 +2641,10 @@ static int vmx_do_msr_read(struct cpu_us
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
+ case MSR_IA32_DEBUGCTLMSR:
+ if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0)
+ msr_content = 0;
+ break;
case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
case MSR_IA32_MCG_STATUS:
@@ -2610,6 +2658,15 @@ static int vmx_do_msr_read(struct cpu_us
msr_content = 0;
break;
default:
+ if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0)
+ break;
+
+ if ( last_branch_msr(ecx) )
+ {
+ msr_content = 0;
+ break;
+ }
+
switch ( long_mode_do_msr_read(regs) )
{
case HNDL_unhandled:
@@ -2736,13 +2793,50 @@ static int vmx_do_msr_write(struct cpu_u
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+ case MSR_IA32_DEBUGCTLMSR:
+ if ( msr_content & ~3 )
+ break;
+ if ( msr_content )
+ {
+ int rc = 0;
+
+ if ( msr_content & 1 )
+ {
+ const struct lbr_info *lbr = last_branch_msr_get();
+
+ if ( lbr == NULL )
+ break;
+ for ( ; rc == 0 && lbr->count; ++lbr )
+ {
+ u32 i;
+
+ for ( i = 0; rc == 0 && i < lbr->count; ++i )
+ {
+ rc = vmx_add_guest_msr(v, lbr->base + i);
+ if ( rc == 0 && cpu_has_vmx_msr_bitmap )
+
vmx_disable_intercept_for_msr(v->arch.hvm_vmx.msr_bitmap,
+ lbr->base + i);
+ }
+ }
+ }
+
+ if ( rc < 0 ||
+ vmx_add_guest_msr(v, ecx) < 0 ||
+ vmx_add_host_load_msr(v, ecx) < 0)
+ vmx_inject_hw_exception(v, TRAP_machine_check, 0);
+ else
+ vmx_write_guest_msr(v, ecx, msr_content);
+ }
+ break;
case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
default:
switch ( long_mode_do_msr_write(regs) )
{
case HNDL_unhandled:
- wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
+ if ( vmx_write_guest_msr(v, ecx, msr_content) != 0 &&
+ !last_branch_msr(ecx) )
+ wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
break;
case HNDL_exception_raised:
return 0;
Index: 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h
===================================================================
--- 2007-08-08.orig/xen/include/asm-x86/hvm/vmx/vmcs.h 2007-08-06
15:08:41.000000000 +0200
+++ 2007-08-08/xen/include/asm-x86/hvm/vmx/vmcs.h 2007-08-08
11:45:33.000000000 +0200
@@ -35,6 +35,13 @@ struct vmcs_struct {
unsigned char data [0]; /* vmcs size is read from MSR */
};
+struct vmx_msr_entry {
+ u32 index;
+ u32 mbz;
+ u64 data;
+};
+extern unsigned int vmx_msr_max_order;
+
enum {
VMX_INDEX_MSR_LSTAR = 0,
VMX_INDEX_MSR_STAR,
@@ -79,6 +86,14 @@ struct arch_vmx_struct {
#endif
unsigned long efer;
+ char *msr_bitmap;
+ unsigned int msr_order;
+ unsigned int msr_count;
+ struct vmx_msr_entry *msr_area;
+ unsigned int host_msr_order;
+ unsigned int host_msr_count;
+ struct vmx_msr_entry *host_msr_area;
+
/* Following fields are all specific to vmxassist. */
unsigned long vmxassist_enabled:1;
unsigned long irqbase_mode:1;
@@ -137,7 +152,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr
(vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
#define cpu_has_vmx_msr_bitmap \
(vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
-extern char *vmx_msr_bitmap;
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -274,6 +288,31 @@ enum vmcs_field {
HOST_RIP = 0x00006c16,
};
+static inline void vmx_disable_intercept_for_msr(char *msr_bitmap, u32 msr)
+{
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if ( msr <= 0x1fff )
+ {
+ __clear_bit(msr, msr_bitmap + 0x000); /* read-low */
+ __clear_bit(msr, msr_bitmap + 0x800); /* write-low */
+ }
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ {
+ msr &= 0x1fff;
+ __clear_bit(msr, msr_bitmap + 0x400); /* read-high */
+ __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */
+ }
+}
+
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val);
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val);
+int vmx_add_guest_msr(struct vcpu *v, u32 msr);
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr);
+
#endif /* ASM_X86_HVM_VMX_VMCS_H__ */
/*
Index: 2007-08-08/xen/include/asm-x86/msr.h
===================================================================
--- 2007-08-08.orig/xen/include/asm-x86/msr.h 2007-08-08 11:43:53.000000000
+0200
+++ 2007-08-08/xen/include/asm-x86/msr.h 2007-08-08 11:45:33.000000000
+0200
@@ -200,6 +200,13 @@ static inline void write_efer(__u64 val)
#define MSR_P6_EVNTSEL0 0x186
#define MSR_P6_EVNTSEL1 0x187
+#define MSR_P6_LASTBRANCH_TOS 0x1c9
+#define MSR_PM_LASTBRANCH_0 0x40
+#define NUM_MSR_PM_LASTBRANCH 8
+#define MSR_C2_LASTBRANCH_0_FROM_IP 0x40
+#define MSR_C2_LASTBRANCH_0_TO_IP 0x60
+#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4
+
#define MSR_IA32_PERF_STATUS 0x198
#define MSR_IA32_PERF_CTL 0x199
@@ -223,6 +230,8 @@ static inline void write_efer(__u64 val)
#define MSR_IA32_MC0_ADDR 0x402
#define MSR_IA32_MC0_MISC 0x403
+#define MSR_IA32_DS_AREA 0x600
+
/* K8 Machine Check MSRs */
#define MSR_K8_MC1_CTL 0x404
#define MSR_K8_MC1_STATUS 0x405
@@ -333,6 +342,15 @@ static inline void write_efer(__u64 val)
#define MSR_P4_U2L_ESCR0 0x3b0
#define MSR_P4_U2L_ESCR1 0x3b1
+#define MSR_P4_LER_FROM_LIP 0x1d7
+#define MSR_P4_LER_TO_LIP 0x1d8
+#define MSR_P4_LASTBRANCH_TOS 0x1da
+#define MSR_P4_LASTBRANCH_0 0x1db
+#define NUM_MSR_P4_LASTBRANCH 4
+#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x680
+#define MSR_P4_LASTBRANCH_0_TO_LIP 0x6c0
+#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16
+
#define MSR_K6_WHCR 0xC0000082
#define MSR_K6_UWCCR 0xC0000085
#define MSR_K6_EPMR 0xC0000086
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|