# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1192630745 -3600
# Node ID 765600a13e4a05aa27c4c8810abf7882aad46406
# Parent ca2984b17fcf134cd675248499e8ed90125774ba
vmx: last branch recording MSR emulation
This required adding infrastructure to make use of VMX' MSR save/
restore feature as well as making the MSR intercept bitmap per-VM.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
xen/arch/x86/hvm/vmx/vmcs.c | 154 +++++++++++++++++++++++++++++++++-
xen/arch/x86/hvm/vmx/vmx.c | 163 +++++++++++++++++++++++++++----------
xen/include/asm-x86/hvm/vmx/vmcs.h | 19 ++++
xen/include/asm-x86/msr-index.h | 21 ++++
4 files changed, 310 insertions(+), 47 deletions(-)
diff -r ca2984b17fcf -r 765600a13e4a xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Oct 17 15:19:05 2007 +0100
@@ -413,9 +413,35 @@ static void vmx_set_host_env(struct vcpu
(unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
}
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
+{
+ char *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
+
+ /* VMX MSR bitmap supported? */
+ if ( msr_bitmap == NULL )
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if ( msr <= 0x1fff )
+ {
+ __clear_bit(msr, msr_bitmap + 0x000); /* read-low */
+ __clear_bit(msr, msr_bitmap + 0x800); /* write-low */
+ }
+ else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+ {
+ msr &= 0x1fff;
+ __clear_bit(msr, msr_bitmap + 0x400); /* read-high */
+ __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */
+ }
+}
+
#define GUEST_SEGMENT_LIMIT 0xffffffff
-static void construct_vmcs(struct vcpu *v)
+static int construct_vmcs(struct vcpu *v)
{
union vmcs_arbytes arbytes;
@@ -430,8 +456,24 @@ static void construct_vmcs(struct vcpu *
if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
__vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
+ /* MSR access bitmap. */
if ( cpu_has_vmx_msr_bitmap )
- __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
+ {
+ char *msr_bitmap = alloc_xenheap_page();
+
+ if ( msr_bitmap == NULL )
+ return -ENOMEM;
+
+ memset(msr_bitmap, ~0, PAGE_SIZE);
+ v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+ __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+
+ vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
+ vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+ }
/* I/O access bitmap. */
__vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
@@ -463,10 +505,8 @@ static void construct_vmcs(struct vcpu *
__vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
/* MSR intercepts. */
- __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
- __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
+ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
__vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
- __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
__vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
__vmwrite(VM_ENTRY_INTR_INFO, 0);
@@ -565,11 +605,108 @@ static void construct_vmcs(struct vcpu *
paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
vmx_vlapic_msr_changed(v);
+
+ return 0;
+}
+
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val)
+{
+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+ const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+ for ( i = 0; i < msr_count; i++ )
+ {
+ if ( msr_area[i].index == msr )
+ {
+ *val = msr_area[i].data;
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val)
+{
+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+ for ( i = 0; i < msr_count; i++ )
+ {
+ if ( msr_area[i].index == msr )
+ {
+ msr_area[i].data = val;
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+int vmx_add_guest_msr(struct vcpu *v, u32 msr)
+{
+ unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
+
+ for ( i = 0; i < msr_count; i++ )
+ if ( msr_area[i].index == msr )
+ return 0;
+
+ if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
+ return -ENOSPC;
+
+ if ( msr_area == NULL )
+ {
+ if ( (msr_area = alloc_xenheap_page()) == NULL )
+ return -ENOMEM;
+ v->arch.hvm_vmx.msr_area = msr_area;
+ __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
+ __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+ }
+
+ msr_area[msr_count].index = msr;
+ msr_area[msr_count].mbz = 0;
+ msr_area[msr_count].data = 0;
+ v->arch.hvm_vmx.msr_count = ++msr_count;
+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
+ __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
+
+ return 0;
+}
+
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr)
+{
+ unsigned int i, msr_count = v->arch.hvm_vmx.host_msr_count;
+ struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area;
+
+ for ( i = 0; i < msr_count; i++ )
+ if ( msr_area[i].index == msr )
+ return 0;
+
+ if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
+ return -ENOSPC;
+
+ if ( msr_area == NULL )
+ {
+ if ( (msr_area = alloc_xenheap_page()) == NULL )
+ return -ENOMEM;
+ v->arch.hvm_vmx.host_msr_area = msr_area;
+ __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
+ }
+
+ msr_area[msr_count].index = msr;
+ msr_area[msr_count].mbz = 0;
+ rdmsrl(msr, msr_area[msr_count].data);
+ v->arch.hvm_vmx.host_msr_count = ++msr_count;
+ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
+
+ return 0;
}
int vmx_create_vmcs(struct vcpu *v)
{
struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
+ int rc;
if ( arch_vmx->vmcs == NULL )
{
@@ -582,7 +719,12 @@ int vmx_create_vmcs(struct vcpu *v)
arch_vmx->launched = 0;
}
- construct_vmcs(v);
+ if ( (rc = construct_vmcs(v)) != 0 )
+ {
+ vmx_free_vmcs(arch_vmx->vmcs);
+ arch_vmx->vmcs = NULL;
+ return rc;
+ }
return 0;
}
diff -r ca2984b17fcf -r 765600a13e4a xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Oct 17 15:19:05 2007 +0100
@@ -53,8 +53,6 @@
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
-char *vmx_msr_bitmap;
-
static void vmx_ctxt_switch_from(struct vcpu *v);
static void vmx_ctxt_switch_to(struct vcpu *v);
@@ -1104,26 +1102,6 @@ static int vmx_event_pending(struct vcpu
{
ASSERT(v == current);
return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
-}
-
-static void disable_intercept_for_msr(u32 msr)
-{
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if ( msr <= 0x1fff )
- {
- __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
- __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
- }
- else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
- {
- msr &= 0x1fff;
- __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
- __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
- }
}
static struct hvm_function_table vmx_function_table = {
@@ -1190,21 +1168,6 @@ void start_vmx(void)
setup_vmcs_dump();
hvm_enable(&vmx_function_table);
-
- if ( cpu_has_vmx_msr_bitmap )
- {
- printk("VMX: MSR intercept bitmap enabled\n");
- vmx_msr_bitmap = alloc_xenheap_page();
- BUG_ON(vmx_msr_bitmap == NULL);
- memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
-
- disable_intercept_for_msr(MSR_FS_BASE);
- disable_intercept_for_msr(MSR_GS_BASE);
-
- disable_intercept_for_msr(MSR_IA32_SYSENTER_CS);
- disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP);
- disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP);
- }
}
/*
@@ -1302,10 +1265,12 @@ static void vmx_do_cpuid(struct cpu_user
bitmaskof(X86_FEATURE_EST) |
bitmaskof(X86_FEATURE_TM2) |
bitmaskof(X86_FEATURE_CID) |
- bitmaskof(X86_FEATURE_PDCM));
+ bitmaskof(X86_FEATURE_PDCM) |
+ bitmaskof(X86_FEATURE_DSCPL));
edx &= ~(bitmaskof(X86_FEATURE_HT) |
bitmaskof(X86_FEATURE_ACPI) |
- bitmaskof(X86_FEATURE_ACC));
+ bitmaskof(X86_FEATURE_ACC) |
+ bitmaskof(X86_FEATURE_DS));
break;
case 0x00000004:
@@ -2239,6 +2204,82 @@ static int vmx_cr_access(unsigned long e
return 1;
}
+static const struct lbr_info {
+ u32 base, count;
+} p4_lbr[] = {
+ { MSR_P4_LER_FROM_LIP, 1 },
+ { MSR_P4_LER_TO_LIP, 1 },
+ { MSR_P4_LASTBRANCH_TOS, 1 },
+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
+ { 0, 0 }
+}, c2_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_C2_LASTBRANCH_TOS, 1 },
+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
+ { 0, 0 }
+#ifdef __i386__
+}, pm_lbr[] = {
+ { MSR_IA32_LASTINTFROMIP, 1 },
+ { MSR_IA32_LASTINTTOIP, 1 },
+ { MSR_PM_LASTBRANCH_TOS, 1 },
+ { MSR_PM_LASTBRANCH_0, NUM_MSR_PM_LASTBRANCH },
+ { 0, 0 }
+#endif
+};
+
+static const struct lbr_info *last_branch_msr_get(void)
+{
+ switch ( boot_cpu_data.x86 )
+ {
+ case 6:
+ switch ( boot_cpu_data.x86_model )
+ {
+#ifdef __i386__
+ /* PentiumM */
+ case 9: case 13:
+ /* Core Solo/Duo */
+ case 14:
+ return pm_lbr;
+ break;
+#endif
+ /* Core2 Duo */
+ case 15:
+ return c2_lbr;
+ break;
+ }
+ break;
+
+ case 15:
+ switch ( boot_cpu_data.x86_model )
+ {
+ /* Pentium4/Xeon with em64t */
+ case 3: case 4: case 6:
+ return p4_lbr;
+ break;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
+static int is_last_branch_msr(u32 ecx)
+{
+ const struct lbr_info *lbr = last_branch_msr_get();
+
+ if ( lbr == NULL )
+ return 0;
+
+ for ( ; lbr->count; lbr++ )
+ if ( (ecx >= lbr->base) && (ecx < (lbr->base + lbr->count)) )
+ return 1;
+
+ return 0;
+}
+
static int vmx_do_msr_read(struct cpu_user_regs *regs)
{
u64 msr_content = 0;
@@ -2263,6 +2304,10 @@ static int vmx_do_msr_read(struct cpu_us
break;
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+ break;
+ case MSR_IA32_DEBUGCTLMSR:
+ if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
+ msr_content = 0;
break;
case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
@@ -2288,6 +2333,15 @@ static int vmx_do_msr_read(struct cpu_us
goto done;
}
+ if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0 )
+ break;
+
+ if ( is_last_branch_msr(ecx) )
+ {
+ msr_content = 0;
+ break;
+ }
+
if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
rdmsr_safe(ecx, eax, edx) == 0 )
{
@@ -2405,13 +2459,42 @@ static int vmx_do_msr_write(struct cpu_u
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+ case MSR_IA32_DEBUGCTLMSR: {
+ int i, rc = 0;
+
+ if ( !msr_content || (msr_content & ~3) )
+ break;
+
+ if ( msr_content & 1 )
+ {
+ const struct lbr_info *lbr = last_branch_msr_get();
+ if ( lbr == NULL )
+ break;
+
+ for ( ; (rc == 0) && lbr->count; lbr++ )
+ for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
+ if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
+ vmx_disable_intercept_for_msr(v, lbr->base + i);
+ }
+
+ if ( (rc < 0) ||
+ (vmx_add_guest_msr(v, ecx) < 0) ||
+ (vmx_add_host_load_msr(v, ecx) < 0) )
+ vmx_inject_hw_exception(v, TRAP_machine_check, 0);
+ else
+ vmx_write_guest_msr(v, ecx, msr_content);
+
+ break;
+ }
case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
default:
switch ( long_mode_do_msr_write(regs) )
{
case HNDL_unhandled:
- wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
+ if ( (vmx_write_guest_msr(v, ecx, msr_content) != 0) &&
+ !is_last_branch_msr(ecx) )
+ wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
break;
case HNDL_exception_raised:
return 0;
diff -r ca2984b17fcf -r 765600a13e4a xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Wed Oct 17 15:19:05 2007 +0100
@@ -33,6 +33,12 @@ struct vmcs_struct {
struct vmcs_struct {
u32 vmcs_revision_id;
unsigned char data [0]; /* vmcs size is read from MSR */
+};
+
+struct vmx_msr_entry {
+ u32 index;
+ u32 mbz;
+ u64 data;
};
enum {
@@ -72,6 +78,12 @@ struct arch_vmx_struct {
unsigned long shadow_gs;
unsigned long cstar;
#endif
+
+ char *msr_bitmap;
+ unsigned int msr_count;
+ struct vmx_msr_entry *msr_area;
+ unsigned int host_msr_count;
+ struct vmx_msr_entry *host_msr_area;
/* Following fields are all specific to vmxassist. */
unsigned long vmxassist_enabled:1;
@@ -131,7 +143,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr
(vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
#define cpu_has_vmx_msr_bitmap \
(vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
-extern char *vmx_msr_bitmap;
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -268,6 +279,12 @@ enum vmcs_field {
HOST_RIP = 0x00006c16,
};
+void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr);
+int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val);
+int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val);
+int vmx_add_guest_msr(struct vcpu *v, u32 msr);
+int vmx_add_host_load_msr(struct vcpu *v, u32 msr);
+
#endif /* ASM_X86_HVM_VMX_VMCS_H__ */
/*
diff -r ca2984b17fcf -r 765600a13e4a xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h Wed Oct 17 14:38:19 2007 +0100
+++ b/xen/include/asm-x86/msr-index.h Wed Oct 17 15:19:05 2007 +0100
@@ -323,6 +323,27 @@
#define MSR_P4_U2L_ESCR0 0x000003b0
#define MSR_P4_U2L_ESCR1 0x000003b1
+/* Netburst (P4) last-branch recording */
+#define MSR_P4_LER_FROM_LIP 0x000001d7
+#define MSR_P4_LER_TO_LIP 0x000001d8
+#define MSR_P4_LASTBRANCH_TOS 0x000001da
+#define MSR_P4_LASTBRANCH_0 0x000001db
+#define NUM_MSR_P4_LASTBRANCH 4
+#define MSR_P4_LASTBRANCH_0_FROM_LIP 0x00000680
+#define MSR_P4_LASTBRANCH_0_TO_LIP 0x000006c0
+#define NUM_MSR_P4_LASTBRANCH_FROM_TO 16
+
+/* Pentium M (and Core) last-branch recording */
+#define MSR_PM_LASTBRANCH_TOS 0x000001c9
+#define MSR_PM_LASTBRANCH_0 0x00000040
+#define NUM_MSR_PM_LASTBRANCH 8
+
+/* Core 2 last-branch recording */
+#define MSR_C2_LASTBRANCH_TOS 0x000001c9
+#define MSR_C2_LASTBRANCH_0_FROM_IP 0x00000040
+#define MSR_C2_LASTBRANCH_0_TO_IP 0x00000060
+#define NUM_MSR_C2_LASTBRANCH_FROM_TO 4
+
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|