# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1178993075 -3600
# Node ID 384a29655270532dfb6b07fe7c5b13ca0c1514ac
# Parent 05c128b0188a7013de3806990916ff7425d78cfb
svm: Avoid VMSAVE/VMLOAD/VMSAVE/VMLOAD sequence on every vmexit/vmentry.
Instead do this only on context switches. In cases where we need
access to state that is only saved to the VMCB on VMSAVE, we track
whether the state is in sync via a per-vcpu flag and VMSAVE on demand.
The context switch code can be further improved:
1. No need to VMLOAD host state if we are switching to another SVM VCPU.
2. No need to VMSAVE host state at all (except once at start of day)
because the registers that are saved do not change (or at least, none
of the ones that matter change).
The performance is improvement is about 650 cycles for a null
hypercall. This reduces the total null-hypercall time for a non-debug
build of Xen down to around 3300 cycles on my AMD X2 system.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
xen/arch/x86/hvm/svm/svm.c | 110 ++++++++++++++++++++++++++++--------
xen/arch/x86/hvm/svm/x86_32/exits.S | 12 ---
xen/arch/x86/hvm/svm/x86_64/exits.S | 13 ----
xen/arch/x86/smpboot.c | 4 -
xen/arch/x86/traps.c | 10 ++-
xen/arch/x86/x86_32/asm-offsets.c | 3
xen/arch/x86/x86_32/traps.c | 6 -
xen/arch/x86/x86_64/asm-offsets.c | 3
xen/include/asm-x86/hvm/svm/svm.h | 2
xen/include/asm-x86/hvm/svm/vmcb.h | 6 -
10 files changed, 100 insertions(+), 69 deletions(-)
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c Sat May 12 19:04:35 2007 +0100
@@ -65,9 +65,6 @@ static void *hsa[NR_CPUS] __read_mostly;
/* vmcb used for extended host state */
static void *root_vmcb[NR_CPUS] __read_mostly;
-/* physical address of above for host VMSAVE/VMLOAD */
-u64 root_vmcb_pa[NR_CPUS] __read_mostly;
-
/* hardware assisted paging bits */
extern int opt_hap_enabled;
@@ -551,14 +548,12 @@ int svm_load_vmcb_ctxt(struct vcpu *v, s
return 0;
}
-
static inline void svm_restore_dr(struct vcpu *v)
{
if ( unlikely(v->arch.guest_context.debugreg[7] & 0xFF) )
__restore_debug_registers(v);
}
-
static int svm_realmode(struct vcpu *v)
{
unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
@@ -586,12 +581,12 @@ static int svm_guest_x86_mode(struct vcp
return (vmcb->cs.attr.fields.db ? 4 : 2);
}
-void svm_update_host_cr3(struct vcpu *v)
+static void svm_update_host_cr3(struct vcpu *v)
{
/* SVM doesn't have a HOST_CR3 equivalent to update. */
}
-void svm_update_guest_cr3(struct vcpu *v)
+static void svm_update_guest_cr3(struct vcpu *v)
{
v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
}
@@ -603,7 +598,7 @@ static void svm_update_vtpr(struct vcpu
vmcb->vintr.fields.tpr = value & 0x0f;
}
-unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
+static unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
{
switch ( num )
{
@@ -621,6 +616,20 @@ unsigned long svm_get_ctrl_reg(struct vc
return 0; /* dummy */
}
+static void svm_sync_vmcb(struct vcpu *v)
+{
+ struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
+
+ if ( arch_svm->vmcb_in_sync )
+ return;
+
+ arch_svm->vmcb_in_sync = 1;
+
+ asm volatile (
+ ".byte 0x0f,0x01,0xdb" /* vmsave */
+ : : "a" (__pa(arch_svm->vmcb)) );
+}
+
static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -634,13 +643,13 @@ static unsigned long svm_get_segment_bas
case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
- case x86_seg_fs: return vmcb->fs.base;
- case x86_seg_gs: return vmcb->gs.base;
+ case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
+ case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
- case x86_seg_tr: return vmcb->tr.base;
+ case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
case x86_seg_gdtr: return vmcb->gdtr.base;
case x86_seg_idtr: return vmcb->idtr.base;
- case x86_seg_ldtr: return vmcb->ldtr.base;
+ case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
}
BUG();
return 0;
@@ -652,16 +661,40 @@ static void svm_get_segment_register(str
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
switch ( seg )
{
- case x86_seg_cs: memcpy(reg, &vmcb->cs, sizeof(*reg)); break;
- case x86_seg_ds: memcpy(reg, &vmcb->ds, sizeof(*reg)); break;
- case x86_seg_es: memcpy(reg, &vmcb->es, sizeof(*reg)); break;
- case x86_seg_fs: memcpy(reg, &vmcb->fs, sizeof(*reg)); break;
- case x86_seg_gs: memcpy(reg, &vmcb->gs, sizeof(*reg)); break;
- case x86_seg_ss: memcpy(reg, &vmcb->ss, sizeof(*reg)); break;
- case x86_seg_tr: memcpy(reg, &vmcb->tr, sizeof(*reg)); break;
- case x86_seg_gdtr: memcpy(reg, &vmcb->gdtr, sizeof(*reg)); break;
- case x86_seg_idtr: memcpy(reg, &vmcb->idtr, sizeof(*reg)); break;
- case x86_seg_ldtr: memcpy(reg, &vmcb->ldtr, sizeof(*reg)); break;
+ case x86_seg_cs:
+ memcpy(reg, &vmcb->cs, sizeof(*reg));
+ break;
+ case x86_seg_ds:
+ memcpy(reg, &vmcb->ds, sizeof(*reg));
+ break;
+ case x86_seg_es:
+ memcpy(reg, &vmcb->es, sizeof(*reg));
+ break;
+ case x86_seg_fs:
+ svm_sync_vmcb(v);
+ memcpy(reg, &vmcb->fs, sizeof(*reg));
+ break;
+ case x86_seg_gs:
+ svm_sync_vmcb(v);
+ memcpy(reg, &vmcb->gs, sizeof(*reg));
+ break;
+ case x86_seg_ss:
+ memcpy(reg, &vmcb->ss, sizeof(*reg));
+ break;
+ case x86_seg_tr:
+ svm_sync_vmcb(v);
+ memcpy(reg, &vmcb->tr, sizeof(*reg));
+ break;
+ case x86_seg_gdtr:
+ memcpy(reg, &vmcb->gdtr, sizeof(*reg));
+ break;
+ case x86_seg_idtr:
+ memcpy(reg, &vmcb->idtr, sizeof(*reg));
+ break;
+ case x86_seg_ldtr:
+ svm_sync_vmcb(v);
+ memcpy(reg, &vmcb->ldtr, sizeof(*reg));
+ break;
default: BUG();
}
}
@@ -761,11 +794,26 @@ static void svm_load_cpu_guest_regs(stru
static void svm_ctxt_switch_from(struct vcpu *v)
{
+ int cpu = smp_processor_id();
+
svm_save_dr(v);
+
+ svm_sync_vmcb(v);
+
+ asm volatile (
+ ".byte 0x0f,0x01,0xda" /* vmload */
+ : : "a" (__pa(root_vmcb[cpu])) );
+
+#ifdef __x86_64__
+ /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
+ idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
+#endif
}
static void svm_ctxt_switch_to(struct vcpu *v)
{
+ int cpu = smp_processor_id();
+
#ifdef __x86_64__
/*
* This is required, because VMRUN does consistency check
@@ -776,8 +824,22 @@ static void svm_ctxt_switch_to(struct vc
set_segment_register(ds, 0);
set_segment_register(es, 0);
set_segment_register(ss, 0);
+
+ /*
+ * Cannot use IST2 for NMIs while we are running with the guest TR. But
+ * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
+ */
+ idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
#endif
+
svm_restore_dr(v);
+
+ asm volatile (
+ ".byte 0x0f,0x01,0xdb" /* vmsave */
+ : : "a" (__pa(root_vmcb[cpu])) );
+ asm volatile (
+ ".byte 0x0f,0x01,0xda" /* vmload */
+ : : "a" (__pa(v->arch.hvm_svm.vmcb)) );
}
static void svm_do_resume(struct vcpu *v)
@@ -925,8 +987,6 @@ int start_svm(void)
phys_hsa_hi = (u32) (phys_hsa >> 32);
wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
- root_vmcb_pa[cpu] = virt_to_maddr(root_vmcb[cpu]);
-
if ( cpu != 0 )
return 1;
@@ -1196,9 +1256,11 @@ static void svm_get_prefix_info(struct v
*seg = &vmcb->es;
continue;
case 0x64: /* FS */
+ svm_sync_vmcb(v);
*seg = &vmcb->fs;
continue;
case 0x65: /* GS */
+ svm_sync_vmcb(v);
*seg = &vmcb->gs;
continue;
case 0x3e: /* DS */
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/hvm/svm/x86_32/exits.S
--- a/xen/arch/x86/hvm/svm/x86_32/exits.S Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/hvm/svm/x86_32/exits.S Sat May 12 19:04:35 2007 +0100
@@ -45,8 +45,6 @@
pushl %ebx;
#define VMRUN .byte 0x0F,0x01,0xD8
-#define VMLOAD .byte 0x0F,0x01,0xDA
-#define VMSAVE .byte 0x0F,0x01,0xDB
#define STGI .byte 0x0F,0x01,0xDC
#define CLGI .byte 0x0F,0x01,0xDD
@@ -66,9 +64,6 @@ ENTRY(svm_asm_do_resume)
movl VCPU_svm_vmcb(%ebx),%ecx
movl UREGS_eax(%esp),%eax
movl %eax,VMCB_rax(%ecx)
- movl VCPU_processor(%ebx),%eax
- movl root_vmcb_pa(,%eax,8),%eax
- VMSAVE
movl VCPU_svm_vmcb_pa(%ebx),%eax
popl %ebx
@@ -78,19 +73,16 @@ ENTRY(svm_asm_do_resume)
popl %edi
popl %ebp
addl $(NR_SKIPPED_REGS*4),%esp
- VMLOAD
+
VMRUN
- VMSAVE
HVM_SAVE_ALL_NOSEGREGS
GET_CURRENT(%ebx)
+ movb $0,VCPU_svm_vmcb_in_sync(%ebx)
movl VCPU_svm_vmcb(%ebx),%ecx
movl VMCB_rax(%ecx),%eax
movl %eax,UREGS_eax(%esp)
- movl VCPU_processor(%ebx),%eax
- movl root_vmcb_pa(,%eax,8),%eax
- VMLOAD
STGI
.globl svm_stgi_label;
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/hvm/svm/x86_64/exits.S
--- a/xen/arch/x86/hvm/svm/x86_64/exits.S Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Sat May 12 19:04:35 2007 +0100
@@ -54,8 +54,6 @@
pushq %r15;
#define VMRUN .byte 0x0F,0x01,0xD8
-#define VMLOAD .byte 0x0F,0x01,0xDA
-#define VMSAVE .byte 0x0F,0x01,0xDB
#define STGI .byte 0x0F,0x01,0xDC
#define CLGI .byte 0x0F,0x01,0xDD
@@ -76,10 +74,6 @@ ENTRY(svm_asm_do_resume)
movq VCPU_svm_vmcb(%rbx),%rcx
movq UREGS_rax(%rsp),%rax
movq %rax,VMCB_rax(%rcx)
- leaq root_vmcb_pa(%rip),%rax
- movl VCPU_processor(%rbx),%ecx
- movq (%rax,%rcx,8),%rax
- VMSAVE
movq VCPU_svm_vmcb_pa(%rbx),%rax
popq %r15
@@ -99,20 +93,15 @@ ENTRY(svm_asm_do_resume)
popq %rdi
addq $(NR_SKIPPED_REGS*8),%rsp
- VMLOAD
VMRUN
- VMSAVE
HVM_SAVE_ALL_NOSEGREGS
GET_CURRENT(%rbx)
+ movb $0,VCPU_svm_vmcb_in_sync(%rbx)
movq VCPU_svm_vmcb(%rbx),%rcx
movq VMCB_rax(%rcx),%rax
movq %rax,UREGS_rax(%rsp)
- leaq root_vmcb_pa(%rip),%rax
- movl VCPU_processor(%rbx),%ecx
- movq (%rax,%rcx,8),%rax
- VMLOAD
STGI
.globl svm_stgi_label;
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/smpboot.c Sat May 12 19:04:35 2007 +0100
@@ -460,7 +460,6 @@ set_cpu_sibling_map(int cpu)
}
}
-#ifdef CONFIG_X86_32
static void construct_percpu_idt(unsigned int cpu)
{
unsigned char idt_load[10];
@@ -472,7 +471,6 @@ static void construct_percpu_idt(unsigne
*(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
__asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
}
-#endif
/*
* Activate a secondary processor.
@@ -500,13 +498,11 @@ void __devinit start_secondary(void *unu
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
rep_nop();
-#ifdef CONFIG_X86_32
/*
* At this point, boot CPU has fully initialised the IDT. It is
* now safe to make ourselves a private copy.
*/
construct_percpu_idt(cpu);
-#endif
setup_secondary_APIC_clock();
enable_APIC_timer();
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/traps.c Sat May 12 19:04:35 2007 +0100
@@ -75,8 +75,11 @@ char opt_nmi[10] = "fatal";
#endif
string_param("nmi", opt_nmi);
-/* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
+/* Master table, used by CPU0. */
idt_entry_t idt_table[IDT_ENTRIES];
+
+/* Pointer to the IDT of every CPU. */
+idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
#define DECLARE_TRAP_HANDLER(_name) \
asmlinkage void _name(void); \
@@ -2025,13 +2028,11 @@ asmlinkage int do_spurious_interrupt_bug
void set_intr_gate(unsigned int n, void *addr)
{
-#ifdef __i386__
int i;
/* Keep secondary tables in sync with IRQ updates. */
for ( i = 1; i < NR_CPUS; i++ )
if ( idt_tables[i] != NULL )
_set_gate(&idt_tables[i][n], 14, 0, addr);
-#endif
_set_gate(&idt_table[n], 14, 0, addr);
}
@@ -2093,6 +2094,9 @@ void __init trap_init(void)
set_intr_gate(TRAP_alignment_check,&alignment_check);
set_intr_gate(TRAP_machine_check,&machine_check);
set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
+
+ /* CPU0 uses the master IDT. */
+ idt_tables[0] = idt_table;
percpu_traps_init();
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/x86_32/asm-offsets.c Sat May 12 19:04:35 2007 +0100
@@ -81,7 +81,7 @@ void __dummy__(void)
OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
OFFSET(VCPU_svm_vmcb, struct vcpu, arch.hvm_svm.vmcb);
- OFFSET(VCPU_svm_vmexit_tsc, struct vcpu, arch.hvm_svm.vmexit_tsc);
+ OFFSET(VCPU_svm_vmcb_in_sync, struct vcpu, arch.hvm_svm.vmcb_in_sync);
BLANK();
OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
@@ -89,7 +89,6 @@ void __dummy__(void)
BLANK();
OFFSET(VMCB_rax, struct vmcb_struct, rax);
- OFFSET(VMCB_tsc_offset, struct vmcb_struct, tsc_offset);
BLANK();
OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/x86_32/traps.c Sat May 12 19:04:35 2007 +0100
@@ -18,9 +18,6 @@
#include <public/callback.h>
-/* All CPUs have their own IDT to allow int80 direct trap. */
-idt_entry_t *idt_tables[NR_CPUS] __read_mostly;
-
static void print_xen_info(void)
{
char taint_str[TAINT_STRING_MAX_LEN];
@@ -252,9 +249,6 @@ void __init percpu_traps_init(void)
if ( smp_processor_id() != 0 )
return;
- /* CPU0 uses the master IDT. */
- idt_tables[0] = idt_table;
-
/* The hypercall entry vector is only accessible from ring 1. */
_set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
diff -r 05c128b0188a -r 384a29655270 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Sat May 12 16:24:50 2007 +0100
+++ b/xen/arch/x86/x86_64/asm-offsets.c Sat May 12 19:04:35 2007 +0100
@@ -84,7 +84,7 @@ void __dummy__(void)
OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
OFFSET(VCPU_svm_vmcb, struct vcpu, arch.hvm_svm.vmcb);
- OFFSET(VCPU_svm_vmexit_tsc, struct vcpu, arch.hvm_svm.vmexit_tsc);
+ OFFSET(VCPU_svm_vmcb_in_sync, struct vcpu, arch.hvm_svm.vmcb_in_sync);
BLANK();
OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
@@ -95,7 +95,6 @@ void __dummy__(void)
BLANK();
OFFSET(VMCB_rax, struct vmcb_struct, rax);
- OFFSET(VMCB_tsc_offset, struct vmcb_struct, tsc_offset);
BLANK();
OFFSET(VCPUINFO_upcall_pending, struct vcpu_info, evtchn_upcall_pending);
diff -r 05c128b0188a -r 384a29655270 xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Sat May 12 16:24:50 2007 +0100
+++ b/xen/include/asm-x86/hvm/svm/svm.h Sat May 12 19:04:35 2007 +0100
@@ -29,8 +29,6 @@
#include <asm/i387.h>
extern void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb);
-
-extern u64 root_vmcb_pa[NR_CPUS];
static inline int svm_long_mode_enabled(struct vcpu *v)
{
diff -r 05c128b0188a -r 384a29655270 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Sat May 12 16:24:50 2007 +0100
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Sat May 12 19:04:35 2007 +0100
@@ -444,11 +444,9 @@ struct arch_svm_struct {
struct arch_svm_struct {
struct vmcb_struct *vmcb;
u64 vmcb_pa;
- u32 *msrpm;
- u64 vmexit_tsc; /* tsc read at #VMEXIT. for TSC_OFFSET */
+ u32 *msrpm;
int launch_core;
-
- unsigned long flags; /* VMCB flags */
+ bool_t vmcb_in_sync; /* VMCB sync'ed with VMSAVE? */
unsigned long cpu_shadow_cr0; /* Guest value for CR0 */
unsigned long cpu_shadow_cr4; /* Guest value for CR4 */
unsigned long cpu_shadow_efer; /* Guest value for EFER */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|