This is for both 32-bit apps in 64-bit pv guests and 32on64.
While I coded both a hypercall interface and MSR emulation, I'm not really
sure both mechanisms need to be there.
Depends on more than just guest_context getting saved/restored as guest
state during save/restore/migrate (namely the new fields holding callback
addresses), which isn't implemented yet (and I likely won't do it).
Since the 32-bit kernel doesn't make use of syscall (it would be possible to
do so now, when running on a 64-bit hv), the compat mode guest code path for
syscall wasn't tested.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: 2007-07-03/xen/arch/x86/domain.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/domain.c 2007-06-22 16:57:45.000000000
+0200
+++ 2007-07-03/xen/arch/x86/domain.c 2007-07-03 10:39:13.000000000 +0200
@@ -395,6 +395,12 @@ int vcpu_initialise(struct vcpu *v)
v->arch.perdomain_ptes =
d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
+#ifdef __x86_64__
+ v->arch.sysexit_cs = 3;
+ v->arch.syscall_eflags_mask = X86_EFLAGS_DF|X86_EFLAGS_TF|X86_EFLAGS_NT|
+ X86_EFLAGS_RF|X86_EFLAGS_VM;
+#endif
+
return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
}
@@ -607,7 +613,18 @@ int arch_set_info_guest(
v->arch.flags |= TF_kernel_mode;
if ( !compat )
+ {
memcpy(&v->arch.guest_context, c.nat, sizeof(*c.nat));
+#ifdef __x86_64__
+ /*
+ * Despite not being correct, be backwards compatible - most
+ * importantly in order to prevent the guest from being crashed
+ * due to use of syscall from compatibility mode when the kernel
+ * didn't set the compatibility mode callback.
+ */
+ v->arch.syscall32_callback_eip = c.nat->syscall_callback_eip;
+#endif
+ }
#ifdef CONFIG_COMPAT
else
{
@@ -1274,7 +1291,9 @@ void context_switch(struct vcpu *prev, s
local_flush_tlb_one(GDT_VIRT_START(next) +
FIRST_RESERVED_GDT_BYTE);
- if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
+ if ( (!is_pv_32on64_vcpu(next)
+ || (next->arch.syscall32_callback_cs & ~3)) ==
+ !(efer & EFER_SCE) )
write_efer(efer ^ EFER_SCE);
}
#endif
Index: 2007-07-03/xen/arch/x86/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/traps.c 2007-07-03 10:35:22.000000000
+0200
+++ 2007-07-03/xen/arch/x86/traps.c 2007-07-04 13:21:20.000000000 +0200
@@ -609,16 +609,21 @@ static int emulate_forced_invalid_op(str
clear_bit(X86_FEATURE_DE, &d);
clear_bit(X86_FEATURE_PSE, &d);
clear_bit(X86_FEATURE_PGE, &d);
+ if ( !cpu_has_sep )
+ clear_bit(X86_FEATURE_SEP, &d);
+#ifdef __i386__
if ( !supervisor_mode_kernel )
clear_bit(X86_FEATURE_SEP, &d);
+#endif
if ( !IS_PRIV(current->domain) )
clear_bit(X86_FEATURE_MTRR, &d);
}
else if ( regs->eax == 0x80000001 )
{
/* Modify Feature Information. */
- if ( is_pv_32bit_vcpu(current) )
- clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+#ifdef __i386__
+ clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+#endif
clear_bit(X86_FEATURE_RDTSCP % 32, &d);
}
else
@@ -1695,6 +1700,8 @@ static int emulate_privileged_op(struct
break;
case 0x30: /* WRMSR */
+ data = regs->eax;
+ res = ((u64)regs->edx << 32) | data;
switch ( regs->ecx )
{
#ifdef CONFIG_X86_64
@@ -1703,24 +1710,87 @@ static int emulate_privileged_op(struct
goto fail;
if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
goto fail;
- v->arch.guest_context.fs_base =
- ((u64)regs->edx << 32) | regs->eax;
+ v->arch.guest_context.fs_base = res;
break;
case MSR_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
goto fail;
- v->arch.guest_context.gs_base_kernel =
- ((u64)regs->edx << 32) | regs->eax;
+ v->arch.guest_context.gs_base_kernel = res;
break;
case MSR_SHADOW_GS_BASE:
if ( is_pv_32on64_vcpu(v) )
goto fail;
if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
goto fail;
- v->arch.guest_context.gs_base_user =
- ((u64)regs->edx << 32) | regs->eax;
+ v->arch.guest_context.gs_base_user = res;
+ break;
+ case MSR_STAR:
+ if ( is_pv_32on64_vcpu(v) )
+ {
+ v->arch.syscall32_callback_eip = data;
+ v->arch.syscall32_callback_cs = (uint16_t)regs->edx;
+ fixup_guest_code_selector(v->domain,
+ v->arch.syscall32_callback_cs);
+ }
+ break;
+ case MSR_LSTAR:
+ if ( is_pv_32on64_vcpu(v) || !is_canonical_address(res) )
+ goto fail;
+ v->arch.guest_context.syscall_callback_eip = res;
+ break;
+ case MSR_CSTAR:
+ if ( is_pv_32on64_vcpu(v) || !is_canonical_address(res) )
+ goto fail;
+ v->arch.syscall32_callback_eip = res;
+ break;
+ case MSR_SYSCALL_MASK:
+ if ( is_pv_32on64_vcpu(v) || (uint32_t)regs->edx != 0 )
+ goto fail;
+ v->arch.syscall_eflags_mask = data &
+ ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+ if ( data & X86_EFLAGS_IF )
+ {
+ set_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags);
+ v->arch.syscall32_disables_events = 1;
+ }
+ else
+ {
+ clear_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags);
+ v->arch.syscall32_disables_events = 0;
+ }
+ break;
+ case MSR_IA32_SYSENTER_CS:
+ if ( is_pv_32on64_vcpu(v) )
+ {
+ v->arch.sysenter_callback_cs = data;
+ fixup_guest_code_selector(v->domain,
+ v->arch.sysenter_callback_cs);
+ /*
+ * While this doesn't match real SYSENTER behavior, the guest
+ * generally doesn't have a need to switch stacks (or anything
+ * else that needs to keep interrupts disabled). If the guest
+ * really needs interrupts disabled on entry, it can still use
+ * the corresponding hypercall.
+ */
+ v->arch.sysenter_disables_events = 0;
+ }
+ v->arch.sysexit_cs = (data + 16) | 3;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ if ( !is_pv_32on64_vcpu(v) && !is_canonical_address(res) )
+ goto fail;
+ v->arch.sysenter_callback_eip = is_pv_32on64_vcpu(v) ? data : res;
+ if ( !is_pv_32on64_vcpu(v) )
+ /* See comment above. */
+ v->arch.sysenter_disables_events = 0;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ if ( !is_pv_32on64_vcpu(v) && !is_canonical_address(res) )
+ goto fail;
break;
#endif
default:
@@ -1758,6 +1828,53 @@ static int emulate_privileged_op(struct
regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
regs->edx = v->arch.guest_context.gs_base_user >> 32;
break;
+ case MSR_STAR:
+ if ( is_pv_32on64_vcpu(v) )
+ {
+ regs->eax = v->arch.syscall32_callback_eip;
+ regs->edx = v->arch.syscall32_callback_cs |
+ (FLAT_COMPAT_USER_CS << 16);
+ }
+ else
+ regs->edx = FLAT_KERNEL_CS64 | (FLAT_USER_CS64 << 16);
+ break;
+ case MSR_LSTAR:
+ if ( is_pv_32on64_vcpu(v) )
+ goto fail;
+ regs->eax = (uint32_t)v->arch.guest_context.syscall_callback_eip;
+ regs->edx = v->arch.guest_context.syscall_callback_eip >> 32;
+ break;
+ case MSR_CSTAR:
+ if ( is_pv_32on64_vcpu(v) )
+ goto fail;
+ regs->eax = (uint32_t)v->arch.syscall32_callback_eip;
+ regs->edx = v->arch.syscall32_callback_eip >> 32;
+ break;
+ case MSR_SYSCALL_MASK:
+ if ( is_pv_32on64_vcpu(v) )
+ goto fail;
+ data = v->arch.syscall_eflags_mask;
+ if ( test_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags) )
+ data |= X86_EFLAGS_IF;
+ regs->eax = data;
+ regs->edx = 0;
+ break;
+ case MSR_IA32_SYSENTER_CS:
+ if ( is_pv_32on64_vcpu(v) )
+ regs->eax = v->arch.sysenter_callback_cs;
+ else
+ regs->eax = FLAT_KERNEL_CS64;
+ regs->edx = 0;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ regs->eax = (uint32_t)v->arch.sysenter_callback_eip;
+ regs->edx = v->arch.sysenter_callback_eip >> 32;
+ break;
+ case MSR_IA32_SYSENTER_ESP:
+ regs->eax = (uint32_t)v->arch.guest_context.kernel_sp;
+ regs->edx = v->arch.guest_context.kernel_sp >> 32;
+ break;
#endif
case MSR_EFER:
if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
@@ -2026,6 +2143,13 @@ asmlinkage int do_debug(struct cpu_user_
if ( !guest_mode(regs) )
{
+#ifdef __x86_64__
+ /*
+ * Single stepping across sysenter must not result in the single step
+ * flag being lost: record it here for create_bounce_frame to pick up.
+ */
+ v->arch.eflags_mask |= (regs->eflags & EF_TF);
+#endif
/* Clear TF just for absolute sanity. */
regs->eflags &= ~EF_TF;
/*
Index: 2007-07-03/xen/arch/x86/x86_32/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_32/traps.c 2007-06-22 16:57:45.000000000
+0200
+++ 2007-07-03/xen/arch/x86/x86_32/traps.c 2007-07-03 10:39:14.000000000
+0200
@@ -329,12 +329,19 @@ static long register_guest_callback(stru
break;
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- case CALLBACKTYPE_sysenter:
+ case CALLBACKTYPE_sysenter_deprecated:
if ( ! cpu_has_sep )
ret = -EINVAL;
else if ( on_each_cpu(do_update_sysenter, ®->address, 1, 1) != 0 )
ret = -EIO;
break;
+
+ case CALLBACKTYPE_sysenter:
+ if ( ! cpu_has_sep )
+ ret = -EINVAL;
+ else
+ do_update_sysenter(®->address);
+ break;
#endif
case CALLBACKTYPE_nmi:
@@ -358,6 +365,7 @@ static long unregister_guest_callback(st
case CALLBACKTYPE_event:
case CALLBACKTYPE_failsafe:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+ case CALLBACKTYPE_sysenter_deprecated:
case CALLBACKTYPE_sysenter:
#endif
ret = -EINVAL;
Index: 2007-07-03/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/asm-offsets.c 2007-07-03
10:35:22.000000000 +0200
+++ 2007-07-03/xen/arch/x86/x86_64/asm-offsets.c 2007-07-04
12:51:10.000000000 +0200
@@ -71,6 +71,22 @@ void __dummy__(void)
arch.guest_context.failsafe_callback_cs);
OFFSET(VCPU_syscall_addr, struct vcpu,
arch.guest_context.syscall_callback_eip);
+ OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip);
+ OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs);
+ OFFSET(VCPU_syscall32_disables_events, struct vcpu,
+ arch.syscall32_disables_events);
+ OFFSET(VCPU_syscall_eflags_mask, struct vcpu, arch.syscall_eflags_mask);
+ OFFSET(VCPU_sysenter_addr, struct vcpu, arch.sysenter_callback_eip);
+ OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs);
+ OFFSET(VCPU_sysenter_disables_events, struct vcpu,
+ arch.sysenter_disables_events);
+ OFFSET(VCPU_sysexit_addr, struct vcpu, arch.sysexit_eip);
+ OFFSET(VCPU_sysexit_sel, struct vcpu, arch.sysexit_cs);
+ OFFSET(VCPU_eflags_mask, struct vcpu, arch.eflags_mask);
+ OFFSET(VCPU_gp_fault_addr, struct vcpu,
+ arch.guest_context.trap_ctxt[TRAP_gp_fault].address);
+ OFFSET(VCPU_gp_fault_sel, struct vcpu,
+ arch.guest_context.trap_ctxt[TRAP_gp_fault].cs);
OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
Index: 2007-07-03/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/compat/entry.S 2007-07-03
10:35:22.000000000 +0200
+++ 2007-07-03/xen/arch/x86/x86_64/compat/entry.S 2007-07-04
13:26:46.000000000 +0200
@@ -188,6 +188,39 @@ ENTRY(compat_post_handle_exception)
movb $0,TRAPBOUNCE_flags(%rdx)
jmp compat_test_all_events
+ENTRY(compat_syscall)
+ cmpb $0,VCPU_syscall32_disables_events(%rbx)
+ movzwl VCPU_syscall32_sel(%rbx),%esi
+ movq VCPU_syscall32_addr(%rbx),%rax
+ setne %cl
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ testl $~3,%esi
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ jz 2f
+1: movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw %si,TRAPBOUNCE_cs(%rdx)
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ call compat_create_bounce_frame
+ jmp compat_test_all_events
+2: movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ movq VCPU_gp_fault_addr(%rbx),%rax
+ movzwl VCPU_gp_fault_sel(%rbx),%esi
+ movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+ movl $0,TRAPBOUNCE_error_code(%rdx)
+ jmp 1b
+
+ENTRY(compat_sysenter)
+ cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ movzwl VCPU_sysenter_sel(%rbx),%eax
+ movzwl VCPU_gp_fault_sel(%rbx),%ecx
+ cmovel %ecx,%eax
+ testl $~3,%eax
+ movl $FLAT_COMPAT_USER_SS,UREGS_ss(%rsp)
+ cmovzl %ecx,%eax
+ movw %ax,TRAPBOUNCE_cs(%rdx)
+ call compat_create_bounce_frame
+ jmp compat_test_all_events
+
ENTRY(compat_int80_direct_trap)
call compat_create_bounce_frame
jmp compat_test_all_events
@@ -230,7 +263,9 @@ compat_create_bounce_frame:
setz %ch # %ch == !saved_upcall_mask
movl UREGS_eflags+8(%rsp),%eax
andl $~X86_EFLAGS_IF,%eax
- shlb $1,%ch # Bit 9 (EFLAGS.IF)
+ addb %ch,%ch # Bit 9 (EFLAGS.IF)
+ orl VCPU_eflags_mask(%rbx),%eax
+ movl $0,VCPU_eflags_mask(%rbx)
orb %ch,%ah # Fold EFLAGS.IF into %eax
.Lft6: movl %eax,%fs:2*4(%rsi) # EFLAGS
movl UREGS_rip+8(%rsp),%eax
Index: 2007-07-03/xen/arch/x86/x86_64/compat/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/compat/traps.c 2007-07-03
10:35:22.000000000 +0200
+++ 2007-07-03/xen/arch/x86/x86_64/compat/traps.c 2007-07-03
11:54:46.000000000 +0200
@@ -160,12 +160,35 @@ static long compat_register_guest_callba
&v->arch.guest_context.flags);
break;
+ case CALLBACKTYPE_syscall:
+ v->arch.syscall32_callback_cs = reg->address.cs;
+ v->arch.syscall32_callback_eip = reg->address.eip;
+ v->arch.syscall32_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ if ( v->arch.syscall32_callback_cs & ~3 )
+ write_efer(read_efer() | EFER_SCE);
+ else
+ write_efer(read_efer() & ~EFER_SCE);
+ break;
+
+ case CALLBACKTYPE_sysenter:
+ v->arch.sysenter_callback_cs = reg->address.cs;
+ v->arch.sysenter_callback_eip = reg->address.eip;
+ v->arch.sysenter_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ break;
+
+ case CALLBACKTYPE_sysexit:
+ v->arch.sysexit_cs = reg->address.cs | 3;
+ v->arch.sysexit_eip = reg->address.eip;
+ break;
+
case CALLBACKTYPE_nmi:
ret = register_guest_nmi_callback(reg->address.eip);
break;
default:
- ret = -EINVAL;
+ ret = -ENOSYS;
break;
}
@@ -178,12 +201,20 @@ static long compat_unregister_guest_call
switch ( unreg->type )
{
+ case CALLBACKTYPE_event:
+ case CALLBACKTYPE_failsafe:
+ case CALLBACKTYPE_syscall:
+ case CALLBACKTYPE_sysenter:
+ case CALLBACKTYPE_sysexit:
+ ret = -EINVAL;
+ break;
+
case CALLBACKTYPE_nmi:
ret = unregister_guest_nmi_callback();
break;
default:
- ret = -EINVAL;
+ ret = -ENOSYS;
break;
}
Index: 2007-07-03/xen/arch/x86/x86_64/entry.S
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/entry.S 2007-07-03 10:35:37.000000000
+0200
+++ 2007-07-03/xen/arch/x86/x86_64/entry.S 2007-07-04 12:48:33.000000000
+0200
@@ -26,15 +26,19 @@
ALIGN
/* %rbx: struct vcpu */
switch_to_kernel:
- leaq VCPU_trap_bounce(%rbx),%rdx
+ cmpw $FLAT_USER_CS32,UREGS_cs(%rsp)
movq VCPU_syscall_addr(%rbx),%rax
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ cmoveq VCPU_syscall32_addr(%rbx),%rax
+ btl $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
movq %rax,TRAPBOUNCE_eip(%rdx)
- movb $0,TRAPBOUNCE_flags(%rdx)
- bt $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
- jnc 1f
- movb $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
-1: call create_bounce_frame
- andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
+ setc %cl
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
+ movl VCPU_syscall_eflags_mask(%rbx),%eax
+ notl %eax
+ andl %eax,UREGS_eflags(%rsp)
jmp test_all_events
/* %rbx: struct vcpu, interrupts disabled */
@@ -47,7 +51,7 @@ restore_all_guest:
addq $8,%rsp
popq %rcx # RIP
popq %r11 # CS
- cmpw $FLAT_KERNEL_CS32,%r11
+ cmpw $FLAT_USER_CS32,%r11
popq %r11 # RFLAGS
popq %rsp # RSP
je 1f
@@ -127,6 +131,9 @@ ENTRY(syscall_enter)
movl $TRAP_syscall,4(%rsp)
SAVE_ALL
GET_CURRENT(%rbx)
+ movq VCPU_domain(%rbx),%rcx
+ testb $1,DOMAIN_is_32bit_pv(%rcx)
+ jnz compat_syscall
testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
jz switch_to_kernel
@@ -224,6 +231,41 @@ bad_hypercall:
movq $-ENOSYS,UREGS_rax(%rsp)
jmp test_all_events
+ENTRY(sysenter_entry)
+ sti
+ pushq $FLAT_USER_SS
+ pushq $0
+ pushfq
+ pushq $0
+ pushq $0
+ pushq $0
+ movl $TRAP_syscall,4(%rsp)
+ SAVE_ALL
+ GET_CURRENT(%rbx)
+ movq VCPU_sysexit_addr(%rbx),%rax
+ movzwl VCPU_sysexit_sel(%rbx),%edx
+ cmpb $0,VCPU_sysenter_disables_events(%rbx)
+ movq %rax,UREGS_rip(%rsp)
+ movl %edx,UREGS_cs(%rsp)
+ movq VCPU_sysenter_addr(%rbx),%rax
+ setne %cl
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ testq %rax,%rax
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ jz 2f
+1: movq VCPU_domain(%rbx),%rdi
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ testb $1,DOMAIN_is_32bit_pv(%rdi)
+ jnz compat_sysenter
+ call create_bounce_frame
+ jmp test_all_events
+2: movl %eax,TRAPBOUNCE_error_code(%rdx)
+ movq VCPU_gp_fault_addr(%rbx),%rax
+ movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+ movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ jmp 1b
+
ENTRY(int80_direct_trap)
pushq $0
SAVE_ALL
@@ -296,9 +338,11 @@ create_bounce_frame:
shrq $32,%rax
testb $0xFF,%al # Bits 0-7: saved_upcall_mask
setz %ch # %ch == !saved_upcall_mask
- movq UREGS_eflags+8(%rsp),%rax
- andq $~X86_EFLAGS_IF,%rax
- shlb $1,%ch # Bit 9 (EFLAGS.IF)
+ movl UREGS_eflags+8(%rsp),%eax
+ andl $~X86_EFLAGS_IF,%eax
+ addb %ch,%ch # Bit 9 (EFLAGS.IF)
+ orl VCPU_eflags_mask(%rbx),%eax
+ movl $0,VCPU_eflags_mask(%rbx)
orb %ch,%ah # Fold EFLAGS.IF into %eax
.Lft5: movq %rax,16(%rsi) # RFLAGS
movq UREGS_rip+8(%rsp),%rax
Index: 2007-07-03/xen/arch/x86/x86_64/traps.c
===================================================================
--- 2007-07-03.orig/xen/arch/x86/x86_64/traps.c 2007-07-03 10:34:30.000000000
+0200
+++ 2007-07-03/xen/arch/x86/x86_64/traps.c 2007-07-03 12:06:05.000000000
+0200
@@ -22,6 +22,7 @@
#include <public/callback.h>
asmlinkage void syscall_enter(void);
+asmlinkage void sysenter_entry(void);
asmlinkage void compat_hypercall(void);
asmlinkage void int80_direct_trap(void);
@@ -323,12 +324,26 @@ void __init percpu_traps_init(void)
/* Trampoline for SYSCALL entry from long mode. */
stack = &stack[IST_MAX * PAGE_SIZE]; /* Skip the IST stacks. */
- wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+ wrmsrl(MSR_LSTAR, (unsigned long)stack);
stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
- /* Trampoline for SYSCALL entry from compatibility mode. */
- wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
- stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
+ switch ( boot_cpu_data.x86_vendor )
+ {
+ case X86_VENDOR_INTEL:
+ /* SYSENTER entry. */
+ wrmsrl(MSR_IA32_SYSENTER_ESP, (unsigned long)stack_bottom);
+ wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
+ wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
+ break;
+ case X86_VENDOR_AMD:
+ /* Trampoline for SYSCALL entry from compatibility mode. */
+ stack = (char *)L1_CACHE_ALIGN((unsigned long)stack);
+ wrmsrl(MSR_CSTAR, (unsigned long)stack);
+ stack += write_stack_trampoline(stack, stack_bottom, FLAT_USER_CS32);
+ break;
+ default:
+ BUG();
+ }
/* Common SYSCALL parameters. */
wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
@@ -353,6 +368,9 @@ static long register_guest_callback(stru
long ret = 0;
struct vcpu *v = current;
+ if ( !is_canonical_address(reg->address) )
+ return -EINVAL;
+
switch ( reg->type )
{
case CALLBACKTYPE_event:
@@ -370,6 +388,14 @@ static long register_guest_callback(stru
break;
case CALLBACKTYPE_syscall:
+ /* See arch_set_info_guest() for why this is being done. */
+ if ( v->arch.syscall32_callback_eip ==
+ v->arch.guest_context.syscall_callback_eip )
+ {
+ v->arch.syscall32_callback_eip = reg->address;
+ v->arch.syscall32_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ }
v->arch.guest_context.syscall_callback_eip = reg->address;
if ( reg->flags & CALLBACKF_mask_events )
set_bit(_VGCF_syscall_disables_events,
@@ -379,6 +405,43 @@ static long register_guest_callback(stru
&v->arch.guest_context.flags);
break;
+ case CALLBACKTYPE_syscall32:
+ v->arch.syscall32_callback_eip = reg->address;
+ v->arch.syscall32_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ break;
+
+ case CALLBACKTYPE_sfmask:
+ v->arch.syscall_eflags_mask = reg->address &
+ ~(X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+ if ( reg->address & X86_EFLAGS_IF )
+ {
+ set_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags);
+ v->arch.syscall32_disables_events = 1;
+ }
+ else
+ {
+ clear_bit(_VGCF_syscall_disables_events,
+ &v->arch.guest_context.flags);
+ v->arch.syscall32_disables_events = 0;
+ }
+ break;
+
+ case CALLBACKTYPE_sysenter:
+ v->arch.sysenter_callback_eip = reg->address;
+ v->arch.sysenter_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ break;
+
+ case CALLBACKTYPE_sysexit:
+ v->arch.sysexit_eip = reg->address;
+ if ( reg->flags & CALLBACKF_mask_events )
+ v->arch.sysexit_cs = FLAT_USER_CS32;
+ else
+ v->arch.sysexit_cs = FLAT_USER_CS64;
+ break;
+
case CALLBACKTYPE_nmi:
ret = register_guest_nmi_callback(reg->address);
break;
@@ -400,6 +463,10 @@ static long unregister_guest_callback(st
case CALLBACKTYPE_event:
case CALLBACKTYPE_failsafe:
case CALLBACKTYPE_syscall:
+ case CALLBACKTYPE_syscall32:
+ case CALLBACKTYPE_sfmask:
+ case CALLBACKTYPE_sysenter:
+ case CALLBACKTYPE_sysexit:
ret = -EINVAL;
break;
Index: 2007-07-03/xen/include/asm-x86/cpufeature.h
===================================================================
--- 2007-07-03.orig/xen/include/asm-x86/cpufeature.h 2007-07-03
10:35:30.000000000 +0200
+++ 2007-07-03/xen/include/asm-x86/cpufeature.h 2007-07-03 10:39:14.000000000
+0200
@@ -130,7 +130,7 @@
#define cpu_has_pae 1
#define cpu_has_pge 1
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep 0
+#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
#define cpu_has_mtrr 1
#define cpu_has_mmx 1
#define cpu_has_fxsr 1
Index: 2007-07-03/xen/include/asm-x86/domain.h
===================================================================
--- 2007-07-03.orig/xen/include/asm-x86/domain.h 2007-06-15
14:05:46.000000000 +0200
+++ 2007-07-03/xen/include/asm-x86/domain.h 2007-07-04 12:51:40.000000000
+0200
@@ -281,6 +281,16 @@ struct arch_vcpu
#endif
#ifdef CONFIG_X86_64
struct trap_bounce int80_bounce;
+ unsigned long syscall32_callback_eip;
+ unsigned long sysenter_callback_eip;
+ unsigned long sysexit_eip;
+ unsigned short syscall32_callback_cs;
+ unsigned short sysenter_callback_cs;
+ unsigned short sysexit_cs;
+ bool_t syscall32_disables_events;
+ bool_t sysenter_disables_events;
+ unsigned int syscall_eflags_mask;
+ unsigned int eflags_mask;
#endif
/* Virtual Machine Extensions */
Index: 2007-07-03/xen/include/public/callback.h
===================================================================
--- 2007-07-03.orig/xen/include/public/callback.h 2006-11-08
10:37:31.000000000 +0100
+++ 2007-07-03/xen/include/public/callback.h 2007-07-03 10:39:14.000000000
+0200
@@ -38,13 +38,34 @@
#define CALLBACKTYPE_event 0
#define CALLBACKTYPE_failsafe 1
-#define CALLBACKTYPE_syscall 2 /* x86_64 only */
+#define CALLBACKTYPE_syscall 2 /* x86_64 hv only */
/*
- * sysenter is only available on x86_32 with the
- * supervisor_mode_kernel option enabled.
+ * sysenter_deprecated is only available on x86_32 with the
+ * supervisor_mode_kernel option enabled, and should not be used in new code.
*/
-#define CALLBACKTYPE_sysenter 3
+#define CALLBACKTYPE_sysenter_deprecated 3
#define CALLBACKTYPE_nmi 4
+#if __XEN_INTERFACE_VERSION__ < 0x00030206
+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
+#else
+/*
+ * sysenter is only available
+ * - on x86_32 with the supervisor_mode_kernel option enabled,
+ * - on x86_64 hv for x86_32 pv or 32-bit guest support in x86_64 pv.
+ */
+#define CALLBACKTYPE_sysenter 5
+/*
+ * sysexit is only available on x86_64 hv, and is only used to fill a
+ * sysenter frame's return address (if the guest desires to have a non-NULL
+ * value there). Additionally, since CALLBACKF_mask_events is meaningless
+ * here, it is being (mis-)used for 64-bits guests to distinguish sysenter
+ * callers expected to be in 64-bit mode (flag set) from 32-bit ones (flag
+ * clear).
+ */
+#define CALLBACKTYPE_sysexit 6
+#define CALLBACKTYPE_syscall32 7 /* x86_64 only */
+#define CALLBACKTYPE_sfmask 8 /* x86_64 only */
+#endif
/*
* Disable event deliver during callback? This flag is ignored for event and
Index: 2007-07-03/xen/include/public/xen-compat.h
===================================================================
--- 2007-07-03.orig/xen/include/public/xen-compat.h 2006-11-16
14:06:41.000000000 +0100
+++ 2007-07-03/xen/include/public/xen-compat.h 2007-07-03 10:39:14.000000000
+0200
@@ -27,7 +27,7 @@
#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
#define __XEN_PUBLIC_XEN_COMPAT_H__
-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206
#if defined(__XEN__) || defined(__XEN_TOOLS__)
/* Xen is built with matching headers and implements the latest interface. */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|