... thus further shrinking overall size of struct arch_vcpu. This has a minor effect on XEN_DOMCTL_{get,set}_ext_vcpucontext - for HVM guests, some meaningless fields will no longer get stored or retrieved: reads will now return zero, and writes are required to be (mostly) zero (the same as was already done on x86-32). Signed-off-by: Jan Beulich --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -342,8 +342,6 @@ int vcpu_initialise(struct vcpu *v) struct domain *d = v->domain; int rc; - v->arch.vcpu_info_mfn = INVALID_MFN; - v->arch.flags = TF_kernel_mode; #if defined(__i386__) @@ -372,8 +370,6 @@ int vcpu_initialise(struct vcpu *v) v->arch.perdomain_ptes = perdomain_ptes(d, v); - spin_lock_init(&v->arch.shadow_ldt_lock); - if ( (rc = xsave_alloc_save_area(v)) != 0 ) return rc; if ( v->arch.xsave_area ) @@ -395,6 +391,10 @@ int vcpu_initialise(struct vcpu *v) goto done; } + v->arch.pv_vcpu.vcpu_info_mfn = INVALID_MFN; + + spin_lock_init(&v->arch.pv_vcpu.shadow_ldt_lock); + if ( !is_idle_domain(d) ) { if ( standalone_trap_ctxt(v) ) @@ -868,7 +868,7 @@ int arch_set_info_guest( init_int80_direct_trap(v); /* IOPL privileges are virtualised. */ - v->arch.iopl = (v->arch.user_regs.eflags >> 12) & 3; + v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3; v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL; /* Ensure real hardware interrupts are enabled. */ @@ -1016,14 +1016,14 @@ unmap_vcpu_info(struct vcpu *v) { unsigned long mfn; - if ( v->arch.vcpu_info_mfn == INVALID_MFN ) + if ( v->arch.pv_vcpu.vcpu_info_mfn == INVALID_MFN ) return; - mfn = v->arch.vcpu_info_mfn; + mfn = v->arch.pv_vcpu.vcpu_info_mfn; unmap_domain_page_global(v->vcpu_info); v->vcpu_info = &dummy_vcpu_info; - v->arch.vcpu_info_mfn = INVALID_MFN; + v->arch.pv_vcpu.vcpu_info_mfn = INVALID_MFN; put_page_and_type(mfn_to_page(mfn)); } @@ -1045,7 +1045,7 @@ map_vcpu_info(struct vcpu *v, unsigned l if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) ) return -EINVAL; - if ( v->arch.vcpu_info_mfn != INVALID_MFN ) + if ( v->arch.pv_vcpu.vcpu_info_mfn != INVALID_MFN ) return -EINVAL; /* Run this command on yourself or on other offline VCPUS. */ @@ -1077,7 +1077,7 @@ map_vcpu_info(struct vcpu *v, unsigned l } v->vcpu_info = new_info; - v->arch.vcpu_info_mfn = mfn; + v->arch.pv_vcpu.vcpu_info_mfn = mfn; /* Set new vcpu_info pointer /before/ setting pending flags. */ wmb(); --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -1123,20 +1123,31 @@ long arch_do_domctl( { evc->size = sizeof(*evc); #ifdef __x86_64__ - evc->sysenter_callback_cs = v->arch.sysenter_callback_cs; - evc->sysenter_callback_eip = v->arch.sysenter_callback_eip; - evc->sysenter_disables_events = v->arch.sysenter_disables_events; - evc->syscall32_callback_cs = v->arch.syscall32_callback_cs; - evc->syscall32_callback_eip = v->arch.syscall32_callback_eip; - evc->syscall32_disables_events = v->arch.syscall32_disables_events; -#else - evc->sysenter_callback_cs = 0; - evc->sysenter_callback_eip = 0; - evc->sysenter_disables_events = 0; - evc->syscall32_callback_cs = 0; - evc->syscall32_callback_eip = 0; - evc->syscall32_disables_events = 0; + if ( !is_hvm_domain(d) ) + { + evc->sysenter_callback_cs = + v->arch.pv_vcpu.sysenter_callback_cs; + evc->sysenter_callback_eip = + v->arch.pv_vcpu.sysenter_callback_eip; + evc->sysenter_disables_events = + v->arch.pv_vcpu.sysenter_disables_events; + evc->syscall32_callback_cs = + v->arch.pv_vcpu.syscall32_callback_cs; + evc->syscall32_callback_eip = + v->arch.pv_vcpu.syscall32_callback_eip; + evc->syscall32_disables_events = + v->arch.pv_vcpu.syscall32_disables_events; + } + else #endif + { + evc->sysenter_callback_cs = 0; + evc->sysenter_callback_eip = 0; + evc->sysenter_disables_events = 0; + evc->syscall32_callback_cs = 0; + evc->syscall32_callback_eip = 0; + evc->syscall32_disables_events = 0; + } } else { @@ -1144,22 +1155,31 @@ long arch_do_domctl( if ( evc->size != sizeof(*evc) ) goto ext_vcpucontext_out; #ifdef __x86_64__ - fixup_guest_code_selector(d, evc->sysenter_callback_cs); - v->arch.sysenter_callback_cs = evc->sysenter_callback_cs; - v->arch.sysenter_callback_eip = evc->sysenter_callback_eip; - v->arch.sysenter_disables_events = evc->sysenter_disables_events; - fixup_guest_code_selector(d, evc->syscall32_callback_cs); - v->arch.syscall32_callback_cs = evc->syscall32_callback_cs; - v->arch.syscall32_callback_eip = evc->syscall32_callback_eip; - v->arch.syscall32_disables_events = evc->syscall32_disables_events; -#else + if ( !is_hvm_domain(d) ) + { + fixup_guest_code_selector(d, evc->sysenter_callback_cs); + v->arch.pv_vcpu.sysenter_callback_cs = + evc->sysenter_callback_cs; + v->arch.pv_vcpu.sysenter_callback_eip = + evc->sysenter_callback_eip; + v->arch.pv_vcpu.sysenter_disables_events = + evc->sysenter_disables_events; + fixup_guest_code_selector(d, evc->syscall32_callback_cs); + v->arch.pv_vcpu.syscall32_callback_cs = + evc->syscall32_callback_cs; + v->arch.pv_vcpu.syscall32_callback_eip = + evc->syscall32_callback_eip; + v->arch.pv_vcpu.syscall32_disables_events = + evc->syscall32_disables_events; + } + else +#endif /* We do not support syscall/syscall32/sysenter on 32-bit Xen. */ if ( (evc->sysenter_callback_cs & ~3) || evc->sysenter_callback_eip || (evc->syscall32_callback_cs & ~3) || evc->syscall32_callback_eip ) goto ext_vcpucontext_out; -#endif } ret = 0; @@ -1698,7 +1718,7 @@ void arch_get_info_guest(struct vcpu *v, /* IOPL privileges are virtualised: merge back into returned eflags. */ BUG_ON((c(user_regs.eflags) & X86_EFLAGS_IOPL) != 0); - c(user_regs.eflags |= v->arch.iopl << 12); + c(user_regs.eflags |= v->arch.pv_vcpu.iopl << 12); if ( !is_pv_32on64_domain(v->domain) ) { --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -592,12 +592,12 @@ static void invalidate_shadow_ldt(struct BUG_ON(unlikely(in_irq())); - spin_lock(&v->arch.shadow_ldt_lock); + spin_lock(&v->arch.pv_vcpu.shadow_ldt_lock); - if ( v->arch.shadow_ldt_mapcnt == 0 ) + if ( v->arch.pv_vcpu.shadow_ldt_mapcnt == 0 ) goto out; - v->arch.shadow_ldt_mapcnt = 0; + v->arch.pv_vcpu.shadow_ldt_mapcnt = 0; for ( i = 16; i < 32; i++ ) { @@ -615,7 +615,7 @@ static void invalidate_shadow_ldt(struct flush_tlb_mask(v->vcpu_dirty_cpumask); out: - spin_unlock(&v->arch.shadow_ldt_lock); + spin_unlock(&v->arch.pv_vcpu.shadow_ldt_lock); } @@ -666,10 +666,10 @@ int map_ldt_shadow_page(unsigned int off nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); - spin_lock(&v->arch.shadow_ldt_lock); + spin_lock(&v->arch.pv_vcpu.shadow_ldt_lock); l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e); - v->arch.shadow_ldt_mapcnt++; - spin_unlock(&v->arch.shadow_ldt_lock); + v->arch.pv_vcpu.shadow_ldt_mapcnt++; + spin_unlock(&v->arch.pv_vcpu.shadow_ldt_lock); return 1; } --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -440,7 +440,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( set_iopl.iopl > 3 ) break; ret = 0; - v->arch.iopl = set_iopl.iopl; + v->arch.pv_vcpu.iopl = set_iopl.iopl; break; } @@ -455,11 +455,11 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; ret = 0; #ifndef COMPAT - v->arch.iobmp = set_iobitmap.bitmap; + v->arch.pv_vcpu.iobmp = set_iobitmap.bitmap; #else - guest_from_compat_handle(v->arch.iobmp, set_iobitmap.bitmap); + guest_from_compat_handle(v->arch.pv_vcpu.iobmp, set_iobitmap.bitmap); #endif - v->arch.iobmp_limit = set_iobitmap.nr_ports; + v->arch.pv_vcpu.iobmp_limit = set_iobitmap.nr_ports; break; } --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -429,7 +429,7 @@ static void do_guest_trap( trace_pv_trap(trapnr, regs->eip, use_error_code, regs->error_code); - tb = &v->arch.trap_bounce; + tb = &v->arch.pv_vcpu.trap_bounce; ti = &v->arch.pv_vcpu.trap_ctxt[trapnr]; tb->flags = TBF_EXCEPTION; @@ -507,7 +507,7 @@ static unsigned int check_guest_io_break asmlinkage int set_guest_machinecheck_trapbounce(void) { struct vcpu *v = current; - struct trap_bounce *tb = &v->arch.trap_bounce; + struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce; do_guest_trap(TRAP_machine_check, guest_cpu_user_regs(), 0); tb->flags &= ~TBF_EXCEPTION; /* not needed for MCE delivery path */ @@ -521,7 +521,7 @@ asmlinkage int set_guest_machinecheck_tr asmlinkage int set_guest_nmi_trapbounce(void) { struct vcpu *v = current; - struct trap_bounce *tb = &v->arch.trap_bounce; + struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce; do_guest_trap(TRAP_nmi, guest_cpu_user_regs(), 0); tb->flags &= ~TBF_EXCEPTION; /* not needed for NMI delivery path */ return !null_trap_bounce(v, tb); @@ -1007,7 +1007,7 @@ void propagate_page_fault(unsigned long { struct trap_info *ti; struct vcpu *v = current; - struct trap_bounce *tb = &v->arch.trap_bounce; + struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce; v->arch.pv_vcpu.ctrlreg[2] = addr; arch_set_cr2(v, addr); @@ -1504,10 +1504,10 @@ static int guest_io_okay( #endif if ( !vm86_mode(regs) && - (v->arch.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) + (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) ) return 1; - if ( v->arch.iobmp_limit > (port + bytes) ) + if ( v->arch.pv_vcpu.iobmp_limit > (port + bytes) ) { union { uint8_t bytes[2]; uint16_t mask; } x; @@ -1516,7 +1516,7 @@ static int guest_io_okay( * read as 0xff (no access allowed). */ TOGGLE_MODE(); - switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp, + switch ( __copy_from_guest_offset(x.bytes, v->arch.pv_vcpu.iobmp, port>>3, 2) ) { default: x.bytes[0] = ~0; @@ -2020,7 +2020,7 @@ static int emulate_privileged_op(struct case 0xfa: /* CLI */ case 0xfb: /* STI */ - if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) + if ( v->arch.pv_vcpu.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the --- a/xen/arch/x86/x86_32/asm-offsets.c +++ b/xen/arch/x86/x86_32/asm-offsets.c @@ -53,7 +53,7 @@ void __dummy__(void) OFFSET(VCPU_processor, struct vcpu, processor); OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); - OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce); + OFFSET(VCPU_trap_bounce, struct vcpu, arch.pv_vcpu.trap_bounce); OFFSET(VCPU_thread_flags, struct vcpu, arch.flags); OFFSET(VCPU_event_sel, struct vcpu, arch.pv_vcpu.event_callback_cs); OFFSET(VCPU_event_addr, struct vcpu, arch.pv_vcpu.event_callback_eip); --- a/xen/arch/x86/x86_32/seg_fixup.c +++ b/xen/arch/x86/x86_32/seg_fixup.c @@ -546,7 +546,7 @@ int gpf_emulate_4gb(struct cpu_user_regs if ( VM_ASSIST(curr->domain, VMASST_TYPE_4gb_segments_notify) ) { struct trap_info *ti = &curr->arch.pv_vcpu.trap_ctxt[15]; - struct trap_bounce *tb = &curr->arch.trap_bounce; + struct trap_bounce *tb = &curr->arch.pv_vcpu.trap_bounce; tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; tb->error_code = pb - eip; --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -387,12 +387,12 @@ void init_int80_direct_trap(struct vcpu if ( TI_GET_IF(ti) || !guest_gate_selector_okay(v->domain, ti->cs) || supervisor_mode_kernel ) { - v->arch.int80_desc.a = v->arch.int80_desc.b = 0; + v->arch.pv_vcpu.int80_desc.a = v->arch.pv_vcpu.int80_desc.b = 0; return; } - v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff); - v->arch.int80_desc.b = + v->arch.pv_vcpu.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff); + v->arch.pv_vcpu.int80_desc.b = (ti->address & 0xffff0000) | 0x8f00 | ((TI_GET_DPL(ti) & 3) << 13); if ( v == current ) --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -65,8 +65,8 @@ void __dummy__(void) OFFSET(VCPU_processor, struct vcpu, processor); OFFSET(VCPU_domain, struct vcpu, domain); OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); - OFFSET(VCPU_trap_bounce, struct vcpu, arch.trap_bounce); - OFFSET(VCPU_int80_bounce, struct vcpu, arch.int80_bounce); + OFFSET(VCPU_trap_bounce, struct vcpu, arch.pv_vcpu.trap_bounce); + OFFSET(VCPU_int80_bounce, struct vcpu, arch.pv_vcpu.int80_bounce); OFFSET(VCPU_thread_flags, struct vcpu, arch.flags); OFFSET(VCPU_event_addr, struct vcpu, arch.pv_vcpu.event_callback_eip); OFFSET(VCPU_event_sel, struct vcpu, arch.pv_vcpu.event_callback_cs); @@ -76,14 +76,18 @@ void __dummy__(void) arch.pv_vcpu.failsafe_callback_cs); OFFSET(VCPU_syscall_addr, struct vcpu, arch.pv_vcpu.syscall_callback_eip); - OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip); - OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs); + OFFSET(VCPU_syscall32_addr, struct vcpu, + arch.pv_vcpu.syscall32_callback_eip); + OFFSET(VCPU_syscall32_sel, struct vcpu, + arch.pv_vcpu.syscall32_callback_cs); OFFSET(VCPU_syscall32_disables_events, struct vcpu, - arch.syscall32_disables_events); - OFFSET(VCPU_sysenter_addr, struct vcpu, arch.sysenter_callback_eip); - OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs); + arch.pv_vcpu.syscall32_disables_events); + OFFSET(VCPU_sysenter_addr, struct vcpu, + arch.pv_vcpu.sysenter_callback_eip); + OFFSET(VCPU_sysenter_sel, struct vcpu, + arch.pv_vcpu.sysenter_callback_cs); OFFSET(VCPU_sysenter_disables_events, struct vcpu, - arch.sysenter_disables_events); + arch.pv_vcpu.sysenter_disables_events); OFFSET(VCPU_trap_ctxt, struct vcpu, arch.pv_vcpu.trap_ctxt); OFFSET(VCPU_kernel_sp, struct vcpu, arch.pv_vcpu.kernel_sp); OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss); --- a/xen/arch/x86/x86_64/compat/traps.c +++ b/xen/arch/x86/x86_64/compat/traps.c @@ -190,16 +190,16 @@ static long compat_register_guest_callba break; case CALLBACKTYPE_syscall32: - v->arch.syscall32_callback_cs = reg->address.cs; - v->arch.syscall32_callback_eip = reg->address.eip; - v->arch.syscall32_disables_events = + v->arch.pv_vcpu.syscall32_callback_cs = reg->address.cs; + v->arch.pv_vcpu.syscall32_callback_eip = reg->address.eip; + v->arch.pv_vcpu.syscall32_disables_events = (reg->flags & CALLBACKF_mask_events) != 0; break; case CALLBACKTYPE_sysenter: - v->arch.sysenter_callback_cs = reg->address.cs; - v->arch.sysenter_callback_eip = reg->address.eip; - v->arch.sysenter_disables_events = + v->arch.pv_vcpu.sysenter_callback_cs = reg->address.cs; + v->arch.pv_vcpu.sysenter_callback_eip = reg->address.eip; + v->arch.pv_vcpu.sysenter_disables_events = (reg->flags & CALLBACKF_mask_events) != 0; break; --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -422,7 +422,7 @@ void __devinit subarch_percpu_traps_init void init_int80_direct_trap(struct vcpu *v) { struct trap_info *ti = &v->arch.pv_vcpu.trap_ctxt[0x80]; - struct trap_bounce *tb = &v->arch.int80_bounce; + struct trap_bounce *tb = &v->arch.pv_vcpu.int80_bounce; tb->flags = TBF_EXCEPTION; tb->cs = ti->cs; @@ -467,14 +467,14 @@ static long register_guest_callback(stru break; case CALLBACKTYPE_syscall32: - v->arch.syscall32_callback_eip = reg->address; - v->arch.syscall32_disables_events = + v->arch.pv_vcpu.syscall32_callback_eip = reg->address; + v->arch.pv_vcpu.syscall32_disables_events = !!(reg->flags & CALLBACKF_mask_events); break; case CALLBACKTYPE_sysenter: - v->arch.sysenter_callback_eip = reg->address; - v->arch.sysenter_disables_events = + v->arch.pv_vcpu.sysenter_callback_eip = reg->address; + v->arch.pv_vcpu.sysenter_disables_events = !!(reg->flags & CALLBACKF_mask_events); break; --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -378,11 +378,38 @@ struct pv_vcpu unsigned long vm_assist; #ifdef CONFIG_X86_64 + unsigned long syscall32_callback_eip; + unsigned long sysenter_callback_eip; + unsigned short syscall32_callback_cs; + unsigned short sysenter_callback_cs; + bool_t syscall32_disables_events; + bool_t sysenter_disables_events; + /* Segment base addresses. */ unsigned long fs_base; unsigned long gs_base_kernel; unsigned long gs_base_user; #endif + + /* Bounce information for propagating an exception to guest OS. */ + struct trap_bounce trap_bounce; +#ifdef CONFIG_X86_64 + struct trap_bounce int80_bounce; +#else + struct desc_struct int80_desc; +#endif + + /* I/O-port access bitmap. */ + XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */ + unsigned int iobmp_limit; /* Number of ports represented in the bitmap. */ + unsigned int iopl; /* Current IOPL for this VCPU. */ + + /* Current LDT details. */ + unsigned long shadow_ldt_mapcnt; + spinlock_t shadow_ldt_lock; + + /* Guest-specified relocation of vcpu_info. */ + unsigned long vcpu_info_mfn; }; struct arch_vcpu @@ -408,27 +435,6 @@ struct arch_vcpu void (*ctxt_switch_from) (struct vcpu *); void (*ctxt_switch_to) (struct vcpu *); - /* Bounce information for propagating an exception to guest OS. */ - struct trap_bounce trap_bounce; - - /* I/O-port access bitmap. */ - XEN_GUEST_HANDLE(uint8) iobmp; /* Guest kernel vaddr of the bitmap. */ - int iobmp_limit; /* Number of ports represented in the bitmap. */ - int iopl; /* Current IOPL for this VCPU. */ - -#ifdef CONFIG_X86_32 - struct desc_struct int80_desc; -#endif -#ifdef CONFIG_X86_64 - struct trap_bounce int80_bounce; - unsigned long syscall32_callback_eip; - unsigned long sysenter_callback_eip; - unsigned short syscall32_callback_cs; - unsigned short sysenter_callback_cs; - bool_t syscall32_disables_events; - bool_t sysenter_disables_events; -#endif - /* Virtual Machine Extensions */ union { struct pv_vcpu pv_vcpu; @@ -468,15 +474,8 @@ struct arch_vcpu */ uint64_t xcr0_accum; - /* Current LDT details. */ - unsigned long shadow_ldt_mapcnt; - spinlock_t shadow_ldt_lock; - struct paging_vcpu paging; - /* Guest-specified relocation of vcpu_info. */ - unsigned long vcpu_info_mfn; - #ifdef CONFIG_X86_32 /* map_domain_page() mapping cache. */ struct mapcache_vcpu mapcache; --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -474,7 +474,7 @@ extern void init_int80_direct_trap(struc #define set_int80_direct_trap(_ed) \ (memcpy(idt_tables[(_ed)->processor] + 0x80, \ - &((_ed)->arch.int80_desc), 8)) + &((_ed)->arch.pv_vcpu.int80_desc), 8)) #else