At 16:22 +0100 on 08 Sep (1283962934), Qing He wrote:
> diff -r f1c1d3077337 xen/arch/x86/hvm/vmx/nest.c
> +/*
> + * VMX instructions support functions
> + */
> +
> +enum vmx_regs_enc {
> + VMX_REG_RAX,
> + VMX_REG_RCX,
> + VMX_REG_RDX,
> + VMX_REG_RBX,
> + VMX_REG_RSP,
> + VMX_REG_RBP,
> + VMX_REG_RSI,
> + VMX_REG_RDI,
> +#ifdef CONFIG_X86_64
> + VMX_REG_R8,
> + VMX_REG_R9,
> + VMX_REG_R10,
> + VMX_REG_R11,
> + VMX_REG_R12,
> + VMX_REG_R13,
> + VMX_REG_R14,
> + VMX_REG_R15,
> +#endif
> +};
> +
> +enum vmx_sregs_enc {
> + VMX_SREG_ES,
> + VMX_SREG_CS,
> + VMX_SREG_SS,
> + VMX_SREG_DS,
> + VMX_SREG_FS,
> + VMX_SREG_GS,
> +};
> +
> +enum x86_segment sreg_to_index[] = {
> + [VMX_SREG_ES] = x86_seg_es,
> + [VMX_SREG_CS] = x86_seg_cs,
> + [VMX_SREG_SS] = x86_seg_ss,
> + [VMX_SREG_DS] = x86_seg_ds,
> + [VMX_SREG_FS] = x86_seg_fs,
> + [VMX_SREG_GS] = x86_seg_gs,
> +};
Since you dislike adding new namespaces and translations, I'm sure you
can get rid of these. :) It might even simplify some of the macros
below.
> +union vmx_inst_info {
> + struct {
> + unsigned int scaling :2; /* bit 0-1 */
> + unsigned int __rsvd0 :1; /* bit 2 */
> + unsigned int reg1 :4; /* bit 3-6 */
> + unsigned int addr_size :3; /* bit 7-9 */
> + unsigned int memreg :1; /* bit 10 */
> + unsigned int __rsvd1 :4; /* bit 11-14 */
> + unsigned int segment :3; /* bit 15-17 */
> + unsigned int index_reg :4; /* bit 18-21 */
> + unsigned int index_reg_invalid :1; /* bit 22 */
> + unsigned int base_reg :4; /* bit 23-26 */
> + unsigned int base_reg_invalid :1; /* bit 27 */
> + unsigned int reg2 :4; /* bit 28-31 */
> + } fields;
> + u32 word;
> +};
> +
> +struct vmx_inst_decoded {
> +#define VMX_INST_MEMREG_TYPE_MEMORY 0
> +#define VMX_INST_MEMREG_TYPE_REG 1
> + int type;
> + union {
> + struct {
> + unsigned long mem;
> + unsigned int len;
> + };
> + enum vmx_regs_enc reg1;
> + };
> +
> + enum vmx_regs_enc reg2;
> +};
> +
> +enum vmx_ops_result {
> + VMSUCCEED,
> + VMFAIL_VALID,
> + VMFAIL_INVALID,
> +};
> +
> +#define CASE_SET_REG(REG, reg) \
> + case VMX_REG_ ## REG: regs->reg = value; break
> +#define CASE_GET_REG(REG, reg) \
> + case VMX_REG_ ## REG: value = regs->reg; break
> +
> +#define CASE_EXTEND_SET_REG \
> + CASE_EXTEND_REG(S)
> +#define CASE_EXTEND_GET_REG \
> + CASE_EXTEND_REG(G)
> +
> +#ifdef __i386__
> +#define CASE_EXTEND_REG(T)
> +#else
> +#define CASE_EXTEND_REG(T) \
> + CASE_ ## T ## ET_REG(R8, r8); \
> + CASE_ ## T ## ET_REG(R9, r9); \
> + CASE_ ## T ## ET_REG(R10, r10); \
> + CASE_ ## T ## ET_REG(R11, r11); \
> + CASE_ ## T ## ET_REG(R12, r12); \
> + CASE_ ## T ## ET_REG(R13, r13); \
> + CASE_ ## T ## ET_REG(R14, r14); \
> + CASE_ ## T ## ET_REG(R15, r15)
> +#endif
> +
> +static unsigned long reg_read(struct cpu_user_regs *regs,
> + enum vmx_regs_enc index)
> +{
> + unsigned long value = 0;
> +
> + switch ( index ) {
> + CASE_GET_REG(RAX, eax);
> + CASE_GET_REG(RCX, ecx);
> + CASE_GET_REG(RDX, edx);
> + CASE_GET_REG(RBX, ebx);
> + CASE_GET_REG(RBP, ebp);
> + CASE_GET_REG(RSI, esi);
> + CASE_GET_REG(RDI, edi);
> + CASE_GET_REG(RSP, esp);
> + CASE_EXTEND_GET_REG;
> + default:
> + break;
> + }
> +
> + return value;
> +}
> +
> +static void reg_write(struct cpu_user_regs *regs,
> + enum vmx_regs_enc index,
> + unsigned long value)
> +{
> + switch ( index ) {
> + CASE_SET_REG(RAX, eax);
> + CASE_SET_REG(RCX, ecx);
> + CASE_SET_REG(RDX, edx);
> + CASE_SET_REG(RBX, ebx);
> + CASE_SET_REG(RBP, ebp);
> + CASE_SET_REG(RSI, esi);
> + CASE_SET_REG(RDI, edi);
> + CASE_SET_REG(RSP, esp);
> + CASE_EXTEND_SET_REG;
> + default:
> + break;
> + }
> +}
> +
> +static int decode_vmx_inst(struct cpu_user_regs *regs,
> + struct vmx_inst_decoded *decode)
> +{
> + struct vcpu *v = current;
> + union vmx_inst_info info;
> + struct segment_register seg;
> + unsigned long base, index, seg_base, disp, offset;
> + int scale;
> +
> + info.word = __vmread(VMX_INSTRUCTION_INFO);
> +
> + if ( info.fields.memreg ) {
> + decode->type = VMX_INST_MEMREG_TYPE_REG;
> + decode->reg1 = info.fields.reg1;
> + }
> + else
> + {
> + decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
> + hvm_get_segment_register(v, sreg_to_index[info.fields.segment],
> &seg);
> + /* TODO: segment type check */
Indeed. :)
> + seg_base = seg.base;
> +
> + base = info.fields.base_reg_invalid ? 0 :
> + reg_read(regs, info.fields.base_reg);
> +
> + index = info.fields.index_reg_invalid ? 0 :
> + reg_read(regs, info.fields.index_reg);
> +
> + scale = 1 << info.fields.scaling;
> +
> + disp = __vmread(EXIT_QUALIFICATION);
> +
> + offset = base + index * scale + disp;
> + if ( offset > seg.limit )
DYM if ( offset + len > limit )?
Would it be worth trying to fold this into the existing x86_emulate
code, which already has careful memory access checks?
> + goto gp_fault;
> +
> + decode->mem = seg_base + base + index * scale + disp;
> + decode->len = 1 << (info.fields.addr_size + 1);
> + }
> +
> + decode->reg2 = info.fields.reg2;
> +
> + return X86EMUL_OKAY;
> +
> +gp_fault:
> + hvm_inject_exception(TRAP_gp_fault, 0, 0);
> + return X86EMUL_EXCEPTION;
> +}
> +
> +static int vmx_inst_check_privilege(struct cpu_user_regs *regs)
> +{
> + struct vcpu *v = current;
> + struct segment_register cs;
> +
> + hvm_get_segment_register(v, x86_seg_cs, &cs);
> +
> + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
> + !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
> + (regs->eflags & X86_EFLAGS_VM) ||
> + (hvm_long_mode_enabled(v) && cs.attr.fields.l == 0) )
> + goto invalid_op;
> +
> + if ( (cs.sel & 3) > 0 )
> + goto gp_fault;
> +
> + return X86EMUL_OKAY;
> +
> +invalid_op:
> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
> + return X86EMUL_EXCEPTION;
> +
> +gp_fault:
> + hvm_inject_exception(TRAP_gp_fault, 0, 0);
> + return X86EMUL_EXCEPTION;
> +}
> +
> +static void vmreturn(struct cpu_user_regs *regs, enum vmx_ops_result res)
> +{
> + unsigned long eflags = regs->eflags;
> + unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
> + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF;
> +
> + eflags &= ~mask;
> +
> + switch ( res ) {
> + case VMSUCCEED:
> + break;
> + case VMFAIL_VALID:
> + /* TODO: error number, useful for guest VMM debugging */
> + eflags |= X86_EFLAGS_ZF;
> + break;
> + case VMFAIL_INVALID:
> + default:
> + eflags |= X86_EFLAGS_CF;
> + break;
> + }
> +
> + regs->eflags = eflags;
> +}
> +
> +static int __clear_current_vvmcs(struct vmx_nest_struct *nest)
> +{
> + int rc;
> +
> + if ( nest->svmcs )
> + __vmpclear(virt_to_maddr(nest->svmcs));
> +
> +#if !CONFIG_VVMCS_MAPPING
> + rc = hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs, PAGE_SIZE);
> + if ( rc != HVMCOPY_okay )
> + return X86EMUL_EXCEPTION;
> +#endif
> +
> + nest->vmcs_valid = 0;
> +
> + return X86EMUL_OKAY;
> +}
> +
> +/*
> + * VMX instructions handling
> + */
> +
> +int vmx_nest_handle_vmxon(struct cpu_user_regs *regs)
> +{
> + struct vcpu *v = current;
> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> + struct vmx_inst_decoded decode;
> + unsigned long gpa = 0;
> + int rc;
> +
> + if ( !is_nested_avail(v->domain) )
> + goto invalid_op;
> +
> + rc = vmx_inst_check_privilege(regs);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
I think you could fold these checks and the goto target into
decode_vmx_inst(), just to avoid repeating them in every
vmx_nest_handle_* function.
> + rc = decode_vmx_inst(regs, &decode);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
> + if ( rc != HVMCOPY_okay )
> + return X86EMUL_EXCEPTION;
> +
> + nest->guest_vmxon_pa = gpa;
> + nest->gvmcs_pa = 0;
> + nest->vmcs_valid = 0;
> +#if !CONFIG_VVMCS_MAPPING
> + nest->vvmcs = alloc_xenheap_page();
> + if ( !nest->vvmcs )
> + {
> + gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs failed\n");
> + vmreturn(regs, VMFAIL_INVALID);
> + goto out;
> + }
> +#endif
> + nest->svmcs = alloc_xenheap_page();
> + if ( !nest->svmcs )
> + {
> + gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
> + free_xenheap_page(nest->vvmcs);
> + vmreturn(regs, VMFAIL_INVALID);
> + goto out;
> + }
> +
> + /*
> + * `fork' the host vmcs to shadow_vmcs
> + * vmcs_lock is not needed since we are on current
> + */
> + nest->hvmcs = v->arch.hvm_vmx.vmcs;
> + __vmpclear(virt_to_maddr(nest->hvmcs));
> + memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
> + __vmptrld(virt_to_maddr(nest->hvmcs));
> + v->arch.hvm_vmx.launched = 0;
> +
> + vmreturn(regs, VMSUCCEED);
> +
> +out:
> + return X86EMUL_OKAY;
> +
> +invalid_op:
> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
> + return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs)
> +{
> + struct vcpu *v = current;
> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> + int rc;
> +
> + if ( unlikely(!nest->guest_vmxon_pa) )
> + goto invalid_op;
> +
> + rc = vmx_inst_check_privilege(regs);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + nest->guest_vmxon_pa = 0;
> + __vmpclear(virt_to_maddr(nest->svmcs));
> +
> +#if !CONFIG_VVMCS_MAPPING
> + free_xenheap_page(nest->vvmcs);
> +#endif
> + free_xenheap_page(nest->svmcs);
These also need to be freed on domain teardown.
> + vmreturn(regs, VMSUCCEED);
> + return X86EMUL_OKAY;
> +
> +invalid_op:
> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
> + return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs)
> +{
> + struct vcpu *v = current;
> + struct vmx_inst_decoded decode;
> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> + unsigned long gpa = 0;
> + int rc;
> +
> + if ( unlikely(!nest->guest_vmxon_pa) )
> + goto invalid_op;
> +
> + rc = vmx_inst_check_privilege(regs);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + rc = decode_vmx_inst(regs, &decode);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
> + if ( rc != HVMCOPY_okay )
> + return X86EMUL_EXCEPTION;
> +
> + if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff )
> + {
> + vmreturn(regs, VMFAIL_INVALID);
> + goto out;
> + }
> +
> + if ( nest->gvmcs_pa != gpa )
> + {
> + if ( nest->vmcs_valid )
> + {
> + rc = __clear_current_vvmcs(nest);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> + }
> + nest->gvmcs_pa = gpa;
> + ASSERT(nest->vmcs_valid == 0);
> + }
> +
> +
> + if ( !nest->vmcs_valid )
> + {
> +#if CONFIG_VVMCS_MAPPING
> + unsigned long mfn;
> + p2m_type_t p2mt;
> +
> + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
> + nest->gvmcs_pa >> PAGE_SHIFT, &p2mt));
> + nest->vvmcs = map_domain_page_global(mfn);
> +#else
> + rc = hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa,
> PAGE_SIZE);
> + if ( rc != HVMCOPY_okay )
> + return X86EMUL_EXCEPTION;
> +#endif
> + nest->vmcs_valid = 1;
> + }
> +
> + vmreturn(regs, VMSUCCEED);
> +
> +out:
> + return X86EMUL_OKAY;
> +
> +invalid_op:
> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
> + return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs)
> +{
> + struct vcpu *v = current;
> + struct vmx_inst_decoded decode;
> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> + unsigned long gpa = 0;
> + int rc;
> +
> + if ( unlikely(!nest->guest_vmxon_pa) )
> + goto invalid_op;
> +
> + rc = vmx_inst_check_privilege(regs);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + rc = decode_vmx_inst(regs, &decode);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> +
> + gpa = nest->gvmcs_pa;
> +
> + rc = hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
> + if ( rc != HVMCOPY_okay )
> + return X86EMUL_EXCEPTION;
> +
> + vmreturn(regs, VMSUCCEED);
> + return X86EMUL_OKAY;
> +
> +invalid_op:
> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
> + return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmclear(struct cpu_user_regs *regs)
> +{
> + struct vcpu *v = current;
> + struct vmx_inst_decoded decode;
> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> + unsigned long gpa = 0;
> + int rc;
> +
> + if ( unlikely(!nest->guest_vmxon_pa) )
> + goto invalid_op;
> +
> + rc = vmx_inst_check_privilege(regs);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + rc = decode_vmx_inst(regs, &decode);
> + if ( rc != X86EMUL_OKAY )
> + return rc;
> +
> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
Is it guaranteed that decode.len is always <= sizeof gpa here, even with
a malicious guest?
Cheers,
Tim.
--
Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Principal Software Engineer, XenServer Engineering
Citrix Systems UK Ltd. (Company #02937203, SL9 0BG)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|