Tim Deegan wrote:
> At 16:22 +0100 on 08 Sep (1283962934), Qing He wrote:
>> diff -r f1c1d3077337 xen/arch/x86/hvm/vmx/nest.c
>> +/*
>> + * VMX instructions support functions
>> + */
>> +
>> +enum vmx_regs_enc {
>> + VMX_REG_RAX,
>> + VMX_REG_RCX,
>> + VMX_REG_RDX,
>> + VMX_REG_RBX,
>> + VMX_REG_RSP,
>> + VMX_REG_RBP,
>> + VMX_REG_RSI,
>> + VMX_REG_RDI,
>> +#ifdef CONFIG_X86_64
>> + VMX_REG_R8,
>> + VMX_REG_R9,
>> + VMX_REG_R10,
>> + VMX_REG_R11,
>> + VMX_REG_R12,
>> + VMX_REG_R13,
>> + VMX_REG_R14,
>> + VMX_REG_R15,
>> +#endif
>> +};
>> +
>> +enum vmx_sregs_enc {
>> + VMX_SREG_ES,
>> + VMX_SREG_CS,
>> + VMX_SREG_SS,
>> + VMX_SREG_DS,
>> + VMX_SREG_FS,
>> + VMX_SREG_GS,
>> +};
>> +
>> +enum x86_segment sreg_to_index[] = {
>> + [VMX_SREG_ES] = x86_seg_es,
>> + [VMX_SREG_CS] = x86_seg_cs,
>> + [VMX_SREG_SS] = x86_seg_ss,
>> + [VMX_SREG_DS] = x86_seg_ds,
>> + [VMX_SREG_FS] = x86_seg_fs,
>> + [VMX_SREG_GS] = x86_seg_gs,
>> +};
>
> Since you dislike adding new namespaces and translations, I'm sure you
> can get rid of these. :) It might even simplify some of the macros
> below.
True, some dupcation here. Regarding following definition in x86_emulate.c, we
can reuse.
static enum x86_segment
decode_segment(uint8_t modrm_reg)
{
switch ( modrm_reg )
{
case 0: return x86_seg_es;
case 1: return x86_seg_cs;
case 2: return x86_seg_ss;
case 3: return x86_seg_ds;
case 4: return x86_seg_fs;
case 5: return x86_seg_gs;
default: break;
}
return decode_segment_failed;
}
BTW, should we use MACROs rather than digital # here? and can we modify
x86_segment structure to use same naming space?
>
>> +union vmx_inst_info {
>> + struct {
>> + unsigned int scaling :2; /* bit 0-1 */
>> + unsigned int __rsvd0 :1; /* bit 2 */
>> + unsigned int reg1 :4; /* bit 3-6 */
>> + unsigned int addr_size :3; /* bit 7-9 */
>> + unsigned int memreg :1; /* bit 10 */
>> + unsigned int __rsvd1 :4; /* bit 11-14 */
>> + unsigned int segment :3; /* bit 15-17 */
>> + unsigned int index_reg :4; /* bit 18-21 */
>> + unsigned int index_reg_invalid :1; /* bit 22 */
>> + unsigned int base_reg :4; /* bit 23-26 */
>> + unsigned int base_reg_invalid :1; /* bit 27 */
>> + unsigned int reg2 :4; /* bit 28-31 */ + }
>> fields; + u32 word;
>> +};
>> +
>> +struct vmx_inst_decoded {
>> +#define VMX_INST_MEMREG_TYPE_MEMORY 0
>> +#define VMX_INST_MEMREG_TYPE_REG 1
>> + int type;
>> + union {
>> + struct {
>> + unsigned long mem;
>> + unsigned int len;
>> + };
>> + enum vmx_regs_enc reg1;
>> + };
>> +
>> + enum vmx_regs_enc reg2;
>> +};
>> +
>> +enum vmx_ops_result {
>> + VMSUCCEED,
>> + VMFAIL_VALID,
>> + VMFAIL_INVALID,
>> +};
>> +
>> +#define CASE_SET_REG(REG, reg) \
>> + case VMX_REG_ ## REG: regs->reg = value; break
>> +#define CASE_GET_REG(REG, reg) \
>> + case VMX_REG_ ## REG: value = regs->reg; break +
>> +#define CASE_EXTEND_SET_REG \
>> + CASE_EXTEND_REG(S)
>> +#define CASE_EXTEND_GET_REG \
>> + CASE_EXTEND_REG(G)
>> +
>> +#ifdef __i386__
>> +#define CASE_EXTEND_REG(T)
>> +#else
>> +#define CASE_EXTEND_REG(T) \
>> + CASE_ ## T ## ET_REG(R8, r8); \
>> + CASE_ ## T ## ET_REG(R9, r9); \
>> + CASE_ ## T ## ET_REG(R10, r10); \
>> + CASE_ ## T ## ET_REG(R11, r11); \
>> + CASE_ ## T ## ET_REG(R12, r12); \
>> + CASE_ ## T ## ET_REG(R13, r13); \
>> + CASE_ ## T ## ET_REG(R14, r14); \
>> + CASE_ ## T ## ET_REG(R15, r15)
>> +#endif
>> +
>> +static unsigned long reg_read(struct cpu_user_regs *regs,
>> + enum vmx_regs_enc index) +{
>> + unsigned long value = 0;
>> +
>> + switch ( index ) {
>> + CASE_GET_REG(RAX, eax);
>> + CASE_GET_REG(RCX, ecx);
>> + CASE_GET_REG(RDX, edx);
>> + CASE_GET_REG(RBX, ebx);
>> + CASE_GET_REG(RBP, ebp);
>> + CASE_GET_REG(RSI, esi);
>> + CASE_GET_REG(RDI, edi);
>> + CASE_GET_REG(RSP, esp);
>> + CASE_EXTEND_GET_REG;
>> + default:
>> + break;
>> + }
>> +
>> + return value;
>> +}
>> +
>> +static void reg_write(struct cpu_user_regs *regs,
>> + enum vmx_regs_enc index,
>> + unsigned long value)
>> +{
>> + switch ( index ) {
>> + CASE_SET_REG(RAX, eax);
>> + CASE_SET_REG(RCX, ecx);
>> + CASE_SET_REG(RDX, edx);
>> + CASE_SET_REG(RBX, ebx);
>> + CASE_SET_REG(RBP, ebp);
>> + CASE_SET_REG(RSI, esi);
>> + CASE_SET_REG(RDI, edi);
>> + CASE_SET_REG(RSP, esp);
>> + CASE_EXTEND_SET_REG;
>> + default:
>> + break;
>> + }
>> +}
>> +
>> +static int decode_vmx_inst(struct cpu_user_regs *regs,
>> + struct vmx_inst_decoded *decode) +{
>> + struct vcpu *v = current;
>> + union vmx_inst_info info;
>> + struct segment_register seg;
>> + unsigned long base, index, seg_base, disp, offset; + int
>> scale; +
>> + info.word = __vmread(VMX_INSTRUCTION_INFO);
>> +
>> + if ( info.fields.memreg ) {
>> + decode->type = VMX_INST_MEMREG_TYPE_REG;
>> + decode->reg1 = info.fields.reg1;
>> + }
>> + else
>> + {
>> + decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
>> + hvm_get_segment_register(v,
>> sreg_to_index[info.fields.segment], &seg); + /* TODO: segment
>> type check */
>
> Indeed. :)
>
>> + seg_base = seg.base;
>> +
>> + base = info.fields.base_reg_invalid ? 0 :
>> + reg_read(regs, info.fields.base_reg);
>> +
>> + index = info.fields.index_reg_invalid ? 0 :
>> + reg_read(regs, info.fields.index_reg); +
>> + scale = 1 << info.fields.scaling;
>> +
>> + disp = __vmread(EXIT_QUALIFICATION);
>> +
>> + offset = base + index * scale + disp;
>> + if ( offset > seg.limit )
>
> DYM if ( offset + len > limit )?
>
> Would it be worth trying to fold this into the existing x86_emulate
> code, which already has careful memory access checks?
Can you give more details? Re-construct hvm_virtual_to_linear_addr?
>
>> + goto gp_fault;
>> +
>> + decode->mem = seg_base + base + index * scale + disp;
>> + decode->len = 1 << (info.fields.addr_size + 1); + }
>> +
>> + decode->reg2 = info.fields.reg2;
>> +
>> + return X86EMUL_OKAY;
>> +
>> +gp_fault:
>> + hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +}
>> +
>> +static int vmx_inst_check_privilege(struct cpu_user_regs *regs) +{
>> + struct vcpu *v = current;
>> + struct segment_register cs;
>> +
>> + hvm_get_segment_register(v, x86_seg_cs, &cs);
>> +
>> + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
>> + !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
>> + (regs->eflags & X86_EFLAGS_VM) ||
>> + (hvm_long_mode_enabled(v) && cs.attr.fields.l == 0) ) +
>> goto invalid_op; +
>> + if ( (cs.sel & 3) > 0 )
>> + goto gp_fault;
>> +
>> + return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +
>> +gp_fault:
>> + hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +}
>> +
>> +static void vmreturn(struct cpu_user_regs *regs, enum
>> vmx_ops_result res) +{ + unsigned long eflags = regs->eflags;
>> + unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF |
>> X86_EFLAGS_AF | + X86_EFLAGS_ZF |
>> X86_EFLAGS_SF | X86_EFLAGS_OF; + + eflags &= ~mask;
>> +
>> + switch ( res ) {
>> + case VMSUCCEED:
>> + break;
>> + case VMFAIL_VALID:
>> + /* TODO: error number, useful for guest VMM debugging */
>> + eflags |= X86_EFLAGS_ZF;
>> + break;
>> + case VMFAIL_INVALID:
>> + default:
>> + eflags |= X86_EFLAGS_CF;
>> + break;
>> + }
>> +
>> + regs->eflags = eflags;
>> +}
>> +
>> +static int __clear_current_vvmcs(struct vmx_nest_struct *nest) +{
>> + int rc;
>> +
>> + if ( nest->svmcs )
>> + __vmpclear(virt_to_maddr(nest->svmcs));
>> +
>> +#if !CONFIG_VVMCS_MAPPING
>> + rc = hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs,
>> PAGE_SIZE); + if ( rc != HVMCOPY_okay )
>> + return X86EMUL_EXCEPTION;
>> +#endif
>> +
>> + nest->vmcs_valid = 0;
>> +
>> + return X86EMUL_OKAY;
>> +}
>> +
>> +/*
>> + * VMX instructions handling
>> + */
>> +
>> +int vmx_nest_handle_vmxon(struct cpu_user_regs *regs) +{
>> + struct vcpu *v = current;
>> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
>> + struct vmx_inst_decoded decode;
>> + unsigned long gpa = 0;
>> + int rc;
>> +
>> + if ( !is_nested_avail(v->domain) )
>> + goto invalid_op;
>> +
>> + rc = vmx_inst_check_privilege(regs);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>
> I think you could fold these checks and the goto target into
> decode_vmx_inst(), just to avoid repeating them in every
> vmx_nest_handle_* function.
>
>> + rc = decode_vmx_inst(regs, &decode);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>> + if ( rc != HVMCOPY_okay )
>> + return X86EMUL_EXCEPTION;
>> +
>> + nest->guest_vmxon_pa = gpa;
>> + nest->gvmcs_pa = 0;
>> + nest->vmcs_valid = 0;
>> +#if !CONFIG_VVMCS_MAPPING
>> + nest->vvmcs = alloc_xenheap_page();
>> + if ( !nest->vvmcs )
>> + {
>> + gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs
>> failed\n"); + vmreturn(regs, VMFAIL_INVALID);
>> + goto out;
>> + }
>> +#endif
>> + nest->svmcs = alloc_xenheap_page();
>> + if ( !nest->svmcs )
>> + {
>> + gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs
>> failed\n"); + free_xenheap_page(nest->vvmcs);
>> + vmreturn(regs, VMFAIL_INVALID);
>> + goto out;
>> + }
>> +
>> + /*
>> + * `fork' the host vmcs to shadow_vmcs
>> + * vmcs_lock is not needed since we are on current + */
>> + nest->hvmcs = v->arch.hvm_vmx.vmcs;
>> + __vmpclear(virt_to_maddr(nest->hvmcs));
>> + memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
>> + __vmptrld(virt_to_maddr(nest->hvmcs));
>> + v->arch.hvm_vmx.launched = 0;
>> +
>> + vmreturn(regs, VMSUCCEED);
>> +
>> +out:
>> + return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs) +{
>> + struct vcpu *v = current;
>> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; + int
>> rc; +
>> + if ( unlikely(!nest->guest_vmxon_pa) )
>> + goto invalid_op;
>> +
>> + rc = vmx_inst_check_privilege(regs);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + nest->guest_vmxon_pa = 0;
>> + __vmpclear(virt_to_maddr(nest->svmcs));
>> +
>> +#if !CONFIG_VVMCS_MAPPING
>> + free_xenheap_page(nest->vvmcs);
>> +#endif
>> + free_xenheap_page(nest->svmcs);
>
> These also need to be freed on domain teardown.
>
>> + vmreturn(regs, VMSUCCEED);
>> + return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs) +{
>> + struct vcpu *v = current;
>> + struct vmx_inst_decoded decode;
>> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; + int rc;
>> +
>> + if ( unlikely(!nest->guest_vmxon_pa) )
>> + goto invalid_op;
>> +
>> + rc = vmx_inst_check_privilege(regs);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + rc = decode_vmx_inst(regs, &decode);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>> + if ( rc != HVMCOPY_okay )
>> + return X86EMUL_EXCEPTION;
>> +
>> + if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff ) + {
>> + vmreturn(regs, VMFAIL_INVALID);
>> + goto out;
>> + }
>> +
>> + if ( nest->gvmcs_pa != gpa )
>> + {
>> + if ( nest->vmcs_valid )
>> + {
>> + rc = __clear_current_vvmcs(nest);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> + }
>> + nest->gvmcs_pa = gpa;
>> + ASSERT(nest->vmcs_valid == 0);
>> + }
>> +
>> +
>> + if ( !nest->vmcs_valid )
>> + {
>> +#if CONFIG_VVMCS_MAPPING
>> + unsigned long mfn;
>> + p2m_type_t p2mt;
>> +
>> + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
>> + nest->gvmcs_pa >> PAGE_SHIFT,
>> &p2mt)); + nest->vvmcs = map_domain_page_global(mfn); +#else
>> + rc = hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa,
>> PAGE_SIZE); + if ( rc != HVMCOPY_okay )
>> + return X86EMUL_EXCEPTION;
>> +#endif
>> + nest->vmcs_valid = 1;
>> + }
>> +
>> + vmreturn(regs, VMSUCCEED);
>> +
>> +out:
>> + return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs) +{
>> + struct vcpu *v = current;
>> + struct vmx_inst_decoded decode;
>> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; + int rc;
>> +
>> + if ( unlikely(!nest->guest_vmxon_pa) )
>> + goto invalid_op;
>> +
>> + rc = vmx_inst_check_privilege(regs);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + rc = decode_vmx_inst(regs, &decode);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); +
>> + gpa = nest->gvmcs_pa;
>> +
>> + rc = hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
>> + if ( rc != HVMCOPY_okay )
>> + return X86EMUL_EXCEPTION;
>> +
>> + vmreturn(regs, VMSUCCEED);
>> + return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> + hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> + return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmclear(struct cpu_user_regs *regs) +{
>> + struct vcpu *v = current;
>> + struct vmx_inst_decoded decode;
>> + struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; + int rc;
>> +
>> + if ( unlikely(!nest->guest_vmxon_pa) )
>> + goto invalid_op;
>> +
>> + rc = vmx_inst_check_privilege(regs);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + rc = decode_vmx_inst(regs, &decode);
>> + if ( rc != X86EMUL_OKAY )
>> + return rc;
>> +
>> + ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> + rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>
> Is it guaranteed that decode.len is always <= sizeof gpa here, even
> with a malicious guest?
Reusing hvm_virtual_to_linear_addr w/ consideration of last byte may be the
best choice :)
Thx, Eddie
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|