RE: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instructio

Tim Deegan wrote:
> At 16:22 +0100 on 08 Sep (1283962934), Qing He wrote:
>> diff -r f1c1d3077337 xen/arch/x86/hvm/vmx/nest.c
>> +/*
>> + * VMX instructions support functions
>> + */
>> +
>> +enum vmx_regs_enc {
>> +    VMX_REG_RAX,
>> +    VMX_REG_RCX,
>> +    VMX_REG_RDX,
>> +    VMX_REG_RBX,
>> +    VMX_REG_RSP,
>> +    VMX_REG_RBP,
>> +    VMX_REG_RSI,
>> +    VMX_REG_RDI,
>> +#ifdef CONFIG_X86_64
>> +    VMX_REG_R8,
>> +    VMX_REG_R9,
>> +    VMX_REG_R10,
>> +    VMX_REG_R11,
>> +    VMX_REG_R12,
>> +    VMX_REG_R13,
>> +    VMX_REG_R14,
>> +    VMX_REG_R15,
>> +#endif
>> +};
>> +
>> +enum vmx_sregs_enc {
>> +    VMX_SREG_ES,
>> +    VMX_SREG_CS,
>> +    VMX_SREG_SS,
>> +    VMX_SREG_DS,
>> +    VMX_SREG_FS,
>> +    VMX_SREG_GS,
>> +};
>> +
>> +enum x86_segment sreg_to_index[] = {
>> +    [VMX_SREG_ES] = x86_seg_es,
>> +    [VMX_SREG_CS] = x86_seg_cs,
>> +    [VMX_SREG_SS] = x86_seg_ss,
>> +    [VMX_SREG_DS] = x86_seg_ds,
>> +    [VMX_SREG_FS] = x86_seg_fs,
>> +    [VMX_SREG_GS] = x86_seg_gs,
>> +};
>
> Since you dislike adding new namespaces and translations, I'm sure you
> can get rid of these. :)  It might even simplify some of the macros
> below.

True, some dupcation here. Regarding following definition in x86_emulate.c, we 
can reuse.


static enum x86_segment
decode_segment(uint8_t modrm_reg)
{
    switch ( modrm_reg )
    {
    case 0: return x86_seg_es;
    case 1: return x86_seg_cs;
    case 2: return x86_seg_ss;
    case 3: return x86_seg_ds;
    case 4: return x86_seg_fs;
    case 5: return x86_seg_gs;
    default: break;
    }
    return decode_segment_failed;
}

BTW, should we use MACROs rather than digital # here? and can we modify 
x86_segment structure to use same naming space?

>
>> +union vmx_inst_info {
>> +    struct {
>> +        unsigned int scaling           :2; /* bit 0-1 */
>> +        unsigned int __rsvd0           :1; /* bit 2 */
>> +        unsigned int reg1              :4; /* bit 3-6 */
>> +        unsigned int addr_size         :3; /* bit 7-9 */
>> +        unsigned int memreg            :1; /* bit 10 */
>> +        unsigned int __rsvd1           :4; /* bit 11-14 */
>> +        unsigned int segment           :3; /* bit 15-17 */
>> +        unsigned int index_reg         :4; /* bit 18-21 */
>> +        unsigned int index_reg_invalid :1; /* bit 22 */
>> +        unsigned int base_reg          :4; /* bit 23-26 */
>> +        unsigned int base_reg_invalid  :1; /* bit 27 */
>> +        unsigned int reg2              :4; /* bit 28-31 */ +    }
>> fields; +    u32 word;
>> +};
>> +
>> +struct vmx_inst_decoded {
>> +#define VMX_INST_MEMREG_TYPE_MEMORY 0
>> +#define VMX_INST_MEMREG_TYPE_REG    1
>> +    int type;
>> +    union {
>> +        struct {
>> +            unsigned long mem;
>> +            unsigned int  len;
>> +        };
>> +        enum vmx_regs_enc reg1;
>> +    };
>> +
>> +    enum vmx_regs_enc reg2;
>> +};
>> +
>> +enum vmx_ops_result {
>> +    VMSUCCEED,
>> +    VMFAIL_VALID,
>> +    VMFAIL_INVALID,
>> +};
>> +
>> +#define CASE_SET_REG(REG, reg)      \
>> +    case VMX_REG_ ## REG: regs->reg = value; break
>> +#define CASE_GET_REG(REG, reg)      \
>> +    case VMX_REG_ ## REG: value = regs->reg; break +
>> +#define CASE_EXTEND_SET_REG         \
>> +    CASE_EXTEND_REG(S)
>> +#define CASE_EXTEND_GET_REG         \
>> +    CASE_EXTEND_REG(G)
>> +
>> +#ifdef __i386__
>> +#define CASE_EXTEND_REG(T)
>> +#else
>> +#define CASE_EXTEND_REG(T)          \
>> +    CASE_ ## T ## ET_REG(R8, r8);   \
>> +    CASE_ ## T ## ET_REG(R9, r9);   \
>> +    CASE_ ## T ## ET_REG(R10, r10); \
>> +    CASE_ ## T ## ET_REG(R11, r11); \
>> +    CASE_ ## T ## ET_REG(R12, r12); \
>> +    CASE_ ## T ## ET_REG(R13, r13); \
>> +    CASE_ ## T ## ET_REG(R14, r14); \
>> +    CASE_ ## T ## ET_REG(R15, r15)
>> +#endif
>> +
>> +static unsigned long reg_read(struct cpu_user_regs *regs,
>> +                              enum vmx_regs_enc index) +{
>> +    unsigned long value = 0;
>> +
>> +    switch ( index ) {
>> +    CASE_GET_REG(RAX, eax);
>> +    CASE_GET_REG(RCX, ecx);
>> +    CASE_GET_REG(RDX, edx);
>> +    CASE_GET_REG(RBX, ebx);
>> +    CASE_GET_REG(RBP, ebp);
>> +    CASE_GET_REG(RSI, esi);
>> +    CASE_GET_REG(RDI, edi);
>> +    CASE_GET_REG(RSP, esp);
>> +    CASE_EXTEND_GET_REG;
>> +    default:
>> +        break;
>> +    }
>> +
>> +    return value;
>> +}
>> +
>> +static void reg_write(struct cpu_user_regs *regs,
>> +                      enum vmx_regs_enc index,
>> +                      unsigned long value)
>> +{
>> +    switch ( index ) {
>> +    CASE_SET_REG(RAX, eax);
>> +    CASE_SET_REG(RCX, ecx);
>> +    CASE_SET_REG(RDX, edx);
>> +    CASE_SET_REG(RBX, ebx);
>> +    CASE_SET_REG(RBP, ebp);
>> +    CASE_SET_REG(RSI, esi);
>> +    CASE_SET_REG(RDI, edi);
>> +    CASE_SET_REG(RSP, esp);
>> +    CASE_EXTEND_SET_REG;
>> +    default:
>> +        break;
>> +    }
>> +}
>> +
>> +static int decode_vmx_inst(struct cpu_user_regs *regs,
>> +                           struct vmx_inst_decoded *decode) +{
>> +    struct vcpu *v = current;
>> +    union vmx_inst_info info;
>> +    struct segment_register seg;
>> +    unsigned long base, index, seg_base, disp, offset; +    int
>> scale; +
>> +    info.word = __vmread(VMX_INSTRUCTION_INFO);
>> +
>> +    if ( info.fields.memreg ) {
>> +        decode->type = VMX_INST_MEMREG_TYPE_REG;
>> +        decode->reg1 = info.fields.reg1;
>> +    }
>> +    else
>> +    {
>> +        decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
>> +        hvm_get_segment_register(v,
>> sreg_to_index[info.fields.segment], &seg); +        /* TODO: segment
>> type check */
>
> Indeed. :)
>
>> +        seg_base = seg.base;
>> +
>> +        base = info.fields.base_reg_invalid ? 0 :
>> +            reg_read(regs, info.fields.base_reg);
>> +
>> +        index = info.fields.index_reg_invalid ? 0 :
>> +            reg_read(regs, info.fields.index_reg); +
>> +        scale = 1 << info.fields.scaling;
>> +
>> +        disp = __vmread(EXIT_QUALIFICATION);
>> +
>> +        offset = base + index * scale + disp;
>> +        if ( offset > seg.limit )
>
> DYM if ( offset + len > limit )?
>
> Would it be worth trying to fold this into the existing x86_emulate
> code, which already has careful memory access checks?


Can you give more details? Re-construct hvm_virtual_to_linear_addr?


>
>> +            goto gp_fault;
>> +
>> +        decode->mem = seg_base + base + index * scale + disp;
>> +        decode->len = 1 << (info.fields.addr_size + 1); +    }
>> +
>> +    decode->reg2 = info.fields.reg2;
>> +
>> +    return X86EMUL_OKAY;
>> +
>> +gp_fault:
>> +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +static int vmx_inst_check_privilege(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct segment_register cs;
>> +
>> +    hvm_get_segment_register(v, x86_seg_cs, &cs);
>> +
>> +    if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
>> +         !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
>> +         (regs->eflags & X86_EFLAGS_VM) ||
>> +         (hvm_long_mode_enabled(v) && cs.attr.fields.l == 0) ) +
>> goto invalid_op; +
>> +    if ( (cs.sel & 3) > 0 )
>> +        goto gp_fault;
>> +
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +
>> +gp_fault:
>> +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +static void vmreturn(struct cpu_user_regs *regs, enum
>> vmx_ops_result res) +{ +    unsigned long eflags = regs->eflags;
>> +    unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF |
>> X86_EFLAGS_AF | +                         X86_EFLAGS_ZF |
>> X86_EFLAGS_SF | X86_EFLAGS_OF; + +    eflags &= ~mask;
>> +
>> +    switch ( res ) {
>> +    case VMSUCCEED:
>> +        break;
>> +    case VMFAIL_VALID:
>> +        /* TODO: error number, useful for guest VMM debugging */
>> +        eflags |= X86_EFLAGS_ZF;
>> +        break;
>> +    case VMFAIL_INVALID:
>> +    default:
>> +        eflags |= X86_EFLAGS_CF;
>> +        break;
>> +    }
>> +
>> +    regs->eflags = eflags;
>> +}
>> +
>> +static int __clear_current_vvmcs(struct vmx_nest_struct *nest) +{
>> +    int rc;
>> +
>> +    if ( nest->svmcs )
>> +        __vmpclear(virt_to_maddr(nest->svmcs));
>> +
>> +#if !CONFIG_VVMCS_MAPPING
>> +    rc = hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs,
>> PAGE_SIZE); +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +#endif
>> +
>> +    nest->vmcs_valid = 0;
>> +
>> +    return X86EMUL_OKAY;
>> +}
>> +
>> +/*
>> + * VMX instructions handling
>> + */
>> +
>> +int vmx_nest_handle_vmxon(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
>> +    struct vmx_inst_decoded decode;
>> +    unsigned long gpa = 0;
>> +    int rc;
>> +
>> +    if ( !is_nested_avail(v->domain) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>
> I think you could fold these checks and the goto target into
> decode_vmx_inst(), just to avoid repeating them in every
> vmx_nest_handle_* function.
>
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>> +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +
>> +    nest->guest_vmxon_pa = gpa;
>> +    nest->gvmcs_pa = 0;
>> +    nest->vmcs_valid = 0;
>> +#if !CONFIG_VVMCS_MAPPING
>> +    nest->vvmcs = alloc_xenheap_page();
>> +    if ( !nest->vvmcs )
>> +    {
>> +        gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs
>> failed\n"); +        vmreturn(regs, VMFAIL_INVALID);
>> +        goto out;
>> +    }
>> +#endif
>> +    nest->svmcs = alloc_xenheap_page();
>> +    if ( !nest->svmcs )
>> +    {
>> +        gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs
>> failed\n"); +        free_xenheap_page(nest->vvmcs);
>> +        vmreturn(regs, VMFAIL_INVALID);
>> +        goto out;
>> +    }
>> +
>> +    /*
>> +     * `fork' the host vmcs to shadow_vmcs
>> +     * vmcs_lock is not needed since we are on current +     */
>> +    nest->hvmcs = v->arch.hvm_vmx.vmcs;
>> +    __vmpclear(virt_to_maddr(nest->hvmcs));
>> +    memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
>> +    __vmptrld(virt_to_maddr(nest->hvmcs));
>> +    v->arch.hvm_vmx.launched = 0;
>> +
>> +    vmreturn(regs, VMSUCCEED);
>> +
>> +out:
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +    int
>> rc; +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    nest->guest_vmxon_pa = 0;
>> +    __vmpclear(virt_to_maddr(nest->svmcs));
>> +
>> +#if !CONFIG_VVMCS_MAPPING
>> +    free_xenheap_page(nest->vvmcs);
>> +#endif
>> +    free_xenheap_page(nest->svmcs);
>
> These also need to be freed on domain teardown.
>
>> +    vmreturn(regs, VMSUCCEED);
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_inst_decoded decode;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; +    int rc;
>> +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>> +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +
>> +    if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff ) +    {
>> +        vmreturn(regs, VMFAIL_INVALID);
>> +        goto out;
>> +    }
>> +
>> +    if ( nest->gvmcs_pa != gpa )
>> +    {
>> +        if ( nest->vmcs_valid )
>> +        {
>> +            rc = __clear_current_vvmcs(nest);
>> +            if ( rc != X86EMUL_OKAY )
>> +                return rc;
>> +        }
>> +        nest->gvmcs_pa = gpa;
>> +        ASSERT(nest->vmcs_valid == 0);
>> +    }
>> +
>> +
>> +    if ( !nest->vmcs_valid )
>> +    {
>> +#if CONFIG_VVMCS_MAPPING
>> +        unsigned long mfn;
>> +        p2m_type_t p2mt;
>> +
>> +        mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
>> +                               nest->gvmcs_pa >> PAGE_SHIFT,
>> &p2mt)); +        nest->vvmcs = map_domain_page_global(mfn); +#else
>> +        rc = hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa,
>> PAGE_SIZE); +        if ( rc != HVMCOPY_okay )
>> +            return X86EMUL_EXCEPTION;
>> +#endif
>> +        nest->vmcs_valid = 1;
>> +    }
>> +
>> +    vmreturn(regs, VMSUCCEED);
>> +
>> +out:
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_inst_decoded decode;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; +    int rc;
>> +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY); +
>> +    gpa = nest->gvmcs_pa;
>> +
>> +    rc = hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
>> +    if ( rc != HVMCOPY_okay )
>> +        return X86EMUL_EXCEPTION;
>> +
>> +    vmreturn(regs, VMSUCCEED);
>> +    return X86EMUL_OKAY;
>> +
>> +invalid_op:
>> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
>> +    return X86EMUL_EXCEPTION;
>> +}
>> +
>> +int vmx_nest_handle_vmclear(struct cpu_user_regs *regs) +{
>> +    struct vcpu *v = current;
>> +    struct vmx_inst_decoded decode;
>> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest; +
>> unsigned long gpa = 0; +    int rc;
>> +
>> +    if ( unlikely(!nest->guest_vmxon_pa) )
>> +        goto invalid_op;
>> +
>> +    rc = vmx_inst_check_privilege(regs);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    rc = decode_vmx_inst(regs, &decode);
>> +    if ( rc != X86EMUL_OKAY )
>> +        return rc;
>> +
>> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
>> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
>
> Is it guaranteed that decode.len is always <= sizeof gpa here, even
> with a malicious guest?

Reusing hvm_virtual_to_linear_addr w/ consideration of last byte may be the 
best choice :)

Thx, Eddie

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

RE: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits