WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits

To: Qing He <qing.he@xxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH 06/16] vmx: nest: handling VMX instruction exits
From: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Date: Mon, 13 Sep 2010 12:10:01 +0100
Cc: Eddie, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Dong <eddie.dong@xxxxxxxxx>
Delivery-date: Mon, 13 Sep 2010 04:11:46 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1283959344-3837-7-git-send-email-qing.he@xxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1283959344-3837-1-git-send-email-qing.he@xxxxxxxxx> <1283959344-3837-7-git-send-email-qing.he@xxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.18 (2008-05-17)
At 16:22 +0100 on 08 Sep (1283962934), Qing He wrote:
> diff -r f1c1d3077337 xen/arch/x86/hvm/vmx/nest.c
> +/*
> + * VMX instructions support functions
> + */
> +
> +enum vmx_regs_enc {
> +    VMX_REG_RAX,
> +    VMX_REG_RCX,
> +    VMX_REG_RDX,
> +    VMX_REG_RBX,
> +    VMX_REG_RSP,
> +    VMX_REG_RBP,
> +    VMX_REG_RSI,
> +    VMX_REG_RDI,
> +#ifdef CONFIG_X86_64
> +    VMX_REG_R8,
> +    VMX_REG_R9,
> +    VMX_REG_R10,
> +    VMX_REG_R11,
> +    VMX_REG_R12,
> +    VMX_REG_R13,
> +    VMX_REG_R14,
> +    VMX_REG_R15,
> +#endif
> +};
> +
> +enum vmx_sregs_enc {
> +    VMX_SREG_ES,
> +    VMX_SREG_CS,
> +    VMX_SREG_SS,
> +    VMX_SREG_DS,
> +    VMX_SREG_FS,
> +    VMX_SREG_GS,
> +};
> +
> +enum x86_segment sreg_to_index[] = {
> +    [VMX_SREG_ES] = x86_seg_es,
> +    [VMX_SREG_CS] = x86_seg_cs,
> +    [VMX_SREG_SS] = x86_seg_ss,
> +    [VMX_SREG_DS] = x86_seg_ds,
> +    [VMX_SREG_FS] = x86_seg_fs,
> +    [VMX_SREG_GS] = x86_seg_gs,
> +};

Since you dislike adding new namespaces and translations, I'm sure you
can get rid of these. :)  It might even simplify some of the macros
below. 

> +union vmx_inst_info {
> +    struct {
> +        unsigned int scaling           :2; /* bit 0-1 */
> +        unsigned int __rsvd0           :1; /* bit 2 */
> +        unsigned int reg1              :4; /* bit 3-6 */
> +        unsigned int addr_size         :3; /* bit 7-9 */
> +        unsigned int memreg            :1; /* bit 10 */
> +        unsigned int __rsvd1           :4; /* bit 11-14 */
> +        unsigned int segment           :3; /* bit 15-17 */
> +        unsigned int index_reg         :4; /* bit 18-21 */
> +        unsigned int index_reg_invalid :1; /* bit 22 */
> +        unsigned int base_reg          :4; /* bit 23-26 */
> +        unsigned int base_reg_invalid  :1; /* bit 27 */
> +        unsigned int reg2              :4; /* bit 28-31 */
> +    } fields;
> +    u32 word;
> +};
> +
> +struct vmx_inst_decoded {
> +#define VMX_INST_MEMREG_TYPE_MEMORY 0
> +#define VMX_INST_MEMREG_TYPE_REG    1
> +    int type;
> +    union {
> +        struct {
> +            unsigned long mem;
> +            unsigned int  len;
> +        };
> +        enum vmx_regs_enc reg1;
> +    };
> +
> +    enum vmx_regs_enc reg2;
> +};
> +
> +enum vmx_ops_result {
> +    VMSUCCEED,
> +    VMFAIL_VALID,
> +    VMFAIL_INVALID,
> +};
> +
> +#define CASE_SET_REG(REG, reg)      \
> +    case VMX_REG_ ## REG: regs->reg = value; break
> +#define CASE_GET_REG(REG, reg)      \
> +    case VMX_REG_ ## REG: value = regs->reg; break
> +
> +#define CASE_EXTEND_SET_REG         \
> +    CASE_EXTEND_REG(S)
> +#define CASE_EXTEND_GET_REG         \
> +    CASE_EXTEND_REG(G)
> +
> +#ifdef __i386__
> +#define CASE_EXTEND_REG(T)
> +#else
> +#define CASE_EXTEND_REG(T)          \
> +    CASE_ ## T ## ET_REG(R8, r8);   \
> +    CASE_ ## T ## ET_REG(R9, r9);   \
> +    CASE_ ## T ## ET_REG(R10, r10); \
> +    CASE_ ## T ## ET_REG(R11, r11); \
> +    CASE_ ## T ## ET_REG(R12, r12); \
> +    CASE_ ## T ## ET_REG(R13, r13); \
> +    CASE_ ## T ## ET_REG(R14, r14); \
> +    CASE_ ## T ## ET_REG(R15, r15)
> +#endif
> +
> +static unsigned long reg_read(struct cpu_user_regs *regs,
> +                              enum vmx_regs_enc index)
> +{
> +    unsigned long value = 0;
> +
> +    switch ( index ) {
> +    CASE_GET_REG(RAX, eax);
> +    CASE_GET_REG(RCX, ecx);
> +    CASE_GET_REG(RDX, edx);
> +    CASE_GET_REG(RBX, ebx);
> +    CASE_GET_REG(RBP, ebp);
> +    CASE_GET_REG(RSI, esi);
> +    CASE_GET_REG(RDI, edi);
> +    CASE_GET_REG(RSP, esp);
> +    CASE_EXTEND_GET_REG;
> +    default:
> +        break;
> +    }
> +
> +    return value;
> +}
> +
> +static void reg_write(struct cpu_user_regs *regs,
> +                      enum vmx_regs_enc index,
> +                      unsigned long value)
> +{
> +    switch ( index ) {
> +    CASE_SET_REG(RAX, eax);
> +    CASE_SET_REG(RCX, ecx);
> +    CASE_SET_REG(RDX, edx);
> +    CASE_SET_REG(RBX, ebx);
> +    CASE_SET_REG(RBP, ebp);
> +    CASE_SET_REG(RSI, esi);
> +    CASE_SET_REG(RDI, edi);
> +    CASE_SET_REG(RSP, esp);
> +    CASE_EXTEND_SET_REG;
> +    default:
> +        break;
> +    }
> +}
> +
> +static int decode_vmx_inst(struct cpu_user_regs *regs,
> +                           struct vmx_inst_decoded *decode)
> +{
> +    struct vcpu *v = current;
> +    union vmx_inst_info info;
> +    struct segment_register seg;
> +    unsigned long base, index, seg_base, disp, offset;
> +    int scale;
> +
> +    info.word = __vmread(VMX_INSTRUCTION_INFO);
> +
> +    if ( info.fields.memreg ) {
> +        decode->type = VMX_INST_MEMREG_TYPE_REG;
> +        decode->reg1 = info.fields.reg1;
> +    }
> +    else
> +    {
> +        decode->type = VMX_INST_MEMREG_TYPE_MEMORY;
> +        hvm_get_segment_register(v, sreg_to_index[info.fields.segment], 
> &seg);
> +        /* TODO: segment type check */

Indeed. :)

> +        seg_base = seg.base;
> +
> +        base = info.fields.base_reg_invalid ? 0 :
> +            reg_read(regs, info.fields.base_reg);
> +
> +        index = info.fields.index_reg_invalid ? 0 :
> +            reg_read(regs, info.fields.index_reg);
> +
> +        scale = 1 << info.fields.scaling;
> +
> +        disp = __vmread(EXIT_QUALIFICATION);
> +
> +        offset = base + index * scale + disp;
> +        if ( offset > seg.limit )

DYM if ( offset + len > limit )?

Would it be worth trying to fold this into the existing x86_emulate
code, which already has careful memory access checks?

> +            goto gp_fault;
> +
> +        decode->mem = seg_base + base + index * scale + disp;
> +        decode->len = 1 << (info.fields.addr_size + 1);
> +    }
> +
> +    decode->reg2 = info.fields.reg2;
> +
> +    return X86EMUL_OKAY;
> +
> +gp_fault:
> +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +}
> +
> +static int vmx_inst_check_privilege(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *v = current;
> +    struct segment_register cs;
> +
> +    hvm_get_segment_register(v, x86_seg_cs, &cs);
> +
> +    if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
> +         !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VMXE) ||
> +         (regs->eflags & X86_EFLAGS_VM) ||
> +         (hvm_long_mode_enabled(v) && cs.attr.fields.l == 0) )
> +        goto invalid_op;
> +
> +    if ( (cs.sel & 3) > 0 )
> +        goto gp_fault;
> +
> +    return X86EMUL_OKAY;
> +
> +invalid_op:
> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +
> +gp_fault:
> +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +}
> +
> +static void vmreturn(struct cpu_user_regs *regs, enum vmx_ops_result res)
> +{
> +    unsigned long eflags = regs->eflags;
> +    unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
> +                         X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF;
> +
> +    eflags &= ~mask;
> +
> +    switch ( res ) {
> +    case VMSUCCEED:
> +        break;
> +    case VMFAIL_VALID:
> +        /* TODO: error number, useful for guest VMM debugging */
> +        eflags |= X86_EFLAGS_ZF;
> +        break;
> +    case VMFAIL_INVALID:
> +    default:
> +        eflags |= X86_EFLAGS_CF;
> +        break;
> +    }
> +
> +    regs->eflags = eflags;
> +}
> +
> +static int __clear_current_vvmcs(struct vmx_nest_struct *nest)
> +{
> +    int rc;
> +
> +    if ( nest->svmcs )
> +        __vmpclear(virt_to_maddr(nest->svmcs));
> +
> +#if !CONFIG_VVMCS_MAPPING
> +    rc = hvm_copy_to_guest_phys(nest->gvmcs_pa, nest->vvmcs, PAGE_SIZE);
> +    if ( rc != HVMCOPY_okay )
> +        return X86EMUL_EXCEPTION;
> +#endif
> +
> +    nest->vmcs_valid = 0;
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +/*
> + * VMX instructions handling
> + */
> +
> +int vmx_nest_handle_vmxon(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *v = current;
> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> +    struct vmx_inst_decoded decode;
> +    unsigned long gpa = 0;
> +    int rc;
> +
> +    if ( !is_nested_avail(v->domain) )
> +        goto invalid_op;
> + 
> +    rc = vmx_inst_check_privilege(regs);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;

I think you could fold these checks and the goto target into
decode_vmx_inst(), just to avoid repeating them in every
vmx_nest_handle_* function. 

> +    rc = decode_vmx_inst(regs, &decode);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
> +    if ( rc != HVMCOPY_okay )
> +        return X86EMUL_EXCEPTION;
> +
> +    nest->guest_vmxon_pa = gpa;
> +    nest->gvmcs_pa = 0;
> +    nest->vmcs_valid = 0;
> +#if !CONFIG_VVMCS_MAPPING
> +    nest->vvmcs = alloc_xenheap_page();
> +    if ( !nest->vvmcs )
> +    {
> +        gdprintk(XENLOG_ERR, "nest: allocation for virtual vmcs failed\n");
> +        vmreturn(regs, VMFAIL_INVALID);
> +        goto out;
> +    }
> +#endif
> +    nest->svmcs = alloc_xenheap_page();
> +    if ( !nest->svmcs )
> +    {
> +        gdprintk(XENLOG_ERR, "nest: allocation for shadow vmcs failed\n");
> +        free_xenheap_page(nest->vvmcs);
> +        vmreturn(regs, VMFAIL_INVALID);
> +        goto out;
> +    }
> +
> +    /*
> +     * `fork' the host vmcs to shadow_vmcs
> +     * vmcs_lock is not needed since we are on current
> +     */
> +    nest->hvmcs = v->arch.hvm_vmx.vmcs;
> +    __vmpclear(virt_to_maddr(nest->hvmcs));
> +    memcpy(nest->svmcs, nest->hvmcs, PAGE_SIZE);
> +    __vmptrld(virt_to_maddr(nest->hvmcs));
> +    v->arch.hvm_vmx.launched = 0;
> +
> +    vmreturn(regs, VMSUCCEED);
> +
> +out:
> +    return X86EMUL_OKAY;
> +
> +invalid_op:
> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmxoff(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *v = current;
> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> +    int rc;
> +
> +    if ( unlikely(!nest->guest_vmxon_pa) )
> +        goto invalid_op;
> +
> +    rc = vmx_inst_check_privilege(regs);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    nest->guest_vmxon_pa = 0;
> +    __vmpclear(virt_to_maddr(nest->svmcs));
> +
> +#if !CONFIG_VVMCS_MAPPING
> +    free_xenheap_page(nest->vvmcs);
> +#endif
> +    free_xenheap_page(nest->svmcs);

These also need to be freed on domain teardown.

> +    vmreturn(regs, VMSUCCEED);
> +    return X86EMUL_OKAY;
> +
> +invalid_op:
> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmptrld(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *v = current;
> +    struct vmx_inst_decoded decode;
> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> +    unsigned long gpa = 0;
> +    int rc;
> +
> +    if ( unlikely(!nest->guest_vmxon_pa) )
> +        goto invalid_op;
> +
> +    rc = vmx_inst_check_privilege(regs);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    rc = decode_vmx_inst(regs, &decode);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);
> +    if ( rc != HVMCOPY_okay )
> +        return X86EMUL_EXCEPTION;
> +
> +    if ( gpa == nest->guest_vmxon_pa || gpa & 0xfff )
> +    {
> +        vmreturn(regs, VMFAIL_INVALID);
> +        goto out;
> +    }
> +
> +    if ( nest->gvmcs_pa != gpa )
> +    {
> +        if ( nest->vmcs_valid )
> +        {
> +            rc = __clear_current_vvmcs(nest);
> +            if ( rc != X86EMUL_OKAY )
> +                return rc;
> +        }
> +        nest->gvmcs_pa = gpa;
> +        ASSERT(nest->vmcs_valid == 0);
> +    }
> +
> +
> +    if ( !nest->vmcs_valid )
> +    {
> +#if CONFIG_VVMCS_MAPPING
> +        unsigned long mfn;
> +        p2m_type_t p2mt;
> +
> +        mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
> +                               nest->gvmcs_pa >> PAGE_SHIFT, &p2mt));
> +        nest->vvmcs = map_domain_page_global(mfn);
> +#else
> +        rc = hvm_copy_from_guest_phys(nest->vvmcs, nest->gvmcs_pa, 
> PAGE_SIZE);
> +        if ( rc != HVMCOPY_okay )
> +            return X86EMUL_EXCEPTION;
> +#endif
> +        nest->vmcs_valid = 1;
> +    }
> +
> +    vmreturn(regs, VMSUCCEED);
> +
> +out:
> +    return X86EMUL_OKAY;
> +
> +invalid_op:
> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmptrst(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *v = current;
> +    struct vmx_inst_decoded decode;
> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> +    unsigned long gpa = 0;
> +    int rc;
> +
> +    if ( unlikely(!nest->guest_vmxon_pa) )
> +        goto invalid_op;
> +
> +    rc = vmx_inst_check_privilege(regs);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    rc = decode_vmx_inst(regs, &decode);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> +
> +    gpa = nest->gvmcs_pa;
> +
> +    rc = hvm_copy_to_guest_virt(decode.mem, &gpa, decode.len, 0);
> +    if ( rc != HVMCOPY_okay )
> +        return X86EMUL_EXCEPTION;
> +
> +    vmreturn(regs, VMSUCCEED);
> +    return X86EMUL_OKAY;
> +
> +invalid_op:
> +    hvm_inject_exception(TRAP_invalid_op, 0, 0);
> +    return X86EMUL_EXCEPTION;
> +}
> +
> +int vmx_nest_handle_vmclear(struct cpu_user_regs *regs)
> +{
> +    struct vcpu *v = current;
> +    struct vmx_inst_decoded decode;
> +    struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
> +    unsigned long gpa = 0;
> +    int rc;
> +
> +    if ( unlikely(!nest->guest_vmxon_pa) )
> +        goto invalid_op;
> +
> +    rc = vmx_inst_check_privilege(regs);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    rc = decode_vmx_inst(regs, &decode);
> +    if ( rc != X86EMUL_OKAY )
> +        return rc;
> +
> +    ASSERT(decode.type == VMX_INST_MEMREG_TYPE_MEMORY);
> +    rc = hvm_copy_from_guest_virt(&gpa, decode.mem, decode.len, 0);

Is it guaranteed that decode.len is always <= sizeof gpa here, even with
a malicious guest?

Cheers,

Tim.

-- 
Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Principal Software Engineer, XenServer Engineering
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>