# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 519a74928bd407fb5f1f3d62f4e28493a0443611
# Parent 62b0b520ea53a7e3bd4e34dc226716a688be698a
[HVM] Non-flat protected mode HVM support.
This is now the full set of changes needed to eliminate the assumption
that segments in protected mode always have zero base addresses. At
once, this further simplifies the instruction length determination
code used for MMIO of HVM domains.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
xen/arch/x86/hvm/instrlen.c | 163 ++++++++++++++++------------------
xen/arch/x86/hvm/platform.c | 137 ++++++++++++++++++-----------
xen/arch/x86/hvm/svm/svm.c | 59 ++++++------
xen/arch/x86/hvm/vmx/vmx.c | 196 +++++++++++++++++++++++++++---------------
xen/include/asm-x86/hvm/hvm.h | 19 +---
5 files changed, 327 insertions(+), 247 deletions(-)
diff -r 62b0b520ea53 -r 519a74928bd4 xen/arch/x86/hvm/instrlen.c
--- a/xen/arch/x86/hvm/instrlen.c Tue Nov 28 11:43:39 2006 +0000
+++ b/xen/arch/x86/hvm/instrlen.c Tue Nov 28 11:45:54 2006 +0000
@@ -20,7 +20,6 @@
#include <xen/config.h>
#include <xen/sched.h>
#include <xen/mm.h>
-#include <asm/regs.h>
#include <asm-x86/x86_emulate.h>
/* read from guest memory */
@@ -121,9 +120,7 @@ static uint8_t opcode_table[256] = {
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps, ImplicitOps,
/* 0xB0 - 0xBF */
- SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
- SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xC0 - 0xC7 */
ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
@@ -195,54 +192,52 @@ static uint8_t twobyte_table[256] = {
};
/*
- * insn_fetch - fetch the next 1 to 4 bytes from instruction stream
- * @_type: u8, u16, u32, s8, s16, or s32
- * @_size: 1, 2, or 4 bytes
- */
-#define insn_fetch(_type, _size) \
-({ unsigned long _x, _ptr = _regs.eip; \
- if ( mode == X86EMUL_MODE_REAL ) _ptr += _regs.cs << 4; \
- rc = inst_copy_from_guest((unsigned char *)(&(_x)), _ptr, _size); \
- if ( rc != _size ) goto done; \
- _regs.eip += (_size); \
- length += (_size); \
- (_type)_x; \
+ * insn_fetch - fetch the next byte from instruction stream
+ */
+#define insn_fetch() \
+({ uint8_t _x; \
+ if ( length >= 15 ) \
+ return -1; \
+ if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \
+ gdprintk(XENLOG_WARNING, \
+ "Cannot read from address %lx (eip %lx, mode %d)\n", \
+ pc, org_pc, mode); \
+ return -1; \
+ } \
+ pc += 1; \
+ length += 1; \
+ _x; \
})
/**
* hvm_instruction_length - returns the current instructions length
*
- * @regs: guest register state
+ * @org_pc: guest instruction pointer
* @mode: guest operating mode
*
* EXTERNAL this routine calculates the length of the current instruction
- * pointed to by eip. The guest state is _not_ changed by this routine.
- */
-int hvm_instruction_length(struct cpu_user_regs *regs, int mode)
+ * pointed to by org_pc. The guest state is _not_ changed by this routine.
+ */
+int hvm_instruction_length(unsigned long org_pc, int mode)
{
- uint8_t b, d, twobyte = 0, rex_prefix = 0;
- uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
- unsigned int op_bytes, ad_bytes, i;
- int rc = 0;
+ uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
+ unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
int length = 0;
- unsigned int tmp;
-
- /* Shadow copy of register state. Committed on successful emulation. */
- struct cpu_user_regs _regs = *regs;
+ unsigned long pc = org_pc;
switch ( mode )
{
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_PROT16:
- op_bytes = ad_bytes = 2;
+ op_bytes = op_default = ad_bytes = ad_default = 2;
break;
case X86EMUL_MODE_PROT32:
- op_bytes = ad_bytes = 4;
+ op_bytes = op_default = ad_bytes = ad_default = 4;
break;
#ifdef __x86_64__
case X86EMUL_MODE_PROT64:
- op_bytes = 4;
- ad_bytes = 8;
+ op_bytes = op_default = 4;
+ ad_bytes = ad_default = 8;
break;
#endif
default:
@@ -250,18 +245,18 @@ int hvm_instruction_length(struct cpu_us
}
/* Legacy prefixes. */
- for ( i = 0; i < 8; i++ )
- {
- switch ( b = insn_fetch(uint8_t, 1) )
+ for ( ; ; )
+ {
+ switch ( b = insn_fetch() )
{
case 0x66: /* operand-size override */
- op_bytes ^= 6; /* switch between 2/4 bytes */
+ op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
break;
case 0x67: /* address-size override */
if ( mode == X86EMUL_MODE_PROT64 )
- ad_bytes ^= 12; /* switch between 4/8 bytes */
+ ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */
else
- ad_bytes ^= 6; /* switch between 2/4 bytes */
+ ad_bytes = ad_default ^ 6; /* switch between 2/4 bytes */
break;
case 0x2e: /* CS override */
case 0x3e: /* DS override */
@@ -273,22 +268,26 @@ int hvm_instruction_length(struct cpu_us
case 0xf3: /* REP/REPE/REPZ */
case 0xf2: /* REPNE/REPNZ */
break;
+#ifdef __x86_64__
+ case 0x40 ... 0x4f:
+ if ( mode == X86EMUL_MODE_PROT64 )
+ {
+ rex_prefix = b;
+ continue;
+ }
+ /* FALLTHRU */
+#endif
default:
goto done_prefixes;
}
+ rex_prefix = 0;
}
done_prefixes:
/* REX prefix. */
- if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
- {
- rex_prefix = b;
- if ( b & 8 )
- op_bytes = 8; /* REX.W */
- modrm_reg = (b & 4) << 1; /* REX.R */
- /* REX.B and REX.X do not need to be decoded. */
- b = insn_fetch(uint8_t, 1);
- }
+ if ( rex_prefix & 8 )
+ op_bytes = 8; /* REX.W */
+ /* REX.B, REX.R, and REX.X do not need to be decoded. */
/* Opcode byte(s). */
d = opcode_table[b];
@@ -298,7 +297,7 @@ done_prefixes:
if ( b == 0x0f )
{
twobyte = 1;
- b = insn_fetch(uint8_t, 1);
+ b = insn_fetch();
d = twobyte_table[b];
}
@@ -310,11 +309,11 @@ done_prefixes:
/* ModRM and SIB bytes. */
if ( d & ModRM )
{
- modrm = insn_fetch(uint8_t, 1);
- modrm_mod |= (modrm & 0xc0) >> 6;
- modrm_reg |= (modrm & 0x38) >> 3;
- modrm_rm |= (modrm & 0x07);
-
+ uint8_t modrm = insn_fetch();
+ uint8_t modrm_mod = (modrm & 0xc0) >> 6;
+ uint8_t modrm_rm = (modrm & 0x07);
+
+ modrm_reg = (modrm & 0x38) >> 3;
if ( modrm_mod == 3 )
{
gdprintk(XENLOG_WARNING, "Cannot parse ModRM.mod == 3.\n");
@@ -330,16 +329,16 @@ done_prefixes:
if ( modrm_rm == 6 )
{
length += 2;
- _regs.eip += 2; /* skip disp16 */
+ pc += 2; /* skip disp16 */
}
break;
case 1:
length += 1;
- _regs.eip += 1; /* skip disp8 */
+ pc += 1; /* skip disp8 */
break;
case 2:
length += 2;
- _regs.eip += 2; /* skip disp16 */
+ pc += 2; /* skip disp16 */
break;
}
}
@@ -350,33 +349,34 @@ done_prefixes:
{
case 0:
if ( (modrm_rm == 4) &&
- (((insn_fetch(uint8_t, 1)) & 7)
- == 5) )
+ ((insn_fetch() & 7) == 5) )
{
length += 4;
- _regs.eip += 4; /* skip disp32 specified by SIB.base */
+ pc += 4; /* skip disp32 specified by SIB.base */
}
else if ( modrm_rm == 5 )
{
length += 4;
- _regs.eip += 4; /* skip disp32 */
+ pc += 4; /* skip disp32 */
}
break;
case 1:
if ( modrm_rm == 4 )
{
- insn_fetch(uint8_t, 1);
+ length += 1;
+ pc += 1;
}
length += 1;
- _regs.eip += 1; /* skip disp8 */
+ pc += 1; /* skip disp8 */
break;
case 2:
if ( modrm_rm == 4 )
{
- insn_fetch(uint8_t, 1);
+ length += 1;
+ pc += 1;
}
length += 4;
- _regs.eip += 4; /* skip disp32 */
+ pc += 4; /* skip disp32 */
break;
}
}
@@ -397,15 +397,12 @@ done_prefixes:
tmp = (d & ByteOp) ? 1 : op_bytes;
if ( tmp == 8 ) tmp = 4;
/* NB. Immediates are sign-extended as necessary. */
- switch ( tmp )
- {
- case 1: insn_fetch(int8_t, 1); break;
- case 2: insn_fetch(int16_t, 2); break;
- case 4: insn_fetch(int32_t, 4); break;
- }
+ length += tmp;
+ pc += tmp;
break;
case SrcImmByte:
- insn_fetch(int8_t, 1);
+ length += 1;
+ pc += 1;
break;
}
@@ -414,13 +411,9 @@ done_prefixes:
switch ( b )
{
- case 0xa0 ... 0xa1: /* mov */
+ case 0xa0 ... 0xa3: /* mov */
length += ad_bytes;
- _regs.eip += ad_bytes; /* skip src displacement */
- break;
- case 0xa2 ... 0xa3: /* mov */
- length += ad_bytes;
- _regs.eip += ad_bytes; /* skip dst displacement */
+ pc += ad_bytes; /* skip src/dst displacement */
break;
case 0xf6 ... 0xf7: /* Grp3 */
switch ( modrm_reg )
@@ -429,23 +422,19 @@ done_prefixes:
/* Special case in Grp3: test has an immediate source operand. */
tmp = (d & ByteOp) ? 1 : op_bytes;
if ( tmp == 8 ) tmp = 4;
- switch ( tmp )
- {
- case 1: insn_fetch(int8_t, 1); break;
- case 2: insn_fetch(int16_t, 2); break;
- case 4: insn_fetch(int32_t, 4); break;
- }
- goto done;
+ length += tmp;
+ pc += tmp;
+ break;
}
break;
}
done:
- return length;
+ return length < 16 ? length : -1;
cannot_emulate:
gdprintk(XENLOG_WARNING,
- "Cannot emulate %02x at address %lx (eip %lx, mode %d)\n",
- b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode);
+ "Cannot emulate %02x at address %lx (%lx, mode %d)\n",
+ b, pc - 1, org_pc, mode);
return -1;
}
diff -r 62b0b520ea53 -r 519a74928bd4 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Tue Nov 28 11:43:39 2006 +0000
+++ b/xen/arch/x86/hvm/platform.c Tue Nov 28 11:45:54 2006 +0000
@@ -28,6 +28,7 @@
#include <xen/trace.h>
#include <xen/sched.h>
#include <asm/regs.h>
+#include <asm/x86_emulate.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/hvm/io.h>
@@ -168,13 +169,15 @@ long get_reg_value(int size, int index,
static inline unsigned char *check_prefix(unsigned char *inst,
struct hvm_io_op *mmio_op,
+ unsigned char *ad_size,
unsigned char *op_size,
+ unsigned char *seg_sel,
unsigned char *rex_p)
{
while ( 1 ) {
switch ( *inst ) {
/* rex prefix for em64t instructions */
- case 0x40 ... 0x4e:
+ case 0x40 ... 0x4f:
*rex_p = *inst;
break;
case 0xf3: /* REPZ */
@@ -191,12 +194,13 @@ static inline unsigned char *check_prefi
case 0x26: /* ES */
case 0x64: /* FS */
case 0x65: /* GS */
- //mmio_op->seg_sel = *inst;
+ *seg_sel = *inst;
break;
case 0x66: /* 32bit->16bit */
*op_size = WORD;
break;
case 0x67:
+ *ad_size = WORD;
break;
default:
return inst;
@@ -205,7 +209,7 @@ static inline unsigned char *check_prefi
}
}
-static inline unsigned long get_immediate(int op16, const unsigned char *inst,
int op_size)
+static inline unsigned long get_immediate(int ad_size, const unsigned char
*inst, int op_size)
{
int mod, reg, rm;
unsigned long val = 0;
@@ -216,16 +220,19 @@ static inline unsigned long get_immediat
rm = *inst & 7;
inst++; //skip ModR/M byte
- if ( mod != 3 && rm == 4 ) {
+ if ( ad_size != WORD && mod != 3 && rm == 4 ) {
+ rm = *inst & 7;
inst++; //skip SIB byte
}
switch ( mod ) {
case 0:
- if ( rm == 5 || rm == 4 ) {
- if ( op16 )
+ if ( ad_size == WORD ) {
+ if ( rm == 6 )
inst = inst + 2; //disp16, skip 2 bytes
- else
+ }
+ else {
+ if ( rm == 5 )
inst = inst + 4; //disp32, skip 4 bytes
}
break;
@@ -233,7 +240,7 @@ static inline unsigned long get_immediat
inst++; //disp8, skip 1 byte
break;
case 2:
- if ( op16 )
+ if ( ad_size == WORD )
inst = inst + 2; //disp16, skip 2 bytes
else
inst = inst + 4; //disp32, skip 4 bytes
@@ -276,7 +283,6 @@ static void init_instruction(struct hvm_
mmio_op->instr = 0;
mmio_op->flags = 0;
- //mmio_op->seg_sel = 0;
mmio_op->operand[0] = 0;
mmio_op->operand[1] = 0;
@@ -346,25 +352,52 @@ static int reg_mem(unsigned char size, u
return DECODE_success;
}
-static int mmio_decode(int realmode, unsigned char *opcode,
- struct hvm_io_op *mmio_op, unsigned char *op_size)
+static int mmio_decode(int mode, unsigned char *opcode,
+ struct hvm_io_op *mmio_op,
+ unsigned char *ad_size, unsigned char *op_size,
+ unsigned char *seg_sel)
{
unsigned char size_reg = 0;
unsigned char rex = 0;
int index;
+ *ad_size = 0;
*op_size = 0;
+ *seg_sel = 0;
init_instruction(mmio_op);
- opcode = check_prefix(opcode, mmio_op, op_size, &rex);
-
- if ( realmode ) { /* meaning is reversed */
+ opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex);
+
+ switch ( mode ) {
+ case X86EMUL_MODE_REAL: /* meaning is reversed */
+ case X86EMUL_MODE_PROT16:
if ( *op_size == WORD )
*op_size = LONG;
else if ( *op_size == LONG )
*op_size = WORD;
else if ( *op_size == 0 )
*op_size = WORD;
+ if ( *ad_size == WORD )
+ *ad_size = LONG;
+ else if ( *ad_size == LONG )
+ *ad_size = WORD;
+ else if ( *ad_size == 0 )
+ *ad_size = WORD;
+ break;
+ case X86EMUL_MODE_PROT32:
+ if ( *op_size == 0 )
+ *op_size = LONG;
+ if ( *ad_size == 0 )
+ *ad_size = LONG;
+ break;
+#ifdef __x86_64__
+ case X86EMUL_MODE_PROT64:
+ if ( *op_size == 0 )
+ *op_size = rex & 0x8 ? QUAD : LONG;
+ if ( *ad_size == 0 )
+ *ad_size = QUAD;
+ break;
+#endif
}
/* the operands order in comments conforms to AT&T convention */
@@ -471,10 +504,10 @@ static int mmio_decode(int realmode, uns
/* opcode 0x83 always has a single byte operand */
if ( opcode[0] == 0x83 )
mmio_op->immediate =
- (signed char)get_immediate(realmode, opcode + 1, BYTE);
+ (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
else
mmio_op->immediate =
- get_immediate(realmode, opcode + 1, *op_size);
+ get_immediate(*ad_size, opcode + 1, *op_size);
mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
@@ -598,7 +631,7 @@ static int mmio_decode(int realmode, uns
mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
mmio_op->immediate =
- get_immediate(realmode, opcode + 1, *op_size);
+ get_immediate(*ad_size, opcode + 1, *op_size);
mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
return DECODE_success;
@@ -612,7 +645,7 @@ static int mmio_decode(int realmode, uns
mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
mmio_op->immediate =
- get_immediate(realmode, opcode + 1, *op_size);
+ get_immediate(*ad_size, opcode + 1, *op_size);
mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
return DECODE_success;
@@ -634,7 +667,7 @@ static int mmio_decode(int realmode, uns
mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
mmio_op->immediate =
- get_immediate(realmode, opcode + 1, *op_size);
+ get_immediate(*ad_size, opcode + 1, *op_size);
mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
return DECODE_success;
@@ -697,7 +730,7 @@ static int mmio_decode(int realmode, uns
GET_OP_SIZE_FOR_NONEBYTE(*op_size);
mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
mmio_op->immediate =
- (signed char)get_immediate(realmode, opcode + 1, BYTE);
+ (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
return DECODE_success;
}
@@ -866,15 +899,15 @@ static void mmio_operands(int type, unsi
}
#define GET_REPEAT_COUNT() \
- (mmio_op->flags & REPZ ? (realmode ? regs->ecx & 0xFFFF : regs->ecx) : 1)
+ (mmio_op->flags & REPZ ? (ad_size == WORD ? regs->ecx & 0xFFFF :
regs->ecx) : 1)
void handle_mmio(unsigned long gpa)
{
unsigned long inst_addr;
struct hvm_io_op *mmio_op;
struct cpu_user_regs *regs;
- unsigned char inst[MAX_INST_LEN], op_size;
- int i, realmode, df, inst_len;
+ unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel;
+ int i, mode, df, inst_len;
struct vcpu *v = current;
mmio_op = &v->arch.hvm_vcpu.io_op;
@@ -886,27 +919,23 @@ void handle_mmio(unsigned long gpa)
df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
- inst_len = hvm_instruction_length(regs, hvm_guest_x86_mode(v));
+ mode = hvm_guest_x86_mode(v);
+ inst_addr = hvm_get_segment_base(v, seg_cs) + regs->eip;
+ inst_len = hvm_instruction_length(inst_addr, mode);
if ( inst_len <= 0 )
{
printk("handle_mmio: failed to get instruction length\n");
domain_crash_synchronous();
}
- realmode = hvm_realmode(v);
- if ( realmode )
- inst_addr = regs->cs << 4;
- else
- inst_addr = hvm_get_segment_base(current, seg_cs);
- inst_addr += regs->eip;
-
memset(inst, 0, MAX_INST_LEN);
if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) {
printk("handle_mmio: failed to copy instruction\n");
domain_crash_synchronous();
}
- if ( mmio_decode(realmode, inst, mmio_op, &op_size) == DECODE_failure ) {
+ if ( mmio_decode(mode, inst, mmio_op, &ad_size, &op_size, &seg_sel)
+ == DECODE_failure ) {
printk("handle_mmio: failed to decode instruction\n");
printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len);
for ( i = 0; i < inst_len; i++ )
@@ -926,29 +955,39 @@ void handle_mmio(unsigned long gpa)
{
unsigned long count = GET_REPEAT_COUNT();
int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
- unsigned long addr = 0;
+ unsigned long addr;
int dir, size = op_size;
ASSERT(count);
/* determine non-MMIO address */
- if ( realmode ) {
- if ( ((regs->es << 4) + (regs->edi & 0xFFFF)) == gpa ) {
- dir = IOREQ_WRITE;
- addr = (regs->ds << 4) + (regs->esi & 0xFFFF);
- } else {
- dir = IOREQ_READ;
- addr = (regs->es << 4) + (regs->edi & 0xFFFF);
+ addr = regs->edi;
+ if ( ad_size == WORD )
+ addr &= 0xFFFF;
+ addr += hvm_get_segment_base(v, seg_es);
+ if ( addr == gpa )
+ {
+ enum segment seg;
+
+ dir = IOREQ_WRITE;
+ addr = regs->esi;
+ if ( ad_size == WORD )
+ addr &= 0xFFFF;
+ switch ( seg_sel )
+ {
+ case 0x26: seg = seg_es; break;
+ case 0x2e: seg = seg_cs; break;
+ case 0x36: seg = seg_ss; break;
+ case 0:
+ case 0x3e: seg = seg_ds; break;
+ case 0x64: seg = seg_fs; break;
+ case 0x65: seg = seg_gs; break;
+ default: domain_crash_synchronous();
}
- } else {
- if ( gpa == regs->edi ) {
- dir = IOREQ_WRITE;
- addr = regs->esi;
- } else {
- dir = IOREQ_READ;
- addr = regs->edi;
- }
- }
+ addr += hvm_get_segment_base(v, seg);
+ }
+ else
+ dir = IOREQ_READ;
if ( addr & (size - 1) )
gdprintk(XENLOG_WARNING,
diff -r 62b0b520ea53 -r 519a74928bd4 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Tue Nov 28 11:43:39 2006 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c Tue Nov 28 11:45:54 2006 +0000
@@ -511,18 +511,24 @@ unsigned long svm_get_ctrl_reg(struct vc
static unsigned long svm_get_segment_base(struct vcpu *v, enum segment seg)
{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ int long_mode = 0;
+
+#ifdef __x86_64__
+ long_mode = vmcb->cs.attributes.fields.l && (vmcb->efer & EFER_LMA);
+#endif
switch ( seg )
{
- case seg_cs: return v->arch.hvm_svm.vmcb->cs.base;
- case seg_ds: return v->arch.hvm_svm.vmcb->ds.base;
- case seg_es: return v->arch.hvm_svm.vmcb->es.base;
- case seg_fs: return v->arch.hvm_svm.vmcb->fs.base;
- case seg_gs: return v->arch.hvm_svm.vmcb->gs.base;
- case seg_ss: return v->arch.hvm_svm.vmcb->ss.base;
- case seg_tr: return v->arch.hvm_svm.vmcb->tr.base;
- case seg_gdtr: return v->arch.hvm_svm.vmcb->gdtr.base;
- case seg_idtr: return v->arch.hvm_svm.vmcb->idtr.base;
- case seg_ldtr: return v->arch.hvm_svm.vmcb->ldtr.base;
+ case seg_cs: return long_mode ? 0 : vmcb->cs.base;
+ case seg_ds: return long_mode ? 0 : vmcb->ds.base;
+ case seg_es: return long_mode ? 0 : vmcb->es.base;
+ case seg_fs: return vmcb->fs.base;
+ case seg_gs: return vmcb->gs.base;
+ case seg_ss: return long_mode ? 0 : vmcb->ss.base;
+ case seg_tr: return vmcb->tr.base;
+ case seg_gdtr: return vmcb->gdtr.base;
+ case seg_idtr: return vmcb->idtr.base;
+ case seg_ldtr: return vmcb->ldtr.base;
}
BUG();
return 0;
@@ -832,7 +838,6 @@ int start_svm(void)
hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
- hvm_funcs.realmode = svm_realmode;
hvm_funcs.paging_enabled = svm_paging_enabled;
hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
hvm_funcs.pae_enabled = svm_pae_enabled;
@@ -925,7 +930,7 @@ static void svm_do_general_protection_fa
printk("Huh? We got a GP Fault with an invalid IDTR!\n");
svm_dump_vmcb(__func__, vmcb);
svm_dump_regs(__func__, regs);
- svm_dump_inst(vmcb->rip);
+ svm_dump_inst(svm_rip2pointer(vmcb));
domain_crash(v->domain);
return;
}
@@ -1223,22 +1228,21 @@ static void svm_get_prefix_info(
/* Get the address of INS/OUTS instruction */
static inline int svm_get_io_address(
- struct vcpu *v,
- struct cpu_user_regs *regs, unsigned int dir,
+ struct vcpu *v, struct cpu_user_regs *regs,
+ unsigned int size, ioio_info_t info,
unsigned long *count, unsigned long *addr)
{
unsigned long reg;
unsigned int asize = 0;
unsigned int isize;
- int long_mode;
- ioio_info_t info;
+ int long_mode = 0;
segment_selector_t *seg = NULL;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- info.bytes = vmcb->exitinfo1;
-
+#ifdef __x86_64__
/* If we're in long mode, we shouldn't check the segment presence & limit
*/
long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
+#endif
/* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit.
* l field combined with EFER_LMA -> longmode says whether it's 16 or 64
bit.
@@ -1256,11 +1260,9 @@ static inline int svm_get_io_address(
isize --;
if (isize > 1)
- svm_get_prefix_info(vmcb, dir, &seg, &asize);
-
- ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
-
- if (dir == IOREQ_WRITE)
+ svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
+
+ if (info.fields.type == IOREQ_WRITE)
{
reg = regs->esi;
if (!seg) /* If no prefix, used DS. */
@@ -1289,6 +1291,8 @@ static inline int svm_get_io_address(
*addr = reg;
*count = regs->ecx;
}
+ if (!info.fields.rep)
+ *count = 1;
if (!long_mode) {
if (*addr > seg->limit)
@@ -1301,7 +1305,8 @@ static inline int svm_get_io_address(
*addr += seg->base;
}
}
-
+ else if (seg == &vmcb->fs || seg == &vmcb->gs)
+ *addr += seg->base;
return 1;
}
@@ -1351,7 +1356,7 @@ static void svm_io_instruction(struct vc
unsigned long addr, count;
int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
- if (!svm_get_io_address(v, regs, dir, &count, &addr))
+ if (!svm_get_io_address(v, regs, size, info, &count, &addr))
{
/* We failed to get a valid address, so don't do the IO operation -
* it would just get worse if we do! Hopefully the guest is handing
@@ -1364,10 +1369,6 @@ static void svm_io_instruction(struct vc
if (info.fields.rep)
{
pio_opp->flags |= REPZ;
- }
- else
- {
- count = 1;
}
/*
diff -r 62b0b520ea53 -r 519a74928bd4 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 28 11:43:39 2006 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 28 11:45:54 2006 +0000
@@ -503,23 +503,32 @@ static unsigned long vmx_get_ctrl_reg(st
static unsigned long vmx_get_segment_base(struct vcpu *v, enum segment seg)
{
- unsigned long base;
-
- BUG_ON(v != current);
+ unsigned long base = 0;
+ int long_mode = 0;
+
+ ASSERT(v == current);
+
+#ifdef __x86_64__
+ if ( vmx_long_mode_enabled(v) &&
+ (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) )
+ long_mode = 1;
+#endif
+
switch ( seg )
{
- case seg_cs: base = __vmread(GUEST_CS_BASE); break;
- case seg_ds: base = __vmread(GUEST_DS_BASE); break;
- case seg_es: base = __vmread(GUEST_ES_BASE); break;
+ case seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
+ case seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
+ case seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
case seg_fs: base = __vmread(GUEST_FS_BASE); break;
case seg_gs: base = __vmread(GUEST_GS_BASE); break;
- case seg_ss: base = __vmread(GUEST_SS_BASE); break;
+ case seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
case seg_tr: base = __vmread(GUEST_TR_BASE); break;
case seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
case seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
case seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
- default: BUG(); base = 0; break;
- }
+ default: BUG(); break;
+ }
+
return base;
}
@@ -635,7 +644,6 @@ static void vmx_setup_hvm_funcs(void)
hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
- hvm_funcs.realmode = vmx_realmode;
hvm_funcs.paging_enabled = vmx_paging_enabled;
hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
hvm_funcs.pae_enabled = vmx_pae_enabled;
@@ -949,63 +957,100 @@ static void vmx_do_invlpg(unsigned long
}
-static int check_for_null_selector(unsigned long eip, int inst_len, int dir)
-{
- unsigned char inst[MAX_INST_LEN];
- unsigned long sel;
- int i;
- int inst_copy_from_guest(unsigned char *, unsigned long, int);
-
- /* INS can only use ES segment register, and it can't be overridden */
- if ( dir == IOREQ_READ )
- {
- sel = __vmread(GUEST_ES_SELECTOR);
- return sel == 0 ? 1 : 0;
- }
-
- memset(inst, 0, MAX_INST_LEN);
- if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
- {
- gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
- domain_crash(current->domain);
+static int vmx_check_descriptor(int long_mode, unsigned long eip, int inst_len,
+ enum segment seg, unsigned long *base)
+{
+ enum vmcs_field ar_field, base_field;
+ u32 ar_bytes;
+
+ *base = 0;
+ if ( seg != seg_es )
+ {
+ unsigned char inst[MAX_INST_LEN];
+ int i;
+ extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
+
+ if ( !long_mode )
+ eip += __vmread(GUEST_CS_BASE);
+ memset(inst, 0, MAX_INST_LEN);
+ if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
+ {
+ gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
+ domain_crash(current->domain);
+ return 0;
+ }
+
+ for ( i = 0; i < inst_len; i++ )
+ {
+ switch ( inst[i] )
+ {
+ case 0xf3: /* REPZ */
+ case 0xf2: /* REPNZ */
+ case 0xf0: /* LOCK */
+ case 0x66: /* data32 */
+ case 0x67: /* addr32 */
+#ifdef __x86_64__
+ case 0x40 ... 0x4f: /* REX */
+#endif
+ continue;
+ case 0x2e: /* CS */
+ seg = seg_cs;
+ continue;
+ case 0x36: /* SS */
+ seg = seg_ss;
+ continue;
+ case 0x26: /* ES */
+ seg = seg_es;
+ continue;
+ case 0x64: /* FS */
+ seg = seg_fs;
+ continue;
+ case 0x65: /* GS */
+ seg = seg_gs;
+ continue;
+ case 0x3e: /* DS */
+ seg = seg_ds;
+ continue;
+ }
+ }
+ }
+
+ switch ( seg )
+ {
+ case seg_cs:
+ ar_field = GUEST_CS_AR_BYTES;
+ base_field = GUEST_CS_BASE;
+ break;
+ case seg_ds:
+ ar_field = GUEST_DS_AR_BYTES;
+ base_field = GUEST_DS_BASE;
+ break;
+ case seg_es:
+ ar_field = GUEST_ES_AR_BYTES;
+ base_field = GUEST_ES_BASE;
+ break;
+ case seg_fs:
+ ar_field = GUEST_FS_AR_BYTES;
+ base_field = GUEST_FS_BASE;
+ break;
+ case seg_gs:
+ ar_field = GUEST_FS_AR_BYTES;
+ base_field = GUEST_FS_BASE;
+ break;
+ case seg_ss:
+ ar_field = GUEST_GS_AR_BYTES;
+ base_field = GUEST_GS_BASE;
+ break;
+ default:
+ BUG();
return 0;
}
- for ( i = 0; i < inst_len; i++ )
- {
- switch ( inst[i] )
- {
- case 0xf3: /* REPZ */
- case 0xf2: /* REPNZ */
- case 0xf0: /* LOCK */
- case 0x66: /* data32 */
- case 0x67: /* addr32 */
- continue;
- case 0x2e: /* CS */
- sel = __vmread(GUEST_CS_SELECTOR);
- break;
- case 0x36: /* SS */
- sel = __vmread(GUEST_SS_SELECTOR);
- break;
- case 0x26: /* ES */
- sel = __vmread(GUEST_ES_SELECTOR);
- break;
- case 0x64: /* FS */
- sel = __vmread(GUEST_FS_SELECTOR);
- break;
- case 0x65: /* GS */
- sel = __vmread(GUEST_GS_SELECTOR);
- break;
- case 0x3e: /* DS */
- /* FALLTHROUGH */
- default:
- /* DS is the default */
- sel = __vmread(GUEST_DS_SELECTOR);
- }
- return sel == 0 ? 1 : 0;
- }
-
- return 0;
+ if ( !long_mode || seg == seg_fs || seg == seg_gs )
+ *base = __vmread(base_field);
+ ar_bytes = __vmread(ar_field);
+
+ return !(ar_bytes & 0x10000);
}
static void vmx_io_instruction(unsigned long exit_qualification,
@@ -1013,7 +1058,7 @@ static void vmx_io_instruction(unsigned
{
struct cpu_user_regs *regs;
struct hvm_io_op *pio_opp;
- unsigned long port, size;
+ unsigned int port, size;
int dir, df, vm86;
pio_opp = ¤t->arch.hvm_vcpu.io_op;
@@ -1044,21 +1089,32 @@ static void vmx_io_instruction(unsigned
dir = test_bit(3, &exit_qualification); /* direction */
if ( test_bit(4, &exit_qualification) ) { /* string instruction */
- unsigned long addr, count = 1;
+ unsigned long addr, count = 1, base;
+ u32 ar_bytes;
int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
-
+ int long_mode = 0;
+
+ ar_bytes = __vmread(GUEST_CS_AR_BYTES);
+#ifdef __x86_64__
+ if ( vmx_long_mode_enabled(current) && (ar_bytes & (1u<<13)) )
+ long_mode = 1;
+#endif
addr = __vmread(GUEST_LINEAR_ADDRESS);
/*
* In protected mode, guest linear address is invalid if the
* selector is null.
*/
- if ( !vm86 && check_for_null_selector(regs->eip, inst_len, dir) )
- addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
+ if ( !vmx_check_descriptor(long_mode, regs->eip, inst_len,
+ dir == IOREQ_WRITE ? seg_ds : seg_es,
+ &base) )
+ addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
pio_opp->flags |= REPZ;
- count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
+ count = regs->ecx;
+ if ( !long_mode && (vm86 || !(ar_bytes & (1u<<14))) )
+ count &= 0xFFFF;
}
/*
diff -r 62b0b520ea53 -r 519a74928bd4 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Tue Nov 28 11:43:39 2006 +0000
+++ b/xen/include/asm-x86/hvm/hvm.h Tue Nov 28 11:45:54 2006 +0000
@@ -62,12 +62,13 @@ struct hvm_function_table {
struct vcpu *v, struct cpu_user_regs *r);
/*
* Examine specifics of the guest state:
- * 1) determine whether the guest is in real or vm8086 mode,
- * 2) determine whether paging is enabled,
- * 3) return the current guest control-register value
- * 4) return the current guest segment descriptor base
+ * 1) determine whether paging is enabled,
+ * 2) determine whether long mode is enabled,
+ * 3) determine whether PAE paging is enabled,
+ * 4) determine the mode the guest is running in,
+ * 5) return the current guest control-register value
+ * 6) return the current guest segment descriptor base
*/
- int (*realmode)(struct vcpu *v);
int (*paging_enabled)(struct vcpu *v);
int (*long_mode_enabled)(struct vcpu *v);
int (*pae_enabled)(struct vcpu *v);
@@ -128,12 +129,6 @@ hvm_load_cpu_guest_regs(struct vcpu *v,
}
static inline int
-hvm_realmode(struct vcpu *v)
-{
- return hvm_funcs.realmode(v);
-}
-
-static inline int
hvm_paging_enabled(struct vcpu *v)
{
return hvm_funcs.paging_enabled(v);
@@ -157,7 +152,7 @@ hvm_guest_x86_mode(struct vcpu *v)
return hvm_funcs.guest_x86_mode(v);
}
-int hvm_instruction_length(struct cpu_user_regs *regs, int mode);
+int hvm_instruction_length(unsigned long pc, int mode);
static inline void
hvm_update_host_cr3(struct vcpu *v)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|