Memory mapped and port I/O is currently broken under VMX when the
partition is running in VM8086 mode. The reason is that the instruction
decoding support uses 32-bit opcode/address decodes rather 16-bit
decodes. This patch fixes that. In addition, the patch adds support for
the "stos" instruction decoding because this is a frequently used way
to clear MMIO areas such as the screen.
As an aside, vmx_platform.c should really reuse x86_emulate.c as much
as possible.
Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
===== tools/ioemu/iodev/cpu.cc 1.7 vs edited =====
--- 1.7/tools/ioemu/iodev/cpu.cc 2005-03-28 11:56:48 -05:00
+++ edited/tools/ioemu/iodev/cpu.cc 2005-03-31 13:55:11 -05:00
@@ -51,7 +51,7 @@
if (req->state == STATE_IOREQ_READY) {
req->state = STATE_IOREQ_INPROCESS;
} else {
- BX_INFO(("False I/O requrest ... in-service already: %lx,
pvalid: %lx,port: %lx, data: %lx, count: %lx, size: %lx\n", req->state,
req->pdata_valid, req->addr, req->u.data, req->count, req->size));
+ BX_INFO(("False I/O request ... in-service already: %lx,
pvalid: %lx,port: %lx, data: %lx, count: %lx, size: %lx\n", req->state,
req->pdata_valid, req->addr, req->u.data, req->count, req->size));
req = NULL;
}
@@ -95,6 +95,8 @@
}
if (req->port_mm == 0){//port io
if(req->dir == IOREQ_READ){//read
+ //BX_INFO(("pio: <READ>addr:%llx, value:%llx, size:
%llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
+
if (!req->pdata_valid)
req->u.data = BX_INP(req->addr, req->size);
else {
@@ -107,6 +109,8 @@
}
}
} else if(req->dir == IOREQ_WRITE) {
+ //BX_INFO(("pio: <WRITE>addr:%llx, value:%llx, size:
%llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
+
if (!req->pdata_valid) {
BX_OUTP(req->addr, (dma_addr_t) req->u.data,
req->size);
} else {
@@ -123,20 +127,29 @@
} else if (req->port_mm == 1){//memory map io
if (!req->pdata_valid) {
if(req->dir == IOREQ_READ){//read
- BX_MEM_READ_PHYSICAL(req->addr, req->size,
&req->u.data);
- } else if(req->dir == IOREQ_WRITE)//write
- BX_MEM_WRITE_PHYSICAL(req->addr, req->size,
&req->u.data);
+ //BX_INFO(("mmio[value]: <READ> addr:%llx,
value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size,
req->count));
+
+ for (i = 0; i < req->count; i++) {
+ BX_MEM_READ_PHYSICAL(req->addr,
req->size, &req->u.data);
+ }
+ } else if(req->dir == IOREQ_WRITE) {//write
+ //BX_INFO(("mmio[value]: <WRITE> addr:%llx,
value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size,
req->count));
+
+ for (i = 0; i < req->count; i++) {
+ BX_MEM_WRITE_PHYSICAL(req->addr,
req->size, &req->u.data);
+ }
+ }
} else {
//handle movs
unsigned long tmp;
if (req->dir == IOREQ_READ) {
- //BX_INFO(("<READ>addr:%llx, pdata:%llx, size:
%x, count: %x\n", req->addr, req->u.pdata, req->size, req->count));
+ //BX_INFO(("mmio[pdata]: <READ>addr:%llx,
pdata:%llx, size: %x, count: %x\n", req->addr, req->u.pdata, req->size,
req->count));
for (i = 0; i < req->count; i++) {
BX_MEM_READ_PHYSICAL(req->addr + (sign
* i * req->size), req->size, &tmp);
BX_MEM_WRITE_PHYSICAL((dma_addr_t)
req->u.pdata + (sign * i * req->size), req->size, &tmp);
}
} else if (req->dir == IOREQ_WRITE) {
- //BX_INFO(("<WRITE>addr:%llx, pdata:%llx, size:
%x, count: %x\n", req->addr, req->u.pdata, req->size, req->count));
+ //BX_INFO(("mmio[pdata]: <WRITE>addr:%llx,
pdata:%llx, size: %x, count: %x\n", req->addr, req->u.pdata, req->size,
req->count));
for (i = 0; i < req->count; i++) {
BX_MEM_READ_PHYSICAL((dma_addr_t)req->u.pdata + (sign * i * req->size),
req->size, &tmp);
BX_MEM_WRITE_PHYSICAL(req->addr + (sign
* i * req->size), req->size, &tmp);
===== xen/arch/x86/vmx.c 1.35 vs edited =====
--- 1.35/xen/arch/x86/vmx.c 2005-03-25 08:46:18 -05:00
+++ edited/xen/arch/x86/vmx.c 2005-03-31 13:41:48 -05:00
@@ -294,13 +294,17 @@
vcpu_iodata_t *vio;
ioreq_t *p;
unsigned long addr;
- unsigned long eip;
+ unsigned long eip, cs, eflags;
+ int vm86;
__vmread(GUEST_EIP, &eip);
+ __vmread(GUEST_CS_SELECTOR, &cs);
+ __vmread(GUEST_EFLAGS, &eflags);
+ vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
VMX_DBG_LOG(DBG_LEVEL_1,
- "vmx_io_instruction: eip=%p, exit_qualification = %lx",
- eip, exit_qualification);
+ "vmx_io_instruction: vm86 %d, eip=%p:%p, exit_qualification = %lx",
+ vm86, cs, eip, exit_qualification);
if (test_bit(6, &exit_qualification))
addr = (exit_qualification >> 16) & (0xffff);
@@ -325,17 +329,29 @@
p->size = (exit_qualification & 7) + 1;
if (test_bit(4, &exit_qualification)) {
- unsigned long eflags;
-
- __vmread(GUEST_EFLAGS, &eflags);
p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
p->pdata_valid = 1;
- p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
- regs->esi
- : regs->edi);
+
+ if (vm86) {
+ unsigned long seg;
+ if (p->dir == IOREQ_WRITE) {
+ __vmread(GUEST_DS_SELECTOR, &seg);
+ p->u.pdata = (void *)
+ ((seg << 4) | (regs->esi & 0xFFFF));
+ } else {
+ __vmread(GUEST_ES_SELECTOR, &seg);
+ p->u.pdata = (void *)
+ ((seg << 4) | (regs->edi & 0xFFFF));
+ }
+ } else {
+ p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
+ regs->esi : regs->edi);
+ }
p->u.pdata = (void *) gva_to_gpa(p->u.data);
+
+
if (test_bit(5, &exit_qualification))
- p->count = regs->ecx;
+ p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
if ((p->u.data & PAGE_MASK) !=
((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
printk("stringio crosses page boundary!\n");
@@ -368,13 +384,20 @@
do_block();
}
+static int
+vm86assist(struct exec_domain *d)
+{
+ /* stay tuned ... */
+ return 0;
+}
+
#define CASE_GET_REG(REG, reg) \
case REG_ ## REG: value = regs->reg; break
/*
* Write to control registers
*/
-static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
+static int mov_to_cr(int gp, int cr, struct xen_regs *regs)
{
unsigned long value;
unsigned long old_cr;
@@ -454,8 +477,21 @@
d->arch.arch_vmx.cpu_cr3, mfn);
/* undo the get_page done in the para virt case */
put_page_and_type(&frame_table[old_base_mfn]);
+ } else {
+ if ((value & X86_CR0_PE) == 0) {
+ unsigned long eip;
- }
+ __vmread(GUEST_EIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "Disabling CR0.PE at %%eip 0x%lx", eip);
+ if (vm86assist(d)) {
+ __vmread(GUEST_EIP, &eip);
+ VMX_DBG_LOG(DBG_LEVEL_1,
+ "Transfering control to vm86assist %%eip 0x%lx", eip);
+ return 0; /* do not update eip! */
+ }
+ }
+ }
break;
}
case 3:
@@ -534,7 +570,9 @@
printk("invalid cr: %d\n", gp);
__vmx_bug(regs);
}
-}
+
+ return 1;
+}
#define CASE_SET_REG(REG, reg) \
case REG_ ## REG: \
@@ -575,7 +613,7 @@
VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
}
-static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs
*regs)
+static int vmx_cr_access(unsigned long exit_qualification, struct xen_regs
*regs)
{
unsigned int gp, cr;
unsigned long value;
@@ -584,8 +622,7 @@
case TYPE_MOV_TO_CR:
gp = exit_qualification & CONTROL_REG_ACCESS_REG;
cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
- mov_to_cr(gp, cr, regs);
- break;
+ return mov_to_cr(gp, cr, regs);
case TYPE_MOV_FROM_CR:
gp = exit_qualification & CONTROL_REG_ACCESS_REG;
cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
@@ -604,6 +641,7 @@
__vmx_bug(regs);
break;
}
+ return 1;
}
static inline void vmx_do_msr_read(struct xen_regs *regs)
@@ -619,7 +657,7 @@
}
/*
- * Need to use this exit to rescheule
+ * Need to use this exit to reschedule
*/
static inline void vmx_vmexit_do_hlt(void)
{
@@ -891,8 +929,8 @@
VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification
= %lx",
eip, inst_len, exit_qualification);
- vmx_cr_access(exit_qualification, ®s);
- __update_guest_eip(inst_len);
+ if (vmx_cr_access(exit_qualification, ®s))
+ __update_guest_eip(inst_len);
break;
}
case EXIT_REASON_DR_ACCESS:
===== xen/arch/x86/vmx_platform.c 1.11 vs edited =====
--- 1.11/xen/arch/x86/vmx_platform.c 2005-03-25 08:46:18 -05:00
+++ edited/xen/arch/x86/vmx_platform.c 2005-03-31 13:39:35 -05:00
@@ -55,6 +55,8 @@
__vmread(GUEST_ESP, ®s->esp);
__vmread(GUEST_EFLAGS, ®s->eflags);
__vmread(GUEST_CS_SELECTOR, ®s->cs);
+ __vmread(GUEST_DS_SELECTOR, ®s->ds);
+ __vmread(GUEST_ES_SELECTOR, ®s->es);
__vmread(GUEST_EIP, ®s->eip);
}
@@ -144,19 +146,27 @@
while (1) {
switch (*inst) {
case 0xf3: //REPZ
+ thread_inst->flags = REPZ;
+ break;
case 0xf2: //REPNZ
+ thread_inst->flags = REPNZ;
+ break;
case 0xf0: //LOCK
+ break;
case 0x2e: //CS
case 0x36: //SS
case 0x3e: //DS
case 0x26: //ES
case 0x64: //FS
case 0x65: //GS
+ thread_inst->seg_sel = *inst;
break;
case 0x66: //32bit->16bit
thread_inst->op_size = WORD;
break;
case 0x67:
+ printf("Not handling 0x67 (yet)\n");
+ domain_crash_synchronous();
break;
default:
return inst;
@@ -165,7 +175,7 @@
}
}
-static inline unsigned long get_immediate(const unsigned char *inst, int
op_size)
+static inline unsigned long get_immediate(int op16, const unsigned char *inst,
int op_size)
{
int mod, reg, rm;
unsigned long val = 0;
@@ -183,14 +193,21 @@
switch(mod) {
case 0:
if (rm == 5) {
- inst = inst + 4; //disp32, skip 4 bytes
+ if (op16)
+ inst = inst + 2; //disp16, skip 2 bytes
+ else
+ inst = inst + 4; //disp32, skip 4 bytes
}
break;
case 1:
inst++; //disp8, skip 1 byte
break;
case 2:
- inst = inst + 4; //disp32, skip 4 bytes
+ if (op16)
+ inst = inst + 2; //disp16, skip 2 bytes
+ else
+ inst = inst + 4; //disp32, skip 4 bytes
+ break;
}
for (i = 0; i < op_size; i++) {
val |= (*inst++ & 0xff) << (8 * i);
@@ -218,7 +235,21 @@
static int vmx_decode(const unsigned char *inst, struct instruction
*thread_inst)
{
- int index;
+ unsigned long eflags;
+ int index, vm86 = 0;
+
+ __vmread(GUEST_EFLAGS, &eflags);
+ if (eflags & X86_EFLAGS_VM)
+ vm86 = 1;
+
+ if (vm86) { /* meaning is reversed */
+ if (thread_inst->op_size == WORD)
+ thread_inst->op_size = LONG;
+ else if (thread_inst->op_size == LONG)
+ thread_inst->op_size = WORD;
+ else if (thread_inst->op_size == 0)
+ thread_inst->op_size = WORD;
+ }
switch(*inst) {
case 0x88:
@@ -258,7 +289,6 @@
printk("%x, This opcode hasn't been handled yet!", *inst);
return DECODE_failure;
/* Not handle it yet. */
-
case 0xa0:
/* mov byte to al */
thread_inst->op_size = BYTE;
@@ -291,7 +321,6 @@
/* movsb */
thread_inst->op_size = BYTE;
strcpy((char *)thread_inst->i_name, "movs");
-
return DECODE_success;
case 0xa5:
/* movsw/movsl */
@@ -299,16 +328,28 @@
} else {
thread_inst->op_size = LONG;
}
-
strcpy((char *)thread_inst->i_name, "movs");
-
return DECODE_success;
-
+ case 0xaa:
+ /* stosb */
+ thread_inst->op_size = BYTE;
+ strcpy((char *)thread_inst->i_name, "stosb");
+ return DECODE_success;
+ case 0xab:
+ /* stosw/stosl */
+ if (thread_inst->op_size == WORD) {
+ strcpy((char *)thread_inst->i_name, "stosw");
+ } else {
+ thread_inst->op_size = LONG;
+ strcpy((char *)thread_inst->i_name, "stosl");
+ }
+ return DECODE_success;
case 0xc6:
/* mov imm8 to m8 */
thread_inst->op_size = BYTE;
thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
- thread_inst->immediate = get_immediate((inst+1),
thread_inst->op_size);
+ thread_inst->immediate = get_immediate(vm86,
+ (inst+1), thread_inst->op_size);
break;
case 0xc7:
/* mov imm16/32 to m16/32 */
@@ -318,9 +359,9 @@
thread_inst->op_size = LONG;
thread_inst->operand[0] = mk_operand(LONG, 0, 0, IMMEDIATE);
}
- thread_inst->immediate = get_immediate((inst+1),
thread_inst->op_size);
+ thread_inst->immediate = get_immediate(vm86,
+ (inst+1), thread_inst->op_size);
break;
-
case 0x0f:
break;
default:
@@ -425,6 +466,7 @@
struct exec_domain *d = current;
vcpu_iodata_t *vio;
ioreq_t *p;
+ int vm86;
struct mi_per_cpu_info *mpci_p;
struct xen_regs *inst_decoder_regs;
extern long evtchn_send(int lport);
@@ -432,53 +474,59 @@
mpci_p = ¤t->arch.arch_vmx.vmx_platform.mpci;
inst_decoder_regs = mpci_p->inst_decoder_regs;
+
vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
-
if (vio == NULL) {
printk("bad shared page\n");
domain_crash_synchronous();
}
p = &vio->vp_ioreq;
-
+
+ vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM;
+
set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
p->dir = dir;
p->pdata_valid = pvalid;
- p->count = 1;
p->port_mm = 1;
p->size = inst_p->op_size;
p->addr = gpa;
p->u.data = value;
- // p->state = STATE_UPSTREAM_SENDING;
p->state = STATE_IOREQ_READY;
- // Try to use ins/outs' framework
- if (pvalid) {
- // Handle "movs"
- p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
- inst_decoder_regs->esi
- : inst_decoder_regs->edi);
- p->u.pdata = (void *) gva_to_gpa(p->u.data);
- p->count = inst_decoder_regs->ecx;
- inst_decoder_regs->ecx = 0;
+ if (inst_p->flags & REPZ) {
+ if (vm86)
+ p->count = inst_decoder_regs->ecx & 0xFFFF;
+ else
+ p->count = inst_decoder_regs->ecx;
p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0;
- }
+ } else
+ p->count = 1;
+
+ if (pvalid)
+ p->u.pdata = (void *) gva_to_gpa(p->u.data);
+
+#if 0
+ printf("send_mmio_req: eip 0x%lx:0x%lx, dir %d, pdata_valid %d, ",
+ inst_decoder_regs->cs, inst_decoder_regs->eip, p->dir, p->pdata_valid);
+ printf("port_mm %d, size %lld, addr 0x%llx, value 0x%lx, count %lld\n",
+ p->port_mm, p->size, p->addr, value, p->count);
+#endif
evtchn_send(IOPACKET_PORT);
do_block();
-
}
void handle_mmio(unsigned long va, unsigned long gpa)
{
- unsigned long eip;
- unsigned long inst_len;
+ unsigned long eip, eflags, cs;
+ unsigned long inst_len, inst_addr;
struct mi_per_cpu_info *mpci_p;
struct xen_regs *inst_decoder_regs;
struct instruction mmio_inst;
unsigned char inst[MAX_INST_LEN];
- int ret;
+ int vm86, ret;
mpci_p = ¤t->arch.arch_vmx.vmx_platform.mpci;
inst_decoder_regs = mpci_p->inst_decoder_regs;
@@ -486,13 +534,30 @@
__vmread(GUEST_EIP, &eip);
__vmread(INSTRUCTION_LEN, &inst_len);
+ __vmread(GUEST_EFLAGS, &eflags);
+ vm86 = eflags & X86_EFLAGS_VM;
+
+ if (vm86) {
+ __vmread(GUEST_CS_SELECTOR, &cs);
+ inst_addr = (cs << 4) | eip;
+ } else
+ inst_addr = eip; /* XXX should really look at GDT[cs].base too */
+
memset(inst, '0', MAX_INST_LEN);
- ret = inst_copy_from_guest(inst, eip, inst_len);
+ ret = inst_copy_from_guest(inst, inst_addr, inst_len);
if (ret != inst_len) {
printk("handle_mmio - EXIT: get guest instruction fault\n");
domain_crash_synchronous();
}
+#if 0
+ printk("handle_mmio: cs:eip 0x%lx:0x%lx(0x%lx): opcode",
+ cs, eip, inst_addr, inst_len);
+ for (ret = 0; ret < inst_len; ret++)
+ printk(" %02x", inst[ret]);
+ printk("\n");
+#endif
+
init_instruction(&mmio_inst);
if (vmx_decode(check_prefix(inst, &mmio_inst), &mmio_inst) ==
DECODE_failure)
@@ -506,7 +571,7 @@
if (read_from_mmio(&mmio_inst)) {
// Send the request and waiting for return value.
mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND;
- send_mmio_req(gpa, &mmio_inst, 0, 1, 0);
+ send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0);
return ;
} else {
printk("handle_mmio - EXIT: movz error!\n");
@@ -515,10 +580,32 @@
}
if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) {
- int tmp_dir;
+ unsigned long addr = 0;
+ int dir;
- tmp_dir = ((va == inst_decoder_regs->edi) ? IOREQ_WRITE : IOREQ_READ);
- send_mmio_req(gpa, &mmio_inst, 0, tmp_dir, 1);
+ if (vm86) {
+ unsigned long seg;
+
+ __vmread(GUEST_ES_SELECTOR, &seg);
+ if (((seg << 4) | (inst_decoder_regs->edi & 0xFFFF)) == va) {
+ dir = IOREQ_WRITE;
+ __vmread(GUEST_DS_SELECTOR, &seg);
+ addr = (seg << 4) | (inst_decoder_regs->esi & 0xFFFF);
+ } else {
+ dir = IOREQ_READ;
+ addr = (seg << 4) | (inst_decoder_regs->edi & 0xFFFF);
+ }
+ } else { /* XXX should really look at GDT[ds/es].base too */
+ if (va == inst_decoder_regs->edi) {
+ dir = IOREQ_WRITE;
+ addr = inst_decoder_regs->esi;
+ } else {
+ dir = IOREQ_READ;
+ addr = inst_decoder_regs->edi;
+ }
+ }
+
+ send_mmio_req(gpa, &mmio_inst, addr, dir, 1);
return;
}
@@ -529,7 +616,7 @@
if (read_from_mmio(&mmio_inst)) {
// Send the request and waiting for return value.
mpci_p->mmio_target = mmio_inst.operand[1];
- send_mmio_req(gpa, &mmio_inst, value, 1, 0);
+ send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0);
} else {
// Write to MMIO
if (mmio_inst.operand[0] & IMMEDIATE) {
@@ -541,9 +628,14 @@
} else {
domain_crash_synchronous();
}
- send_mmio_req(gpa, &mmio_inst, value, 0, 0);
+ send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0);
return;
}
+ }
+
+ if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) {
+ send_mmio_req(gpa, &mmio_inst,
+ inst_decoder_regs->eax, IOREQ_WRITE, 0);
}
domain_crash_synchronous();
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|