> Subject: [Xen-devel] [PATCH] Augment vmxassist emulation support and
> more accurate realmode GDT addressing
>
> This patch allows Solaris 10 (and all S10 updates) to boot and run on
> Intel processors.
How much testing have you given this patch with other OSes? Vmxassist is
known to be quite fragile, and I'd feel much happier if you'd tested
with a good selection of Windows, Linux and *BSD versions. In
particular, the various syslinux modules Ubuntu uses have been a
particular problem.
Thanks,
Ian
> --
>
> -----------------------------------------------------
> Russ Blaine | Solaris Kernel | russell.blaine@xxxxxxx
>
> # HG changeset patch
> # User russell.blaine@xxxxxxx
> # Date 1187906490 25200
> # Node ID a77889d180ea0c8e0dd1297e394a755968de45cb
> # Parent 149ca6d04923ea8b2b7b9bbe9c9f983974e40df9
> Augment vmxassist emulation support and more accurate realmode GDT
> addressing
>
> vmxassist does not support all instructions needed for the transition
> to and
> from real mode when running non-windows/non-linux HVM guests. Add
> support needed
> to allow Solaris multiboot (Solaris 10 and S10 updates) to boot. Also,
> modify
> how the emulator finds the GDT while in real mode. The emulator
> currently reads
> segment descriptors out of the GDT in guest memory every time it
> calculates an
> address. To more accurately reflect what actual hardware does, the
> emulator
> should use the hidden portion of the segment registers to find the
> address of
> the GDT. In addition, the current code assumes that the GDT resides on
> a single
> physical page of memory. To accomodate larger GDTs, the emulator
should
> calculate the virtual address of the needed entry in the GDT before
> converting
> it to a physical address for reading.The vmxassist tool used to help
> HVM guests
> transition to and from real mode has some defficiencies and
> inaccuracies which
> prevent certain
>
> Signed-off-by: Russell Blaine <russell.blaine@xxxxxxx>
>
> diff --git a/tools/firmware/vmxassist/vm86.c
> b/tools/firmware/vmxassist/vm86.c
> --- a/tools/firmware/vmxassist/vm86.c
> +++ b/tools/firmware/vmxassist/vm86.c
> @@ -50,10 +50,14 @@ char *states[] = {
> };
>
> static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si",
> "di" };
> +static char *srnames[] = { "es", "cs", "ss", "ds", "fs", "gs",
"invl",
> "invl" };
> #endif /* DEBUG */
>
> #define PDE_PS (1 << 7)
> #define PT_ENTRY_PRESENT 0x1
> +
> +static void load_or_clear_seg(unsigned long, uint32_t *, uint32_t *,
> + union vmcs_arbytes *);
>
> /* We only support access to <=4G physical memory due to 1:1 mapping
> */
> static uint64_t
> @@ -93,7 +97,8 @@ guest_linear_to_phys(uint32_t base)
>
> if (l2_mfn & 0xf00000000ULL) {
> printf("l2 page above 4G\n");
> - cpuid_addr_value(l2_mfn + 8 * ((base >> 21) &
0x1ff),
> &l1_mfn);
> + cpuid_addr_value(l2_mfn + 8 * ((base >> 21) &
0x1ff),
> + &l1_mfn);
> } else
> l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21)
&
> 0x1ff];
> if (!(l1_mfn & PT_ENTRY_PRESENT))
> @@ -108,7 +113,8 @@ guest_linear_to_phys(uint32_t base)
>
> if (l1_mfn & 0xf00000000ULL) {
> printf("l1 page above 4G\n");
> - cpuid_addr_value(l1_mfn + 8 * ((base >> 12) &
0x1ff),
> &l0_mfn);
> + cpuid_addr_value(l1_mfn + 8 * ((base >> 12) &
0x1ff),
> + &l0_mfn);
> } else
> l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12)
&
> 0x1ff];
> if (!(l0_mfn & PT_ENTRY_PRESENT))
> @@ -123,7 +129,8 @@ static unsigned
> static unsigned
> address(struct regs *regs, unsigned seg, unsigned off)
> {
> - uint64_t gdt_phys_base;
> + uint64_t gdt_entry_pa;
> + unsigned gdt_entry_va;
> unsigned long long entry;
> unsigned seg_base, seg_limit;
> unsigned entry_low, entry_high;
> @@ -139,12 +146,32 @@ address(struct regs *regs, unsigned seg,
> (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
> return ((seg & 0xFFFF) << 4) + off;
>
> - gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
> - if (gdt_phys_base != (uint32_t)gdt_phys_base) {
> + if (mode == VM86_PROTECTED_TO_REAL && !(oldctx.cr0 &
> (CR0_PG|CR0_PE))) {
> + if (seg == regs->cs)
> + return oldctx.cs_base + off;
> + if (seg == regs->ves)
> + return oldctx.es_base + off;
> + if (seg == regs->vds)
> + return oldctx.ds_base + off;
> + if (seg == regs->uss)
> + return oldctx.ss_base + off;
> + if (seg == regs->vfs)
> + return oldctx.fs_base + off;
> + if (seg == regs->vgs)
> + return oldctx.gs_base + off;
> +
> + dump_regs(regs);
> + panic("address(): unknown segment selector 0x%x\n",
seg);
> + }
> +
> + gdt_entry_va = (unsigned)((unsigned long long *)oldctx.gdtr_base
> +
> + (seg >> 3));
> + gdt_entry_pa = guest_linear_to_phys(gdt_entry_va);
> + if (gdt_entry_pa != (uint32_t)gdt_entry_pa) {
> printf("gdt base address above 4G\n");
> - cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3),
&entry);
> + cpuid_addr_value(gdt_entry_pa, &entry);
> } else
> - entry = ((unsigned long long *)(long)gdt_phys_base)[seg
>>
> 3];
> + entry = *(unsigned long long *)(long)gdt_entry_pa;
>
> entry_high = entry >> 32;
> entry_low = entry & 0xFFFFFFFF;
> @@ -688,7 +715,8 @@ movcr(struct regs *regs, unsigned prefix
> }
> break;
> case 0x22: /* mov Cd, Rd */
> - TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d",
cr));
> + TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d
> [[0x%x]]",
> + cr, getreg32(regs, modrm)));
> switch (cr) {
> case 0:
> oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE |
> CR0_NE);
> @@ -815,16 +843,83 @@ pop(struct regs *regs, unsigned prefix,
> write16(addr, pop16(regs));
> TRACE((regs, regs->eip - eip, "pop *0x%x", addr));
> break;
> -
> - /* other pop opcodes ... */
> + default: /* other pop opcodes ... */
> + return (0);
> }
>
> return 1;
> +}
> +
> +static void
> +push(struct regs *regs, unsigned prefix, unsigned opc)
> +{
> + unsigned eip = regs->eip - 1;
> + unsigned data;
> +
> + if (prefix & DATA32) {
> + data = getreg32(regs, opc & 0xF);
> + push32(regs, data);
> + } else {
> + data = getreg16(regs, opc & 0xF);
> + push16(regs, data);
> + }
> +
> + TRACE((regs, regs->eip - eip, "push%s %s%s (val 0x%x)",
> + (prefix & DATA32) ? "l" : "",
> + (prefix & DATA32) ? "e" : "", rnames[opc & 0xF],
> data));
> +}
> +
> +static int
> +mov_from_seg(struct regs *regs, unsigned prefix, unsigned opc)
> +{
> + unsigned eip = regs->eip - 1;
> + unsigned modrm = fetch8(regs);
> + unsigned r = modrm & 3; /* dest reg is in r/m field */
> + unsigned data;
> +
> + if ((modrm & 0xC0) != 0xC0) /* reg destinations only. memory
> unimpl */
> + return 0;
> +
> + switch ((modrm & 0x38) >> 3) { /* source reg is in reg field
> */
> + case 0: /* es */
> + data = regs->ves;
> + break;
> +
> + case 1: /* cs */
> + data = regs->cs;
> + break;
> +
> + case 2: /* ss */
> + data = regs->uss;
> + break;
> +
> + case 3: /* ds */
> + data = regs->vds;
> + break;
> +
> + case 4: /* fs */
> + data = regs->vfs;
> + break;
> +
> + case 5: /* gs */
> + data = regs->vgs;
> + break;
> +
> + default:
> + return 0;
> + }
> +
> + TRACE((regs, regs->eip - eip, "mov %%%s %%%s (val 0x%x)\n",
> + srnames[(modrm & 0x38) >> 3], rnames[r], data));
> +
> + setreg16(regs, r, data);
> + return 1;
> }
>
> static int
> mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc)
> {
> + unsigned eip = regs->eip - 1;
> unsigned modrm = fetch8(regs);
>
> /*
> @@ -836,54 +931,130 @@ mov_to_seg(struct regs *regs, unsigned p
> mode != VM86_PROTECTED_TO_REAL)
> return 0;
>
> - /* Register source only. */
> - if ((modrm & 0xC0) != 0xC0)
> - goto fail;
> -
> - switch ((modrm & 0x38) >> 3) {
> - case 0: /* es */
> - regs->ves = getreg16(regs, modrm);
> - if (mode == VM86_PROTECTED_TO_REAL)
> - return 1;
> - saved_rm_regs.ves = 0;
> - oldctx.es_sel = regs->ves;
> - return 1;
> -
> - /* case 1: cs */
> -
> - case 2: /* ss */
> - regs->uss = getreg16(regs, modrm);
> - if (mode == VM86_PROTECTED_TO_REAL)
> - return 1;
> - saved_rm_regs.uss = 0;
> - oldctx.ss_sel = regs->uss;
> - return 1;
> - case 3: /* ds */
> - regs->vds = getreg16(regs, modrm);
> - if (mode == VM86_PROTECTED_TO_REAL)
> - return 1;
> - saved_rm_regs.vds = 0;
> - oldctx.ds_sel = regs->vds;
> - return 1;
> - case 4: /* fs */
> - regs->vfs = getreg16(regs, modrm);
> - if (mode == VM86_PROTECTED_TO_REAL)
> - return 1;
> - saved_rm_regs.vfs = 0;
> - oldctx.fs_sel = regs->vfs;
> - return 1;
> - case 5: /* gs */
> - regs->vgs = getreg16(regs, modrm);
> - if (mode == VM86_PROTECTED_TO_REAL)
> - return 1;
> - saved_rm_regs.vgs = 0;
> - oldctx.gs_sel = regs->vgs;
> - return 1;
> - }
> -
> - fail:
> - printf("%s:%d: missed opcode %02x %02x\n",
> - __FUNCTION__, __LINE__, opc, modrm);
> + if ((modrm & 0xC0) == 0xC0) /* register source */
> + {
> + TRACE((regs, regs->eip - eip, "mov %s %s",
> + rnames[modrm & 0x7],
> + srnames[(modrm & 0x38) >> 3]));
> +
> + switch ((modrm & 0x38) >> 3) {
> + case 0: /* es */
> + regs->ves = getreg16(regs, modrm);
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.ves = 0;
> + oldctx.es_sel = regs->ves;
> + load_or_clear_seg(oldctx.es_sel,
> &oldctx.es_base,
> + &oldctx.es_limit, &oldctx.es_arbytes);
> + return 1;
> +
> + /* case 1: cs */
> +
> + case 2: /* ss */
> + regs->uss = getreg16(regs, modrm);
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.uss = 0;
> + oldctx.ss_sel = regs->uss;
> + load_or_clear_seg(oldctx.ss_sel,
> &oldctx.ss_base,
> + &oldctx.ss_limit, &oldctx.ss_arbytes);
> + return 1;
> + case 3: /* ds */
> + regs->vds = getreg16(regs, modrm);
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.vds = 0;
> + oldctx.ds_sel = regs->vds;
> + load_or_clear_seg(oldctx.ds_sel,
> &oldctx.ds_base,
> + &oldctx.ds_limit, &oldctx.ds_arbytes);
> + return 1;
> + case 4: /* fs */
> + regs->vfs = getreg16(regs, modrm);
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.vfs = 0;
> + oldctx.fs_sel = regs->vfs;
> + load_or_clear_seg(oldctx.fs_sel,
> &oldctx.fs_base,
> + &oldctx.fs_limit, &oldctx.fs_arbytes);
> + return 1;
> + case 5: /* gs */
> + regs->vgs = getreg16(regs, modrm);
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.vgs = 0;
> + oldctx.gs_sel = regs->vgs;
> + load_or_clear_seg(oldctx.gs_sel,
> &oldctx.gs_base,
> + &oldctx.gs_limit, &oldctx.gs_arbytes);
> + return 1;
> + default:
> + break;
> + }
> + } else if ((modrm & 0xC0) == 0) /* memory source */
> + {
> + unsigned addr = operand(prefix, regs, modrm);
> + unsigned data = read16(addr);
> +
> + TRACE((regs, regs->eip - eip, "mov [0x%x] %s", addr,
> + srnames[(modrm & 0x38) >> 3]));
> +
> + switch ((modrm & 0x38) >> 3) {
> + case 0: /* es */
> + regs->ves = data;
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.ves = 0;
> + oldctx.es_sel = regs->ves;
> + load_or_clear_seg(oldctx.es_sel,
> &oldctx.es_base,
> + &oldctx.es_limit, &oldctx.es_arbytes);
> + return 1;
> +
> + case 1: /* cs */
> + break;
> +
> + case 2: /* ss */
> + regs->uss = data;
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.uss = 0;
> + oldctx.ss_sel = regs->uss;
> + load_or_clear_seg(oldctx.ss_sel,
> &oldctx.ss_base,
> + &oldctx.ss_limit, &oldctx.ss_arbytes);
> + return 1;
> + case 3: /* ds */
> + regs->vds = data;
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.vds = 0;
> + oldctx.ds_sel = regs->vds;
> + load_or_clear_seg(oldctx.ds_sel,
> &oldctx.ds_base,
> + &oldctx.ds_limit, &oldctx.ds_arbytes);
> + return 1;
> + case 4: /* fs */
> + regs->vfs = data;
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.vfs = 0;
> + oldctx.fs_sel = regs->vfs;
> + load_or_clear_seg(oldctx.fs_sel,
> &oldctx.fs_base,
> + &oldctx.fs_limit, &oldctx.fs_arbytes);
> + return 1;
> + case 5: /* gs */
> + regs->vgs = data;
> + if (mode == VM86_PROTECTED_TO_REAL)
> + return 1;
> + saved_rm_regs.vgs = 0;
> + oldctx.gs_sel = regs->vgs;
> + load_or_clear_seg(oldctx.gs_sel,
> &oldctx.gs_base,
> + &oldctx.gs_limit, &oldctx.gs_arbytes);
> + return 1;
> + default:
> + break;
> + }
> +
> + }
> +
> + TRACE((regs, regs->eip - eip, "%s: missed opcode %02x modrm
> %02x\n",
> + __FUNCTION__, opc, modrm));
> return 0;
> }
>
> @@ -891,9 +1062,11 @@ mov_to_seg(struct regs *regs, unsigned p
> * Emulate a segment load in protected mode
> */
> static int
> -load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union
> vmcs_arbytes
> *arbytes)
> -{
> - uint64_t gdt_phys_base;
> +load_seg(unsigned long sel, uint32_t *base, uint32_t *limit,
> + union vmcs_arbytes *arbytes)
> +{
> + unsigned gdt_entry_va;
> + uint64_t gdt_entry_pa;
> unsigned long long entry;
>
> /* protected mode: use seg as index into gdt */
> @@ -905,12 +1078,14 @@ load_seg(unsigned long sel, uint32_t *ba
> return 1;
> }
>
> - gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
> - if (gdt_phys_base != (uint32_t)gdt_phys_base) {
> + gdt_entry_va = (unsigned)((unsigned long long *)oldctx.gdtr_base
> +
> + (sel >> 3));
> + gdt_entry_pa = guest_linear_to_phys(gdt_entry_va);
> + if (gdt_entry_pa != (uint32_t)gdt_entry_pa) {
> printf("gdt base address above 4G\n");
> - cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3),
&entry);
> + cpuid_addr_value(gdt_entry_pa, &entry);
> } else
> - entry = ((unsigned long long *)(long)gdt_phys_base)[sel
>>
> 3];
> + entry = *(unsigned long long *)(long)gdt_entry_pa;
>
> /* Check the P bit first */
> if (!((entry >> (15+32)) & 0x1) && sel != 0)
> @@ -945,7 +1120,8 @@ load_seg(unsigned long sel, uint32_t *ba
> * the descriptor was invalid.
> */
> static void
> -load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit,
> union
> vmcs_arbytes *arbytes)
> +load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit,
> + union vmcs_arbytes *arbytes)
> {
> if (!load_seg(sel, base, limit, arbytes))
> load_seg(0, base, limit, arbytes);
> @@ -972,8 +1148,11 @@ protected_mode(struct regs *regs)
>
> /* reload all segment registers */
> if (!load_seg(regs->cs, &oldctx.cs_base,
> - &oldctx.cs_limit, &oldctx.cs_arbytes))
> + &oldctx.cs_limit, &oldctx.cs_arbytes)) {
> + dump_regs(regs);
> panic("Invalid %%cs=0x%x for protected mode\n",
regs->cs);
> + }
> +
> oldctx.cs_sel = regs->cs;
>
> load_or_clear_seg(oldctx.es_sel, &oldctx.es_base,
> @@ -1109,7 +1288,7 @@ jmpl(struct regs *regs, int prefix)
> regs->cs = cs;
> regs->eip = eip;
>
> - if (mode == VM86_REAL_TO_PROTECTED) /* jump to
protected
> mode */
> + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected
mode */
> set_mode(regs, VM86_PROTECTED);
> else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real
mode */
> set_mode(regs, VM86_REAL);
> @@ -1135,12 +1314,12 @@ jmpl_indirect(struct regs *regs, int pre
> regs->cs = cs;
> regs->eip = eip;
>
> - if (mode == VM86_REAL_TO_PROTECTED) /* jump to
protected
> mode */
> + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected
mode */
> set_mode(regs, VM86_PROTECTED);
> else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real
mode */
> set_mode(regs, VM86_REAL);
> else
> - panic("jmpl");
> + panic("jmpl_indirect");
> }
>
> static void
> @@ -1409,6 +1588,10 @@ opcode(struct regs *regs)
> prefix |= SEG_DS;
> continue;
>
> + case 0x50 ... 0x57:
> + push(regs, prefix, opc);
> + continue;
> +
> case 0x64:
> TRACE((regs, regs->eip - eip, "%%fs:"));
> prefix |= SEG_FS;
> @@ -1457,7 +1640,12 @@ opcode(struct regs *regs)
> goto invalid;
> return OPC_EMULATED;
>
> - case 0x8E: /* mov r16, sreg */
> + case 0x8C: /* mov sreg, r/m16 */
> + if (!mov_from_seg(regs, prefix, opc))
> + goto invalid;
> + return OPC_EMULATED;
> +
> + case 0x8E: /* mov r/m16, sreg */
> if (!mov_to_seg(regs, prefix, opc))
> goto invalid;
> return OPC_EMULATED;
> @@ -1535,7 +1723,7 @@ opcode(struct regs *regs)
> if (mode == VM86_REAL_TO_PROTECTED ||
> mode == VM86_PROTECTED_TO_REAL) {
> retl(regs, prefix);
> - return OPC_INVALID;
> + return OPC_INVALID; /* try to exit
emulator */
> }
> goto invalid;
>
> @@ -1573,7 +1761,7 @@ opcode(struct regs *regs)
> if (mode == VM86_REAL_TO_PROTECTED ||
> mode == VM86_PROTECTED_TO_REAL) {
> jmpl(regs, prefix);
> - return OPC_INVALID;
> + return OPC_INVALID; /* try to exit
emulator */
> }
> goto invalid;
>
> @@ -1707,8 +1895,10 @@ trap(int trapno, int errno, struct regs
>
> default:
> invalid:
> - printf("Trap (0x%x) while in %s mode\n",
> - trapno, regs->eflags & EFLAGS_VM ? "real" :
> "protected");
> + printf("Trap (0x%x) while in %s mode (emulator in mode
> %s\n",
> + trapno, regs->eflags & EFLAGS_VM ? "real" :
> "protected",
> + states[mode]);
> +
> if (trapno == 14)
> printf("Page fault address 0x%x\n", get_cr2());
> dump_regs(regs);
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|