Index: 2006-10-04/config/x86_64.mk
===================================================================
--- 2006-10-04.orig/config/x86_64.mk 2006-07-17 08:27:43.000000000 +0200
+++ 2006-10-04/config/x86_64.mk 2006-10-04 15:03:07.000000000 +0200
@@ -1,4 +1,5 @@
CONFIG_X86 := y
+CONFIG_COMPAT := y
CONFIG_HVM := y
CONFIG_MIGRATE := y
CONFIG_XCUTILS := y
Index: 2006-10-04/xen/arch/x86/boot/x86_64.S
===================================================================
--- 2006-10-04.orig/xen/arch/x86/boot/x86_64.S 2006-09-21 11:05:00.000000000
+0200
+++ 2006-10-04/xen/arch/x86/boot/x86_64.S 2006-10-04 15:03:07.000000000
+0200
@@ -223,15 +223,34 @@ high_start:
.align PAGE_SIZE, 0
ENTRY(gdt_table)
.quad 0x0000000000000000 /* unused */
- .quad 0x00cf9a000000ffff /* 0xe008 ring 0 code, compatibility */
- .quad 0x00af9a000000ffff /* 0xe010 ring 0 code, 64-bit mode */
- .quad 0x00cf92000000ffff /* 0xe018 ring 0 data */
+ .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
+ .quad 0x0000000000000000 /* reserved */
.quad 0x00cffa000000ffff /* 0xe023 ring 3 code, compatibility */
.quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
.quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
- .quad 0x0000000000000000 /* unused */
+ .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
.fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+#ifdef CONFIG_COMPAT
+ .align PAGE_SIZE, 0
+/* NB. Even rings != 0 get access to the full 4Gb, as only the */
+/* (compatibility) machine->physical mapping table lives there. */
+ENTRY(compat_gdt_table)
+ .quad 0x0000000000000000 /* unused */
+ .quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
+ .quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
+ .quad 0x00cfba000000ffff /* 0xe019 ring 1 code, compatibility */
+ .quad 0x00cfb2000000ffff /* 0xe021 ring 1 data */
+ .quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */
+ .quad 0x00cff2000000ffff /* 0xe033 ring 3 data */
+ .quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
+ .fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+# undef LIMIT
+#endif
+
/* Initial PML4 -- level-4 page table. */
.align PAGE_SIZE, 0
ENTRY(idle_pg_table)
Index: 2006-10-04/xen/arch/x86/domain.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/domain.c 2006-10-04 09:27:29.000000000
+0200
+++ 2006-10-04/xen/arch/x86/domain.c 2006-10-04 15:03:07.000000000 +0200
@@ -263,17 +263,18 @@ int arch_set_info_guest(
if ( !(c->flags & VGCF_HVM_GUEST) )
{
- fixup_guest_stack_selector(c->user_regs.ss);
- fixup_guest_stack_selector(c->kernel_ss);
- fixup_guest_code_selector(c->user_regs.cs);
-
-#ifdef __i386__
- fixup_guest_code_selector(c->event_callback_cs);
- fixup_guest_code_selector(c->failsafe_callback_cs);
-#endif
+ fixup_guest_stack_selector(d, c->user_regs.ss);
+ fixup_guest_stack_selector(d, c->kernel_ss);
+ fixup_guest_code_selector(d, c->user_regs.cs);
+
+ if ( CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d) )
+ {
+ fixup_guest_code_selector(d, c->event_callback_cs);
+ fixup_guest_code_selector(d, c->failsafe_callback_cs);
+ }
for ( i = 0; i < 256; i++ )
- fixup_guest_code_selector(c->trap_ctxt[i].cs);
+ fixup_guest_code_selector(d, c->trap_ctxt[i].cs);
}
else if ( !hvm_enabled )
return -EINVAL;
@@ -422,9 +423,11 @@ void new_thread(struct vcpu *d,
* ESI = start_info
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
*/
- regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
- regs->ss = FLAT_KERNEL_SS;
- regs->cs = FLAT_KERNEL_CS;
+ regs->ds = regs->es = regs->fs = regs->gs = !IS_COMPAT(d->domain)
+ ? FLAT_KERNEL_DS
+ : FLAT_COMPAT_KERNEL_DS;
+ regs->ss = !IS_COMPAT(d->domain) ? FLAT_KERNEL_SS : FLAT_COMPAT_KERNEL_SS;
+ regs->cs = !IS_COMPAT(d->domain) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
regs->eip = start_pc;
regs->esp = start_stack;
regs->esi = start_info;
@@ -503,27 +506,30 @@ static void load_segments(struct vcpu *n
all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
}
- /* This can only be non-zero if selector is NULL. */
- if ( nctxt->fs_base )
- wrmsr(MSR_FS_BASE,
- nctxt->fs_base,
- nctxt->fs_base>>32);
-
- /* Most kernels have non-zero GS base, so don't bother testing. */
- /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
- wrmsr(MSR_SHADOW_GS_BASE,
- nctxt->gs_base_kernel,
- nctxt->gs_base_kernel>>32);
-
- /* This can only be non-zero if selector is NULL. */
- if ( nctxt->gs_base_user )
- wrmsr(MSR_GS_BASE,
- nctxt->gs_base_user,
- nctxt->gs_base_user>>32);
-
- /* If in kernel mode then switch the GS bases around. */
- if ( n->arch.flags & TF_kernel_mode )
- __asm__ __volatile__ ( "swapgs" );
+ if ( !IS_COMPAT(n->domain) )
+ {
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->fs_base )
+ wrmsr(MSR_FS_BASE,
+ nctxt->fs_base,
+ nctxt->fs_base>>32);
+
+ /* Most kernels have non-zero GS base, so don't bother testing. */
+ /* (This is also a serialising instruction, avoiding AMD erratum #88.)
*/
+ wrmsr(MSR_SHADOW_GS_BASE,
+ nctxt->gs_base_kernel,
+ nctxt->gs_base_kernel>>32);
+
+ /* This can only be non-zero if selector is NULL. */
+ if ( nctxt->gs_base_user )
+ wrmsr(MSR_GS_BASE,
+ nctxt->gs_base_user,
+ nctxt->gs_base_user>>32);
+
+ /* If in kernel mode then switch the GS bases around. */
+ if ( (n->arch.flags & TF_kernel_mode) )
+ __asm__ __volatile__ ( "swapgs" );
+ }
if ( unlikely(!all_segs_okay) )
{
@@ -534,6 +540,54 @@ static void load_segments(struct vcpu *n
(unsigned long *)nctxt->kernel_sp;
unsigned long cs_and_mask, rflags;
+ if ( IS_COMPAT(n->domain) )
+ {
+ unsigned int *esp = ring_1(regs) ?
+ (unsigned int *)regs->rsp :
+ (unsigned int *)nctxt->kernel_sp;
+ unsigned int cs_and_mask, eflags;
+ int ret = 0;
+
+ /* CS longword also contains full evtchn_upcall_mask. */
+ cs_and_mask = (unsigned short)regs->cs |
+ ((unsigned int)n->vcpu_info->evtchn_upcall_mask << 16);
+ /* Fold upcall mask into RFLAGS.IF. */
+ eflags = regs->_eflags & ~X86_EFLAGS_IF;
+ eflags |= !n->vcpu_info->evtchn_upcall_mask << 9;
+
+ if ( !ring_1(regs) )
+ {
+ ret = put_user(regs->ss, esp-1);
+ ret |= put_user(regs->_esp, esp-2);
+ esp -= 2;
+ }
+
+ if ( ret |
+ put_user(eflags, esp-1) |
+ put_user(cs_and_mask, esp-2) |
+ put_user(regs->_eip, esp-3) |
+ put_user(nctxt->user_regs.gs, esp-4) |
+ put_user(nctxt->user_regs.fs, esp-5) |
+ put_user(nctxt->user_regs.es, esp-6) |
+ put_user(nctxt->user_regs.ds, esp-7) )
+ {
+ DPRINTK("Error while creating failsafe callback frame.\n");
+ domain_crash(n->domain);
+ }
+
+ if ( test_bit(_VGCF_failsafe_disables_events,
+ &n->arch.guest_context.flags) )
+ n->vcpu_info->evtchn_upcall_mask = 1;
+
+ regs->entry_vector = TRAP_syscall;
+ regs->_eflags &= 0xFFFCBEFFUL;
+ regs->ss = FLAT_COMPAT_KERNEL_SS;
+ regs->_esp = (unsigned long)(esp-7);
+ regs->cs = FLAT_COMPAT_KERNEL_CS;
+ regs->_eip = nctxt->failsafe_callback_eip;
+ return;
+ }
+
if ( !(n->arch.flags & TF_kernel_mode) )
toggle_guest_mode(n);
else
@@ -594,7 +648,7 @@ static void save_segments(struct vcpu *v
if ( regs->es )
dirty_segment_mask |= DIRTY_ES;
- if ( regs->fs )
+ if ( regs->fs || IS_COMPAT(v->domain) )
{
dirty_segment_mask |= DIRTY_FS;
ctxt->fs_base = 0; /* != 0 selector kills fs_base */
@@ -604,7 +658,7 @@ static void save_segments(struct vcpu *v
dirty_segment_mask |= DIRTY_FS_BASE;
}
- if ( regs->gs )
+ if ( regs->gs || IS_COMPAT(v->domain) )
{
dirty_segment_mask |= DIRTY_GS;
ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
@@ -736,6 +790,21 @@ void context_switch(struct vcpu *prev, s
{
__context_switch();
+ if ( IS_COMPAT(prev->domain) != IS_COMPAT(next->domain) )
+ {
+ uint32_t efer_lo, efer_hi;
+
+ local_flush_tlb_one(GDT_VIRT_START(next)
+ + FIRST_RESERVED_GDT_BYTE);
+
+ rdmsr(MSR_EFER, efer_lo, efer_hi);
+ if ( !IS_COMPAT(next->domain) == !(efer_lo & EFER_SCE) )
+ {
+ efer_lo ^= EFER_SCE;
+ wrmsr(MSR_EFER, efer_lo, efer_hi);
+ }
+ }
+
/* Re-enable interrupts before restoring state which may fault. */
local_irq_enable();
@@ -948,6 +1017,10 @@ void domain_relinquish_resources(struct
put_page(mfn_to_page(pfn));
else
put_page_and_type(mfn_to_page(pfn));
+#ifdef __x86_64__
+ if ( pfn == pagetable_get_pfn(v->arch.guest_table_user) )
+ v->arch.guest_table_user = pagetable_null();
+#endif
v->arch.guest_table = pagetable_null();
}
Index: 2006-10-04/xen/arch/x86/domain_build.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/domain_build.c 2006-09-21 10:56:11.000000000
+0200
+++ 2006-10-04/xen/arch/x86/domain_build.c 2006-10-04 15:03:07.000000000
+0200
@@ -316,11 +316,39 @@ int construct_dom0(struct domain *d,
else
nr_pages = dom0_nrpages;
- if ( (rc = parseelfimage(&dsi)) != 0 )
+ rc = parseelfimage(&dsi);
+#ifdef CONFIG_COMPAT
+ if ( rc == -ENOSYS
+ && (rc = parseelf32image(&dsi)) == 0 )
+ {
+ l1_pgentry_t gdt_l1e;
+
+ set_bit(_DOMF_compat, &d->domain_flags);
+
+ if ( nr_pages != (unsigned int)nr_pages )
+ nr_pages = UINT_MAX;
+
+ /*
+ * Map compatibility Xen segments into every VCPU's GDT. See
+ * arch_domain_create() for further comments.
+ */
+ gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
+ PAGE_HYPERVISOR);
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
+ }
+#endif
+ if ( rc != 0)
+ {
+ if ( rc == -ENOSYS )
+ printk("DOM0 image is not a Xen-compatible Elf image.\n");
return rc;
+ }
dom0_pae = (dsi.pae_kernel != PAEKERN_no);
- xen_pae = (CONFIG_PAGING_LEVELS == 3);
+ xen_pae = (CONFIG_PAGING_LEVELS == 3) || IS_COMPAT(d);
if ( dom0_pae != xen_pae )
{
printk("PAE mode mismatch between Xen and DOM0 (xen=%s, dom0=%s)\n",
@@ -331,7 +359,13 @@ int construct_dom0(struct domain *d,
if ( xen_pae && dsi.pae_kernel == PAEKERN_extended_cr3 )
set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist);
- if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL )
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ p = xen_elf32note_string(&dsi, XEN_ELFNOTE_FEATURES);
+ else
+#endif
+ p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES);
+ if ( p != NULL )
{
parse_features(p,
dom0_features_supported,
@@ -444,9 +478,9 @@ int construct_dom0(struct domain *d,
* We're basically forcing default RPLs to 1, so that our "what privilege
* level are we returning to?" logic works.
*/
- v->arch.guest_context.kernel_ss = FLAT_KERNEL_SS;
+ v->arch.guest_context.kernel_ss = !IS_COMPAT(d) ? FLAT_KERNEL_SS :
FLAT_COMPAT_KERNEL_SS;
for ( i = 0; i < 256; i++ )
- v->arch.guest_context.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ v->arch.guest_context.trap_ctxt[i].cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS
: FLAT_COMPAT_KERNEL_CS;
#if defined(__i386__)
@@ -595,6 +629,12 @@ int construct_dom0(struct domain *d,
return -EINVAL;
}
+ if ( IS_COMPAT(d) )
+ {
+ v->arch.guest_context.failsafe_callback_cs = FLAT_COMPAT_KERNEL_CS;
+ v->arch.guest_context.event_callback_cs = FLAT_COMPAT_KERNEL_CS;
+ }
+
/* WARNING: The new domain must have its 'processor' field filled in! */
maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
@@ -604,6 +644,8 @@ int construct_dom0(struct domain *d,
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
+ if ( IS_COMPAT(d) )
+ v->arch.guest_table_user = v->arch.guest_table;
l4tab += l4_table_offset(dsi.v_start);
mfn = alloc_spfn;
@@ -714,8 +756,19 @@ int construct_dom0(struct domain *d,
write_ptbase(v);
/* Copy the OS image and free temporary buffer. */
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ (void)loadelf32image(&dsi);
+ else
+#endif
(void)loadelfimage(&dsi);
+#ifdef CONFIG_COMPAT
+ if ( IS_COMPAT(d) )
+ hypercall_page =
+ xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE,
&hypercall_page_defined);
+ else
+#endif
hypercall_page =
xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE,
&hypercall_page_defined);
if ( hypercall_page_defined )
@@ -750,7 +803,7 @@ int construct_dom0(struct domain *d,
si->mfn_list = vphysmap_start;
sprintf(si->magic, "xen-%i.%i-x86_%d%s",
xen_major_version(), xen_minor_version(),
- BITS_PER_LONG, xen_pae ? "p" : "");
+ !IS_COMPAT(d) ? BITS_PER_LONG : 32, xen_pae ? "p" : "");
/* Write the phys->machine and machine->phys table entries. */
for ( pfn = 0; pfn < d->tot_pages; pfn++ )
@@ -914,13 +967,28 @@ int elf_sanity_check(Elf_Ehdr *ehdr)
(ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
(ehdr->e_type != ET_EXEC) )
{
- printk("DOM0 image is not a Xen-compatible Elf image.\n");
return 0;
}
return 1;
}
+#ifdef CONFIG_COMPAT
+int elf32_sanity_check(Elf32_Ehdr *ehdr)
+{
+ if ( !IS_ELF(*ehdr) ||
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
+ (ehdr->e_machine != EM_386) ||
+ (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
+ (ehdr->e_type != ET_EXEC) )
+ {
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
/*
* Local variables:
* mode: C
Index: 2006-10-04/xen/arch/x86/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/mm.c 2006-10-04 09:27:53.000000000 +0200
+++ 2006-10-04/xen/arch/x86/mm.c 2006-10-04 15:03:07.000000000 +0200
@@ -407,7 +407,7 @@ static int alloc_segdesc_page(struct pag
descs = map_domain_page(page_to_mfn(page));
for ( i = 0; i < 512; i++ )
- if ( unlikely(!check_descriptor(&descs[i])) )
+ if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
goto fail;
unmap_domain_page(descs);
@@ -2848,7 +2848,7 @@ long do_update_descriptor(u64 pa, u64 de
if ( !VALID_MFN(mfn = gmfn_to_mfn(dom, gmfn)) ||
(((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
!mfn_valid(mfn) ||
- !check_descriptor(&d) )
+ !check_descriptor(dom, &d) )
{
UNLOCK_BIGLOCK(dom);
return -EINVAL;
Index: 2006-10-04/xen/arch/x86/traps.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/traps.c 2006-10-04 09:28:00.000000000
+0200
+++ 2006-10-04/xen/arch/x86/traps.c 2006-10-04 15:03:07.000000000 +0200
@@ -1812,6 +1812,13 @@ void set_tss_desc(unsigned int n, void *
(unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
9);
+#ifdef CONFIG_COMPAT
+ _set_tssldt_desc(
+ compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ (unsigned long)addr,
+ offsetof(struct tss_struct, __cacheline_filler) - 1,
+ 11);
+#endif
}
void __init trap_init(void)
@@ -1886,7 +1893,7 @@ long do_set_trap_table(XEN_GUEST_HANDLE(
if ( cur.address == 0 )
break;
- fixup_guest_code_selector(cur.cs);
+ fixup_guest_code_selector(current->domain, cur.cs);
memcpy(&dst[cur.vector], &cur, sizeof(cur));
Index: 2006-10-04/xen/arch/x86/x86_32/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_32/mm.c 2006-08-23 11:24:59.000000000
+0200
+++ 2006-10-04/xen/arch/x86/x86_32/mm.c 2006-10-04 15:03:07.000000000 +0200
@@ -227,7 +227,7 @@ long do_stack_switch(unsigned long ss, u
int nr = smp_processor_id();
struct tss_struct *t = &init_tss[nr];
- fixup_guest_stack_selector(ss);
+ fixup_guest_stack_selector(current->domain, ss);
current->arch.guest_context.kernel_ss = ss;
current->arch.guest_context.kernel_sp = esp;
@@ -238,7 +238,7 @@ long do_stack_switch(unsigned long ss, u
}
/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(struct desc_struct *d)
+int check_descriptor(const struct domain *dom, struct desc_struct *d)
{
unsigned long base, limit;
u32 a = d->a, b = d->b;
@@ -258,8 +258,8 @@ int check_descriptor(struct desc_struct
* gates (consider a call gate pointing at another kernel descriptor with
* DPL 0 -- this would get the OS ring-0 privileges).
*/
- if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
- d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
+ if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
+ d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
if ( !(b & _SEGMENT_S) )
{
@@ -281,8 +281,8 @@ int check_descriptor(struct desc_struct
/* Validate and fix up the target code selector. */
cs = a >> 16;
- fixup_guest_code_selector(cs);
- if ( !guest_gate_selector_okay(cs) )
+ fixup_guest_code_selector(dom, cs);
+ if ( !guest_gate_selector_okay(dom, cs) )
goto bad;
a = d->a = (d->a & 0xffffU) | (cs << 16);
Index: 2006-10-04/xen/arch/x86/x86_32/traps.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_32/traps.c 2006-09-21 10:56:11.000000000
+0200
+++ 2006-10-04/xen/arch/x86/x86_32/traps.c 2006-10-04 15:03:07.000000000
+0200
@@ -323,7 +323,7 @@ void init_int80_direct_trap(struct vcpu
* switch to the Xen stack and we need to swap back to the guest
* kernel stack before passing control to the system call entry point.
*/
- if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) ||
+ if ( TI_GET_IF(ti) || !guest_gate_selector_okay(v->domain, ti->cs) ||
supervisor_mode_kernel )
{
v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
@@ -353,7 +353,7 @@ static long register_guest_callback(stru
long ret = 0;
struct vcpu *v = current;
- fixup_guest_code_selector(reg->address.cs);
+ fixup_guest_code_selector(v->domain, reg->address.cs);
switch ( reg->type )
{
Index: 2006-10-04/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/asm-offsets.c 2006-09-25
14:59:15.000000000 +0200
+++ 2006-10-04/xen/arch/x86/x86_64/asm-offsets.c 2006-10-04
15:03:07.000000000 +0200
@@ -58,12 +58,16 @@ void __dummy__(void)
OFFSET(VCPU_thread_flags, struct vcpu, arch.flags);
OFFSET(VCPU_event_addr, struct vcpu,
arch.guest_context.event_callback_eip);
+ OFFSET(VCPU_event_sel, struct vcpu,
+ arch.guest_context.event_callback_cs);
OFFSET(VCPU_failsafe_addr, struct vcpu,
arch.guest_context.failsafe_callback_eip);
+ OFFSET(VCPU_failsafe_sel, struct vcpu,
+ arch.guest_context.failsafe_callback_cs);
OFFSET(VCPU_syscall_addr, struct vcpu,
arch.guest_context.syscall_callback_eip);
- OFFSET(VCPU_kernel_sp, struct vcpu,
- arch.guest_context.kernel_sp);
+ OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
+ OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
Index: 2006-10-04/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/mm.c 2006-09-21 13:23:19.000000000
+0200
+++ 2006-10-04/xen/arch/x86/x86_64/mm.c 2006-10-04 15:03:07.000000000 +0200
@@ -224,7 +224,7 @@ long subarch_memory_op(int op, XEN_GUEST
long do_stack_switch(unsigned long ss, unsigned long esp)
{
- fixup_guest_stack_selector(ss);
+ fixup_guest_stack_selector(current->domain, ss);
current->arch.guest_context.kernel_ss = ss;
current->arch.guest_context.kernel_sp = esp;
return 0;
@@ -284,7 +284,7 @@ long do_set_segment_base(unsigned int wh
/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(struct desc_struct *d)
+int check_descriptor(const struct domain *dom, struct desc_struct *d)
{
u32 a = d->a, b = d->b;
u16 cs;
@@ -294,8 +294,8 @@ int check_descriptor(struct desc_struct
goto good;
/* Check and fix up the DPL. */
- if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
- d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
+ if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
+ d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
/* All code and data segments are okay. No base/limit checking. */
if ( (b & _SEGMENT_S) )
@@ -311,8 +311,8 @@ int check_descriptor(struct desc_struct
/* Validate and fix up the target code selector. */
cs = a >> 16;
- fixup_guest_code_selector(cs);
- if ( !guest_gate_selector_okay(cs) )
+ fixup_guest_code_selector(dom, cs);
+ if ( !guest_gate_selector_okay(dom, cs) )
goto bad;
a = d->a = (d->a & 0xffffU) | (cs << 16);
Index: 2006-10-04/xen/arch/x86/x86_64/traps.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/traps.c 2006-10-04 09:17:44.000000000
+0200
+++ 2006-10-04/xen/arch/x86/x86_64/traps.c 2006-10-04 15:03:07.000000000
+0200
@@ -187,6 +187,8 @@ asmlinkage void do_double_fault(struct c
void toggle_guest_mode(struct vcpu *v)
{
+ if ( IS_COMPAT(v->domain) )
+ return;
v->arch.flags ^= TF_kernel_mode;
__asm__ __volatile__ ( "swapgs" );
update_cr3(v);
Index: 2006-10-04/xen/common/Makefile
===================================================================
--- 2006-10-04.orig/xen/common/Makefile 2006-08-28 08:32:38.000000000 +0200
+++ 2006-10-04/xen/common/Makefile 2006-10-04 15:03:07.000000000 +0200
@@ -3,6 +3,7 @@ obj-y += bitmap.o
obj-y += domctl.o
obj-y += domain.o
obj-y += elf.o
+obj-$(CONFIG_COMPAT) += elf32.o
obj-y += event_channel.o
obj-y += grant_table.o
obj-y += kernel.o
Index: 2006-10-04/xen/common/elf.c
===================================================================
--- 2006-10-04.orig/xen/common/elf.c 2006-08-28 08:32:38.000000000 +0200
+++ 2006-10-04/xen/common/elf.c 2006-10-04 15:03:07.000000000 +0200
@@ -202,7 +202,7 @@ int parseelfimage(struct domain_setup_in
int h, virt_base_defined, elf_pa_off_defined, virt_entry_defined;
if ( !elf_sanity_check(ehdr) )
- return -EINVAL;
+ return -ENOSYS;
if ( (ehdr->e_phoff + (ehdr->e_phnum*ehdr->e_phentsize)) > image_len )
{
Index: 2006-10-04/xen/common/elf32.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 2006-10-04/xen/common/elf32.c 2006-10-04 15:03:07.000000000 +0200
@@ -0,0 +1,19 @@
+/******************************************************************************
+ * elf32.c
+ *
+ * Stub to support 32-bit ELF images on 64-bit platforms.
+ */
+
+#include <xen/config.h>
+#undef ELFSIZE
+#define ELFSIZE 32
+#include <xen/types.h>
+#include <xen/elf.h>
+
+#define xen_elfnote_string xen_elf32note_string
+#define xen_elfnote_numeric xen_elf32note_numeric
+#define parseelfimage parseelf32image
+#define loadelfimage loadelf32image
+#define elf_sanity_check elf32_sanity_check
+
+#include "elf.c"
Index: 2006-10-04/xen/include/asm-x86/config.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/config.h 2006-10-04
09:17:44.000000000 +0200
+++ 2006-10-04/xen/include/asm-x86/config.h 2006-10-04 15:03:07.000000000
+0200
@@ -93,6 +93,7 @@ static inline void FORCE_CRASH(void)
#if defined(__x86_64__)
#define CONFIG_X86_64 1
+#define CONFIG_COMPAT 1
#define asmlinkage
@@ -187,13 +188,21 @@ static inline void FORCE_CRASH(void)
#define DIRECTMAP_VIRT_START (PML4_ADDR(262))
#define DIRECTMAP_VIRT_END (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
+#define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000
+#define __MACH2PHYS_COMPAT_VIRT_START 0xF5800000
+#define __MACH2PHYS_COMPAT_VIRT_END 0xF6800000 /* XXX bump this ? */
+#define HYPERVISOR_COMPAT_VIRT_START
mk_unsigned_long(__HYPERVISOR_COMPAT_VIRT_START)
+#define MACH2PHYS_COMPAT_VIRT_START
mk_unsigned_long(__MACH2PHYS_COMPAT_VIRT_START)
+#define MACH2PHYS_COMPAT_VIRT_END
mk_unsigned_long(__MACH2PHYS_COMPAT_VIRT_END)
+#define MACH2PHYS_COMPAT_NR_ENTRIES
((MACH2PHYS_COMPAT_VIRT_END-MACH2PHYS_COMPAT_VIRT_START)>>2)
+
#define PGT_base_page_table PGT_l4_page_table
-#define __HYPERVISOR_CS64 0xe010
-#define __HYPERVISOR_CS32 0xe008
+#define __HYPERVISOR_CS64 0xe008
+#define __HYPERVISOR_CS32 0xe038
#define __HYPERVISOR_CS __HYPERVISOR_CS64
#define __HYPERVISOR_DS64 0x0000
-#define __HYPERVISOR_DS32 0xe018
+#define __HYPERVISOR_DS32 0xe010
#define __HYPERVISOR_DS __HYPERVISOR_DS64
/* For generic assembly code: use macros to define operation/operand sizes. */
Index: 2006-10-04/xen/include/asm-x86/desc.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/desc.h 2006-09-21 11:04:30.000000000
+0200
+++ 2006-10-04/xen/include/asm-x86/desc.h 2006-10-04 15:03:07.000000000
+0200
@@ -18,31 +18,76 @@
#define LDT_ENTRY_SIZE 8
+#if defined(__x86_64__)
+
+#define FLAT_COMPAT_RING1_CS 0xe019 /* GDT index 259 */
+#define FLAT_COMPAT_RING1_DS 0xe021 /* GDT index 260 */
+#define FLAT_COMPAT_RING1_SS 0xe021 /* GDT index 260 */
+#define FLAT_COMPAT_RING3_CS 0xe02b /* GDT index 261 */
+#define FLAT_COMPAT_RING3_DS 0xe033 /* GDT index 262 */
+#define FLAT_COMPAT_RING3_SS 0xe033 /* GDT index 262 */
+
+#define FLAT_COMPAT_KERNEL_DS FLAT_COMPAT_RING1_DS
+#define FLAT_COMPAT_KERNEL_CS FLAT_COMPAT_RING1_CS
+#define FLAT_COMPAT_KERNEL_SS FLAT_COMPAT_RING1_SS
+#define FLAT_COMPAT_USER_DS FLAT_COMPAT_RING3_DS
+#define FLAT_COMPAT_USER_CS FLAT_COMPAT_RING3_CS
+#define FLAT_COMPAT_USER_SS FLAT_COMPAT_RING3_SS
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
+
+#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+
+#elif defined(__i386__)
+
+#define FLAT_COMPAT_KERNEL_CS FLAT_KERNEL_CS
+#define FLAT_COMPAT_KERNEL_DS FLAT_KERNEL_DS
+#define FLAT_COMPAT_KERNEL_SS FLAT_KERNEL_SS
+#define FLAT_COMPAT_USER_CS FLAT_USER_CS
+#define FLAT_COMPAT_USER_DS FLAT_USER_DS
+#define FLAT_COMPAT_USER_SS FLAT_USER_SS
+
+#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
+
+#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
+
+#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+
+#endif
+
+#ifndef __ASSEMBLY__
+
#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
#if defined(__x86_64__)
-#define GUEST_KERNEL_RPL 3
+#define GUEST_KERNEL_RPL(d) (!IS_COMPAT(d) ? 3 : 1)
#elif defined(__i386__)
-#define GUEST_KERNEL_RPL 1
+#define GUEST_KERNEL_RPL(d) ((void)(d), 1)
#endif
/* Fix up the RPL of a guest segment selector. */
-#define __fixup_guest_selector(sel) \
- ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) : \
- (((sel) & ~3) | GUEST_KERNEL_RPL))
+#define __fixup_guest_selector(d, sel) \
+({ \
+ uint16_t _rpl = GUEST_KERNEL_RPL(d); \
+ (sel) = (((sel) & 3) >= _rpl) ? (sel) : (((sel) & ~3) | _rpl); \
+})
/* Stack selectors don't need fixing up if the kernel runs in ring 0. */
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
-#define fixup_guest_stack_selector(ss) ((void)0)
+#define fixup_guest_stack_selector(d, ss) ((void)0)
#else
-#define fixup_guest_stack_selector(ss) __fixup_guest_selector(ss)
+#define fixup_guest_stack_selector(d, ss) __fixup_guest_selector(d, ss)
#endif
/*
* Code selectors are always fixed up. It allows the Xen exit stub to detect
* return to guest context, even when the guest kernel runs in ring 0.
*/
-#define fixup_guest_code_selector(cs) __fixup_guest_selector(cs)
+#define fixup_guest_code_selector(d, cs) __fixup_guest_selector(d, cs)
/*
* We need this function because enforcing the correct guest kernel RPL is
@@ -57,11 +102,15 @@
* DPL < CPL then they'll be cleared automatically. If SS RPL or DPL differs
* from CS RPL then we'll #GP.
*/
-#define guest_gate_selector_okay(sel) \
+#define guest_gate_selector_okay(d, sel) \
((((sel)>>3) < FIRST_RESERVED_GDT_ENTRY) || /* Guest seg? */ \
- ((sel) == FLAT_KERNEL_CS) || /* Xen default seg? */ \
+ ((sel) == (!IS_COMPAT(d) ? \
+ FLAT_KERNEL_CS : /* Xen default seg? */ \
+ FLAT_COMPAT_KERNEL_CS)) || /* Xen default compat seg? */
\
((sel) & 4)) /* LDT seg? */
+#endif /* __ASSEMBLY__ */
+
/* These are bitmasks for the high 32 bits of a descriptor table entry. */
#define _SEGMENT_TYPE (15<< 8)
#define _SEGMENT_EC ( 1<<10) /* Expand-down or Conforming segment */
@@ -81,12 +130,6 @@ struct desc_struct {
#if defined(__x86_64__)
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
-
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
-
typedef struct {
u64 a, b;
} idt_entry_t;
@@ -118,14 +161,6 @@ do {
#elif defined(__i386__)
-#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
-
typedef struct desc_struct idt_entry_t;
#define _set_gate(gate_addr,type,dpl,addr) \
@@ -155,6 +190,11 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\
#endif
extern struct desc_struct gdt_table[];
+#ifdef CONFIG_COMPAT
+extern struct desc_struct compat_gdt_table[];
+#else
+# define compat_gdt_table gdt_table
+#endif
struct Xgt_desc_struct {
unsigned short size;
Index: 2006-10-04/xen/include/asm-x86/ldt.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/ldt.h 2005-11-17 15:51:06.000000000
+0100
+++ 2006-10-04/xen/include/asm-x86/ldt.h 2006-10-04 15:03:07.000000000
+0200
@@ -17,7 +17,8 @@ static inline void load_LDT(struct vcpu
else
{
cpu = smp_processor_id();
- desc = gdt_table + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+ desc = (!IS_COMPAT(v->domain) ? gdt_table : compat_gdt_table)
+ + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
_set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
__asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
}
Index: 2006-10-04/xen/include/asm-x86/mm.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/mm.h 2006-10-04 08:49:31.000000000
+0200
+++ 2006-10-04/xen/include/asm-x86/mm.h 2006-10-04 15:03:07.000000000 +0200
@@ -280,7 +280,7 @@ unsigned long
pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
#endif /* CONFIG_PAGING_LEVELS == 3 */
-int check_descriptor(struct desc_struct *d);
+int check_descriptor(const struct domain *, struct desc_struct *d);
/*
* The MPT (machine->physical mapping table) is an array of word-sized
Index: 2006-10-04/xen/include/asm-x86/regs.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/regs.h 2006-03-09 13:13:42.000000000
+0100
+++ 2006-10-04/xen/include/asm-x86/regs.h 2006-10-04 15:03:07.000000000
+0200
@@ -38,7 +38,8 @@ enum EFLAGS {
ASSERT(diff < STACK_SIZE); \
/* If a guest frame, it must be have guest privs (unless HVM guest). */ \
/* We permit CS==0 which can come from an uninitialised trap entry. */ \
- ASSERT((diff != 0) || vm86_mode(r) || ((r->cs&3) >= GUEST_KERNEL_RPL) || \
+ ASSERT((diff != 0) || vm86_mode(r) || \
+ ((r->cs&3) >= GUEST_KERNEL_RPL(current->domain)) || \
(r->cs == 0) || hvm_guest(current)); \
/* If not a guest frame, it must be a hypervisor frame. */ \
ASSERT((diff == 0) || (!vm86_mode(r) && (r->cs == __HYPERVISOR_CS))); \
Index: 2006-10-04/xen/include/asm-x86/x86_64/regs.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/x86_64/regs.h 2006-03-09
13:13:42.000000000 +0100
+++ 2006-10-04/xen/include/asm-x86/x86_64/regs.h 2006-10-04
15:03:07.000000000 +0200
@@ -11,7 +11,9 @@
#define ring_3(r) (((r)->cs & 3) == 3)
#define guest_kernel_mode(v, r) \
- (ring_3(r) && ((v)->arch.flags & TF_kernel_mode))
+ (!IS_COMPAT(v->domain) ? \
+ ring_3(r) && ((v)->arch.flags & TF_kernel_mode) : \
+ ring_1(r))
#define permit_softint(dpl, v, r) \
((dpl) >= (guest_kernel_mode(v, r) ? 1 : 3))
Index: 2006-10-04/xen/include/public/arch-x86_64.h
===================================================================
--- 2006-10-04.orig/xen/include/public/arch-x86_64.h 2006-09-11
09:06:11.000000000 +0200
+++ 2006-10-04/xen/include/public/arch-x86_64.h 2006-10-04 15:03:07.000000000
+0200
@@ -192,7 +192,10 @@ DEFINE_XEN_GUEST_HANDLE(trap_info_t);
#ifdef __GNUC__
/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
-#define __DECL_REG(name) union { uint64_t r ## name, e ## name; }
+#define __DECL_REG(name) union { \
+ uint64_t r ## name, e ## name; \
+ uint32_t _e ## name; \
+}
#else
/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
#define __DECL_REG(name) uint64_t r ## name
@@ -265,7 +268,17 @@ struct vcpu_guest_context {
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
unsigned long event_callback_eip;
unsigned long failsafe_callback_eip;
+#ifdef __GNUC__
+ union {
+ unsigned long syscall_callback_eip;
+ struct {
+ unsigned int event_callback_cs; /* compat CS of event cb */
+ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */
+ };
+ };
+#else
unsigned long syscall_callback_eip;
+#endif
unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
/* Segment base addresses. */
uint64_t fs_base;
Index: 2006-10-04/xen/include/xen/elf.h
===================================================================
--- 2006-10-04.orig/xen/include/xen/elf.h 2006-08-25 15:36:10.000000000
+0200
+++ 2006-10-04/xen/include/xen/elf.h 2006-10-04 15:03:07.000000000 +0200
@@ -533,6 +533,15 @@ extern unsigned long long xen_elfnote_nu
int type, int *defined);
extern const char *xen_elfnote_string(struct domain_setup_info *dsi, int type);
+#ifdef CONFIG_COMPAT
+extern int elf32_sanity_check(Elf32_Ehdr *ehdr);
+extern int loadelf32image(struct domain_setup_info *);
+extern int parseelf32image(struct domain_setup_info *);
+extern unsigned long long xen_elf32note_numeric(struct domain_setup_info *,
+ int type, int *defined);
+extern const char *xen_elf32note_string(struct domain_setup_info *, int type);
+#endif
+
#ifdef Elf_Ehdr
extern int elf_sanity_check(Elf_Ehdr *ehdr);
#endif
Index: 2006-10-04/xen/include/xen/sched.h
===================================================================
--- 2006-10-04.orig/xen/include/xen/sched.h 2006-09-21 11:09:00.000000000
+0200
+++ 2006-10-04/xen/include/xen/sched.h 2006-10-04 15:03:07.000000000 +0200
@@ -417,6 +417,9 @@ extern struct domain *domain_list;
/* Domain is paused by the hypervisor? */
#define _DOMF_paused 6
#define DOMF_paused (1UL<<_DOMF_paused)
+ /* Domain is a compatibility one? */
+#define _DOMF_compat 7
+#define DOMF_compat (1UL<<_DOMF_compat)
static inline int vcpu_runnable(struct vcpu *v)
{
@@ -453,6 +456,13 @@ static inline void vcpu_unblock(struct v
#define IS_PRIV(_d) \
(test_bit(_DOMF_privileged, &(_d)->domain_flags))
+#ifdef CONFIG_COMPAT
+#define IS_COMPAT(_d) \
+ (test_bit(_DOMF_compat, &(_d)->domain_flags))
+#else
+#define IS_COMPAT(_d) 0
+#endif
+
#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
#endif /* __SCHED_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|