WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] xen: Big changes to x86 start-of-day:

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] xen: Big changes to x86 start-of-day:
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 15 May 2007 08:20:38 -0700
Delivery-date: Tue, 15 May 2007 08:20:59 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1178816575 -3600
# Node ID 23c4790512dbc889c4deed8ae1f8d54813c4b474
# Parent  07b1e917c9d8146af0acefd1f039b57729c841af
xen: Big changes to x86 start-of-day:

 1. x86/64 Xen now relocates itself to physical high memory. This is
    useful if we have devices that need very low memory, or if in
    future we want to grant a 1:1 mapping of low physical memory to a
    special 'native client domain'.

 2. We now only map low 16MB RAM statically. All other RAM is mapped
    dynamically within the constraints of the e820 map. It is
    recommended never to map MMIO regions, and this change means that
    Xen now obeys this constraint.

 3. The CPU bootup trampoline is now permanently installed at
    0x90000. This is necessary prereq for CPU hotplug.

 4. Start-of-day asm is generally cleaned up and diff between x86/32
    and x86/64 is reduced.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/trampoline.S          |   67 -----
 xen/arch/ia64/xen/mm.c             |    2 
 xen/arch/x86/Makefile              |   14 -
 xen/arch/x86/boot/Makefile         |    3 
 xen/arch/x86/boot/head.S           |  180 +++++++++++++++
 xen/arch/x86/boot/trampoline.S     |  107 +++++++++
 xen/arch/x86/boot/x86_32.S         |  205 ++---------------
 xen/arch/x86/boot/x86_64.S         |  200 +----------------
 xen/arch/x86/e820.c                |    2 
 xen/arch/x86/mm.c                  |   13 -
 xen/arch/x86/setup.c               |  427 +++++++++++++++++++++++--------------
 xen/arch/x86/smpboot.c             |   56 ----
 xen/arch/x86/traps.c               |   28 +-
 xen/arch/x86/x86_32/gpr_switch.S   |    1 
 xen/arch/x86/x86_32/mm.c           |    8 
 xen/arch/x86/x86_64/compat_kexec.S |   24 +-
 xen/arch/x86/x86_64/gpr_switch.S   |    3 
 xen/arch/x86/x86_64/mm.c           |   16 +
 xen/arch/x86/x86_64/traps.c        |  118 ++++------
 xen/arch/x86/x86_64/xen.lds.S      |    2 
 xen/common/grant_table.c           |    2 
 xen/common/page_alloc.c            |   15 -
 xen/include/asm-ia64/mm.h          |    4 
 xen/include/asm-powerpc/mm.h       |    2 
 xen/include/asm-x86/config.h       |   36 +--
 xen/include/asm-x86/mm.h           |    5 
 xen/include/asm-x86/page.h         |   20 -
 xen/include/asm-x86/x86_32/page.h  |    4 
 xen/include/asm-x86/x86_64/page.h  |   18 +
 29 files changed, 800 insertions(+), 782 deletions(-)

diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/ia64/xen/mm.c    Thu May 10 18:02:55 2007 +0100
@@ -2118,7 +2118,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
 
         /* Remove previously mapped page if it was present. */
         if (prev_mfn && mfn_valid(prev_mfn)) {
-            if (IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)))
+            if (is_xen_heap_frame(mfn_to_page(prev_mfn)))
                 /* Xen heap frames are simply unhooked from this phys slot. */
                 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
             else
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/Makefile     Thu May 10 18:02:55 2007 +0100
@@ -39,7 +39,6 @@ obj-y += string.o
 obj-y += string.o
 obj-y += sysctl.o
 obj-y += time.o
-obj-y += trampoline.o
 obj-y += traps.o
 obj-y += usercopy.o
 obj-y += x86_emulate.o
@@ -52,20 +51,19 @@ obj-$(crash_debug) += gdbstub.o
        ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
        `$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
 
-$(TARGET)-syms: boot/$(TARGET_SUBARCH).o $(ALL_OBJS) xen.lds
+ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o $(ALL_OBJS)
+
+$(TARGET)-syms: $(ALL_OBJS) xen.lds
        $(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/common/symbols-dummy.o
-       $(LD) $(LDFLAGS) -T xen.lds -N \
-           boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
+       $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
            $(BASEDIR)/common/symbols-dummy.o -o $(@D)/.$(@F).0
        $(NM) -n $(@D)/.$(@F).0 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).0.S
        $(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).0.o
-       $(LD) $(LDFLAGS) -T xen.lds -N \
-           boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
+       $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
            $(@D)/.$(@F).0.o -o $(@D)/.$(@F).1
        $(NM) -n $(@D)/.$(@F).1 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).1.S
        $(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).1.o
-       $(LD) $(LDFLAGS) -T xen.lds -N \
-           boot/$(TARGET_SUBARCH).o $(ALL_OBJS) \
+       $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
            $(@D)/.$(@F).1.o -o $@
        rm -f $(@D)/.$(@F).[0-9]*
 
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/boot/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/Makefile        Thu May 10 18:02:55 2007 +0100
@@ -0,0 +1,3 @@
+obj-y += head.o
+
+head.o: head.S trampoline.S $(TARGET_SUBARCH).S
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/boot/head.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/head.S  Thu May 10 18:02:55 2007 +0100
@@ -0,0 +1,180 @@
+#include <xen/config.h>
+#include <xen/multiboot.h>
+#include <public/xen.h>
+#include <asm/asm_defns.h>
+#include <asm/desc.h>
+#include <asm/page.h>
+#include <asm/msr.h>
+
+        .text
+        .code32
+
+#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
+#define SYM_TRAMP_PHYS(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE)
+
+#define TRAMP_CS32 0x0008
+#define TRAMP_CS64 0x0010
+#define TRAMP_DS   0x0018
+
+ENTRY(start)
+        jmp     __start
+
+        .align 4
+/*** MULTIBOOT HEADER ****/
+#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
+                                MULTIBOOT_HEADER_WANT_MEMORY)
+        /* Magic number indicating a Multiboot header. */
+        .long   MULTIBOOT_HEADER_MAGIC
+        /* Flags to bootloader (see Multiboot spec). */
+        .long   MULTIBOOT_HEADER_FLAGS
+        /* Checksum: must be the negated sum of the first two fields. */
+        .long   -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
+
+.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
+.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
+
+bad_cpu:
+        mov     $(SYM_PHYS(.Lbad_cpu_msg)),%esi # Error message
+        jmp     print_err
+not_multiboot:
+        mov     $(SYM_PHYS(.Lbad_ldr_msg)),%esi # Error message
+print_err:
+        mov     $0xB8000,%edi  # VGA framebuffer
+1:      mov     (%esi),%bl
+        test    %bl,%bl        # Terminate on '\0' sentinel
+2:      je      2b
+        mov     $0x3f8+5,%dx   # UART Line Status Register
+3:      in      %dx,%al
+        test    $0x20,%al      # Test THR Empty flag
+        je      3b
+        mov     $0x3f8+0,%dx   # UART Transmit Holding Register
+        mov     %bl,%al
+        out     %al,%dx        # Send a character over the serial line
+        movsb                  # Write a character to the VGA framebuffer
+        mov     $7,%al
+        stosb                  # Write an attribute to the VGA framebuffer
+        jmp     1b
+
+gdt_boot_descr:
+        .word   4*8-1
+        .long   SYM_PHYS(trampoline_gdt)
+
+__start:
+        cld
+        cli
+
+        /* Initialise GDT and basic data segments. */
+        lgdt    %cs:SYM_PHYS(gdt_boot_descr)
+        mov     $TRAMP_DS,%ecx
+        mov     %ecx,%ds
+        mov     %ecx,%es
+
+        /* Check for Multiboot bootloader */
+        cmp     $0x2BADB002,%eax
+        jne     not_multiboot
+
+        /* Save the Multiboot info structure for later use. */
+        mov     %ebx,SYM_PHYS(multiboot_ptr)
+
+        /* Initialize BSS (no nasty surprises!) */
+        mov     $SYM_PHYS(__bss_start),%edi
+        mov     $SYM_PHYS(_end),%ecx
+        sub     %edi,%ecx
+        xor     %eax,%eax
+        rep     stosb
+
+        /* Interrogate CPU extended features via CPUID. */
+        mov     $0x80000000,%eax
+        cpuid
+        xor     %edx,%edx
+        cmp     $0x80000000,%eax    # any function > 0x80000000?
+        jbe     1f
+        mov     $0x80000001,%eax
+        cpuid
+1:      mov     %edx,SYM_PHYS(cpuid_ext_features)
+
+#if defined(__x86_64__)
+        /* Check for availability of long mode. */
+        bt      $29,%edx
+        jnc     bad_cpu
+        /* Initialise L2 identity-map and xen page table entries (16MB). */
+        mov     $SYM_PHYS(l2_identmap),%edi
+        mov     $SYM_PHYS(l2_xenmap),%esi
+        mov     $0x1e3,%eax                  /* PRESENT+RW+A+D+2MB+GLOBAL */
+        mov     $8,%ecx
+1:      mov     %eax,(%edi)
+        add     $8,%edi
+        mov     %eax,(%esi)
+        add     $8,%esi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        loop    1b
+        /* Initialise L3 identity-map page directory entries. */
+        mov     $SYM_PHYS(l3_identmap),%edi
+        mov     $(SYM_PHYS(l2_identmap)+7),%eax
+        mov     $4,%ecx
+1:      mov     %eax,(%edi)
+        add     $8,%edi
+        add     $PAGE_SIZE,%eax
+        loop    1b
+        /* Initialise L3 xen-map page directory entry. */
+        mov     $(SYM_PHYS(l2_xenmap)+7),%eax
+        mov     %eax,SYM_PHYS(l3_xenmap) + (50*8)
+        /* Hook indentity-map and xen-map L3 tables into PML4. */
+        mov     $(SYM_PHYS(l3_identmap)+7),%eax
+        mov     %eax,SYM_PHYS(idle_pg_table) + (  0*8) /* PML4[  0]: 1:1 map */
+        mov     %eax,SYM_PHYS(idle_pg_table) + (262*8) /* PML4[262]: 1:1 map */
+        mov     $(SYM_PHYS(l3_xenmap)+7),%eax
+        mov     %eax,SYM_PHYS(idle_pg_table) + (261*8) /* PML4[261]: xen map */
+#elif defined(CONFIG_X86_PAE)
+        /* Initialize low and high mappings of memory with 2MB pages */
+        mov     $SYM_PHYS(idle_pg_table_l2),%edi
+        mov     $0xe3,%eax                   /* PRESENT+RW+A+D+2MB */
+1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+        stosl                                /* low mapping */
+        add     $4,%edi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
+        jne     1b
+1:      stosl   /* low mappings cover up to 16MB */
+        add     $4,%edi
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $(16<<20)+0xe3,%eax
+        jne     1b
+#else
+        /* Initialize low and high mappings of memory with 4MB pages */
+        mov     $SYM_PHYS(idle_pg_table),%edi
+        mov     $0xe3,%eax                   /* PRESENT+RW+A+D+4MB */
+1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
+        stosl                                /* low mapping */
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
+        jne     1b
+1:      stosl   /* low mappings cover up to 16MB */
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $(16<<20)+0xe3,%eax
+        jne     1b
+#endif
+
+        /* Copy bootstrap trampoline to low memory, below 1MB. */
+        mov     $SYM_PHYS(trampoline_start),%esi
+        mov     $SYM_TRAMP_PHYS(trampoline_start),%edi
+        mov     $trampoline_end - trampoline_start,%ecx
+        rep     movsb
+
+        /* EBX == 0 indicates we are the BP (Boot Processor). */
+        xor     %ebx,%ebx
+
+        /* Jump into the relocated trampoline. */
+        jmp     $TRAMP_CS32,$SYM_TRAMP_PHYS(trampoline_protmode_entry)
+
+        .globl trampoline_start, trampoline_end
+trampoline_start:
+#include "trampoline.S"
+trampoline_end:
+
+__high_start:
+#ifdef __x86_64__
+#include "x86_64.S"
+#else
+#include "x86_32.S"
+#endif
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/boot/trampoline.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/boot/trampoline.S    Thu May 10 18:02:55 2007 +0100
@@ -0,0 +1,107 @@
+        .code16
+
+        .globl trampoline_realmode_entry
+trampoline_realmode_entry:
+        nop                               # We use this byte as a progress flag
+        movb    $0xA5,trampoline_cpu_started - trampoline_start
+        cld
+        cli
+        lidt    %cs:idt_48 - trampoline_start
+        lgdt    %cs:gdt_48 - trampoline_start
+        xor     %ax, %ax
+        inc     %ax
+        lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
+        mov     $1,%bl                    # EBX != 0 indicates we are an AP
+        jmp     1f
+1:      ljmpl   $TRAMP_CS32,$SYM_TRAMP_PHYS(trampoline_protmode_entry)
+
+idt_48: .word   0, 0, 0 # base = limit = 0
+gdt_48: .word   4*8-1
+        .long   SYM_TRAMP_PHYS(trampoline_gdt)
+trampoline_gdt:
+        .quad   0x0000000000000000     /* 0x0000: unused */
+        .quad   0x00cf9a000000ffff     /* 0x0008: ring 0 code, 32-bit mode */
+        .quad   0x00af9a000000ffff     /* 0x0010: ring 0 code, 64-bit mode */
+        .quad   0x00cf92000000ffff     /* 0x0018: ring 0 data */
+
+cpuid_ext_features:
+        .long   0
+
+        .globl trampoline_xen_phys_start
+trampoline_xen_phys_start:
+        .long   0
+
+        .globl trampoline_cpu_started
+trampoline_cpu_started:
+        .byte   0
+
+        .code32
+trampoline_protmode_entry:
+        /* Set up a few descriptors: on entry only CS is guaranteed good. */
+        mov     $TRAMP_DS,%eax
+        mov     %eax,%ds
+        mov     %eax,%es
+
+        /* Set up FPU. */
+        fninit
+
+        /* Initialise CR4. */
+#if CONFIG_PAGING_LEVELS == 2
+        mov     $X86_CR4_PSE,%ecx
+#else
+        mov     $X86_CR4_PAE,%ecx
+#endif
+        mov     %ecx,%cr4
+
+        /* Load pagetable base register. */
+        mov     $SYM_PHYS(idle_pg_table),%eax
+        add     SYM_TRAMP_PHYS(trampoline_xen_phys_start),%eax
+        mov     %eax,%cr3
+
+#if CONFIG_PAGING_LEVELS != 2
+        /* Set up EFER (Extended Feature Enable Register). */
+        movl    $MSR_EFER,%ecx
+        rdmsr
+#if CONFIG_PAGING_LEVELS == 4
+        btsl    $_EFER_LME,%eax /* Long Mode      */
+        btsl    $_EFER_SCE,%eax /* SYSCALL/SYSRET */
+#endif
+        mov     SYM_TRAMP_PHYS(cpuid_ext_features),%edi
+        btl     $20,%edi        /* CPUID 0x80000001, EDX[20] */
+        jnc     1f
+        btsl    $_EFER_NX,%eax  /* No-Execute     */
+1:      wrmsr
+#endif
+
+        mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
+        mov     %eax,%cr0
+        jmp     1f
+1:
+
+#if defined(__x86_64__)
+
+        /* Now in compatibility mode. Long-jump into 64-bit mode. */
+        ljmp    $TRAMP_CS64,$SYM_TRAMP_PHYS(start64)
+
+        .code64
+start64:
+        /* Jump to high mappings. */
+        mov     high_start(%rip),%rax
+        jmpq    *%rax
+
+high_start:
+        .quad   __high_start
+
+#else /* !defined(__x86_64__) */
+
+        /* Install relocated selectors. */
+        lgdt    gdt_descr
+        mov     $(__HYPERVISOR_DS),%eax
+        mov     %eax,%ds
+        mov     %eax,%es
+        mov     %eax,%fs
+        mov     %eax,%gs
+        mov     %eax,%ss
+        ljmp    $(__HYPERVISOR_CS),$__high_start
+
+#endif
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/boot/x86_32.S        Thu May 10 18:02:55 2007 +0100
@@ -1,178 +1,38 @@
-#include <xen/config.h>
-#include <xen/multiboot.h>
-#include <public/xen.h>
-#include <asm/asm_defns.h>
-#include <asm/desc.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-
-#define  SECONDARY_CPU_FLAG 0xA5A5A5A5
-                
-        .text
-
-ENTRY(start)
-        jmp __start
-
-        .align 4
-
-/*** MULTIBOOT HEADER ****/
-#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
-                                MULTIBOOT_HEADER_WANT_MEMORY)
-        /* Magic number indicating a Multiboot header. */
-        .long MULTIBOOT_HEADER_MAGIC
-        /* Flags to bootloader (see Multiboot spec). */
-        .long MULTIBOOT_HEADER_FLAGS
-        /* Checksum: must be the negated sum of the first two fields. */
-        .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
+        .code32
         
-not_multiboot_msg:
-        .asciz "ERR: Not a Multiboot bootloader!"
-not_multiboot:
-        mov     $not_multiboot_msg-__PAGE_OFFSET,%esi
-        mov     $0xB8000,%edi  # VGA framebuffer
-1:      mov     (%esi),%bl
-        test    %bl,%bl        # Terminate on '\0' sentinel
-2:      je      2b
-        mov     $0x3f8+5,%dx   # UART Line Status Register
-3:      in      %dx,%al
-        test    $0x20,%al      # Test THR Empty flag
-        je      3b
-        mov     $0x3f8+0,%dx   # UART Transmit Holding Register
-        mov     %bl,%al
-        out     %al,%dx        # Send a character over the serial line
-        movsb                  # Write a character to the VGA framebuffer
-        mov     $7,%al
-        stosb                  # Write an attribute to the VGA framebuffer
-        jmp     1b
+        /* Enable full CR4 features. */
+        mov     mmu_cr4_features,%eax
+        mov     %eax,%cr4
         
-__start:
-        /* Set up a few descriptors: on entry only CS is guaranteed good. */
-        lgdt    %cs:nopaging_gdt_descr-__PAGE_OFFSET
-        mov     $(__HYPERVISOR_DS),%ecx
-        mov     %ecx,%ds
-        mov     %ecx,%es
-        mov     %ecx,%fs
-        mov     %ecx,%gs
-        ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
-1:      lss     stack_start-__PAGE_OFFSET,%esp
-        add     $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
-
+        /* Initialise stack. */
+        mov     stack_start,%esp
+        or      $(STACK_SIZE-CPUINFO_sizeof),%esp
+        
         /* Reset EFLAGS (subsumes CLI and CLD). */
         pushl   $0
         popf
 
-        /* Set up FPU. */
-        fninit
+        lidt    idt_descr
 
-        /* Set up CR4, except global flag which Intel requires should be     */
-        /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
-        mov     mmu_cr4_features-__PAGE_OFFSET,%ecx
-        and     $0x7f,%cl   # CR4.PGE (global enable)
-        mov     %ecx,%cr4
+        test    %ebx,%ebx
+        jnz     start_secondary
 
-        cmp     $(SECONDARY_CPU_FLAG),%ebx
-        je      start_paging
-                
-        /* Check for Multiboot bootloader */
-        cmp     $0x2BADB002,%eax
-        jne     not_multiboot
-
-        /* Initialize BSS (no nasty surprises!) */
-        mov     $__bss_start-__PAGE_OFFSET,%edi
-        mov     $_end-__PAGE_OFFSET,%ecx
-        sub     %edi,%ecx
-        xor     %eax,%eax
-        rep     stosb
-
-        /* Save the Multiboot info structure for later use. */
-        add     $__PAGE_OFFSET,%ebx
-        push    %ebx
-
-#ifdef CONFIG_X86_PAE
-        /* Initialize low and high mappings of all memory with 2MB pages */
-        mov     $idle_pg_table_l2-__PAGE_OFFSET,%edi
-        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+2MB */
-1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
-        stosl                                /* low mapping */
-        add     $4,%edi
-        add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
-        jne     1b
-1:      stosl   /* low mappings cover as much physmem as possible */
-        add     $4,%edi
-        add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
-        jne     1b
-#else
-        /* Initialize low and high mappings of all memory with 4MB pages */
-        mov     $idle_pg_table-__PAGE_OFFSET,%edi
-        mov     $0xe3,%eax                  /* PRESENT+RW+A+D+4MB */
-1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
-        stosl                                /* low mapping */
-        add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
-        jne     1b
-1:      stosl   /* low mappings cover as much physmem as possible */
-        add     $(1<<L2_PAGETABLE_SHIFT),%eax
-        cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
-        jne     1b
-#endif
-        
         /* Initialise IDT with simple error defaults. */
         lea     ignore_int,%edx
         mov     $(__HYPERVISOR_CS << 16),%eax
         mov     %dx,%ax            /* selector = 0x0010 = cs */
         mov     $0x8E00,%dx        /* interrupt gate - dpl=0, present */
-        lea     idt_table-__PAGE_OFFSET,%edi
+        lea     idt_table,%edi
         mov     $256,%ecx
 1:      mov     %eax,(%edi)
         mov     %edx,4(%edi)
         add     $8,%edi
         loop    1b
-
-start_paging:
-#ifdef CONFIG_X86_PAE
-        /* Enable Execute-Disable (NX/XD) support if it is available. */
-        push    %ebx
-        mov     $0x80000000,%eax
-        cpuid
-        cmp     $0x80000000,%eax    /* Any function > 0x80000000? */
-        jbe     no_execute_disable
-        mov     $0x80000001,%eax
-        cpuid
-        bt      $20,%edx            /* Execute Disable? */
-        jnc     no_execute_disable
-        movl    $MSR_EFER,%ecx
-        rdmsr
-        bts     $_EFER_NX,%eax
-        wrmsr
-no_execute_disable:
-        pop     %ebx
-#endif
-        mov     $idle_pg_table-__PAGE_OFFSET,%eax
-        mov     %eax,%cr3
-        mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
-        mov     %eax,%cr0
-        jmp     1f
-1:      /* Install relocated selectors (FS/GS unused). */
-        lgdt    gdt_descr
-        mov     $(__HYPERVISOR_DS),%ecx
-        mov     %ecx,%ds
-        mov     %ecx,%es
-        mov     %ecx,%ss
-        ljmp    $(__HYPERVISOR_CS),$1f
-1:      /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
-        mov     mmu_cr4_features,%ecx
-        mov     %ecx,%cr4
-        /* Relocate ESP */
-        add     $__PAGE_OFFSET,%esp
-
-        lidt    idt_descr
                 
-        cmp     $(SECONDARY_CPU_FLAG),%ebx
-        je      start_secondary
-
-        /* Call into main C routine. This should never return.*/
+        /* Pass off the Multiboot info structure to C land. */
+        mov     multiboot_ptr,%eax
+        add     $__PAGE_OFFSET,%eax
+        push    %eax
         call    __start_xen
         ud2     /* Force a panic (invalid opcode). */
 
@@ -189,15 +49,14 @@ ignore_int:
         call    printk
 1:      jmp     1b
 
-/*** STACK LOCATION ***/
-        
 ENTRY(stack_start)
         .long cpu0_stack
-        .long __HYPERVISOR_DS
         
 /*** DESCRIPTOR TABLES ***/
 
         ALIGN
+multiboot_ptr:
+        .long   0
         
         .word   0    
 idt_descr:
@@ -212,8 +71,8 @@ gdt_descr:
         .word   0
 nopaging_gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
-        .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
-        
+        .long   SYM_PHYS(gdt_table) - FIRST_RESERVED_GDT_BYTE
+
         .align PAGE_SIZE, 0
 /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
 /*     the machine->physical mapping table. Ring 0 can access all memory.    */
@@ -231,25 +90,11 @@ ENTRY(gdt_table)
         .quad 0x0000000000000000     /* unused                           */
         .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
 
-        .align PAGE_SIZE, 0
-
 #ifdef CONFIG_X86_PAE
+        .align 32
 ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_l3)
-        .long idle_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-        .long idle_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-        .long idle_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-        .long idle_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-.section ".bss.page_aligned","w"
-ENTRY(idle_pg_table_l2)
-        .fill 4*PAGE_SIZE,1,0
-#else
-.section ".bss.page_aligned","w"
-ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_l2)
-        .fill 1*PAGE_SIZE,1,0
+        .long SYM_PHYS(idle_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
+        .long SYM_PHYS(idle_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
+        .long SYM_PHYS(idle_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
+        .long SYM_PHYS(idle_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
 #endif
-
-.section ".bss.stack_aligned","w"
-ENTRY(cpu0_stack)
-        .fill STACK_SIZE,1,0
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/boot/x86_64.S        Thu May 10 18:02:55 2007 +0100
@@ -1,122 +1,13 @@
-#include <xen/config.h>
-#include <xen/multiboot.h>
-#include <public/xen.h>
-#include <asm/asm_defns.h>
-#include <asm/desc.h>
-#include <asm/page.h>
-#include <asm/msr.h>
+        .code64
 
-#define  SECONDARY_CPU_FLAG 0xA5A5A5A5
-                
-        .text
-        .code32
-
-#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
-
-ENTRY(start)
-        jmp __start
-
-        .org    0x004
-/*** MULTIBOOT HEADER ****/
-#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
-                                MULTIBOOT_HEADER_WANT_MEMORY)
-        /* Magic number indicating a Multiboot header. */
-        .long   MULTIBOOT_HEADER_MAGIC
-        /* Flags to bootloader (see Multiboot spec). */
-        .long   MULTIBOOT_HEADER_FLAGS
-        /* Checksum: must be the negated sum of the first two fields. */
-        .long   -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
-
-.Lbad_cpu_msg: .asciz "ERR: Not a 64-bit CPU!"
-.Lbad_ldr_msg: .asciz "ERR: Not a Multiboot bootloader!"
-
-bad_cpu:
-        mov     $(SYM_PHYS(.Lbad_cpu_msg)),%esi # Error message
-        jmp     print_err
-not_multiboot:
-        mov     $(SYM_PHYS(.Lbad_ldr_msg)),%esi # Error message
-print_err:
-        mov     $0xB8000,%edi  # VGA framebuffer
-1:      mov     (%esi),%bl
-        test    %bl,%bl        # Terminate on '\0' sentinel
-2:      je      2b
-        mov     $0x3f8+5,%dx   # UART Line Status Register
-3:      in      %dx,%al
-        test    $0x20,%al      # Test THR Empty flag
-        je      3b
-        mov     $0x3f8+0,%dx   # UART Transmit Holding Register
-        mov     %bl,%al
-        out     %al,%dx        # Send a character over the serial line
-        movsb                  # Write a character to the VGA framebuffer
-        mov     $7,%al
-        stosb                  # Write an attribute to the VGA framebuffer
-        jmp     1b
-
-__start:
-        cld
-        cli
-
-        /* Set up a few descriptors: on entry only CS is guaranteed good. */
-        lgdt    %cs:SYM_PHYS(nopaging_gdt_descr)
-        mov     $(__HYPERVISOR_DS32),%ecx
+        /* Install relocated data selectors. */
+        lgdt    gdt_descr(%rip)
+        mov     $(__HYPERVISOR_DS64),%ecx
         mov     %ecx,%ds
         mov     %ecx,%es
-
-        cmp     $(SECONDARY_CPU_FLAG),%ebx
-        je      skip_boot_checks
-
-        /* Check for Multiboot bootloader */
-        cmp     $0x2BADB002,%eax
-        jne     not_multiboot
-
-        /* Save the Multiboot info structure for later use. */
-        mov     %ebx,SYM_PHYS(multiboot_ptr)
-
-        /* We begin by interrogating the CPU for the presence of long mode. */
-        mov     $0x80000000,%eax
-        cpuid
-        cmp     $0x80000000,%eax    # any function > 0x80000000?
-        jbe     bad_cpu
-        mov     $0x80000001,%eax
-        cpuid
-        bt      $29,%edx            # Long mode feature?
-        jnc     bad_cpu
-        mov     %edx,SYM_PHYS(cpuid_ext_features)
-skip_boot_checks:
-
-        /* Set up FPU. */
-        fninit
-        
-        /* Enable PAE in CR4. */
-        mov     $0x20,%ecx # X86_CR4_PAE
-        mov     %ecx,%cr4
-
-        /* Load pagetable base register. */
-        mov     $SYM_PHYS(idle_pg_table),%eax
-        mov     %eax,%cr3
-
-        /* Set up EFER (Extended Feature Enable Register). */
-        movl    $MSR_EFER,%ecx
-        rdmsr
-        btsl    $_EFER_LME,%eax /* Long Mode      */
-        btsl    $_EFER_SCE,%eax /* SYSCALL/SYSRET */
-        mov     SYM_PHYS(cpuid_ext_features),%edi
-        btl     $20,%edi        /* CPUID 0x80000001, EDX[20] */
-        jnc     1f
-        btsl    $_EFER_NX,%eax  /* No-Execute     */
-1:      wrmsr
-
-        mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
-        mov     %eax,%cr0
-        jmp     1f
-
-1:      /* Now in compatibility mode. Long-jump into 64-bit mode. */
-        ljmp    $(__HYPERVISOR_CS64),$SYM_PHYS(start64)
-        
-        .code64
-start64:
-        /* Install relocated selectors (FS/GS unused). */
-        lgdt    gdt_descr(%rip)
+        mov     %ecx,%fs
+        mov     %ecx,%gs
+        mov     %ecx,%ss
 
         /* Enable full CR4 features. */
         mov     mmu_cr4_features(%rip),%rcx
@@ -129,30 +20,15 @@ start64:
         pushq   $0
         popf
 
-        /* Jump to high mappings. */
-        mov     high_start(%rip),%rax
-        push    %rax
-        ret
-__high_start:
-        
-        mov     $(__HYPERVISOR_DS64),%ecx
-        mov     %ecx,%ds
-        mov     %ecx,%es
-        mov     %ecx,%fs
-        mov     %ecx,%gs
-        mov     %ecx,%ss
+        /* Reload code selector. */
+        pushq   $(__HYPERVISOR_CS64)
+        leaq    1f(%rip),%rax
+        pushq   %rax
+        lretq
+1:      lidt    idt_descr(%rip)
 
-        lidt    idt_descr(%rip)
-
-        cmp     $(SECONDARY_CPU_FLAG),%ebx
-        je      start_secondary
-
-        /* Initialize BSS (no nasty surprises!) */
-        lea     __bss_start(%rip),%rdi
-        lea     _end(%rip),%rcx
-        sub     %rdi,%rcx
-        xor     %rax,%rax
-        rep     stosb
+        test    %ebx,%ebx
+        jnz     start_secondary
 
         /* Initialise IDT with simple error defaults. */
         leaq    ignore_int(%rip),%rcx
@@ -198,14 +74,6 @@ multiboot_ptr:
         .long   0
 
         .word   0
-nopaging_gdt_descr:
-        .word   LAST_RESERVED_GDT_BYTE
-        .quad   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
-
-cpuid_ext_features:
-        .long   0
-        
-        .word   0
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
         .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
@@ -217,9 +85,6 @@ idt_descr:
 
 ENTRY(stack_start)
         .quad   cpu0_stack
-
-high_start:
-        .quad   __high_start
 
         .align PAGE_SIZE, 0
 ENTRY(gdt_table)
@@ -234,7 +99,6 @@ ENTRY(gdt_table)
         .org gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
         .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
 
-#ifdef CONFIG_COMPAT
         .align PAGE_SIZE, 0
 /* NB. Even rings != 0 get access to the full 4Gb, as only the            */
 /*     (compatibility) machine->physical mapping table lives there.       */
@@ -249,37 +113,3 @@ ENTRY(compat_gdt_table)
         .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
         .org compat_gdt_table - FIRST_RESERVED_GDT_BYTE + __TSS(0) * 8
         .fill 4*NR_CPUS,8,0          /* space for TSS and LDT per CPU     */
-# undef LIMIT
-#endif
-
-/* Initial PML4 -- level-4 page table. */
-        .align PAGE_SIZE, 0
-ENTRY(idle_pg_table)
-ENTRY(idle_pg_table_4)
-        .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[0]
-        .fill 261,8,0
-        .quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262]
-
-/* Initial PDP -- level-3 page table. */
-        .align PAGE_SIZE, 0
-ENTRY(idle_pg_table_l3)
-        .quad idle_pg_table_l2 - __PAGE_OFFSET + 7
-
-/* Initial PDE -- level-2 page table. Maps first 1GB physical memory. */
-        .align PAGE_SIZE, 0
-ENTRY(idle_pg_table_l2)
-        .macro identmap from=0, count=512
-        .if \count-1
-        identmap "(\from+0)","(\count/2)"
-        identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
-        .else
-        .quad 0x00000000000001e3 + \from
-        .endif
-        .endm
-        identmap
-
-        .align PAGE_SIZE, 0
-
-.section ".bss.stack_aligned","w"
-ENTRY(cpu0_stack)
-        .fill STACK_SIZE,1,0
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c       Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/e820.c       Thu May 10 18:02:55 2007 +0100
@@ -32,7 +32,7 @@ static void __init add_memory_region(uns
     }
 } /* add_memory_region */
 
-static void __init print_e820_memory_map(struct e820entry *map, int entries)
+/*static*/ void __init print_e820_memory_map(struct e820entry *map, int 
entries)
 {
     int i;
 
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/mm.c Thu May 10 18:02:55 2007 +0100
@@ -3037,7 +3037,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
         if ( mfn_valid(prev_mfn) )
         {
-            if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
+            if ( is_xen_heap_frame(mfn_to_page(prev_mfn)) )
                 /* Xen heap frames are simply unhooked from this phys slot. */
                 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
             else
@@ -3487,8 +3487,17 @@ void memguard_init(void)
 void memguard_init(void)
 {
     map_pages_to_xen(
-        PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
+        (unsigned long)__va(xen_phys_start),
+        xen_phys_start >> PAGE_SHIFT,
+        (xenheap_phys_end - xen_phys_start) >> PAGE_SHIFT,
         __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+#ifdef __x86_64__
+    map_pages_to_xen(
+        XEN_VIRT_START,
+        xen_phys_start >> PAGE_SHIFT,
+        (__pa(&_end) + PAGE_SIZE - 1 - xen_phys_start) >> PAGE_SHIFT,
+        __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+#endif
 }
 
 static void __memguard_change_range(void *p, unsigned long l, int guard)
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/setup.c      Thu May 10 18:02:55 2007 +0100
@@ -34,6 +34,14 @@
 #include <acm/acm_hooks.h>
 #include <xen/kexec.h>
 
+#if defined(CONFIG_X86_64)
+#define BOOTSTRAP_DIRECTMAP_END (1UL << 32)
+#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
+#else
+#define BOOTSTRAP_DIRECTMAP_END HYPERVISOR_VIRT_START
+#define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
+#endif
+
 extern void dmi_scan_machine(void);
 extern void generic_apic_probe(void);
 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
@@ -82,6 +90,8 @@ int early_boot = 1;
 
 cpumask_t cpu_present_map;
 
+unsigned long xen_phys_start;
+
 /* Limits of Xen heap, used to initialise the allocator. */
 unsigned long xenheap_phys_start, xenheap_phys_end;
 
@@ -93,7 +103,7 @@ extern void early_cpu_init(void);
 
 struct tss_struct init_tss[NR_CPUS];
 
-extern unsigned long cpu0_stack[];
+char __attribute__ ((__section__(".bss.page_aligned"))) cpu0_stack[STACK_SIZE];
 
 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 
@@ -108,7 +118,7 @@ int acpi_disabled;
 
 int acpi_force;
 char acpi_param[10] = "";
-static void parse_acpi_param(char *s)
+static void __init parse_acpi_param(char *s)
 {
     /* Save the parameter so it can be propagated to domain0. */
     safe_strcpy(acpi_param, s);
@@ -147,20 +157,23 @@ static void __init do_initcalls(void)
         (*call)();
 }
 
-#define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
-
-static struct e820entry e820_raw[E820MAX];
-
-static unsigned long initial_images_start, initial_images_end;
-
-unsigned long initial_images_nrpages(void)
+#define EARLY_FAIL(f, a...) do {                \
+    printk( f , ## a );                         \
+    for ( ; ; ) __asm__ __volatile__ ( "hlt" ); \
+} while (0)
+
+static struct e820entry __initdata e820_raw[E820MAX];
+
+static unsigned long __initdata initial_images_start, initial_images_end;
+
+unsigned long __init initial_images_nrpages(void)
 {
     unsigned long s = initial_images_start + PAGE_SIZE - 1;
     unsigned long e = initial_images_end;
     return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
 }
 
-void discard_initial_images(void)
+void __init discard_initial_images(void)
 {
     init_domheap_pages(initial_images_start, initial_images_end);
 }
@@ -170,33 +183,15 @@ static void __init percpu_init_areas(voi
 static void __init percpu_init_areas(void)
 {
     unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
+    unsigned int first_unused;
 
     BUG_ON(data_size > PERCPU_SIZE);
 
-    for_each_cpu ( i )
-    {
-        memguard_unguard_range(__per_cpu_start + (i << PERCPU_SHIFT),
-                               1 << PERCPU_SHIFT);
-        if ( i != 0 )
-            memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
-                   __per_cpu_start,
-                   data_size);
-    }
-}
-
-static void __init percpu_guard_areas(void)
-{
-    memguard_guard_range(__per_cpu_start, __per_cpu_end - __per_cpu_start);
-}
-
-static void __init percpu_free_unused_areas(void)
-{
-    unsigned int i, first_unused;
-
-    /* Find first unused CPU number. */
-    for ( i = 0; i < NR_CPUS; i++ )
-        if ( !cpu_possible(i) )
-            break;
+    /* Initialise per-cpu data area for all possible secondary CPUs. */
+    for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
+        memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
+               __per_cpu_start,
+               data_size);
     first_unused = i;
 
     /* Check that there are no holes in cpu_possible_map. */
@@ -210,7 +205,7 @@ static void __init percpu_free_unused_ar
 }
 
 /* Fetch acm policy module from multiboot modules. */
-static void extract_acm_policy(
+static void __init extract_acm_policy(
     multiboot_info_t *mbi,
     unsigned int *initrdidx,
     char **_policy_start,
@@ -228,11 +223,7 @@ static void extract_acm_policy(
     for ( i = mbi->mods_count-1; i >= 1; i-- )
     {
         start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
-#if defined(__i386__)
-        policy_start = (char *)start;
-#elif defined(__x86_64__)
-        policy_start = __va(start);
-#endif
+        policy_start = maddr_to_bootstrap_virt(start);
         policy_len   = mod[i].mod_end - mod[i].mod_start;
         if ( acm_is_policy(policy_start, policy_len) )
         {
@@ -264,7 +255,7 @@ static void __init init_idle_domain(void
     setup_idle_pagetable();
 }
 
-static void srat_detect_node(int cpu)
+static void __init srat_detect_node(int cpu)
 {
     unsigned node;
     u8 apicid = x86_cpu_to_apicid[cpu];
@@ -278,18 +269,45 @@ static void srat_detect_node(int cpu)
         printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
 }
 
-void __init move_memory(unsigned long dst,
-                          unsigned long src_start, unsigned long src_end)
-{
-#if defined(CONFIG_X86_32)
-    memmove((void *)dst,            /* use low mapping */
-            (void *)src_start,      /* use low mapping */
+static void __init move_memory(
+    unsigned long dst, unsigned long src_start, unsigned long src_end)
+{
+    memmove(maddr_to_bootstrap_virt(dst),
+            maddr_to_bootstrap_virt(src_start),
             src_end - src_start);
-#elif defined(CONFIG_X86_64)
-    memmove(__va(dst),
-            __va(src_start),
-            src_end - src_start);
-#endif
+}
+
+/* A temporary copy of the e820 map that we can mess with during bootstrap. */
+static struct e820map __initdata boot_e820;
+
+/* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
+static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
+{
+    unsigned long rs, re;
+    int i;
+
+    for ( i = 0; i < boot_e820.nr_map; i++ )
+    {
+        /* Have we found the e820 region that includes the specified range? */
+        rs = boot_e820.map[i].addr;
+        re = boot_e820.map[i].addr + boot_e820.map[i].size;
+        if ( (s < rs) || (e > re) )
+            continue;
+
+        /* Start fragment. */
+        boot_e820.map[i].size = s - rs;
+
+        /* End fragment. */
+        if ( e < re )
+        {
+            memmove(&boot_e820.map[i+1], &boot_e820.map[i],
+                    (boot_e820.nr_map-i) * sizeof(boot_e820.map[0]));
+            boot_e820.nr_map++;
+            i++;
+            boot_e820.map[i].addr = e;
+            boot_e820.map[i].size = re - e;
+        }
+    }
 }
 
 void __init __start_xen(multiboot_info_t *mbi)
@@ -301,7 +319,6 @@ void __init __start_xen(multiboot_info_t
     unsigned long _policy_len = 0;
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, modules_length;
-    paddr_t s, e;
     int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
         .data_bits = 8,
@@ -338,17 +355,11 @@ void __init __start_xen(multiboot_info_t
 
     /* Check that we have at least one Multiboot module. */
     if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
-    {
-        printk("FATAL ERROR: dom0 kernel not specified."
-               " Check bootloader configuration.\n");
-        EARLY_FAIL();
-    }
+        EARLY_FAIL("dom0 kernel not specified. "
+                   "Check bootloader configuration.\n");
 
     if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
-    {
-        printk("FATAL ERROR: Misaligned CPU0 stack.\n");
-        EARLY_FAIL();
-    }
+        EARLY_FAIL("Misaligned CPU0 stack.\n");
 
     /*
      * Since there are some stubs getting built on the stacks which use
@@ -357,7 +368,6 @@ void __init __start_xen(multiboot_info_t
      */
     if ( opt_xenheap_megabytes > 2048 )
         opt_xenheap_megabytes = 2048;
-    xenheap_phys_end = opt_xenheap_megabytes << 20;
 
     if ( mbi->flags & MBI_MEMMAP )
     {
@@ -403,8 +413,7 @@ void __init __start_xen(multiboot_info_t
     }
     else
     {
-        printk("FATAL ERROR: Bootloader provided no memory information.\n");
-        for ( ; ; ) ;
+        EARLY_FAIL("Bootloader provided no memory information.\n");
     }
 
     if ( e820_warn )
@@ -430,80 +439,190 @@ void __init __start_xen(multiboot_info_t
     /* Sanitise the raw E820 map to produce a final clean version. */
     max_page = init_e820(e820_raw, &e820_raw_nr);
 
+    /*
+     * Create a temporary copy of the E820 map. Truncate it to above 16MB
+     * as anything below that is already mapped and has a statically-allocated
+     * purpose.
+     */
+    memcpy(&boot_e820, &e820, sizeof(e820));
+    for ( i = 0; i < boot_e820.nr_map; i++ )
+    {
+        uint64_t s, e, min = 16 << 20; /* 16MB */
+        s = boot_e820.map[i].addr;
+        e = boot_e820.map[i].addr + boot_e820.map[i].size;
+        if ( s >= min )
+            continue;
+        if ( e > min )
+        {
+            boot_e820.map[i].addr = min;
+            boot_e820.map[i].size = e - min;
+        }
+        else
+            boot_e820.map[i].type = E820_RESERVED;
+    }
+
+    /*
+     * Iterate over all superpage-aligned RAM regions.
+     * 
+     * We require superpage alignment because the boot allocator is not yet
+     * initialised. Hence we can only map superpages in the address range
+     * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
+     * dynamic allocation of pagetables.
+     * 
+     * As well as mapping superpages in that range, in preparation for
+     * initialising the boot allocator, we also look for a region to which
+     * we can relocate the dom0 kernel and other multiboot modules. Also, on
+     * x86/64, we relocate Xen to higher memory.
+     */
     modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
-
-    /* Find a large enough RAM extent to stash the DOM0 modules. */
-    for ( i = 0; ; i++ )
-    {
-        if ( i == e820.nr_map )
-        {
-            printk("Not enough memory to stash the DOM0 kernel image.\n");
-            for ( ; ; ) ;
-        }
-
-        if ( (e820.map[i].type == E820_RAM) &&
-             (e820.map[i].size >= modules_length) &&
-             ((e820.map[i].addr + e820.map[i].size) >=
-              (xenheap_phys_end + modules_length)) )
-            break;
-    }
-
-    /* Stash as near as possible to the beginning of the RAM extent. */
-    initial_images_start = e820.map[i].addr;
-    if ( initial_images_start < xenheap_phys_end )
-        initial_images_start = xenheap_phys_end;
-    initial_images_end = initial_images_start + modules_length;
-
-    move_memory(initial_images_start, 
-                mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
-
-    /* Initialise boot-time allocator with all RAM situated after modules. */
+    for ( i = 0; i < boot_e820.nr_map; i++ )
+    {
+        uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
+
+        /* Superpage-aligned chunks up to BOOTSTRAP_DIRECTMAP_END, please. */
+        s = (boot_e820.map[i].addr + mask) & ~mask;
+        e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+        e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
+        if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
+            continue;
+
+        /* Map the chunk. No memory will need to be allocated to do this. */
+        map_pages_to_xen(
+            (unsigned long)maddr_to_bootstrap_virt(s),
+            s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+
+        /* Is the region suitable for relocating the multiboot modules? */
+        if ( !initial_images_start && ((e-s) >= modules_length) )
+        {
+            e -= modules_length;
+            e &= ~mask;
+            initial_images_start = e;
+            initial_images_end = initial_images_start + modules_length;
+            move_memory(initial_images_start, 
+                        mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
+            if ( s >= e )
+                continue;
+        }
+
+#if defined(CONFIG_X86_64)
+        /* Is the region suitable for relocating Xen? */
+        if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
+        {
+            extern l2_pgentry_t l2_xenmap[];
+            l4_pgentry_t *pl4e;
+            l3_pgentry_t *pl3e;
+            l2_pgentry_t *pl2e;
+            int i, j;
+
+            /* Select relocation address. */
+            e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
+            xen_phys_start = e;
+            boot_trampoline_va(trampoline_xen_phys_start) = e;
+
+            /*
+             * Perform relocation to new physical address.
+             * Before doing so we must sync static/global data with main memory
+             * with a barrier(). After this we must *not* modify static/global
+             * data until after we have switched to the relocated pagetables!
+             */
+            barrier();
+            move_memory(e, 0, __pa(&_end) - xen_phys_start);
+
+            /* Walk initial pagetables, relocating page directory entries. */
+            pl4e = __va(__pa(idle_pg_table));
+            for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
+            {
+                if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
+                    continue;
+                *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
+                                        xen_phys_start);
+                pl3e = l4e_to_l3e(*pl4e);
+                for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
+                {
+                    /* Not present or already relocated? */
+                    if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
+                         (l3e_get_pfn(*pl3e) > 0x1000) )
+                        continue;
+                    *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
+                                            xen_phys_start);
+                }
+            }
+
+            /* The only data mappings to be relocated are in the Xen area. */
+            pl2e = __va(__pa(l2_xenmap));
+            for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
+            {
+                if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+                    continue;
+                *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
+                                        xen_phys_start);
+            }
+
+            /* Re-sync the stack and then switch to relocated pagetables. */
+            asm volatile (
+                "rep movsb        ; " /* re-sync the stack */
+                "movq %%cr4,%%rsi ; "
+                "andb $0x7f,%%sil ; "
+                "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
+                "movq %0,%%cr3    ; " /* CR3 == new pagetables */
+                "orb $0x80,%%sil  ; "
+                "movq %%rsi,%%cr4   " /* CR4.PGE == 1 */
+                : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
+                "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
+        }
+#endif
+    }
+
+    if ( !initial_images_start )
+        EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
+    reserve_in_boot_e820(initial_images_start, initial_images_end);
+
+    /*
+     * With modules (and Xen itself, on x86/64) relocated out of the way, we
+     * can now initialise the boot allocator with some memory.
+     */
     xenheap_phys_start = init_boot_allocator(__pa(&_end));
-    nr_pages = 0;
-    for ( i = 0; i < e820.nr_map; i++ )
-    {
-        if ( e820.map[i].type != E820_RAM )
+    xenheap_phys_end   = opt_xenheap_megabytes << 20;
+#if defined(CONFIG_X86_64)
+    if ( !xen_phys_start )
+        EARLY_FAIL("Not enough memory to relocate Xen.\n");
+    xenheap_phys_end += xen_phys_start;
+    reserve_in_boot_e820(xen_phys_start,
+                         xen_phys_start + (opt_xenheap_megabytes<<20));
+    init_boot_pages(1<<20, 16<<20); /* Initial seed: 15MB */
+#else
+    init_boot_pages(xenheap_phys_end, 16<<20); /* Initial seed: 4MB */
+#endif
+
+    /*
+     * With the boot allocator now seeded, we can walk every RAM region and
+     * map it in its entirety (on x86/64, at least) and notify it to the
+     * boot allocator.
+     */
+    for ( i = 0; i < boot_e820.nr_map; i++ )
+    {
+        uint64_t s, e, map_e, mask = PAGE_SIZE - 1;
+
+        /* Only page alignment required now. */
+        s = (boot_e820.map[i].addr + mask) & ~mask;
+        e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+        if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
             continue;
 
-        nr_pages += e820.map[i].size >> PAGE_SHIFT;
-
-        /* Initialise boot heap, skipping Xen heap and dom0 modules. */
-        s = e820.map[i].addr;
-        e = s + e820.map[i].size;
-        if ( s < xenheap_phys_end )
-            s = xenheap_phys_end;
-        if ( (s < initial_images_end) && (e > initial_images_start) )
-            s = initial_images_end;
+        /* Perform the mapping (truncated in 32-bit mode). */
+        map_e = e;
+#if defined(CONFIG_X86_32)
+        map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
+#endif
+        if ( s < map_e )
+            map_pages_to_xen(
+                (unsigned long)maddr_to_bootstrap_virt(s),
+                s >> PAGE_SHIFT, (map_e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
+
         init_boot_pages(s, e);
-
-#if defined(CONFIG_X86_64)
-        /*
-         * x86/64 maps all registered RAM. Points to note:
-         *  1. The initial pagetable already maps low 1GB, so skip that.
-         *  2. We must map *only* RAM areas, taking care to avoid I/O holes.
-         *     Failure to do this can cause coherency problems and deadlocks
-         *     due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
-         */
-        {
-            /* Calculate page-frame range, discarding partial frames. */
-            unsigned long start, end;
-            unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
-            start = PFN_UP(e820.map[i].addr);
-            end   = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-            /* Clip the range to exclude what the bootstrapper initialised. */
-            if ( start < init_mapped )
-                start = init_mapped;
-            if ( end <= start )
-                continue;
-            /* Request the mapping. */
-            map_pages_to_xen(
-                PAGE_OFFSET + (start << PAGE_SHIFT),
-                start, end-start, PAGE_HYPERVISOR);
-        }
-#endif
-    }
-
-    if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0)
+    }
+
+    if ( (kexec_crash_area.size > 0) && (kexec_crash_area.start > 0) )
     {
         unsigned long kdump_start, kdump_size, k;
 
@@ -534,7 +653,7 @@ void __init __start_xen(multiboot_info_t
 
 #if defined(CONFIG_X86_32)
         /* Must allocate within bootstrap 1:1 limits. */
-        k = alloc_boot_low_pages(k, 1); /* 0x0 - HYPERVISOR_VIRT_START */
+        k = alloc_boot_low_pages(k, 1); /* 0x0 - BOOTSTRAP_DIRECTMAP_END */
 #else
         k = alloc_boot_pages(k, 1);
 #endif
@@ -549,8 +668,11 @@ void __init __start_xen(multiboot_info_t
     }
 
     memguard_init();
-    percpu_guard_areas();
-
+
+    nr_pages = 0;
+    for ( i = 0; i < e820.nr_map; i++ )
+        if ( e820.map[i].type == E820_RAM )
+            nr_pages += e820.map[i].size >> PAGE_SHIFT;
     printk("System RAM: %luMB (%lukB)\n",
            nr_pages >> (20 - PAGE_SHIFT),
            nr_pages << (PAGE_SHIFT - 10));
@@ -592,26 +714,13 @@ void __init __start_xen(multiboot_info_t
     numa_initmem_init(0, max_page);
 
     /* Initialise the Xen heap, skipping RAM holes. */
-    nr_pages = 0;
-    for ( i = 0; i < e820.nr_map; i++ )
-    {
-        if ( e820.map[i].type != E820_RAM )
-            continue;
-
-        s = e820.map[i].addr;
-        e = s + e820.map[i].size;
-        if ( s < xenheap_phys_start )
-            s = xenheap_phys_start;
-        if ( e > xenheap_phys_end )
-            e = xenheap_phys_end;
- 
-        if ( s < e )
-        {
-            nr_pages += (e - s) >> PAGE_SHIFT;
-            init_xenheap_pages(s, e);
-        }
-    }
-
+    init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
+    nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
+#ifdef __x86_64__
+    init_xenheap_pages(xen_phys_start, __pa(&_start));
+    nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
+#endif
+    xenheap_phys_start = xen_phys_start;
     printk("Xen heap: %luMB (%lukB)\n", 
            nr_pages >> (20 - PAGE_SHIFT),
            nr_pages << (PAGE_SHIFT - 10));
@@ -635,8 +744,6 @@ void __init __start_xen(multiboot_info_t
     sort_exception_tables();
 
     find_smp_config();
-
-    smp_alloc_memory();
 
     dmi_scan_machine();
 
@@ -710,8 +817,6 @@ void __init __start_xen(multiboot_info_t
 
     printk("Brought up %ld CPUs\n", (long)num_online_cpus());
     smp_cpus_done(max_cpus);
-
-    percpu_free_unused_areas();
 
     initialise_gdb(); /* could be moved earlier */
 
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/smpboot.c    Thu May 10 18:02:55 2007 +0100
@@ -54,8 +54,8 @@
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
 
-static inline int set_kernel_exec(unsigned long x, int y) { return 0; }
-#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
+#define set_kernel_exec(x, y) (0)
+#define setup_trampoline()    (boot_trampoline_pa(trampoline_realmode_entry))
 
 /* Set if we find a B stepping CPU */
 static int __devinitdata smp_b_stepping;
@@ -109,50 +109,7 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mos
                        { [0 ... NR_CPUS-1] = 0xff };
 EXPORT_SYMBOL(x86_cpu_to_apicid);
 
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end  [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-
 static void map_cpu_to_logical_apicid(void);
-
-/* State of each CPU. */
-/*DEFINE_PER_CPU(int, cpu_state) = { 0 };*/
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __devinit setup_trampoline(void)
-{
-       memcpy(trampoline_base, trampoline_data, trampoline_end - 
trampoline_data);
-       return virt_to_maddr(trampoline_base);
-}
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
-       /*
-        * Has to be in very low memory so we can execute
-        * real-mode AP code.
-        */
-       if (__pa(trampoline_base) >= 0x9F000)
-               BUG();
-       /*
-        * Make the SMP trampoline executable:
-        */
-       trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-}
 
 /*
  * The bootstrap kernel entry code has set these up. Save them for
@@ -950,9 +907,9 @@ static int __devinit do_boot_cpu(int api
                        print_cpu_info(&cpu_data[cpu]);
                        Dprintk("CPU has booted.\n");
                } else {
-                       boot_error= 1;
-                       if (*((volatile unsigned char *)trampoline_base)
-                                       == 0xA5)
+                       boot_error = 1;
+                       mb();
+                       if (boot_trampoline_va(trampoline_cpu_started) == 0xA5)
                                /* trampoline started but...? */
                                printk("Stuck ??\n");
                        else
@@ -974,7 +931,8 @@ static int __devinit do_boot_cpu(int api
        }
 
        /* mark "stuck" area as not stuck */
-       *((volatile unsigned long *)trampoline_base) = 0;
+       boot_trampoline_va(trampoline_cpu_started) = 0;
+       mb();
 
        return boot_error;
 }
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/trampoline.S
--- a/xen/arch/x86/trampoline.S Thu May 10 16:22:27 2007 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-/*
- *
- *     Trampoline.S    Derived from Setup.S by Linus Torvalds
- *
- *     4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- *     Entry: CS:IP point to the start of our code, we are 
- *     in real mode with no stack, but the rest of the 
- *     trampoline page to make our stack and everything else
- *     is a mystery.
- *
- *     On entry to trampoline_data, the processor is in real mode
- *     with 16-bit addressing and 16-bit data.  CS has some value
- *     and IP is zero.  Thus, data addresses need to be absolute
- *     (no relocation) and are taken with regard to r_base.
- */
-
-#include <xen/config.h>
-#include <public/xen.h>
-#include <asm/desc.h>
-#include <asm/page.h>
-
-#ifdef CONFIG_SMP
-        
-.data
-
-.code16
-
-ENTRY(trampoline_data)
-r_base = .
-        mov    %cs, %ax        # Code and data in the same place
-       mov     %ax, %ds
-
-       movl    $0xA5A5A5A5, %ebx # Flag an SMP trampoline
-       cli                     # We should be safe anyway
-
-       movl    $0xA5A5A5A5, trampoline_data - r_base
-
-       lidt    idt_48 - r_base # load idt with 0, 0
-       lgdt    gdt_48 - r_base # load gdt with whatever is appropriate
-
-       xor     %ax, %ax
-       inc     %ax             # protected mode (PE) bit
-       lmsw    %ax             # into protected mode
-       jmp     flush_instr
-flush_instr:
-#if defined(__x86_64__)
-       ljmpl   $__HYPERVISOR_CS32, $0x100000 # 1MB
-#else        
-       ljmpl   $__HYPERVISOR_CS,   $0x100000 # 1MB
-#endif
-
-idt_48:
-       .word   0                       # idt limit = 0
-       .word   0, 0                    # idt base = 0L
-
-gdt_48:
-       .word   LAST_RESERVED_GDT_BYTE
-#ifdef __i386__
-       .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
-#else
-       .long   0x101000 - FIRST_RESERVED_GDT_BYTE
-#endif
-
-ENTRY(trampoline_end)
-
-#endif /* CONFIG_SMP */
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/traps.c      Thu May 10 18:02:55 2007 +0100
@@ -1413,20 +1413,30 @@ static int emulate_privileged_op(struct 
      * GPR context. This is needed for some systems which (ab)use IN/OUT
      * to communicate with BIOS code in system-management mode.
      */
+#ifdef __x86_64__
+    /* movq $host_to_guest_gpr_switch,%rcx */
+    io_emul_stub[0] = 0x48;
+    io_emul_stub[1] = 0xb9;
+    *(void **)&io_emul_stub[2] = (void *)host_to_guest_gpr_switch;
+    /* callq *%rcx */
+    io_emul_stub[10] = 0xff;
+    io_emul_stub[11] = 0xd1;
+#else
     /* call host_to_guest_gpr_switch */
     io_emul_stub[0] = 0xe8;
     *(s32 *)&io_emul_stub[1] =
         (char *)host_to_guest_gpr_switch - &io_emul_stub[5];
+    /* 7 x nop */
+    memset(&io_emul_stub[5], 0x90, 7);
+#endif
     /* data16 or nop */
-    io_emul_stub[5] = (op_bytes != 2) ? 0x90 : 0x66;
+    io_emul_stub[12] = (op_bytes != 2) ? 0x90 : 0x66;
     /* <io-access opcode> */
-    io_emul_stub[6] = opcode;
+    io_emul_stub[13] = opcode;
     /* imm8 or nop */
-    io_emul_stub[7] = 0x90;
-    /* jmp guest_to_host_gpr_switch */
-    io_emul_stub[8] = 0xe9;
-    *(s32 *)&io_emul_stub[9] =
-        (char *)guest_to_host_gpr_switch - &io_emul_stub[13];
+    io_emul_stub[14] = 0x90;
+    /* ret (jumps to guest_to_host_gpr_switch) */
+    io_emul_stub[15] = 0xc3;
 
     /* Handy function-typed pointer to the stub. */
     io_emul = (void *)io_emul_stub;
@@ -1438,7 +1448,7 @@ static int emulate_privileged_op(struct 
         op_bytes = 1;
     case 0xe5: /* IN imm8,%eax */
         port = insn_fetch(u8, code_base, eip, code_limit);
-        io_emul_stub[7] = port; /* imm8 */
+        io_emul_stub[14] = port; /* imm8 */
     exec_in:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
@@ -1480,7 +1490,7 @@ static int emulate_privileged_op(struct 
         op_bytes = 1;
     case 0xe7: /* OUT %eax,imm8 */
         port = insn_fetch(u8, code_base, eip, code_limit);
-        io_emul_stub[7] = port; /* imm8 */
+        io_emul_stub[14] = port; /* imm8 */
     exec_out:
         if ( !guest_io_okay(port, op_bytes, v, regs) )
             goto fail;
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_32/gpr_switch.S
--- a/xen/arch/x86/x86_32/gpr_switch.S  Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_32/gpr_switch.S  Thu May 10 18:02:55 2007 +0100
@@ -20,6 +20,7 @@ ENTRY(host_to_guest_gpr_switch)
         movl  UREGS_esi(%eax), %esi
         pushl %edi
         movl  UREGS_edi(%eax), %edi
+        pushl $guest_to_host_gpr_switch
         pushl %ecx
         movl  UREGS_ecx(%eax), %ecx
         movl  UREGS_eax(%eax), %eax
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_32/mm.c  Thu May 10 18:02:55 2007 +0100
@@ -30,6 +30,14 @@
 #include <asm/fixmap.h>
 #include <public/memory.h>
 
+#ifdef CONFIG_X86_PAE
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    idle_pg_table_l2[4 * L2_PAGETABLE_ENTRIES];
+#else
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    idle_pg_table_l2[L2_PAGETABLE_ENTRIES];
+#endif
+
 unsigned int PAGE_HYPERVISOR         = __PAGE_HYPERVISOR;
 unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
 
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_64/compat_kexec.S
--- a/xen/arch/x86/x86_64/compat_kexec.S        Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_64/compat_kexec.S        Thu May 10 18:02:55 2007 +0100
@@ -8,7 +8,9 @@
 #include <asm/msr.h>
 #include <asm/page.h>
 
-.text
+#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
+
+        .text
 
         .code64
 
@@ -29,21 +31,19 @@ 1:      dec %r9
         test %r9,%r9
         jnz 1b
 
-        movq %rbx,%rdx
-        mov $__PAGE_OFFSET,%rbx
-        sub %rbx, %rdx
+        mov $SYM_PHYS(compat_page_list),%rdx
 
         /*
          * Setup an identity mapped region in PML4[0] of idle page
          * table.
          */
-        lea idle_pg_table_l3(%rip),%rax
+        lea l3_identmap(%rip),%rax
         sub %rbx,%rax
         or  $0x63,%rax
         mov %rax, idle_pg_table(%rip)
 
         /* Switch to idle page table. */
-        movq $(idle_pg_table - __PAGE_OFFSET), %rax
+        movq $SYM_PHYS(idle_pg_table), %rax
         movq %rax, %cr3
 
         /* Jump to low identity mapping in compatibility mode. */
@@ -51,7 +51,7 @@ 1:      dec %r9
         ud2
 
 compatibility_mode_far:
-        .long compatibility_mode - __PAGE_OFFSET
+        .long SYM_PHYS(compatibility_mode)
         .long __HYPERVISOR_CS32
 
         .code32
@@ -78,7 +78,7 @@ compatibility_mode:
         movl %eax, %cr0
 
         /* Switch to 32 bit page table. */
-        movl  $compat_pg_table - __PAGE_OFFSET, %eax
+        movl  $SYM_PHYS(compat_pg_table), %eax
         movl  %eax, %cr3
 
         /* Clear MSR_EFER[LME], disabling long mode */
@@ -106,10 +106,10 @@ compat_page_list:
          * first 4G of the physical address space.
          */
 compat_pg_table:
-        .long compat_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-        .long compat_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-        .long compat_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
-        .long compat_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
+        .long SYM_PHYS(compat_pg_table_l2) + 0*PAGE_SIZE + 0x01, 0
+        .long SYM_PHYS(compat_pg_table_l2) + 1*PAGE_SIZE + 0x01, 0
+        .long SYM_PHYS(compat_pg_table_l2) + 2*PAGE_SIZE + 0x01, 0
+        .long SYM_PHYS(compat_pg_table_l2) + 3*PAGE_SIZE + 0x01, 0
 
         .align 4096,0
 
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_64/gpr_switch.S
--- a/xen/arch/x86/x86_64/gpr_switch.S  Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_64/gpr_switch.S  Thu May 10 18:02:55 2007 +0100
@@ -30,7 +30,10 @@ ENTRY(host_to_guest_gpr_switch)
         pushq %r15
         movq  UREGS_r11(%rdi), %r11
         movq  UREGS_r15(%rdi), %r15
+        pushq %rcx /* dummy push, filled by guest_to_host_gpr_switch pointer */
         pushq %rcx
+        leaq  guest_to_host_gpr_switch(%rip),%rcx
+        movq  %rcx,8(%rsp)
         movq  UREGS_rcx(%rdi), %rcx
         movq  UREGS_rdi(%rdi), %rdi
         ret
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_64/mm.c  Thu May 10 18:02:55 2007 +0100
@@ -35,6 +35,22 @@
 #ifdef CONFIG_COMPAT
 unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
 #endif
+
+/* Top-level master (and idle-domain) page directory. */
+l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    idle_pg_table[L4_PAGETABLE_ENTRIES];
+
+/* Enough page directories to map bottom 4GB of the memory map. */
+l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    l3_identmap[L3_PAGETABLE_ENTRIES];
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    l2_identmap[4*L2_PAGETABLE_ENTRIES];
+
+/* Enough page directories to map the Xen text and static data. */
+l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    l3_xenmap[L3_PAGETABLE_ENTRIES];
+l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
+    l2_xenmap[L2_PAGETABLE_ENTRIES];
 
 void *alloc_xen_pagetable(void)
 {
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_64/traps.c       Thu May 10 18:02:55 2007 +0100
@@ -19,8 +19,11 @@
 #include <asm/shared.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
-
 #include <public/callback.h>
+
+asmlinkage void syscall_enter(void);
+asmlinkage void compat_hypercall(void);
+asmlinkage void int80_direct_trap(void);
 
 static void print_xen_info(void)
 {
@@ -246,9 +249,42 @@ unsigned long do_iret(void)
     return 0;
 }
 
-asmlinkage void syscall_enter(void);
-asmlinkage void compat_hypercall(void);
-asmlinkage void int80_direct_trap(void);
+static int write_stack_trampoline(
+    char *stack, char *stack_bottom, uint16_t cs_seg)
+{
+    /* movq %rsp, saversp(%rip) */
+    stack[0] = 0x48;
+    stack[1] = 0x89;
+    stack[2] = 0x25;
+    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
+
+    /* leaq saversp(%rip), %rsp */
+    stack[7] = 0x48;
+    stack[8] = 0x8d;
+    stack[9] = 0x25;
+    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
+
+    /* pushq %r11 */
+    stack[14] = 0x41;
+    stack[15] = 0x53;
+
+    /* pushq $<cs_seg> */
+    stack[16] = 0x68;
+    *(u32 *)&stack[17] = cs_seg;
+
+    /* movq $syscall_enter,%r11 */
+    stack[21] = 0x49;
+    stack[22] = 0xbb;
+    *(void **)&stack[23] = (void *)syscall_enter;
+
+    /* jmpq *%r11 */
+    stack[31] = 0x41;
+    stack[32] = 0xff;
+    stack[33] = 0xe3;
+
+    return 34;
+}
+
 void __init percpu_traps_init(void)
 {
     char *stack_bottom, *stack;
@@ -280,74 +316,16 @@ void __init percpu_traps_init(void)
     /* NMI handler has its own per-CPU 1kB stack. */
     init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
 
-    /*
-     * Trampoline for SYSCALL entry from long mode.
-     */
-
-    /* Skip the NMI and DF stacks. */
-    stack = &stack[3072];
+    /* Trampoline for SYSCALL entry from long mode. */
+    stack = &stack[3072]; /* Skip the NMI and DF stacks. */
     wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
-
-    /* movq %rsp, saversp(%rip) */
-    stack[0] = 0x48;
-    stack[1] = 0x89;
-    stack[2] = 0x25;
-    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
-
-    /* leaq saversp(%rip), %rsp */
-    stack[7] = 0x48;
-    stack[8] = 0x8d;
-    stack[9] = 0x25;
-    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
-
-    /* pushq %r11 */
-    stack[14] = 0x41;
-    stack[15] = 0x53;
-
-    /* pushq $FLAT_KERNEL_CS64 */
-    stack[16] = 0x68;
-    *(u32 *)&stack[17] = FLAT_KERNEL_CS64;
-
-    /* jmp syscall_enter */
-    stack[21] = 0xe9;
-    *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
-
-    /*
-     * Trampoline for SYSCALL entry from compatibility mode.
-     */
-
-    /* Skip the long-mode entry trampoline. */
-    stack = &stack[26];
+    stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
+
+    /* Trampoline for SYSCALL entry from compatibility mode. */
     wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
-
-    /* movq %rsp, saversp(%rip) */
-    stack[0] = 0x48;
-    stack[1] = 0x89;
-    stack[2] = 0x25;
-    *(u32 *)&stack[3] = (stack_bottom - &stack[7]) - 16;
-
-    /* leaq saversp(%rip), %rsp */
-    stack[7] = 0x48;
-    stack[8] = 0x8d;
-    stack[9] = 0x25;
-    *(u32 *)&stack[10] = (stack_bottom - &stack[14]) - 16;
-
-    /* pushq %r11 */
-    stack[14] = 0x41;
-    stack[15] = 0x53;
-
-    /* pushq $FLAT_KERNEL_CS32 */
-    stack[16] = 0x68;
-    *(u32 *)&stack[17] = FLAT_KERNEL_CS32;
-
-    /* jmp syscall_enter */
-    stack[21] = 0xe9;
-    *(u32 *)&stack[22] = (char *)syscall_enter - &stack[26];
-
-    /*
-     * Common SYSCALL parameters.
-     */
-
+    stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
+
+    /* Common SYSCALL parameters. */
     wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
     wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
 }
diff -r 07b1e917c9d8 -r 23c4790512db xen/arch/x86/x86_64/xen.lds.S
--- a/xen/arch/x86/x86_64/xen.lds.S     Thu May 10 16:22:27 2007 +0100
+++ b/xen/arch/x86/x86_64/xen.lds.S     Thu May 10 18:02:55 2007 +0100
@@ -16,7 +16,7 @@ PHDRS
 }
 SECTIONS
 {
-  . = 0xFFFF830000100000;
+  . = __XEN_VIRT_START + 0x100000;
   _start = .;
   _stext = .;                  /* Text and read-only data */
   .text : {
diff -r 07b1e917c9d8 -r 23c4790512db xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu May 10 16:22:27 2007 +0100
+++ b/xen/common/grant_table.c  Thu May 10 18:02:55 2007 +0100
@@ -833,7 +833,7 @@ gnttab_transfer(
         }
 
         page = mfn_to_page(mfn);
-        if ( unlikely(IS_XEN_HEAP_FRAME(page)) )
+        if ( unlikely(is_xen_heap_frame(page)) )
         { 
             gdprintk(XENLOG_INFO, "gnttab_transfer: xen frame %lx\n",
                     (unsigned long)gop.mfn);
diff -r 07b1e917c9d8 -r 23c4790512db xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Thu May 10 16:22:27 2007 +0100
+++ b/xen/common/page_alloc.c   Thu May 10 18:02:55 2007 +0100
@@ -585,18 +585,20 @@ static unsigned long avail_heap_pages(
     return free_pages;
 }
 
+#define avail_for_domheap(mfn) \
+    (!allocated_in_map(mfn) && !is_xen_heap_frame(mfn_to_page(mfn)))
 void end_boot_allocator(void)
 {
     unsigned long i;
     int curr_free, next_free;
 
     /* Pages that are free now go to the domain sub-allocator. */
-    if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
+    if ( (curr_free = next_free = avail_for_domheap(first_valid_mfn)) )
         map_alloc(first_valid_mfn, 1);
     for ( i = first_valid_mfn; i < max_page; i++ )
     {
         curr_free = next_free;
-        next_free = !allocated_in_map(i+1);
+        next_free = avail_for_domheap(i+1);
         if ( next_free )
             map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
         if ( curr_free )
@@ -605,6 +607,7 @@ void end_boot_allocator(void)
 
     printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
 }
+#undef avail_for_domheap
 
 /*
  * Scrub all unallocated pages in all heap zones. This function is more
@@ -635,7 +638,7 @@ void scrub_heap_pages(void)
         /* Re-check page status with lock held. */
         if ( !allocated_in_map(mfn) )
         {
-            if ( IS_XEN_HEAP_FRAME(mfn_to_page(mfn)) )
+            if ( is_xen_heap_frame(mfn_to_page(mfn)) )
             {
                 p = page_to_virt(mfn_to_page(mfn));
                 memguard_unguard_range(p, PAGE_SIZE);
@@ -675,7 +678,9 @@ void init_xenheap_pages(paddr_t ps, padd
      * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
      * prevent merging of power-of-two blocks across the zone boundary.
      */
-    if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
+    if ( ps && !is_xen_heap_frame(maddr_to_page(ps)-1) )
+        ps += PAGE_SIZE;
+    if ( !is_xen_heap_frame(maddr_to_page(pe)) )
         pe -= PAGE_SIZE;
 
     init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
@@ -856,7 +861,7 @@ void free_domheap_pages(struct page_info
 
     ASSERT(!in_irq());
 
-    if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
+    if ( unlikely(is_xen_heap_frame(pg)) )
     {
         /* NB. May recursively lock from relinquish_memory(). */
         spin_lock_recursive(&d->page_alloc_lock);
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-ia64/mm.h Thu May 10 18:02:55 2007 +0100
@@ -115,8 +115,8 @@ struct page_info
  /* 29-bit count of references to this frame. */
 #define PGC_count_mask      ((1UL<<29)-1)
 
-#define IS_XEN_HEAP_FRAME(_pfn) ((page_to_maddr(_pfn) < xenheap_phys_end) \
-                                && (page_to_maddr(_pfn) >= xen_pstart))
+#define is_xen_heap_frame(pfn) ((page_to_maddr(pfn) < xenheap_phys_end) \
+                                && (page_to_maddr(pfn) >= xen_pstart))
 
 extern void *xen_heap_start;
 #define __pickle(a)    ((unsigned long)a - (unsigned long)xen_heap_start)
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-powerpc/mm.h      Thu May 10 18:02:55 2007 +0100
@@ -112,7 +112,7 @@ struct page_info
  /* 29-bit count of references to this frame. */
 #define PGC_count_mask      ((1UL<<28)-1)
 
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+#define is_xen_heap_frame(pfn) (page_to_maddr(pfn) < xenheap_phys_end)
 
 static inline struct domain *unpickle_domptr(u32 _domain)
 { return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-x86/config.h      Thu May 10 18:02:55 2007 +0100
@@ -84,6 +84,19 @@
 
 #define CONFIG_DMA_BITSIZE 32
 
+#define BOOT_TRAMPOLINE 0x90000
+#define boot_trampoline_pa(sym)                                 \
+    (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
+#define boot_trampoline_va(sym)                                 \
+    (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))),            \
+                 BOOT_TRAMPOLINE-__pa(trampoline_start)))
+#ifndef __ASSEMBLY__
+extern char trampoline_start[], trampoline_end[];
+extern char trampoline_realmode_entry[];
+extern unsigned int trampoline_xen_phys_start;
+extern unsigned char trampoline_cpu_started;
+#endif
+
 #if defined(__x86_64__)
 
 #define CONFIG_X86_64 1
@@ -116,7 +129,7 @@
  *  0xffff804000000000 - 0xffff807fffffffff [256GB, 2^38 bytes, PML4:256]
  *    Reserved for future shared info with the guest OS (GUEST ACCESSIBLE).
  *  0xffff808000000000 - 0xffff80ffffffffff [512GB, 2^39 bytes, PML4:257]
- *    Read-only guest linear page table (GUEST ACCESSIBLE).
+ *    Reserved for future use.
  *  0xffff810000000000 - 0xffff817fffffffff [512GB, 2^39 bytes, PML4:258]
  *    Guest linear page table.
  *  0xffff818000000000 - 0xffff81ffffffffff [512GB, 2^39 bytes, PML4:259]
@@ -133,10 +146,12 @@
  *    Compatibility machine-to-phys translation table.
  *  0xffff828c40000000 - 0xffff828c7fffffff [1GB,   2^30 bytes, PML4:261]
  *    High read-only compatibility machine-to-phys translation table.
- *  0xffff828c80000000 - 0xffff82ffffffffff [462GB,             PML4:261]
+ *  0xffff828c80000000 - 0xffff828cbfffffff [1GB,   2^30 bytes, PML4:261]
+ *    Xen text, static data, bss.
+ *  0xffff828cc0000000 - 0xffff82ffffffffff [461GB,             PML4:261]
  *    Reserved for future use.
  *  0xffff830000000000 - 0xffff83ffffffffff [1TB,   2^40 bytes, PML4:262-263]
- *    1:1 direct mapping of all physical memory. Xen and its heap live here.
+ *    1:1 direct mapping of all physical memory.
  *  0xffff840000000000 - 0xffff87ffffffffff [4TB,   2^42 bytes, PML4:264-271]
  *    Reserved for future use.
  *  0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
@@ -167,14 +182,6 @@
 /* Slot 256: read-only guest-accessible machine-to-phys translation table. */
 #define RO_MPT_VIRT_START       (PML4_ADDR(256))
 #define RO_MPT_VIRT_END         (RO_MPT_VIRT_START + PML4_ENTRY_BYTES/2)
-
-// current unused?
-#if 0
-/* Slot 257: read-only guest-accessible linear page table. */
-#define RO_LINEAR_PT_VIRT_START (PML4_ADDR(257))
-#define RO_LINEAR_PT_VIRT_END   (RO_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
-#endif
-
 /* Slot 258: linear page table (guest table). */
 #define LINEAR_PT_VIRT_START    (PML4_ADDR(258))
 #define LINEAR_PT_VIRT_END      (LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
@@ -197,9 +204,12 @@
 /* Slot 261: compatibility machine-to-phys conversion table (1GB). */
 #define RDWR_COMPAT_MPT_VIRT_START IOREMAP_VIRT_END
 #define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + (1UL << 30))
-/* Slot 261: high read-only compatibility machine-to-phys conversion table 
(1GB). */
+/* Slot 261: high read-only compat machine-to-phys conversion table (1GB). */
 #define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END
 #define HIRO_COMPAT_MPT_VIRT_END (HIRO_COMPAT_MPT_VIRT_START + (1UL << 30))
+/* Slot 261: xen text, static data and bss (1GB). */
+#define XEN_VIRT_START          (HIRO_COMPAT_MPT_VIRT_END)
+#define XEN_VIRT_END            (XEN_VIRT_START + (1UL << 30))
 /* Slot 262-263: A direct 1:1 mapping of all of physical memory. */
 #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
 #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
@@ -340,7 +350,7 @@
 #endif /* __i386__ */
 
 #ifndef __ASSEMBLY__
-extern unsigned long xenheap_phys_end; /* user-configurable */
+extern unsigned long xen_phys_start, xenheap_phys_start, xenheap_phys_end;
 #endif
 
 /* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-x86/mm.h  Thu May 10 18:02:55 2007 +0100
@@ -104,7 +104,10 @@ struct page_info
 #define PageSetSlab(page)   ((void)0)
 #define PageClearSlab(page) ((void)0)
 
-#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
+#define is_xen_heap_frame(pfn) ({                                       \
+    paddr_t maddr = page_to_maddr(pfn);                                 \
+    ((maddr >= xenheap_phys_start) && (maddr < xenheap_phys_end));      \
+})
 
 #if defined(__i386__)
 #define pickle_domptr(_d)   ((u32)(unsigned long)(_d))
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-x86/page.h        Thu May 10 18:02:55 2007 +0100
@@ -223,10 +223,6 @@ typedef struct { u64 pfn; } pagetable_t;
 #define mfn_valid(mfn)      ((mfn) < max_page)
 
 /* Convert between Xen-heap virtual addresses and machine addresses. */
-#define PAGE_OFFSET         ((unsigned long)__PAGE_OFFSET)
-#define virt_to_maddr(va)   ((unsigned long)(va)-PAGE_OFFSET)
-#define maddr_to_virt(ma)   ((void *)((unsigned long)(ma)+PAGE_OFFSET))
-/* Shorthand versions of the above functions. */
 #define __pa(x)             (virt_to_maddr(x))
 #define __va(x)             (maddr_to_virt(x))
 
@@ -280,21 +276,19 @@ typedef struct { u64 pfn; } pagetable_t;
 
 
 #ifndef __ASSEMBLY__
+extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
 #if CONFIG_PAGING_LEVELS == 3
-extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
-extern l3_pgentry_t   idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
-extern l2_pgentry_t   
idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
-#else
-extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
-extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
-#ifdef CONFIG_COMPAT
+extern l2_pgentry_t   idle_pg_table_l2[
+    ROOT_PAGETABLE_ENTRIES * L2_PAGETABLE_ENTRIES];
+#elif CONFIG_PAGING_LEVELS == 2
+#define idle_pg_table_l2 idle_pg_table
+#elif CONFIG_PAGING_LEVELS == 4
 extern l2_pgentry_t  *compat_idle_pg_table_l2;
 extern unsigned int   m2p_compat_vstart;
 #endif
-#endif
 void paging_init(void);
 void setup_idle_pagetable(void);
-#endif
+#endif /* !defined(__ASSEMBLY__) */
 
 #define __pge_off()                                                     \
     do {                                                                \
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-x86/x86_32/page.h Thu May 10 18:02:55 2007 +0100
@@ -3,6 +3,10 @@
 #define __X86_32_PAGE_H__
 
 #define __PAGE_OFFSET           (0xFF000000)
+#define __XEN_VIRT_START        __PAGE_OFFSET
+
+#define virt_to_maddr(va) ((unsigned long)(va)-DIRECTMAP_VIRT_START)
+#define maddr_to_virt(ma) ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START))
 
 #define VADDR_BITS              32
 #define VADDR_MASK              (~0UL)
diff -r 07b1e917c9d8 -r 23c4790512db xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Thu May 10 16:22:27 2007 +0100
+++ b/xen/include/asm-x86/x86_64/page.h Thu May 10 18:02:55 2007 +0100
@@ -17,6 +17,7 @@
 #define ROOT_PAGETABLE_ENTRIES  L4_PAGETABLE_ENTRIES
 
 #define __PAGE_OFFSET           (0xFFFF830000000000)
+#define __XEN_VIRT_START        (0xFFFF828C80000000)
 
 /* These are architectural limits. Current CPUs support only 40-bit phys. */
 #define PADDR_BITS              52
@@ -30,6 +31,23 @@
 
 #include <xen/config.h>
 #include <asm/types.h>
+
+/* Physical address where Xen was relocated to. */
+extern unsigned long xen_phys_start;
+
+static inline unsigned long __virt_to_maddr(unsigned long va)
+{
+    ASSERT(va >= XEN_VIRT_START);
+    ASSERT(va < DIRECTMAP_VIRT_END);
+    ASSERT((va < XEN_VIRT_END) || (va >= DIRECTMAP_VIRT_START));
+    if ( va > DIRECTMAP_VIRT_START )
+        return va - DIRECTMAP_VIRT_START;
+    return va - XEN_VIRT_START + xen_phys_start;
+}
+#define virt_to_maddr(va)       \
+    (__virt_to_maddr((unsigned long)(va)))
+#define maddr_to_virt(ma)       \
+    ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START))
 
 /* read access (should only be used for debug printk's) */
 typedef u64 intpte_t;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] xen: Big changes to x86 start-of-day:, Xen patchbot-unstable <=