By doing so, we're no longer restricted to be able to place all boot loader modules into the low 1Gb/4Gb (32-/64-bit) of memory, nor is there a dependency anymore on where the boot loader places the modules. We're also no longer restricted to copy the modules into a place below 4Gb, nor to put them all together into a single piece of memory. Further it allows even the 32-bit Dom0 kernel to be loaded anywhere in physical memory (except if it doesn't support PAE-above-4G). Signed-off-by: Jan Beulich --- 2010-11-09.orig/xen/arch/x86/boot/head.S 2010-08-06 08:44:33.000000000 +0200 +++ 2010-11-09/xen/arch/x86/boot/head.S 2010-11-09 10:30:06.000000000 +0100 @@ -110,12 +110,15 @@ __start: /* Initialise L2 identity-map and xen page table entries (16MB). */ mov $sym_phys(l2_identmap),%edi mov $sym_phys(l2_xenmap),%esi + mov $sym_phys(l2_bootmap),%edx mov $0x1e3,%eax /* PRESENT+RW+A+D+2MB+GLOBAL */ mov $8,%ecx 1: mov %eax,(%edi) add $8,%edi mov %eax,(%esi) add $8,%esi + mov %eax,(%edx) + add $8,%edx add $(1<>18) --- 2010-11-09.orig/xen/arch/x86/boot/Makefile 2010-04-22 14:43:25.000000000 +0200 +++ 2010-11-09/xen/arch/x86/boot/Makefile 2010-11-09 10:30:06.000000000 +0100 @@ -4,6 +4,6 @@ head.o: reloc.S BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' $(BASEDIR)/include/asm-x86/config.h) %.S: %.c - RELOC=$(BOOT_TRAMPOLINE) XEN_BITSPERLONG=$(patsubst x86_%,%,$(TARGET_SUBARCH)) $(MAKE) -f build32.mk $@ + RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@ reloc.S: $(BASEDIR)/include/asm-x86/config.h --- 2010-11-09.orig/xen/arch/x86/boot/build32.mk 2010-08-06 08:44:33.000000000 +0200 +++ 2010-11-09/xen/arch/x86/boot/build32.mk 2010-11-09 10:30:06.000000000 +0100 @@ -19,6 +19,6 @@ CFLAGS += -Werror -fno-builtin -msoft-fl $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(RELOC) -o $@ $< %.o: %.c - $(CC) $(CFLAGS) -DXEN_BITSPERLONG=$(XEN_BITSPERLONG) -c $< -o $@ + $(CC) $(CFLAGS) -c $< -o $@ reloc.o: $(BASEDIR)/include/asm-x86/config.h --- 2010-11-09.orig/xen/arch/x86/boot/reloc.c 2010-08-06 08:44:33.000000000 +0200 +++ 2010-11-09/xen/arch/x86/boot/reloc.c 2010-11-09 10:30:06.000000000 +0100 @@ -68,7 +68,6 @@ multiboot_info_t *reloc(multiboot_info_t { module_t *mods = reloc_mbi_struct( (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t)); - u32 max_addr = 0; mbi->mods_addr = (u32)mods; @@ -76,29 +75,6 @@ multiboot_info_t *reloc(multiboot_info_t { if ( mods[i].string ) mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string); - if ( mods[i].mod_end > max_addr ) - max_addr = mods[i].mod_end; - } - - /* - * 32-bit Xen only maps bottom 1GB of memory at boot time. Relocate - * modules which extend beyond this (GRUB2 in particular likes to - * place modules as high as possible below 4GB). - */ -#define BOOTMAP_END (1ul<<30) /* 1GB */ - if ( (XEN_BITSPERLONG == 32) && (max_addr > BOOTMAP_END) ) - { - char *mod_alloc = (char *)BOOTMAP_END; - for ( i = 0; i < mbi->mods_count; i++ ) - mod_alloc -= mods[i].mod_end - mods[i].mod_start; - for ( i = 0; i < mbi->mods_count; i++ ) - { - u32 mod_len = mods[i].mod_end - mods[i].mod_start; - mods[i].mod_start = (u32)memcpy( - mod_alloc, (char *)mods[i].mod_start, mod_len); - mods[i].mod_end = mods[i].mod_start + mod_len; - mod_alloc += mod_len; - } } } --- 2010-11-09.orig/xen/arch/x86/domain_build.c 2010-08-06 08:44:33.000000000 +0200 +++ 2010-11-09/xen/arch/x86/domain_build.c 2010-11-09 10:30:06.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include /* for bzimage_parse */ #include @@ -284,9 +285,9 @@ static void __init process_dom0_ioports_ int __init construct_dom0( struct domain *d, - unsigned long _image_base, - unsigned long _image_start, unsigned long image_len, - unsigned long _initrd_start, unsigned long initrd_len, + const module_t *image, unsigned long image_headroom, + const module_t *initrd, + void *(*bootstrap_map)(const module_t *), char *cmdline) { int i, cpu, rc, compatible, compat32, order, machine; @@ -301,16 +302,14 @@ int __init construct_dom0( start_info_t *si; struct vcpu *v = d->vcpu[0]; unsigned long long value; -#if defined(__i386__) - char *image_base = (char *)_image_base; /* use lowmem mappings */ - char *image_start = (char *)_image_start; /* use lowmem mappings */ - char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */ -#elif defined(__x86_64__) - char *image_base = __va(_image_base); - char *image_start = __va(_image_start); - char *initrd_start = __va(_initrd_start); -#endif -#if CONFIG_PAGING_LEVELS >= 4 + char *image_base = bootstrap_map(image); + unsigned long image_len = image->mod_end; + char *image_start = image_base + image_headroom; + unsigned long initrd_len = initrd ? initrd->mod_end : 0; +#if CONFIG_PAGING_LEVELS < 4 + module_t mpt; + void *mpt_ptr; +#else l4_pgentry_t *l4tab = NULL, *l4start = NULL; #endif l3_pgentry_t *l3tab = NULL, *l3start = NULL; @@ -340,7 +339,7 @@ int __init construct_dom0( unsigned long v_end; /* Machine address of next candidate page-table page. */ - unsigned long mpt_alloc; + paddr_t mpt_alloc; /* Sanity! */ BUG_ON(d->domain_id != 0); @@ -495,17 +494,17 @@ int __init construct_dom0( if ( (1UL << order) > nr_pages ) panic("Domain 0 allocation is too small for kernel image.\n"); -#ifdef __i386__ - /* Ensure that our low-memory 1:1 mapping covers the allocation. */ - page = alloc_domheap_pages(d, order, MEMF_bits(30)); -#else if ( parms.p2m_base != UNSET_ADDR ) { vphysmap_start = parms.p2m_base; vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long); } - page = alloc_domheap_pages(d, order, 0); +#ifdef __i386__ + if ( !test_bit(XENFEAT_pae_pgdir_above_4gb, parms.f_supported) ) + page = alloc_domheap_pages(d, order, MEMF_bits(32)); + else #endif + page = alloc_domheap_pages(d, order, 0); if ( page == NULL ) panic("Not enough RAM for domain 0 allocation.\n"); alloc_spfn = page_to_mfn(page); @@ -534,8 +533,7 @@ int __init construct_dom0( _p(v_start), _p(v_end)); printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry)); - mpt_alloc = (vpt_start - v_start) + - (unsigned long)pfn_to_paddr(alloc_spfn); + mpt_alloc = (vpt_start - v_start) + pfn_to_paddr(alloc_spfn); #if defined(__i386__) /* @@ -548,17 +546,25 @@ int __init construct_dom0( return -EINVAL; } + mpt.mod_start = mpt_alloc >> PAGE_SHIFT; + mpt.mod_end = vpt_end - vpt_start; + mpt_ptr = bootstrap_map(&mpt); +#define MPT_ALLOC(n) (mpt_ptr += (n)*PAGE_SIZE, mpt_alloc += (n)*PAGE_SIZE) + /* WARNING: The new domain must have its 'processor' field filled in! */ - l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; - l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE; + l3start = l3tab = mpt_ptr; MPT_ALLOC(1); + l2start = l2tab = mpt_ptr; MPT_ALLOC(4); for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) { - copy_page(l2tab + i * L2_PAGETABLE_ENTRIES, - idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES); - l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT); + if ( i < 3 ) + clear_page(l2tab + i * L2_PAGETABLE_ENTRIES); + else + copy_page(l2tab + i * L2_PAGETABLE_ENTRIES, + idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES); + l3tab[i] = l3e_from_pfn(mpt.mod_start + 1 + i, L3_PROT); l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] = - l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR); + l2e_from_pfn(mpt.mod_start + 1 + i, __PAGE_HYPERVISOR); } - v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start); + v->arch.guest_table = pagetable_from_pfn(mpt.mod_start); for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) l2tab[l2_linear_offset(PERDOMAIN_VIRT_START) + i] = @@ -570,9 +576,9 @@ int __init construct_dom0( { if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) { - l1start = l1tab = (l1_pgentry_t *)mpt_alloc; - mpt_alloc += PAGE_SIZE; - *l2tab = l2e_from_paddr((unsigned long)l1start, L2_PROT); + l1tab = mpt_ptr; + *l2tab = l2e_from_paddr(mpt_alloc, L2_PROT); + MPT_ALLOC(1); l2tab++; clear_page(l1tab); if ( count == 0 ) @@ -587,11 +593,14 @@ int __init construct_dom0( mfn++; } +#undef MPT_ALLOC /* Pages that are part of page tables must be read only. */ + mpt_alloc = (paddr_t)mpt.mod_start << PAGE_SHIFT; + mpt_ptr = l3start; l2tab = l2start + l2_linear_offset(vpt_start); - l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*l2tab); - l1tab += l1_table_offset(vpt_start); + l1start = mpt_ptr + (l2e_get_paddr(*l2tab) - mpt_alloc); + l1tab = l1start + l1_table_offset(vpt_start); for ( count = 0; count < nr_pt_pages; count++ ) { page = mfn_to_page(l1e_get_pfn(*l1tab)); @@ -627,9 +636,15 @@ int __init construct_dom0( break; } if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) ) - l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_paddr(*++l2tab); + l1tab = mpt_ptr + (l2e_get_paddr(*++l2tab) - mpt_alloc); } + /* + * Put Xen's first L3 entry into Dom0's page tables so that updates + * through bootstrap_map() will affect the page tables we will run on. + */ + l3start[0] = l3e_from_paddr(__pa(idle_pg_table_l2), L3_PROT); + #elif defined(__x86_64__) /* Overlap with Xen protected area? */ @@ -807,6 +822,7 @@ int __init construct_dom0( /* Copy the OS image and free temporary buffer. */ elf.dest = (void*)vkern_start; elf_load_binary(&elf); + bootstrap_map(NULL); if ( UNSET_ADDR != parms.virt_hypercall ) { @@ -823,7 +839,12 @@ int __init construct_dom0( /* Copy the initial ramdisk. */ if ( initrd_len != 0 ) + { + char *initrd_start = bootstrap_map(initrd); + memcpy((void *)vinitrd_start, initrd_start, initrd_len); + bootstrap_map(NULL); + } /* Free temporary buffers. */ discard_initial_images(); @@ -1033,7 +1054,22 @@ int __init construct_dom0( write_ptbase(current); #if defined(__i386__) - /* Destroy low mappings - they were only for our convenience. */ + /* Restore Dom0's first L3 entry. */ + mpt.mod_end = 5 * PAGE_SIZE; + l3start = mpt_ptr = bootstrap_map(&mpt); + l2start = mpt_ptr + PAGE_SIZE; + l3start[0] = l3e_from_pfn(mpt.mod_start + 1, L3_PROT); + + /* Re-setup CR3 */ + if ( paging_mode_enabled(d) ) + paging_update_paging_modes(v); + else + update_cr3(v); + + /* + * Destroy low mappings - they were only for our convenience. Note + * that zap_low_mappings() exceeds what bootstrap_map(NULL) would do. + */ zap_low_mappings(l2start); #endif --- 2010-11-09.orig/xen/arch/x86/setup.c 2010-08-12 08:17:22.000000000 +0200 +++ 2010-11-09/xen/arch/x86/setup.c 2010-11-09 10:30:06.000000000 +0100 @@ -45,14 +45,6 @@ #include #include -#if defined(CONFIG_X86_64) -#define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */ -#define maddr_to_bootstrap_virt(m) maddr_to_virt(m) -#else -#define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */ -#define maddr_to_bootstrap_virt(m) ((void *)(long)(m)) -#endif - extern u16 boot_edid_caps; extern u8 boot_edid_info[128]; extern struct boot_video_info boot_vid_info; @@ -152,21 +144,34 @@ static void __init parse_acpi_param(char for ( ; ; ) halt(); \ } while (0) -static unsigned long __initdata initial_images_base; -static unsigned long __initdata initial_images_start; -static unsigned long __initdata initial_images_end; +static const module_t *__initdata initial_images; +static unsigned int __initdata nr_initial_images; unsigned long __init initial_images_nrpages(void) { - ASSERT(!(initial_images_base & ~PAGE_MASK)); - ASSERT(!(initial_images_end & ~PAGE_MASK)); - return ((initial_images_end >> PAGE_SHIFT) - - (initial_images_base >> PAGE_SHIFT)); + unsigned long nr; + unsigned int i; + + for ( nr = i = 0; i < nr_initial_images; ++i ) + nr += PFN_UP(initial_images[i].mod_end); + + return nr; } void __init discard_initial_images(void) { - init_domheap_pages(initial_images_base, initial_images_end); + unsigned int i; + + for ( i = 0; i < nr_initial_images; ++i ) + { + uint64_t start = (uint64_t)initial_images[i].mod_start << PAGE_SHIFT; + + init_domheap_pages(start, + start + PAGE_ALIGN(initial_images[i].mod_end)); + } + + nr_initial_images = 0; + initial_images = NULL; } static void free_xen_data(char *s, char *e) @@ -257,33 +262,128 @@ static void __init normalise_cpu_order(v } } +#define BOOTSTRAP_MAP_BASE (16UL << 20) +#define BOOTSTRAP_MAP_LIMIT (1UL << L3_PAGETABLE_SHIFT) + /* * Ensure a given physical memory range is present in the bootstrap mappings. * Use superpage mappings to ensure that pagetable memory needn't be allocated. */ -static void __init bootstrap_map(unsigned long start, unsigned long end) +static void *__init bootstrap_map(const module_t *mod) { - unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; - start = max_t(unsigned long, start & ~mask, 16UL << 20); - end = (end + mask) & ~mask; + static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE; + uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1; + void *ret; + +#ifdef __x86_64__ + if ( !early_boot ) + return mod ? mfn_to_virt(mod->mod_start) : NULL; +#endif + + if ( !mod ) + { + destroy_xen_mappings(BOOTSTRAP_MAP_BASE, BOOTSTRAP_MAP_LIMIT); + map_cur = BOOTSTRAP_MAP_BASE; + return NULL; + } + + start = (uint64_t)mod->mod_start << PAGE_SHIFT; + end = start + mod->mod_end; if ( start >= end ) - return; - if ( end > BOOTSTRAP_DIRECTMAP_END ) - panic("Cannot access memory beyond end of " - "bootstrap direct-map area\n"); - map_pages_to_xen( - (unsigned long)maddr_to_bootstrap_virt(start), - start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR); + return NULL; + + if ( end <= BOOTSTRAP_MAP_BASE ) + return (void *)(unsigned long)start; + + ret = (void *)(map_cur + (unsigned long)(start & mask)); + start &= ~mask; + end = (end + mask) & ~mask; + if ( end - start > BOOTSTRAP_MAP_LIMIT - map_cur ) + return NULL; + + map_pages_to_xen(map_cur, start >> PAGE_SHIFT, + (end - start) >> PAGE_SHIFT, PAGE_HYPERVISOR); + map_cur += end - start; + return ret; } -static void __init move_memory( - unsigned long dst, unsigned long src_start, unsigned long src_end) +static void *__init move_memory( + uint64_t dst, uint64_t src, unsigned int size, bool_t keep) { - bootstrap_map(src_start, src_end); - bootstrap_map(dst, dst + src_end - src_start); - memmove(maddr_to_bootstrap_virt(dst), - maddr_to_bootstrap_virt(src_start), - src_end - src_start); + unsigned int blksz = BOOTSTRAP_MAP_LIMIT - BOOTSTRAP_MAP_BASE; + unsigned int mask = (1L << L2_PAGETABLE_SHIFT) - 1; + + if ( src + size > BOOTSTRAP_MAP_BASE ) + blksz >>= 1; + + while ( size ) + { + module_t mod; + unsigned int soffs = src & mask; + unsigned int doffs = dst & mask; + unsigned int sz; + void *d, *s; + + mod.mod_start = (src - soffs) >> PAGE_SHIFT; + mod.mod_end = soffs + size; + if ( mod.mod_end > blksz ) + mod.mod_end = blksz; + sz = mod.mod_end - soffs; + s = bootstrap_map(&mod); + + mod.mod_start = (dst - doffs) >> PAGE_SHIFT; + mod.mod_end = doffs + size; + if ( mod.mod_end > blksz ) + mod.mod_end = blksz; + if ( sz > mod.mod_end - doffs ) + sz = mod.mod_end - doffs; + d = bootstrap_map(&mod); + + memmove(d + doffs, s + soffs, sz); + + dst += sz; + src += sz; + size -= sz; + + if ( keep ) + return size ? NULL : d + doffs; + + bootstrap_map(NULL); + } + + return NULL; +} + +static uint64_t __init consider_modules( + uint64_t s, uint64_t e, uint32_t size, const module_t *mod, + unsigned int nr_mods, unsigned int this_mod) +{ + unsigned int i; + + if ( s > e || e - s < size ) + return 0; + + for ( i = 0; i < nr_mods ; ++i ) + { + uint64_t start = (uint64_t)mod[i].mod_start << PAGE_SHIFT; + uint64_t end = start + PAGE_ALIGN(mod[i].mod_end); + + if ( i == this_mod ) + continue; + + if ( s < end && start < e ) + { + end = consider_modules(end, e, size, mod + i + 1, + nr_mods - i - 1, this_mod - i - 1); + if ( end ) + return end; + + return consider_modules(s, start, size, mod + i + 1, + nr_mods - i - 1, this_mod - i - 1); + } + } + + return e; } static void __init setup_max_pdx(void) @@ -447,11 +547,10 @@ void __init __start_xen(unsigned long mb { char *memmap_type = NULL; char *cmdline, *kextra, *loader; - unsigned long _initrd_start = 0, _initrd_len = 0; unsigned int initrdidx = 1; multiboot_info_t *mbi = __va(mbi_p); module_t *mod = (module_t *)__va(mbi->mods_addr); - unsigned long nr_pages, modules_length, modules_headroom; + unsigned long nr_pages, modules_headroom; int i, j, e820_warn = 0, bytes = 0; bool_t acpi_boot_table_init_done = 0; struct ns16550_defaults ns16550 = { @@ -647,6 +746,9 @@ void __init __start_xen(unsigned long mb set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT); kexec_reserve_area(&boot_e820); + initial_images = mod; + nr_initial_images = mbi->mods_count; + /* * Iterate backwards over all superpage-aligned RAM regions. * @@ -660,48 +762,64 @@ void __init __start_xen(unsigned long mb * we can relocate the dom0 kernel and other multiboot modules. Also, on * x86/64, we relocate Xen to higher memory. */ - modules_length = 0; for ( i = 0; i < mbi->mods_count; i++ ) - modules_length += mod[i].mod_end - mod[i].mod_start; + { + if ( mod[i].mod_start & (PAGE_SIZE - 1) ) + EARLY_FAIL("Bootloader didn't honor module alignment request.\n"); + mod[i].mod_end -= mod[i].mod_start; + mod[i].mod_start >>= PAGE_SHIFT; + mod[i].reserved = 0; + } - /* ensure mod[0] is mapped before parsing */ - bootstrap_map(mod[0].mod_start, mod[0].mod_end); - modules_headroom = bzimage_headroom( - (char *)(unsigned long)mod[0].mod_start, - (unsigned long)(mod[0].mod_end - mod[0].mod_start)); + modules_headroom = bzimage_headroom(bootstrap_map(mod), mod->mod_end); + bootstrap_map(NULL); for ( i = boot_e820.nr_map-1; i >= 0; i-- ) { uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1; + uint64_t end, limit = ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT; - /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */ + /* Superpage-aligned chunks from BOOTSTRAP_MAP_BASE. */ s = (boot_e820.map[i].addr + mask) & ~mask; e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask; - s = max_t(uint64_t, s, 16 << 20); - e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END); + s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE); if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) ) continue; - set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT); - - /* Map the chunk. No memory will need to be allocated to do this. */ - map_pages_to_xen( - (unsigned long)maddr_to_bootstrap_virt(s), - s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR); + if ( s < limit ) + { + end = min(e, limit); + set_pdx_range(s >> PAGE_SHIFT, end >> PAGE_SHIFT); +#ifdef CONFIG_X86_64 + map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT, + (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR); +#endif + } #if defined(CONFIG_X86_64) + e = min_t(uint64_t, e, 1ULL << (PAGE_SHIFT + 32)); #define reloc_size ((__pa(&_end) + mask) & ~mask) /* Is the region suitable for relocating Xen? */ - if ( !xen_phys_start && ((e-s) >= reloc_size) ) + if ( !xen_phys_start && e <= limit ) + { + /* Don't overlap with modules. */ + end = consider_modules(s, e, reloc_size + mask, + mod, mbi->mods_count, -1); + end &= ~mask; + } + else + end = 0; + if ( end > s ) { extern l2_pgentry_t l2_xenmap[]; l4_pgentry_t *pl4e; l3_pgentry_t *pl3e; l2_pgentry_t *pl2e; int i, j, k; + void *dst; /* Select relocation address. */ - e -= reloc_size; + e = end - reloc_size; xen_phys_start = e; bootsym(trampoline_xen_phys_start) = e; @@ -712,10 +830,10 @@ void __init __start_xen(unsigned long mb * data until after we have switched to the relocated pagetables! */ barrier(); - move_memory(e, 0, __pa(&_end) - xen_phys_start); + dst = move_memory(e, 0, (unsigned long)&_end - XEN_VIRT_START, 1); /* Poison low 1MB to detect stray pointers to physical 0-1MB. */ - memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20); + memset(dst, 0x55, 1U << 20); /* Walk initial pagetables, relocating page directory entries. */ pl4e = __va(__pa(idle_pg_table)); @@ -772,38 +890,58 @@ void __init __start_xen(unsigned long mb "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */ : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack), "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" ); + + bootstrap_map(NULL); } #endif /* Is the region suitable for relocating the multiboot modules? */ - if ( !initial_images_start && (s < e) && - ((e-s) >= (modules_length+modules_headroom)) ) + for ( j = mbi->mods_count - 1; j >= 0; j-- ) { - initial_images_end = e; - initial_images_start = initial_images_end - modules_length; - initial_images_base = initial_images_start - modules_headroom; - initial_images_base &= PAGE_MASK; - for ( j = mbi->mods_count-1; j >= 0; j-- ) + unsigned long headroom = j ? 0 : modules_headroom; + unsigned long size = PAGE_ALIGN(headroom + mod[j].mod_end); + + if ( mod[j].reserved ) + continue; + + /* Don't overlap with other modules. */ + end = consider_modules(s, e, size, mod, mbi->mods_count, j); + + if ( s < end && + (headroom || + ((end - size) >> PAGE_SHIFT) > mod[j].mod_start) ) { - e -= mod[j].mod_end - mod[j].mod_start; - move_memory(e, mod[j].mod_start, mod[j].mod_end); - mod[j].mod_end += e - mod[j].mod_start; - mod[j].mod_start = e; + move_memory(end - size + headroom, + (uint64_t)mod[j].mod_start << PAGE_SHIFT, + mod[j].mod_end, 0); + mod[j].mod_start = (end - size) >> PAGE_SHIFT; + mod[j].mod_end += headroom; + mod[j].reserved = 1; } - e = initial_images_base; } - if ( !kexec_crash_area.start && (s < e) && - ((e-s) >= kexec_crash_area.size) ) +#ifdef CONFIG_X86_32 + /* Confine the kexec area to below 4Gb. */ + e = min_t(uint64_t, e, 1ULL << 32); +#endif + /* Don't overlap with modules. */ + e = consider_modules(s, e, PAGE_ALIGN(kexec_crash_area.size), + mod, mbi->mods_count, -1); + if ( !kexec_crash_area.start && (s < e) ) { e = (e - kexec_crash_area.size) & PAGE_MASK; kexec_crash_area.start = e; } } - if ( !initial_images_start ) + if ( modules_headroom && !mod->reserved ) EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n"); - reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end); + for ( i = 0; i < mbi->mods_count; ++i ) + { + uint64_t s = (uint64_t)mod[i].mod_start << PAGE_SHIFT; + + reserve_e820_ram(&boot_e820, s, s + PAGE_ALIGN(mod[i].mod_end)); + } #if defined(CONFIG_X86_32) xenheap_initial_phys_start = (PFN_UP(__pa(&_end)) + 1) << PAGE_SHIFT; @@ -827,7 +965,10 @@ void __init __start_xen(unsigned long mb */ for ( i = 0; i < boot_e820.nr_map; i++ ) { - uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1; + uint64_t s, e, mask = PAGE_SIZE - 1; +#ifdef CONFIG_X86_64 + uint64_t map_s, map_e; +#endif /* Only page alignment required now. */ s = (boot_e820.map[i].addr + mask) & ~mask; @@ -842,7 +983,7 @@ void __init __start_xen(unsigned long mb #ifdef __x86_64__ if ( !acpi_boot_table_init_done && - s >= BOOTSTRAP_DIRECTMAP_END && + s >= (1ULL << 32) && !acpi_boot_table_init() ) { acpi_boot_table_init_done = 1; @@ -881,26 +1022,60 @@ void __init __start_xen(unsigned long mb set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT); - /* Need to create mappings above 16MB. */ - map_s = max_t(uint64_t, s, 16<<20); - map_e = e; -#if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */ - map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END); -#endif +#ifdef CONFIG_X86_64 + /* Need to create mappings above BOOTSTRAP_MAP_BASE. */ + map_s = max_t(uint64_t, s, BOOTSTRAP_MAP_BASE); + map_e = min_t(uint64_t, e, + ARRAY_SIZE(l2_identmap) << L2_PAGETABLE_SHIFT); /* Pass mapped memory to allocator /before/ creating new mappings. */ - init_boot_pages(s, min_t(uint64_t, map_s, e)); + init_boot_pages(s, min(map_s, e)); + s = map_s; + if ( s < map_e ) + { + uint64_t mask = (1UL << L2_PAGETABLE_SHIFT) - 1; + + map_s = (s + mask) & ~mask; + map_e &= ~mask; + init_boot_pages(map_s, map_e); + } + + if ( map_s > map_e ) + map_s = map_e = s; /* Create new mappings /before/ passing memory to the allocator. */ - if ( map_s < map_e ) - map_pages_to_xen( - (unsigned long)maddr_to_bootstrap_virt(map_s), - map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT, - PAGE_HYPERVISOR); + if ( map_e < e ) + { + map_pages_to_xen((unsigned long)__va(map_e), map_e >> PAGE_SHIFT, + (e - map_e) >> PAGE_SHIFT, PAGE_HYPERVISOR); + init_boot_pages(map_e, e); + } + if ( s < map_s ) + { + map_pages_to_xen((unsigned long)__va(s), s >> PAGE_SHIFT, + (map_s - s) >> PAGE_SHIFT, PAGE_HYPERVISOR); + init_boot_pages(s, map_s); + } +#else + init_boot_pages(s, e); +#endif + } - /* Pass remainder of this memory chunk to the allocator. */ - init_boot_pages(map_s, e); + for ( i = 0; i < mbi->mods_count; ++i ) + { + set_pdx_range(mod[i].mod_start, + mod[i].mod_start + PFN_UP(mod[i].mod_end)); +#ifdef CONFIG_X86_64 + map_pages_to_xen((unsigned long)mfn_to_virt(mod[i].mod_start), + mod[i].mod_start, + PFN_UP(mod[i].mod_end), PAGE_HYPERVISOR); +#endif } +#ifdef CONFIG_X86_64 + map_pages_to_xen((unsigned long)__va(kexec_crash_area.start), + kexec_crash_area.start >> PAGE_SHIFT, + PFN_UP(kexec_crash_area.size), PAGE_HYPERVISOR); +#endif memguard_init(); @@ -1023,7 +1198,7 @@ void __init __start_xen(unsigned long mb init_IRQ(); - xsm_init(&initrdidx, mbi, initial_images_start); + xsm_init(&initrdidx, mbi, bootstrap_map); timer_init(); @@ -1135,12 +1310,6 @@ void __init __start_xen(unsigned long mb cmdline = dom0_cmdline; } - if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) ) - { - _initrd_start = mod[initrdidx].mod_start; - _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start; - } - if ( xen_cpuidle ) xen_processor_pmbits |= XEN_PROCESSOR_PM_CX; @@ -1148,13 +1317,10 @@ void __init __start_xen(unsigned long mb * We're going to setup domain0 using the module(s) that we stashed safely * above our heap. The second module, if present, is an initrd ramdisk. */ - if ( construct_dom0(dom0, - initial_images_base, - initial_images_start, - mod[0].mod_end-mod[0].mod_start, - _initrd_start, - _initrd_len, - cmdline) != 0) + if ( construct_dom0(dom0, mod, modules_headroom, + (initrdidx > 0) && (initrdidx < mbi->mods_count) + ? mod + initrdidx : NULL, + bootstrap_map, cmdline) != 0) panic("Could not set up DOM0 guest OS\n"); /* Scrub RAM that is still free and so may go to an unprivileged domain. */ --- 2010-11-09.orig/xen/arch/x86/x86_64/mm.c 2010-09-06 08:21:15.000000000 +0200 +++ 2010-11-09/xen/arch/x86/x86_64/mm.c 2010-11-09 10:30:06.000000000 +0100 @@ -65,6 +65,12 @@ l3_pgentry_t __attribute__ ((__section__ l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned"))) l2_xenmap[L2_PAGETABLE_ENTRIES]; +/* Enough page directories to map into the bottom 1GB. */ +l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned"))) + l3_bootmap[L3_PAGETABLE_ENTRIES]; +l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned"))) + l2_bootmap[L2_PAGETABLE_ENTRIES]; + int __mfn_valid(unsigned long mfn) { return likely(mfn < max_page) && --- 2010-11-09.orig/xen/include/asm-x86/domain.h 2010-11-05 09:22:58.000000000 +0100 +++ 2010-11-09/xen/include/asm-x86/domain.h 2010-11-09 10:30:06.000000000 +0100 @@ -466,16 +466,6 @@ void domain_cpuid(struct domain *d, unsigned int *ecx, unsigned int *edx); -int construct_dom0( - struct domain *d, - unsigned long image_base, - unsigned long image_start, unsigned long image_len, - unsigned long initrd_start, unsigned long initrd_len, - char *cmdline); - -extern unsigned long initial_images_nrpages(void); -extern void discard_initial_images(void); - #endif /* __ASM_DOMAIN_H__ */ /* --- 2010-11-09.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 2010-06-01 13:39:57.000000000 +0200 +++ 2010-11-09/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h 2010-11-09 10:30:06.000000000 +0100 @@ -30,7 +30,6 @@ &amd_iommu_head, list) #define DMA_32BIT_MASK 0x00000000ffffffffULL -#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) extern int amd_iommu_debug; extern int amd_iommu_perdev_intremap; --- 2010-11-09.orig/xen/include/asm-x86/page.h 2010-05-28 13:59:16.000000000 +0200 +++ 2010-11-09/xen/include/asm-x86/page.h 2010-11-09 10:30:06.000000000 +0100 @@ -302,6 +302,7 @@ extern l2_pgentry_t idle_pg_table_l2[ extern l2_pgentry_t *compat_idle_pg_table_l2; extern unsigned int m2p_compat_vstart; #endif +extern l2_pgentry_t l2_identmap[4*L2_PAGETABLE_ENTRIES]; void paging_init(void); void setup_idle_pagetable(void); #endif /* !defined(__ASSEMBLY__) */ @@ -397,6 +398,7 @@ static inline uint32_t cacheattr_to_pte_ #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PAGE_ALIGN(x) (((x) + PAGE_SIZE - 1) & PAGE_MASK) #endif /* __X86_PAGE_H__ */ --- 2010-11-09.orig/xen/include/asm-x86/setup.h 2010-05-20 09:59:27.000000000 +0200 +++ 2010-11-09/xen/include/asm-x86/setup.h 2010-11-09 10:30:06.000000000 +0100 @@ -1,6 +1,8 @@ #ifndef __X86_SETUP_H_ #define __X86_SETUP_H_ +#include + extern int early_boot; extern unsigned long xenheap_initial_phys_start; @@ -25,4 +27,14 @@ void init_IRQ(void); void vesa_init(void); void vesa_mtrr_init(void); +int construct_dom0( + struct domain *d, + const module_t *kernel, unsigned long kernel_headroom, + const module_t *initrd, + void *(*bootstrap_map)(const module_t *), + char *cmdline); + +unsigned long initial_images_nrpages(void); +void discard_initial_images(void); + #endif --- 2010-11-09.orig/xen/include/xsm/xsm.h 2010-11-09 08:25:42.000000000 +0100 +++ 2010-11-09/xen/include/xsm/xsm.h 2010-11-09 10:30:06.000000000 +0100 @@ -432,14 +432,15 @@ static inline long __do_xsm_op (XEN_GUES #ifdef XSM_ENABLE extern int xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi, - unsigned long initial_images_start); + void *(*bootstrap_map)(const module_t *)); extern int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi, - unsigned long initial_images_start); + void *(*bootstrap_map)(const module_t *)); extern int register_xsm(struct xsm_operations *ops); extern int unregister_xsm(struct xsm_operations *ops); #else static inline int xsm_init (unsigned int *initrdidx, - const multiboot_info_t *mbi, unsigned long initial_images_start) + const multiboot_info_t *mbi, + void *(*bootstrap_map)(const module_t *)) { return 0; } --- 2010-11-09.orig/xen/xsm/xsm_core.c 2009-10-07 13:31:36.000000000 +0200 +++ 2010-11-09/xen/xsm/xsm_core.c 2010-11-09 10:30:06.000000000 +0100 @@ -47,7 +47,7 @@ static void __init do_xsm_initcalls(void } int __init xsm_init(unsigned int *initrdidx, const multiboot_info_t *mbi, - unsigned long initial_images_start) + void *(*bootstrap_map)(const module_t *)) { int ret = 0; @@ -55,9 +55,10 @@ int __init xsm_init(unsigned int *initrd if ( XSM_MAGIC ) { - ret = xsm_policy_init(initrdidx, mbi, initial_images_start); + ret = xsm_policy_init(initrdidx, mbi, bootstrap_map); if ( ret ) { + bootstrap_map(NULL); printk("%s: Error initializing policy.\n", __FUNCTION__); return -EINVAL; } @@ -65,6 +66,7 @@ int __init xsm_init(unsigned int *initrd if ( verify(&dummy_xsm_ops) ) { + bootstrap_map(NULL); printk("%s could not verify " "dummy_xsm_ops structure.\n", __FUNCTION__); return -EIO; @@ -72,6 +74,7 @@ int __init xsm_init(unsigned int *initrd xsm_ops = &dummy_xsm_ops; do_xsm_initcalls(); + bootstrap_map(NULL); return 0; } --- 2010-11-09.orig/xen/xsm/xsm_policy.c 2007-11-02 17:25:59.000000000 +0100 +++ 2010-11-09/xen/xsm/xsm_policy.c 2010-11-09 10:30:06.000000000 +0100 @@ -22,11 +22,11 @@ #include #include -char *policy_buffer = NULL; -u32 policy_size = 0; +char *__initdata policy_buffer = NULL; +u32 __initdata policy_size = 0; int xsm_policy_init(unsigned int *initrdidx, const multiboot_info_t *mbi, - unsigned long initial_images_start) + void *(*bootstrap_map)(const module_t *)) { int i; module_t *mod = (module_t *)__va(mbi->mods_addr); @@ -40,15 +40,8 @@ int xsm_policy_init(unsigned int *initrd */ for ( i = mbi->mods_count-1; i >= 1; i-- ) { - start = initial_images_start + (mod[i].mod_start-mod[0].mod_start); -#if defined(__i386__) - _policy_start = (u32 *)start; -#elif defined(__x86_64__) - _policy_start = maddr_to_virt(start); -#else - _policy_start = NULL; -#endif - _policy_len = mod[i].mod_end - mod[i].mod_start; + _policy_start = bootstrap_map(mod + i); + _policy_len = mod[i].mod_end; if ( (xsm_magic_t)(*_policy_start) == XSM_MAGIC ) { @@ -63,6 +56,8 @@ int xsm_policy_init(unsigned int *initrd break; } + + bootstrap_map(NULL); } return rc;