# HG changeset patch
# User Ian.Campbell@xxxxxxxxxxxxx
# Node ID 8fa46042348c33429245312e58ab71c437bf9920
# Parent d61211a6c273de817af91944c666ffb6406c6798
Convert x86/64 Linux to use the new memory map hypercall.
This change removes several of the differences between the bare-metal
and Xen versions.
Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
---
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 218 ++++++++------------
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 70 +++---
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 26 +-
linux-2.6-xen-sparse/include/asm-x86_64/e820.h | 4
4 files changed, 154 insertions(+), 164 deletions(-)
diff -r d61211a6c273 -r 8fa46042348c
linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Mon May 22
09:23:03 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Mon May 22
09:23:15 2006 +0100
@@ -26,8 +26,6 @@
#include <asm/sections.h>
#include <xen/interface/memory.h>
-unsigned long pci_mem_start = 0xaeedbabe;
-
/*
* PFN of last memory page.
*/
@@ -47,15 +45,15 @@ unsigned long end_user_pfn = MAXMEM>>PAG
unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
#ifndef CONFIG_XEN
-
-
extern struct resource code_resource, data_resource;
+#endif
/* Check for some hardcoded bad areas that early boot is not allowed to touch
*/
static inline int bad_addr(unsigned long *addrp, unsigned long size)
{
unsigned long addr = *addrp, last = addr + size;
+#ifndef CONFIG_XEN
/* various gunk below that needed for SMP startup */
if (addr < 0x8000) {
*addrp = 0x8000;
@@ -83,9 +81,16 @@ static inline int bad_addr(unsigned long
return 1;
}
/* XXX ramdisk image here? */
+#else
+ if (last < (table_end<<PAGE_SHIFT)) {
+ *addrp = table_end << PAGE_SHIFT;
+ return 1;
+ }
+#endif
return 0;
}
+#ifndef CONFIG_XEN
int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
@@ -99,6 +104,7 @@ int __init e820_mapped(unsigned long sta
}
return 0;
}
+#endif
/*
* Find a free area in a specific range.
@@ -229,22 +235,23 @@ e820_hole_size(unsigned long start_pfn,
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
-void __init e820_reserve_resources(void)
-{
- int i;
- for (i = 0; i < e820.nr_map; i++) {
+void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
+{
+ int i;
+ for (i = 0; i < nr_map; i++) {
struct resource *res;
res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
+ switch (e820[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
+ res->start = e820[i].addr;
+ res->end = res->start + e820[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
+#ifndef CONFIG_XEN
if (e820.map[i].type == E820_RAM) {
/*
* We don't know which RAM region contains kernel data,
@@ -257,74 +264,9 @@ void __init e820_reserve_resources(void)
request_resource(res, &crashk_res);
#endif
}
- }
-}
-#else
-void __init e820_reserve_resources(void)
-{
- dom0_op_t op;
- struct dom0_memory_map_entry *map;
- unsigned long gapstart, gapsize, round, last;
- int i, found = 0;
-
- if (!(xen_start_info->flags & SIF_INITDOMAIN))
- return;
-
- map = alloc_bootmem_low_pages(PAGE_SIZE);
- op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
- set_xen_guest_handle(op.u.physical_memory_map.memory_map, map);
- op.u.physical_memory_map.max_map_entries =
- PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
- BUG_ON(HYPERVISOR_dom0_op(&op));
-
- last = 0x100000000ULL;
- gapstart = 0x10000000;
- gapsize = 0x400000;
-
- for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
- struct resource *res;
-
- if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
- gapsize = last - map[i].end;
- gapstart = map[i].end;
- found = 1;
- }
- if (map[i].start < last)
- last = map[i].start;
-
- if (map[i].end > 0x100000000ULL)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
- res->name = map[i].is_ram ? "System RAM" : "reserved";
- res->start = map[i].start;
- res->end = map[i].end - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- }
-
- free_bootmem(__pa(map), PAGE_SIZE);
-
- if (!found) {
- gapstart = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
- gapstart = (gapstart << PAGE_SHIFT) + 1024*1024;
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit
address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource
registers may break!\n");
- }
-
- /*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
- */
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
-
- printk(KERN_INFO "Allocating PCI resources starting at %lx (gap:
%lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
-}
-#endif /* CONFIG_XEN */
+#endif
+ }
+}
/*
* Add a memory region to the kernel e820 map.
@@ -370,7 +312,6 @@ void __init e820_print_map(char *who)
}
}
-#ifndef CONFIG_XEN
/*
* Sanitize the BIOS e820 map.
*
@@ -557,9 +498,13 @@ static int __init sanitize_e820_map(stru
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
+#ifndef CONFIG_XEN
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
+#else
+ BUG_ON(nr_map < 1);
+#endif
do {
unsigned long start = biosmap->addr;
@@ -571,6 +516,7 @@ static int __init copy_e820_map(struct e
if (start > end)
return -1;
+#ifndef CONFIG_XEN
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
@@ -589,12 +535,14 @@ static int __init copy_e820_map(struct e
size = end - start;
}
}
+#endif
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
+#ifndef CONFIG_XEN
void __init setup_memory_region(void)
{
char *who = "BIOS-e820";
@@ -628,39 +576,63 @@ void __init setup_memory_region(void)
#else /* CONFIG_XEN */
-extern unsigned long xen_override_max_pfn;
-extern union xen_start_info_union xen_start_info_union;
-
-unsigned long __init e820_end_of_ram(void)
-{
- unsigned long max_end_pfn;
-
- if (xen_override_max_pfn == 0) {
- max_end_pfn = xen_start_info->nr_pages;
- /* Default 8MB slack (to balance backend allocations). */
- max_end_pfn += 8 << (20 - PAGE_SHIFT);
- } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
- max_end_pfn = xen_override_max_pfn;
- } else {
- max_end_pfn = xen_start_info->nr_pages;
- }
-
- return max_end_pfn;
-}
-
-unsigned long __init
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
-{
- return 0;
-}
-
+void __init setup_memory_region(void)
+{
+ int rc;
+ struct xen_memory_map memmap;
+ /*
+ * This is rather large for a stack variable but this early in
+ * the boot process we know we have plenty slack space.
+ */
+ struct e820entry map[E820MAX];
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, map);
+
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if ( rc == -ENOSYS ) {
+ memmap.nr_entries = 1;
+ map[0].addr = 0ULL;
+ map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+ /* 8MB slack (to balance backend allocations). */
+ map[0].size += 8 << 20;
+ map[0].type = E820_RAM;
+ rc = 0;
+ }
+ BUG_ON(rc);
+
+ sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+ BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
+
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ e820_print_map("Xen");
+}
#endif
void __init parse_memopt(char *p, char **from)
{
+ int i;
+ unsigned long current_end;
+ unsigned long end;
+
end_user_pfn = memparse(p, from);
end_user_pfn >>= PAGE_SHIFT;
- xen_override_max_pfn = (unsigned long) end_user_pfn;
+
+ end = end_user_pfn<<PAGE_SHIFT;
+ i = e820.nr_map-1;
+ current_end = e820.map[i].addr + e820.map[i].size;
+
+ if (current_end < end) {
+ /*
+ * The e820 map ends before our requested size so
+ * extend the final entry to the requested address.
+ */
+ if (e820.map[i].type == E820_RAM)
+ e820.map[i].size = end - e820.map[i].addr;
+ else
+ add_memory_region(current_end, end - current_end,
E820_RAM);
+ }
}
void __init parse_memmapopt(char *p, char **from)
@@ -684,16 +656,17 @@ void __init parse_memmapopt(char *p, cha
p = *from;
}
+unsigned long pci_mem_start = 0xaeedbabe;
+
/*
* Search for the biggest gap in the low 32 bits of the e820
* memory space. We pass this space to PCI to assign MMIO resources
* for hotplug or unconfigured devices in.
* Hopefully the BIOS let enough space left.
*/
-__init void e820_setup_gap(void)
-{
-#ifndef CONFIG_XEN
- unsigned long gapstart, gapsize;
+__init void e820_setup_gap(struct e820entry *e820, int nr_map)
+{
+ unsigned long gapstart, gapsize, round;
unsigned long last;
int i;
int found = 0;
@@ -701,10 +674,10 @@ __init void e820_setup_gap(void)
last = 0x100000000ull;
gapstart = 0x10000000;
gapsize = 0x400000;
- i = e820.nr_map;
+ i = nr_map;
while (--i >= 0) {
- unsigned long long start = e820.map[i].addr;
- unsigned long long end = start + e820.map[i].size;
+ unsigned long long start = e820[i].addr;
+ unsigned long long end = start + e820[i].size;
/*
* Since "last" is at most 4GB, we know we'll
@@ -730,16 +703,15 @@ __init void e820_setup_gap(void)
}
/*
- * Start allocating dynamic PCI memory a bit into the gap,
- * aligned up to the nearest megabyte.
- *
- * Question: should we try to pad it up a bit (do something
- * like " + (gapsize >> 3)" in there too?). We now have the
- * technology.
+ * See how much we want to round up: start off with
+ * rounding to the next 1MB area.
*/
- pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
+ round = 0x100000;
+ while ((gapsize >> 4) > round)
+ round += round;
+ /* Fun with two's complement */
+ pci_mem_start = (gapstart + round) & -round;
printk(KERN_INFO "Allocating PCI resources starting at %lx (gap:
%lx:%lx)\n",
pci_mem_start, gapstart, gapsize);
-#endif
-}
+}
diff -r d61211a6c273 -r 8fa46042348c
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon May 22
09:23:03 2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon May 22
09:23:15 2006 +0100
@@ -76,8 +76,8 @@
#include <xen/features.h>
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
-#define end_pfn_map end_pfn
#include <asm/mach-xen/setup_arch_post.h>
+#include <xen/interface/memory.h>
extern unsigned long start_pfn;
extern struct edid_info edid_info;
@@ -490,19 +490,6 @@ static __init void parse_cmdline_early (
}
#ifndef CONFIG_NUMA
-#ifdef CONFIG_XEN
-static void __init
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- unsigned long bootmap_size;
-
- bootmap_size = init_bootmem(start_pfn, end_pfn);
- free_bootmem(0, xen_start_info->nr_pages << PAGE_SHIFT);
- reserve_bootmem(HIGH_MEMORY,
- (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1)
- - HIGH_MEMORY);
-}
-#else
static void __init
contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -513,10 +500,13 @@ contig_initmem_init(unsigned long start_
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n",bootmap_size);
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
+#ifdef CONFIG_XEN
+ e820_bootmem_free(NODE_DATA(0), 0,
xen_start_info->nr_pages<<PAGE_SHIFT);
+#else
e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
+#endif
reserve_bootmem(bootmap, bootmap_size);
}
-#endif /* !CONFIG_XEN */
#endif
/* Use inline assembly to define this because the nops are defined
@@ -637,6 +627,9 @@ void __init setup_arch(char **cmdline_p)
unsigned long kernel_end;
#ifdef CONFIG_XEN
+ struct e820entry *machine_e820;
+ struct xen_memory_map memmap;
+
/* Register a call for panic conditions. */
notifier_chain_register(&panic_notifier_list, &xen_panic_block);
@@ -693,20 +686,18 @@ void __init setup_arch(char **cmdline_p)
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
+#endif /* !CONFIG_XEN */
setup_memory_region();
copy_edd();
-#endif /* !CONFIG_XEN */
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
-#ifdef CONFIG_XEN
- init_mm.brk = start_pfn << PAGE_SHIFT;
-#else
- init_mm.brk = (unsigned long) &_end;
-
+ init_mm.brk = (unsigned long) &_end;
+
+#ifndef CONFIG_XEN
code_resource.start = virt_to_phys(&_text);
code_resource.end = virt_to_phys(&_etext)-1;
data_resource.start = virt_to_phys(&_etext);
@@ -735,12 +726,11 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_NUMA
- numa_initmem_init(start_pfn, end_pfn);
+ numa_initmem_init(0, end_pfn);
#else
- contig_initmem_init(start_pfn, end_pfn);
-#endif
-
-#ifndef CONFIG_XEN
+ contig_initmem_init(0, end_pfn);
+#endif
+
/* Reserve direct mapping */
reserve_bootmem_generic(table_start << PAGE_SHIFT,
(table_end - table_start) << PAGE_SHIFT);
@@ -749,6 +739,10 @@ void __init setup_arch(char **cmdline_p)
kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
+#ifdef CONFIG_XEN
+ /* reserve physmap, start info and initial page tables */
+ reserve_bootmem(kernel_end, table_start<<PAGE_SHIFT);
+#else
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
@@ -933,13 +927,24 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
#endif
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
probe_roms();
- e820_reserve_resources();
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, machine_e820);
+
+ BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map,
&memmap));
+
+ e820_reserve_resources(machine_e820, memmap.nr_entries);
+ }
+#elif !defined(CONFIG_XEN)
+ e820_reserve_resources(e820.map, e820.nr_map);
#endif
request_resource(&iomem_resource, &video_ram_resource);
@@ -951,7 +956,14 @@ void __init setup_arch(char **cmdline_p)
request_resource(&ioport_resource, &standard_io_resources[i]);
}
- e820_setup_gap();
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+ if (xen_start_info->flags & SIF_INITDOMAIN) {
+ e820_setup_gap(machine_e820, memmap.nr_entries);
+ free_bootmem(__pa(machine_e820), PAGE_SIZE);
+ }
+#elif !defined(CONFIG_XEN)
+ e820_setup_gap(e820.map, e820.nr_map);
+#endif
#ifdef CONFIG_GART_IOMMU
iommu_hole_init();
diff -r d61211a6c273 -r 8fa46042348c
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Mon May 22 09:23:03
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Mon May 22 09:23:15
2006 +0100
@@ -370,7 +370,7 @@ void __set_fixmap_user (enum fixed_addre
set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
}
-unsigned long __initdata table_start, tables_space;
+unsigned long __initdata table_start, table_end;
unsigned long get_machine_pfn(unsigned long addr)
{
@@ -409,11 +409,17 @@ static inline int make_readonly(unsigned
{
int readonly = 0;
- /* Make old and new page tables read-only. */
+ /* Make new page tables read-only. */
+ if (!xen_feature(XENFEAT_writable_page_tables)
+ && (paddr >= (table_start << PAGE_SHIFT))
+ && (paddr < (table_end << PAGE_SHIFT)))
+ readonly = 1;
+ /* Make old page tables read-only. */
if (!xen_feature(XENFEAT_writable_page_tables)
&& (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
- && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+ && (paddr < (start_pfn << PAGE_SHIFT)))
readonly = 1;
+
/*
* No need for writable mapping of kernel image. This also ensures that
* page and descriptor tables embedded inside don't have writable
@@ -544,7 +550,7 @@ void __init xen_init_pt(void)
mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
}
-void __init extend_init_mapping(void)
+void __init extend_init_mapping(unsigned long tables_space)
{
unsigned long va = __START_KERNEL_map;
unsigned long phys, addr, *pte_page;
@@ -599,23 +605,23 @@ void __init extend_init_mapping(void)
static void __init find_early_table_space(unsigned long end)
{
- unsigned long puds, pmds, ptes;
+ unsigned long puds, pmds, ptes, tables;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
- tables_space =
- round_up(puds * 8, PAGE_SIZE) +
+ tables = round_up(puds * 8, PAGE_SIZE) +
round_up(pmds * 8, PAGE_SIZE) +
round_up(ptes * 8, PAGE_SIZE);
- extend_init_mapping();
+ extend_init_mapping(tables);
table_start = start_pfn;
+ table_end = table_start + (tables>>PAGE_SHIFT);
early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, table_start << PAGE_SHIFT, start_pfn << PAGE_SHIFT);
+ end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
}
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -660,7 +666,7 @@ void __meminit init_memory_mapping(unsig
set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
}
- BUG_ON(!after_bootmem && start_pfn != table_start + (tables_space >>
PAGE_SHIFT));
+ BUG_ON(!after_bootmem && start_pfn != table_end);
__flush_tlb_all();
}
diff -r d61211a6c273 -r 8fa46042348c
linux-2.6-xen-sparse/include/asm-x86_64/e820.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h Mon May 22 09:23:03
2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/e820.h Mon May 22 09:23:15
2006 +0100
@@ -45,12 +45,12 @@ extern void setup_memory_region(void);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
extern void e820_print_map(char *who);
extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned
long end);
-extern void e820_setup_gap(void);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
extern unsigned long e820_hole_size(unsigned long start_pfn,
unsigned long end_pfn);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|