# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID f0bf239844a66f1cd0b8bbf112e1acd73996cae1
# Parent b043928b08738e714900eef21cd8932934d09c45
Fix x86/64 pagetable initialisation to not waste several
megabytes of memory.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r b043928b0873 -r f0bf239844a6
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 30 16:15:27 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 30 16:19:07 2005
@@ -62,14 +62,16 @@
* avaialble in init_memory_mapping().
*/
-#define addr_to_page(addr, page) \
- (addr) &= PHYSICAL_PAGE_MASK; \
- (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >>
PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
+#define addr_to_page(addr, page) \
+ (addr) &= PHYSICAL_PAGE_MASK; \
+ (page) = ((unsigned long *) ((unsigned long) \
+ (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + \
+ __START_KERNEL_map)))
static void __make_page_readonly(unsigned long va)
{
- unsigned long addr;
- pte_t pte, *ptep;
+ unsigned long addr;
+ pte_t pte, *ptep;
unsigned long *page = (unsigned long *) init_level4_pgt;
addr = (unsigned long) page[pgd_index(va)];
@@ -89,22 +91,22 @@
static void __make_page_writable(unsigned long va)
{
- unsigned long addr;
- pte_t pte, *ptep;
- unsigned long *page = (unsigned long *) init_level4_pgt;
-
- addr = (unsigned long) page[pgd_index(va)];
- addr_to_page(addr, page);
-
- addr = page[pud_index(va)];
- addr_to_page(addr, page);
-
- addr = page[pmd_index(va)];
- addr_to_page(addr, page);
-
- ptep = (pte_t *) &page[pte_index(va)];
+ unsigned long addr;
+ pte_t pte, *ptep;
+ unsigned long *page = (unsigned long *) init_level4_pgt;
+
+ addr = (unsigned long) page[pgd_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pud_index(va)];
+ addr_to_page(addr, page);
+
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, page);
+
+ ptep = (pte_t *) &page[pte_index(va)];
pte.pte = (ptep->pte | _PAGE_RW);
- xen_l1_entry_update(ptep, pte);
+ xen_l1_entry_update(ptep, pte);
__flush_tlb_one(addr);
}
@@ -115,55 +117,55 @@
void make_page_readonly(void *va)
{
pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
- unsigned long addr = (unsigned long) va;
-
- if (!init_mapping_done) {
- __make_page_readonly(addr);
- return;
- }
-
- pgd = pgd_offset_k(addr);
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
- ptep = pte_offset_kernel(pmd, addr);
+ unsigned long addr = (unsigned long) va;
+
+ if (!init_mapping_done) {
+ __make_page_readonly(addr);
+ return;
+ }
+
+ pgd = pgd_offset_k(addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ ptep = pte_offset_kernel(pmd, addr);
pte.pte = (ptep->pte & ~_PAGE_RW);
- xen_l1_entry_update(ptep, pte);
+ xen_l1_entry_update(ptep, pte);
__flush_tlb_one(addr);
}
void make_page_writable(void *va)
{
- pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
- unsigned long addr = (unsigned long) va;
-
- if (!init_mapping_done) {
- __make_page_writable(addr);
- return;
- }
-
- pgd = pgd_offset_k(addr);
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
- ptep = pte_offset_kernel(pmd, addr);
+ pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
+ unsigned long addr = (unsigned long) va;
+
+ if (!init_mapping_done) {
+ __make_page_writable(addr);
+ return;
+ }
+
+ pgd = pgd_offset_k(addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ ptep = pte_offset_kernel(pmd, addr);
pte.pte = (ptep->pte | _PAGE_RW);
- xen_l1_entry_update(ptep, pte);
+ xen_l1_entry_update(ptep, pte);
__flush_tlb_one(addr);
}
void make_pages_readonly(void* va, unsigned nr)
{
- while ( nr-- != 0 ) {
- make_page_readonly(va);
- va = (void*)((unsigned long)va + PAGE_SIZE);
- }
+ while (nr-- != 0) {
+ make_page_readonly(va);
+ va = (void*)((unsigned long)va + PAGE_SIZE);
+ }
}
void make_pages_writable(void* va, unsigned nr)
{
- while ( nr-- != 0 ) {
- make_page_writable(va);
- va = (void*)((unsigned long)va + PAGE_SIZE);
- }
+ while (nr-- != 0) {
+ make_page_writable(va);
+ va = (void*)((unsigned long)va + PAGE_SIZE);
+ }
}
/*
@@ -389,7 +391,7 @@
set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
}
-unsigned long __initdata table_start, table_end, tables_space;
+unsigned long __initdata table_start, tables_space;
unsigned long get_machine_pfn(unsigned long addr)
{
@@ -400,40 +402,15 @@
return pte_mfn(*pte);
}
-#define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
-#define MAX_LOW_PAGES 0x20
-static unsigned long __init_pgt[MAX_LOW_PAGES][512] ALIGN_TO_4K;
-static int __init_pgt_index;
-
-/*
- * We start using from start_pfn
- */
static __init void *alloc_static_page(unsigned long *phys)
{
- int i = __init_pgt_index++;
-
- if (__init_pgt_index >= MAX_LOW_PAGES) {
- printk("Need to increase MAX_LOW_PAGES");
- BUG();
- }
-
- *phys = __pa(__init_pgt[i]);
-
- return (void *) __init_pgt[i];
+ unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map;
+ *phys = start_pfn << PAGE_SHIFT;
+ start_pfn++;
+ memset((void *)va, 0, PAGE_SIZE);
+ return (void *)va;
}
-/*
- * Get RO page
- */
-static void __init *alloc_low_page(unsigned long *phys)
-{
- unsigned long pfn = table_end++;
-
- *phys = (pfn << PAGE_SHIFT);
- memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0,
PAGE_SIZE);
- return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
-}
-
#define PTE_SIZE PAGE_SIZE
static inline void __set_pte(pte_t *dst, pte_t val)
@@ -443,27 +420,21 @@
static inline int make_readonly(unsigned long paddr)
{
- int readonly = 0;
-
- /* Make new page tables read-only. */
- if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
- (paddr >= (table_start << PAGE_SHIFT)))
- readonly = 1;
-
- /* Make old page tables read-only. */
- if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
- (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
- (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
- readonly = 1;
-
- /*
- * No need for writable mapping of kernel image. This also ensures that
- * page and descriptor tables embedded inside don't have writable mappings.
- */
- if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
- readonly = 1;
-
- return readonly;
+ int readonly = 0;
+
+ /* Make old and new page tables read-only. */
+ if ((paddr >= (xen_start_info.pt_base - __START_KERNEL_map))
+ && (paddr < ((table_start << PAGE_SHIFT) + tables_space)))
+ readonly = 1;
+ /*
+ * No need for writable mapping of kernel image. This also ensures that
+ * page and descriptor tables embedded inside don't have writable
+ * mappings.
+ */
+ if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+ readonly = 1;
+
+ return readonly;
}
static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned
long end)
@@ -485,7 +456,7 @@
break;
}
- pmd = alloc_low_page(&pmd_phys);
+ pmd = alloc_static_page(&pmd_phys);
make_page_readonly(pmd);
xen_pmd_pin(pmd_phys);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
@@ -499,7 +470,7 @@
set_pmd(pmd, __pmd(0));
break;
}
- pte = alloc_low_page(&pte_phys);
+ pte = alloc_static_page(&pte_phys);
pte_save = pte;
for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr +=
PTE_SIZE) {
if ((paddr >= end) ||
@@ -526,15 +497,16 @@
static void __init find_early_table_space(unsigned long end)
{
- unsigned long puds, pmds, ptes;
+ unsigned long puds, pmds, ptes;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
-
- tables_space = round_up(puds * 8, PAGE_SIZE) +
- round_up(pmds * 8, PAGE_SIZE) +
- round_up(ptes * 8, PAGE_SIZE);
+ ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
+
+ tables_space =
+ round_up(puds * 8, PAGE_SIZE) +
+ round_up(pmds * 8, PAGE_SIZE) +
+ round_up(ptes * 8, PAGE_SIZE);
}
void __init xen_init_pt(void)
@@ -580,65 +552,58 @@
mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
}
-/*
- * Extend kernel mapping to access pages for page tables. The initial
- * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
- * mapping for early initialization.
- */
-static unsigned long current_size, extended_size;
-
void __init extend_init_mapping(void)
{
unsigned long va = __START_KERNEL_map;
unsigned long phys, addr, *pte_page;
- pmd_t *pmd;
+ pmd_t *pmd;
pte_t *pte, new_pte;
- unsigned long *page = (unsigned long *) init_level4_pgt;
- int i;
+ unsigned long *page = (unsigned long *)init_level4_pgt;
addr = page[pgd_index(va)];
addr_to_page(addr, page);
addr = page[pud_index(va)];
addr_to_page(addr, page);
- for (;;) {
+ /* Kill mapping of low 1MB. */
+ while (va < (unsigned long)&_text) {
+ HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
+ va += PAGE_SIZE;
+ }
+
+ /* Ensure init mappings cover kernel text/data and initial tables. */
+ while (va < (__START_KERNEL_map
+ + (start_pfn << PAGE_SHIFT)
+ + tables_space)) {
pmd = (pmd_t *)&page[pmd_index(va)];
- if (!pmd_present(*pmd))
- break;
- addr = page[pmd_index(va)];
- addr_to_page(addr, pte_page);
- for (i = 0; i < PTRS_PER_PTE; i++) {
- pte = (pte_t *) &pte_page[pte_index(va)];
- if (!pte_present(*pte))
- break;
- va += PAGE_SIZE;
- current_size += PAGE_SIZE;
+ if (pmd_none(*pmd)) {
+ pte_page = alloc_static_page(&phys);
+ make_page_readonly(pte_page);
+ xen_pte_pin(phys);
+ set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+ } else {
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, pte_page);
}
- }
-
- while (va < __START_KERNEL_map + current_size + tables_space) {
- pmd = (pmd_t *) &page[pmd_index(va)];
- if (!pmd_none(*pmd))
- continue;
- pte_page = (unsigned long *) alloc_static_page(&phys);
- make_page_readonly(pte_page);
- xen_pte_pin(phys);
- set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
- for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
+ pte = (pte_t *)&pte_page[pte_index(va)];
+ if (pte_none(*pte)) {
new_pte = pfn_pte(
(va - __START_KERNEL_map) >> PAGE_SHIFT,
__pgprot(_KERNPG_TABLE | _PAGE_USER));
- pte = (pte_t *)&pte_page[pte_index(va)];
xen_l1_entry_update(pte, new_pte);
- extended_size += PAGE_SIZE;
}
- }
-
- /* Kill mapping of low 1MB. */
- for (va = __START_KERNEL_map; va < (unsigned long)&_text; va +=
PAGE_SIZE)
+ va += PAGE_SIZE;
+ }
+
+ /* Finally, blow away any spurious initial mappings. */
+ while (1) {
+ pmd = (pmd_t *)&page[pmd_index(va)];
+ if (pmd_none(*pmd))
+ break;
HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
-}
-
+ va += PAGE_SIZE;
+ }
+}
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
This runs before bootmem is initialized and gets pages directly from the
@@ -651,34 +616,31 @@
find_early_table_space(end);
extend_init_mapping();
- start_pfn = current_size >> PAGE_SHIFT;
table_start = start_pfn;
- table_end = table_start;
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
for (; start < end; start = next) {
unsigned long pud_phys;
- pud_t *pud = alloc_low_page(&pud_phys);
- make_page_readonly(pud);
- xen_pud_pin(pud_phys);
+ pud_t *pud = alloc_static_page(&pud_phys);
+ make_page_readonly(pud);
+ xen_pud_pin(pud_phys);
next = start + PGDIR_SIZE;
if (next > end)
next = end;
phys_pud_init(pud, __pa(start), __pa(next));
set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
- }
-
- printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
- table_start<<PAGE_SHIFT,
- table_end<<PAGE_SHIFT);
-
- start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
+ }
+
+ printk("kernel direct mapping tables upto %lx @ %lx-%lx\n",
+ __pa(end), table_start<<PAGE_SHIFT, start_pfn<<PAGE_SHIFT);
+
+ BUG_ON(start_pfn != (table_start + (tables_space >> PAGE_SHIFT)));
__flush_tlb_all();
- init_mapping_done = 1;
+ init_mapping_done = 1;
}
extern struct x8664_pda cpu_pda[NR_CPUS];
@@ -1003,3 +965,13 @@
{
return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|