diff -r 31adb5c972d0 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu Jan 24 14:41:26 2008 +0000 +++ b/tools/libxc/xc_hvm_build.c Thu Jan 24 09:59:38 2008 -0600 @@ -155,7 +155,9 @@ static int setup_guest(int xc_handle, char *image, unsigned long image_size) { xen_pfn_t *page_array = NULL; + xen_pfn_t *page_array_super = NULL; unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT); + unsigned long nr_pages_super; unsigned long shared_page_nr, entry_eip; struct xen_add_to_physmap xatp; struct shared_info *shared_info; @@ -164,7 +166,9 @@ static int setup_guest(int xc_handle, uint64_t v_start, v_end; int rc; xen_capabilities_info_t caps; - + int super_pages_shift; + int super_pages_order; + /* An HVM guest must be initialised with at least 2MB memory. */ if ( memsize < 2 ) goto error_out; @@ -186,7 +190,21 @@ static int setup_guest(int xc_handle, PERROR("Guest OS must load to a page boundary.\n"); goto error_out; } - + /* check for PAE support, and set super page size shifting appropriately */ + if ( strstr(caps, "x86_32p") ) + super_pages_shift = 1; + else + super_pages_shift = 2; + + nr_pages_super = (unsigned long)memsize >> super_pages_shift; + super_pages_order = 9 + (super_pages_shift - 1); + + IPRINTF("SUPER PAGE MEMORY:\n" + " Number of SuperPages: %lu\n" + " SuperPage Size: %cMB\n", + nr_pages_super, + ((super_pages_shift == 1) ? '2' : '4') ); + IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n" " Loader: %016"PRIx64"->%016"PRIx64"\n" " TOTAL: %016"PRIx64"->%016"PRIx64"\n" @@ -195,7 +213,9 @@ static int setup_guest(int xc_handle, v_start, v_end, elf_uval(&elf, elf.ehdr, e_entry)); - if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL || + (page_array_super = + malloc(nr_pages_super * sizeof(xen_pfn_t))) == NULL ) { PERROR("Could not allocate memory.\n"); goto error_out; @@ -203,15 +223,37 @@ static int setup_guest(int xc_handle, for ( i = 0; i < nr_pages; i++ ) page_array[i] = i; + for ( i = 0; i < nr_pages_super; i++ ) + page_array_super[i] = i << super_pages_order; for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ ) page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; - - /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ + for ( i = HVM_BELOW_4G_RAM_END >> (PAGE_SHIFT + super_pages_order); + i < nr_pages_super; i++ ) + page_array_super[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; + + /* Note: We try to request super page allocations (currently either 2MB/4MB) + * at this point. Hypervisor will fall back to 4KB allocation if it can + * not satisfies these requests. + * + * Allocate memory for HVM guest from 0 - 2MB/4MB space using 4KB pages, + * skipping VGA hole 0xA0000-0xC0000. + */ rc = xc_domain_memory_populate_physmap( xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]); if ( rc == 0 ) rc = xc_domain_memory_populate_physmap( - xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]); + xc_handle, dom, (0x200<<(super_pages_shift-1)) - 0xc0, 0, 0, + &page_array[0xc0]); + /* Allocate memory for HVM guest beyond 2MB/4MB space using 2MB/4MB pages */ + if ( rc == 0 ) + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, nr_pages_super - 1, super_pages_order, 0, + &page_array_super[1]); + /* Handle the case of odd number physical memory size */ + if ( rc == 0 ) + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, nr_pages - (nr_pages_super << super_pages_order), 0, + 0, &page_array[nr_pages_super << super_pages_order]); if ( rc != 0 ) { PERROR("Could not allocate memory for HVM guest.\n"); @@ -283,10 +325,12 @@ static int setup_guest(int xc_handle, } free(page_array); + free(page_array_super); return 0; error_out: free(page_array); + free(page_array_super); return -1; } diff -r 31adb5c972d0 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/arch/ia64/xen/mm.c Thu Jan 24 09:59:38 2008 -0600 @@ -2416,7 +2416,7 @@ steal_page(struct domain *d, struct page int guest_physmap_add_page(struct domain *d, unsigned long gpfn, - unsigned long mfn) + unsigned long mfn, unsigned int page_order) { BUG_ON(!mfn_valid(mfn)); BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1)); @@ -2433,7 +2433,7 @@ guest_physmap_add_page(struct domain *d, void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, - unsigned long mfn) + unsigned long mfn, unsigned int page_order) { BUG_ON(mfn == 0);//XXX zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn); @@ -2839,7 +2839,8 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( if (prev_mfn && mfn_valid(prev_mfn)) { if (is_xen_heap_mfn(prev_mfn)) /* Xen heap frames are simply unhooked from this phys slot. */ - guest_physmap_remove_page(d, xatp.gpfn, prev_mfn); + guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, + NORMAL_PAGE_ORDER); else /* Normal domain memory is freed, to avoid leaking memory. */ guest_remove_page(d, xatp.gpfn); @@ -2848,10 +2849,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( /* Unmap from old location, if any. */ gpfn = get_gpfn_from_mfn(mfn); if (gpfn != INVALID_M2P_ENTRY) - guest_physmap_remove_page(d, gpfn, mfn); + guest_physmap_remove_page(d, gpfn, mfn, NORMAL_PAGE_ORDER); /* Map at new location. */ - guest_physmap_add_page(d, xatp.gpfn, mfn); + guest_physmap_add_page(d, xatp.gpfn, mfn, NORMAL_PAGE_ORDER); out: UNLOCK_BIGLOCK(d); diff -r 31adb5c972d0 xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/arch/powerpc/mm.c Thu Jan 24 09:59:38 2008 -0600 @@ -350,7 +350,7 @@ uint allocate_extents(struct domain *d, /* Build p2m mapping for newly allocated extent. */ mfn = page_to_mfn(pg); for (i = 0; i < (1 << ext_order); i++) - guest_physmap_add_page(d, gpfn + i, mfn + i); + guest_physmap_add_page(d, gpfn + i, mfn + i, NORMAL_PAGE_ORDER); /* Bump starting PFN by extent size pages. */ gpfn += ext_nrpages; @@ -395,7 +395,7 @@ int allocate_rma(struct domain *d, unsig clear_page((void *)page_to_maddr(&d->arch.rma_page[i])); /* Set up p2m mapping for RMA. */ - guest_physmap_add_page(d, i, mfn+i); + guest_physmap_add_page(d, i, mfn+i, NORMAL_PAGE_ORDER); } /* shared_info uses last page of RMA */ @@ -563,7 +563,8 @@ int guest_physmap_max_mem_pages(struct d } void guest_physmap_add_page( - struct domain *d, unsigned long gpfn, unsigned long mfn) + struct domain *d, unsigned long gpfn, unsigned long mfn, + unsigned int page_order) { if (page_get_owner(mfn_to_page(mfn)) != d) { printk("Won't map foreign MFN 0x%lx for DOM%d\n", mfn, d->domain_id); @@ -591,7 +592,8 @@ void guest_physmap_add_page( } void guest_physmap_remove_page( - struct domain *d, unsigned long gpfn, unsigned long mfn) + struct domain *d, unsigned long gpfn, unsigned long mfn, + unsigned int page_order) { if (page_get_owner(mfn_to_page(mfn)) != d) { printk("Won't unmap foreign MFN 0x%lx for DOM%d\n", mfn, d->domain_id); diff -r 31adb5c972d0 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/arch/x86/mm.c Thu Jan 24 09:59:38 2008 -0600 @@ -3277,7 +3277,8 @@ long arch_memory_op(int op, XEN_GUEST_HA { if ( is_xen_heap_mfn(prev_mfn) ) /* Xen heap frames are simply unhooked from this phys slot. */ - guest_physmap_remove_page(d, xatp.gpfn, prev_mfn); + guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, + NORMAL_PAGE_ORDER); else /* Normal domain memory is freed, to avoid leaking memory. */ guest_remove_page(d, xatp.gpfn); @@ -3286,10 +3287,10 @@ long arch_memory_op(int op, XEN_GUEST_HA /* Unmap from old location, if any. */ gpfn = get_gpfn_from_mfn(mfn); if ( gpfn != INVALID_M2P_ENTRY ) - guest_physmap_remove_page(d, gpfn, mfn); + guest_physmap_remove_page(d, gpfn, mfn, NORMAL_PAGE_ORDER); /* Map at new location. */ - guest_physmap_add_page(d, xatp.gpfn, mfn); + guest_physmap_add_page(d, xatp.gpfn, mfn, NORMAL_PAGE_ORDER); UNLOCK_BIGLOCK(d); diff -r 31adb5c972d0 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/arch/x86/mm/p2m.c Thu Jan 24 09:59:38 2008 -0600 @@ -149,9 +149,11 @@ p2m_next_level(struct domain *d, mfn_t * unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned long type) { + l1_pgentry_t *l1_entry; l1_pgentry_t *p2m_entry; l1_pgentry_t new_entry; void *next; + int i; ASSERT(d->arch.p2m.alloc_page); if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, @@ -192,6 +194,36 @@ p2m_next_level(struct domain *d, mfn_t * break; } } + + ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT); + + /* split single large page into 4KB page in P2M table */ + if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + { + struct page_info *pg = d->arch.p2m.alloc_page(d); + if ( pg == NULL ) + return 0; + list_add_tail(&pg->list, &d->arch.p2m.pages); + pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated; + pg->count_info = 1; + + l1_entry = map_domain_page(mfn_x(page_to_mfn(pg))); + for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + { + mfn_t mfn = _mfn(l1e_get_pfn(*p2m_entry) + i); + new_entry = l1e_from_pfn(mfn_x(mfn), + __PAGE_HYPERVISOR|_PAGE_USER); + paging_write_p2m_entry(d, gfn, + l1_entry+i, *table_mfn, new_entry, 1); + } + unmap_domain_page(l1_entry); + + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), + __PAGE_HYPERVISOR|_PAGE_USER); + paging_write_p2m_entry(d, gfn, + p2m_entry, *table_mfn, new_entry, 2); + } + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); next = map_domain_page(mfn_x(*table_mfn)); unmap_domain_page(*table); @@ -202,7 +234,8 @@ p2m_next_level(struct domain *d, mfn_t * // Returns 0 on error (out of memory) static int -set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int page_order, p2m_type_t p2mt) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); @@ -210,6 +243,7 @@ set_p2m_entry(struct domain *d, unsigned unsigned long gfn_remainder = gfn; l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; + l2_pgentry_t l2e_content; int rv=0; #if CONFIG_PAGING_LEVELS >= 4 @@ -234,29 +268,57 @@ set_p2m_entry(struct domain *d, unsigned PGT_l2_page_table) ) goto out; #endif - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L2_PAGETABLE_SHIFT - PAGE_SHIFT, - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) - goto out; - - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, - 0, L1_PAGETABLE_ENTRIES); - ASSERT(p2m_entry); + if ( page_order == NORMAL_PAGE_ORDER ) + { + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) + goto out; + + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + 0, L1_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) + entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt)); + else + entry_content = l1e_empty(); + + /* level 1 entry */ + paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1); + } + else + { + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && + !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + { + P2M_ERROR("configure P2M table 4KB L2 entry with large page\n"); + domain_crash(d); + goto out; + } + + if ( mfn_valid(mfn) ) + l2e_content = l2e_from_pfn(mfn_x(mfn), + p2m_type_to_flags(p2mt) | _PAGE_PSE); + else + l2e_content = l2e_empty(); + + entry_content.l1 = l2e_content.l2; + + paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2); + } + + if ( vtd_enabled && (p2mt == p2m_mmio_direct) && is_hvm_domain(d) ) + iommu_flush(d, gfn, (u64*)p2m_entry); /* Track the highest gfn for which we have ever had a valid mapping */ if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) d->arch.p2m.max_mapped_pfn = gfn; - - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) - entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt)); - else - entry_content = l1e_empty(); - - /* level 1 entry */ - paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1); - - if ( vtd_enabled && (p2mt == p2m_mmio_direct) && is_hvm_domain(d) ) - iommu_flush(d, gfn, (u64*)p2m_entry); /* Success */ rv = 1; @@ -334,7 +396,8 @@ int p2m_alloc_table(struct domain *d, P2M_PRINTK("populating p2m table\n"); /* Initialise physmap tables for slot zero. Other code assumes this. */ - if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) ) + if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER, + p2m_invalid) ) goto error; /* Copy all existing mappings from the page list and m2p */ @@ -353,7 +416,7 @@ int p2m_alloc_table(struct domain *d, (gfn != 0x55555555L) #endif && gfn != INVALID_M2P_ENTRY - && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) ) + && !set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_ram_rw) ) goto error; } @@ -458,6 +521,16 @@ gfn_to_mfn_foreign(struct domain *d, uns unmap_domain_page(l2e); return _mfn(INVALID_MFN); } + else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) ) + { + mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); + *t = p2m_flags_to_type(l2e_get_flags(*l2e)); + unmap_domain_page(l2e); + + ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); + return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); + } + mfn = _mfn(l2e_get_pfn(*l2e)); unmap_domain_page(l2e); @@ -614,6 +687,29 @@ static void audit_p2m(struct domain *d) gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } + + /* check for large page */ + if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE ) + { + mfn = l2e_get_pfn(l2e[i2]); + ASSERT(mfn_valid(_mfn(mfn))); + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++) + { + m2pfn = get_gpfn_from_mfn(mfn+i1); + if ( m2pfn != (gfn + i) ) + { + pmbad++; + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" + " -> gfn %#lx\n", gfn+i, mfn+i, + m2pfn); + BUG(); + } + } + + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) @@ -667,35 +763,40 @@ static void audit_p2m(struct domain *d) static void -p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) -{ +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn, + unsigned int page_order) +{ + int i; if ( !paging_mode_translate(d) ) return; P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid); + for (i = 0; i < (1UL << page_order); i++ ) + set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); } void guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn) + unsigned long mfn, unsigned int page_order) { p2m_lock(d); audit_p2m(d); - p2m_remove_page(d, gfn, mfn); + p2m_remove_page(d, gfn, mfn, page_order); audit_p2m(d); p2m_unlock(d); } int guest_physmap_add_entry(struct domain *d, unsigned long gfn, - unsigned long mfn, p2m_type_t t) + unsigned long mfn, unsigned int page_order, + p2m_type_t t) { unsigned long ogfn; p2m_type_t ot; mfn_t omfn; int rc = 0; + int i; if ( !paging_mode_translate(d) ) return -EINVAL; @@ -725,7 +826,8 @@ guest_physmap_add_entry(struct domain *d if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); + for ( i = 0; i < (1UL << page_order); i++ ) + set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY); } ogfn = mfn_to_gfn(d, _mfn(mfn)); @@ -748,21 +850,23 @@ guest_physmap_add_entry(struct domain *d P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", ogfn , mfn_x(omfn)); if ( mfn_x(omfn) == mfn ) - p2m_remove_page(d, ogfn, mfn); + p2m_remove_page(d, ogfn, mfn, page_order); } } if ( mfn_valid(_mfn(mfn)) ) { - if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) ) + if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) ) rc = -EINVAL; - set_gpfn_from_mfn(mfn, gfn); + for ( i = 0; i < (1UL << page_order); i++) + set_gpfn_from_mfn(mfn+i, gfn+i); } else { gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", gfn, mfn); - if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) ) + if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, + p2m_invalid) ) rc = -EINVAL; } @@ -781,7 +885,7 @@ void p2m_change_type_global(struct domai l1_pgentry_t l1e_content; l1_pgentry_t *l1e; l2_pgentry_t *l2e; - mfn_t l1mfn; + mfn_t l1mfn, l2mfn; int i1, i2; #if CONFIG_PAGING_LEVELS >= 3 l3_pgentry_t *l3e; @@ -806,6 +910,7 @@ void p2m_change_type_global(struct domai l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); #else /* CONFIG_PAGING_LEVELS == 2 */ l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); + l2mfn = mfn_x(pagetable_get_mfn(d->arch.phys_table)); #endif #if CONFIG_PAGING_LEVELS >= 3 @@ -826,12 +931,27 @@ void p2m_change_type_global(struct domai { continue; } + l2mfn = _mfn(l3e_get_pfn(l3e[i3])); l2e = map_domain_page(l3e_get_pfn(l3e[i3])); #endif /* all levels... */ for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) { + continue; + } + + if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) ) + { + flags = l2e_get_flags(l2e[i2]); + if ( p2m_flags_to_type(flags) != ot ) + continue; + mfn = l2e_get_pfn(l2e[i2]); + gfn = get_gpfn_from_mfn(mfn); + flags = p2m_flags_to_type(nt); + l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); + paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2], + l2mfn, l1e_content, 2); continue; } @@ -885,7 +1005,7 @@ p2m_type_t p2m_change_type(struct domain mfn = gfn_to_mfn(d, gfn, &pt); if ( pt == ot ) - set_p2m_entry(d, gfn, mfn, nt); + set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, nt); p2m_unlock(d); @@ -909,7 +1029,7 @@ set_mmio_p2m_entry(struct domain *d, uns set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } - rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct); + rc = set_p2m_entry(d, gfn, mfn, NORMAL_PAGE_ORDER, p2m_mmio_direct); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", @@ -933,7 +1053,7 @@ clear_mmio_p2m_entry(struct domain *d, u "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn); return 0; } - rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0); + rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), NORMAL_PAGE_ORDER, 0); return rc; } diff -r 31adb5c972d0 xen/common/grant_table.c --- a/xen/common/grant_table.c Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/common/grant_table.c Thu Jan 24 09:59:38 2008 -0600 @@ -1156,7 +1156,7 @@ gnttab_transfer( spin_lock(&e->grant_table->lock); sha = &shared_entry(e->grant_table, gop.ref); - guest_physmap_add_page(e, sha->frame, mfn); + guest_physmap_add_page(e, sha->frame, mfn, NORMAL_PAGE_ORDER); sha->frame = mfn; wmb(); sha->flags |= GTF_transfer_completed; diff -r 31adb5c972d0 xen/common/memory.c --- a/xen/common/memory.c Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/common/memory.c Thu Jan 24 09:59:38 2008 -0600 @@ -117,34 +117,60 @@ static void populate_physmap(struct memo goto out; page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags); - if ( unlikely(page == NULL) ) - { - gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " - "id=%d memflags=%x (%ld of %d)\n", - a->extent_order, d->domain_id, a->memflags, - i, a->nr_extents); - goto out; - } - - mfn = page_to_mfn(page); - - if ( unlikely(paging_mode_translate(d)) ) - { + if ( unlikely(page == NULL) ) + { + /* fail if it is not translate mode */ + if ( !paging_mode_translate(d) ) + { + gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:" + "id=%d memflags=%x (%ld of %d)\n", + a->extent_order, d->domain_id, a->memflags, + i, a->nr_extents); + goto out; + } + + /* try to do allocation using 4KB page instead */ for ( j = 0; j < (1 << a->extent_order); j++ ) - if ( guest_physmap_add_page(d, gpfn + j, mfn + j) ) + { + page = __alloc_domheap_pages(d, cpu, 0, a->memflags); + if ( page == NULL ) + { + gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:" + "id=%d memflags=%x (%ld of %d)\n", + 0, d->domain_id, a->memflags, + i, a->nr_extents); goto out; - } - else - { - for ( j = 0; j < (1 << a->extent_order); j++ ) - set_gpfn_from_mfn(mfn + j, gpfn + j); - - /* Inform the domain of the new page's machine address. */ - if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) - goto out; - } - } - + } + + mfn = page_to_mfn(page); + + if ( guest_physmap_add_page(d, gpfn+j, mfn, + NORMAL_PAGE_ORDER) ) + goto out; + } + } + else /* sucessful in allocating page of extent_order */ + { + mfn = page_to_mfn(page); + + if ( unlikely(paging_mode_translate(d)) ) + { + if ( guest_physmap_add_page(d, gpfn, mfn, a->extent_order) ) + goto out; + } + else + { + for ( j = 0; j < (1 << a->extent_order); j++ ) + set_gpfn_from_mfn(mfn + j, gpfn + j); + + /* Inform the domain of the new page's machine address. */ + if ( unlikely(__copy_to_guest_offset(a->extent_list, i, + &mfn, 1)) ) + goto out; + } + } + } + out: a->nr_done = i; } @@ -175,7 +201,7 @@ int guest_remove_page(struct domain *d, if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); - guest_physmap_remove_page(d, gmfn, mfn); + guest_physmap_remove_page(d, gmfn, mfn, NORMAL_PAGE_ORDER); put_page(page); @@ -421,7 +447,8 @@ static long memory_exchange(XEN_GUEST_HA if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) ) BUG(); mfn = page_to_mfn(page); - guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn); + guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, + NORMAL_PAGE_ORDER); put_page(page); } @@ -443,8 +470,8 @@ static long memory_exchange(XEN_GUEST_HA if ( unlikely(paging_mode_translate(d)) ) { /* Ignore failure here. There's nothing we can do. */ - for ( k = 0; k < (1UL << exch.out.extent_order); k++ ) - (void)guest_physmap_add_page(d, gpfn + k, mfn + k); + (void)guest_physmap_add_page(d, gpfn, mfn, + exch.out.extent_order); } else { diff -r 31adb5c972d0 xen/include/asm-ia64/shadow.h --- a/xen/include/asm-ia64/shadow.h Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/include/asm-ia64/shadow.h Thu Jan 24 09:59:38 2008 -0600 @@ -40,8 +40,10 @@ * Utilities to change relationship of gpfn->mfn for designated domain, * which is required by gnttab transfer, balloon, device model and etc. */ -int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn); -void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn); +int guest_physmap_add_page(struct domain *d, unsigned long gpfn, + unsigned long mfn, unsigned int page_order); +void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, + unsigned long mfn, unsigned int page_order); static inline int shadow_mode_enabled(struct domain *d) diff -r 31adb5c972d0 xen/include/asm-powerpc/mm.h --- a/xen/include/asm-powerpc/mm.h Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/include/asm-powerpc/mm.h Thu Jan 24 09:59:38 2008 -0600 @@ -278,9 +278,11 @@ extern int guest_physmap_max_mem_pages(s extern int guest_physmap_max_mem_pages(struct domain *d, unsigned long new_max); extern void guest_physmap_add_page( - struct domain *d, unsigned long gpfn, unsigned long mfn); + struct domain *d, unsigned long gpfn, unsigned long mfn, + unsigned int page_order); extern void guest_physmap_remove_page( - struct domain *d, unsigned long gpfn, unsigned long mfn); + struct domain *d, unsigned long gpfn, unsigned long mfn, + unsigned int page_order); #endif diff -r 31adb5c972d0 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/include/asm-x86/mm.h Thu Jan 24 10:00:06 2008 -0600 @@ -129,6 +129,14 @@ static inline u32 pickle_domptr(struct d #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */ #endif +/* The order of continuously allocated page frames */ +#define NORMAL_PAGE_ORDER 0 /* 4KB page */ +#if CONFIG_PAGING_LEVELS == 2 +#define SUPER_PAGE_ORDER 10 /* 4MB page */ +#else +#define SUPER_PAGE_ORDER 9 /* 2MB page */ +#endif + #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) diff -r 31adb5c972d0 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/include/asm-x86/p2m.h Thu Jan 24 09:59:38 2008 -0600 @@ -98,6 +98,7 @@ static inline mfn_t gfn_to_mfn_current(u { mfn_t mfn = _mfn(INVALID_MFN); p2m_type_t p2mt = p2m_mmio_dm; + paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m @@ -106,25 +107,43 @@ static inline mfn_t gfn_to_mfn_current(u if ( gfn <= current->domain->arch.p2m.max_mapped_pfn ) { l1_pgentry_t l1e = l1e_empty(); + l2_pgentry_t l2e = l2e_empty(); int ret; ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); - /* Need to __copy_from_user because the p2m is sparse and this - * part might not exist */ - ret = __copy_from_user(&l1e, - &phys_to_machine_mapping[gfn], - sizeof(l1e)); - - if ( ret == 0 ) { - p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); - ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); + ret = __copy_from_user(&l2e, + &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + + l2_linear_offset(addr)], + sizeof(l2e)); + if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) && + (l2e_get_flags(l2e) & _PAGE_PSE) ) + { + p2mt = p2m_flags_to_type(l2e_get_flags(l2e)); + ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt)); if ( p2m_is_valid(p2mt) ) - mfn = _mfn(l1e_get_pfn(l1e)); - else - /* XXX see above */ + mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr)); + else p2mt = p2m_mmio_dm; + } + else + { + /* Need to __copy_from_user because the p2m is sparse and this + * part might not exist */ + ret = __copy_from_user(&l1e, + &phys_to_machine_mapping[gfn], + sizeof(l1e)); + + if ( ret == 0 ) { + p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); + ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); + if ( p2m_is_valid(p2mt) ) + mfn = _mfn(l1e_get_pfn(l1e)); + else + /* XXX see above */ + p2mt = p2m_mmio_dm; + } } } @@ -202,21 +221,23 @@ void p2m_teardown(struct domain *d); /* Add a page to a domain's p2m table */ int guest_physmap_add_entry(struct domain *d, unsigned long gfn, - unsigned long mfn, p2m_type_t t); + unsigned long mfn, unsigned int page_order, + p2m_type_t t); /* Untyped version for RAM only, for compatibility * * Return 0 for success */ static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ - return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw); + unsigned long mfn, + unsigned int page_order) +{ + return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw); } /* Remove a page from a domain's p2m table */ void guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); + unsigned long mfn, unsigned int page_order); /* Change types across all p2m entries in a domain */ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt); diff -r 31adb5c972d0 xen/include/xen/paging.h --- a/xen/include/xen/paging.h Thu Jan 24 14:41:26 2008 +0000 +++ b/xen/include/xen/paging.h Thu Jan 24 09:59:38 2008 -0600 @@ -17,9 +17,9 @@ #else -#define paging_mode_translate(d) (0) -#define guest_physmap_add_page(d, p, m) (0) -#define guest_physmap_remove_page(d, p, m) ((void)0) +#define paging_mode_translate(d) (0) +#define guest_physmap_add_page(d, p, m, o) (0) +#define guest_physmap_remove_page(d, p, m, o) ((void)0) #endif