# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1210846156 -3600
# Node ID 8d18e52a1b23c40cb81ae51df8c703175819c504
# Parent 29dc52031954896a407a97cba167c197f8e1c0ed
x86, hvm: Support 2MB superpage allocations for HVM guests.
Signed-off-by: Xin Xiaohui <xiaohui.xin@xxxxxxxxx>
Signed-off-by: Li Xin, B <xin.b.li@xxxxxxxxx>
Signed-off-by: Wei Huang <wei.huang2@xxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
tools/libxc/xc_hvm_build.c | 50 ++++++-
xen/arch/ia64/xen/mm.c | 41 +++---
xen/arch/x86/mm.c | 6
xen/arch/x86/mm/hap/p2m-ept.c | 208 +++++++++++++++++++++++-------
xen/arch/x86/mm/p2m.c | 251 +++++++++++++++++++++++++++++--------
xen/common/grant_table.c | 2
xen/common/memory.c | 12 -
xen/include/asm-ia64/grant_table.h | 2
xen/include/asm-ia64/shadow.h | 6
xen/include/asm-x86/p2m.h | 15 +-
xen/include/xen/paging.h | 4
11 files changed, 451 insertions(+), 146 deletions(-)
diff -r 29dc52031954 -r 8d18e52a1b23 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Thu May 15 09:38:00 2008 +0100
+++ b/tools/libxc/xc_hvm_build.c Thu May 15 11:09:16 2008 +0100
@@ -18,6 +18,9 @@
#include "xc_e820.h"
#include <xen/libelf.h>
+
+#define SUPERPAGE_PFN_SHIFT 9
+#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
#define SCRATCH_PFN 0xFFFFF
@@ -211,7 +214,7 @@ static int setup_guest(int xc_handle,
/*
* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
- * We allocate pages in batches of no more than 2048 to ensure that
+ * We allocate pages in batches of no more than 8MB to ensure that
* we can be preempted and hence dom0 remains responsive.
*/
rc = xc_domain_memory_populate_physmap(
@@ -219,13 +222,50 @@ static int setup_guest(int xc_handle,
cur_pages = 0xc0;
while ( (rc == 0) && (nr_pages > cur_pages) )
{
+ /* Clip count to maximum 8MB extent. */
unsigned long count = nr_pages - cur_pages;
if ( count > 2048 )
count = 2048;
- rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
- cur_pages += count;
- }
+
+ /* Clip partial superpage extents to superpage boundaries. */
+ if ( ((cur_pages & (SUPERPAGE_NR_PFNS-1)) != 0) &&
+ (count > (-cur_pages & (SUPERPAGE_NR_PFNS-1))) )
+ count = -cur_pages & (SUPERPAGE_NR_PFNS-1); /* clip s.p. tail */
+ else if ( ((count & (SUPERPAGE_NR_PFNS-1)) != 0) &&
+ (count > SUPERPAGE_NR_PFNS) )
+ count &= ~(SUPERPAGE_NR_PFNS - 1); /* clip non-s.p. tail */
+
+ /* Attempt to allocate superpage extents. */
+ if ( ((count | cur_pages) & (SUPERPAGE_NR_PFNS - 1)) == 0 )
+ {
+ long done;
+ xen_pfn_t sp_extents[2048 >> SUPERPAGE_PFN_SHIFT];
+ struct xen_memory_reservation sp_req = {
+ .nr_extents = count >> SUPERPAGE_PFN_SHIFT,
+ .extent_order = SUPERPAGE_PFN_SHIFT,
+ .domid = dom
+ };
+ set_xen_guest_handle(sp_req.extent_start, sp_extents);
+ for ( i = 0; i < sp_req.nr_extents; i++ )
+ sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_PFN_SHIFT)];
+ done = xc_memory_op(xc_handle, XENMEM_populate_physmap, &sp_req);
+ if ( done > 0 )
+ {
+ done <<= SUPERPAGE_PFN_SHIFT;
+ cur_pages += done;
+ count -= done;
+ }
+ }
+
+ /* Fall back to 4kB extents. */
+ if ( count != 0 )
+ {
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
+ cur_pages += count;
+ }
+ }
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
diff -r 29dc52031954 -r 8d18e52a1b23 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Thu May 15 09:38:00 2008 +0100
+++ b/xen/arch/ia64/xen/mm.c Thu May 15 11:09:16 2008 +0100
@@ -2415,16 +2415,20 @@ steal_page(struct domain *d, struct page
int
guest_physmap_add_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
-{
- BUG_ON(!mfn_valid(mfn));
- BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
- set_gpfn_from_mfn(mfn, gpfn);
- smp_mb();
- assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn,
- ASSIGN_writable | ASSIGN_pgc_allocated);
-
- //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >>
PAGE_SHIFT));
+ unsigned long mfn, unsigned int page_order)
+{
+ unsigned long i;
+
+ for (i = 0; i < (1UL << page_order); i++) {
+ BUG_ON(!mfn_valid(mfn));
+ BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
+ set_gpfn_from_mfn(mfn, gpfn);
+ smp_mb();
+ assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn,
+ ASSIGN_writable | ASSIGN_pgc_allocated);
+ mfn++;
+ gpfn++;
+ }
perfc_incr(guest_physmap_add_page);
return 0;
@@ -2432,10 +2436,15 @@ guest_physmap_add_page(struct domain *d,
void
guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
- unsigned long mfn)
-{
+ unsigned long mfn, unsigned int page_order)
+{
+ unsigned long i;
+
BUG_ON(mfn == 0);//XXX
- zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
+
+ for (i = 0; i < (1UL << page_order); i++) {
+ zap_domain_page_one(d, (gpfn+i) << PAGE_SHIFT, 0, mfn+i);
+
perfc_incr(guest_physmap_remove_page);
}
@@ -2838,7 +2847,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
if (prev_mfn && mfn_valid(prev_mfn)) {
if (is_xen_heap_mfn(prev_mfn))
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -2847,10 +2856,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if (gpfn != INVALID_M2P_ENTRY)
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
out:
domain_unlock(d);
diff -r 29dc52031954 -r 8d18e52a1b23 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 15 09:38:00 2008 +0100
+++ b/xen/arch/x86/mm.c Thu May 15 11:09:16 2008 +0100
@@ -3297,7 +3297,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
{
if ( is_xen_heap_mfn(prev_mfn) )
/* Xen heap frames are simply unhooked from this phys slot. */
- guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
+ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
/* Normal domain memory is freed, to avoid leaking memory. */
guest_remove_page(d, xatp.gpfn);
@@ -3306,10 +3306,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Unmap from old location, if any. */
gpfn = get_gpfn_from_mfn(mfn);
if ( gpfn != INVALID_M2P_ENTRY )
- guest_physmap_remove_page(d, gpfn, mfn);
+ guest_physmap_remove_page(d, gpfn, mfn, 0);
/* Map at new location. */
- guest_physmap_add_page(d, xatp.gpfn, mfn);
+ guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
domain_unlock(d);
diff -r 29dc52031954 -r 8d18e52a1b23 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Thu May 15 09:38:00 2008 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Thu May 15 11:09:16 2008 +0100
@@ -20,6 +20,7 @@
#include <xen/domain_page.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <asm/paging.h>
#include <asm/types.h>
#include <asm/domain.h>
#include <asm/p2m.h>
@@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
}
}
+#define GUEST_TABLE_NORMAL_PAGE 1
+#define GUEST_TABLE_SUPER_PAGE 2
+
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
u32 shift)
@@ -54,7 +58,6 @@ static int ept_next_level(struct domain
u32 index;
index = *gfn_remainder >> shift;
- *gfn_remainder &= (1UL << shift) - 1;
ept_entry = (*table) + index;
@@ -83,31 +86,53 @@ static int ept_next_level(struct domain
ept_entry->r = ept_entry->w = ept_entry->x = 1;
}
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
-
- return 1;
+ if ( !ept_entry->sp_avail )
+ {
+ *gfn_remainder &= (1UL << shift) - 1;
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+ return GUEST_TABLE_NORMAL_PAGE;
+ }
+ else
+ return GUEST_TABLE_SUPER_PAGE;
}
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
-{
- ept_entry_t *table =
- map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
- unsigned long gfn_remainder = gfn;
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ ept_entry_t *table = NULL;
+ unsigned long gfn_remainder = gfn, offset = 0;
ept_entry_t *ept_entry = NULL;
u32 index;
- int i, rv = 0;
+ int i, rv = 0, ret = 0;
+ int walk_level = order / EPT_TABLE_ORDER;
/* Should check if gfn obeys GAW here */
- for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 0, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ if ( order != 0 )
+ if ( (gfn & ((1UL << order) - 1)) )
+ return 1;
+
+ table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+
+ ASSERT(table != NULL);
+
+ for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
+ {
+ ret = ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
+ walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
+ offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
+
ept_entry = table + index;
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
@@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = EPT_DEFAULT_MT;
- ept_entry->sp_avail = 0;
+ ept_entry->sp_avail = walk_level ? 1 : 0;
+
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->avail1 == p2m_ram_logdirty &&
+ p2mt == p2m_ram_rw )
+ for ( i = 0; i < 512; i++ )
+ paging_mark_dirty(d, mfn_x(mfn)-offset+i);
+ }
+ else
+ ept_entry->mfn = mfn_x(mfn);
+
ept_entry->avail1 = p2mt;
- ept_entry->mfn = mfn_x(mfn);
ept_entry->rsvd = 0;
ept_entry->avail2 = 0;
/* last step */
@@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
/* Success */
rv = 1;
- out:
+out:
unmap_domain_page(table);
ept_sync_domain(d);
+ /* Now the p2m table is not shared with vt-d page table */
+
+ if ( iommu_enabled && is_hvm_domain(d) )
+ {
+ if ( p2mt == p2m_ram_rw )
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
+ }
+ else if ( ret )
+ iommu_map_page(d, gfn, mfn_x(mfn));
+ }
+ else
+ {
+ if ( ret == GUEST_TABLE_SUPER_PAGE )
+ {
+ for ( i = 0; i < 512; i++ )
+ iommu_unmap_page(d, gfn-offset+i);
+ }
+ else if ( ret )
+ iommu_unmap_page(d, gfn);
+ }
+ }
+
+#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
/* If p2m table is shared with vtd page-table. */
if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
iommu_flush(d, gfn, (u64*)ept_entry);
+#endif
return rv;
}
@@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
unsigned long gfn_remainder = gfn;
ept_entry_t *ept_entry;
u32 index;
- int i;
+ int i, ret=0;
mfn_t mfn = _mfn(INVALID_MFN);
*t = p2m_mmio_dm;
@@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
/* Should check if gfn obeys GAW here. */
for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
- if ( !ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER) )
+ {
+ ret = ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER);
+ if ( !ret )
goto out;
-
- index = gfn_remainder;
+ else if ( ret == GUEST_TABLE_SUPER_PAGE )
+ break;
+ }
+
+ index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
ept_entry = table + index;
if ( ept_entry->avail1 != p2m_invalid )
{
*t = ept_entry->avail1;
mfn = _mfn(ept_entry->mfn);
+ if ( i )
+ {
+ /* we may meet super pages, and to split into 4k pages
+ * to emulate p2m table
+ */
+ unsigned long split_mfn =
+ mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1
)));
+ mfn = _mfn(split_mfn);
+ }
}
out:
@@ -205,33 +283,63 @@ static void ept_change_entry_type_global
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
{
- if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ if ( !l4e[i4].epte )
continue;
- l3e = map_domain_page(l4e[i4].mfn);
- for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
- {
- if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ if ( !l4e[i4].sp_avail )
+ {
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte )
+ continue;
+ if ( !l3e[i3].sp_avail )
+ {
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte )
+ continue;
+ if ( !l2e[i2].sp_avail )
+ {
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ else
+ {
+ if ( l2e[i2].avail1 != ot )
+ continue;
+ l2e[i2].avail1 = nt;
+ ept_p2m_type_to_flags(l2e+i2, nt);
+ }
+ }
+ unmap_domain_page(l2e);
+ }
+ else
+ {
+ if ( l3e[i3].avail1 != ot )
+ continue;
+ l3e[i3].avail1 = nt;
+ ept_p2m_type_to_flags(l3e+i3, nt);
+ }
+ }
+ unmap_domain_page(l3e);
+ }
+ else
+ {
+ if ( l4e[i4].avail1 != ot )
continue;
- l2e = map_domain_page(l3e[i3].mfn);
- for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
- {
- if ( !l2e[i2].epte || l2e[i2].sp_avail )
- continue;
- l1e = map_domain_page(l2e[i2].mfn);
- for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
- {
- if ( !l1e[i1].epte )
- continue;
- if ( l1e[i1].avail1 != ot )
- continue;
- l1e[i1].avail1 = nt;
- ept_p2m_type_to_flags(l1e+i1, nt);
- }
- unmap_domain_page(l1e);
- }
- unmap_domain_page(l2e);
- }
- unmap_domain_page(l3e);
+ l4e[i4].avail1 = nt;
+ ept_p2m_type_to_flags(l4e+i4, nt);
+ }
}
unmap_domain_page(l4e);
diff -r 29dc52031954 -r 8d18e52a1b23 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Thu May 15 09:38:00 2008 +0100
+++ b/xen/arch/x86/mm/p2m.c Thu May 15 11:09:16 2008 +0100
@@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
u32 max, unsigned long type)
{
+ l1_pgentry_t *l1_entry;
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
+ int i;
ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
@@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
break;
}
}
+
+ ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+
+ /* split single large page into 4KB page in P2M table */
+ if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ unsigned long flags, pfn;
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+ pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
+ pg->count_info = 1;
+
+ /* New splintered mappings inherit the flags of the old superpage,
+ * with a little reorganisation for the _PAGE_PSE_PAT bit. */
+ flags = l1e_get_flags(*p2m_entry);
+ pfn = l1e_get_pfn(*p2m_entry);
+ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
+ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
+ else
+ flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
+
+ l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ new_entry = l1e_from_pfn(pfn + i, flags);
+ paging_write_p2m_entry(d, gfn,
+ l1_entry+i, *table_mfn, new_entry, 1);
+ }
+ unmap_domain_page(l1_entry);
+
+ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
+ __PAGE_HYPERVISOR|_PAGE_USER);
+ paging_write_p2m_entry(d, gfn,
+ p2m_entry, *table_mfn, new_entry, 2);
+ }
+
*table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
next = map_domain_page(mfn_x(*table_mfn));
unmap_domain_page(*table);
@@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
// Returns 0 on error (out of memory)
static int
-p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
unsigned long gfn_remainder = gfn;
l1_pgentry_t *p2m_entry;
l1_pgentry_t entry_content;
+ l2_pgentry_t l2e_content;
int rv=0;
#if CONFIG_PAGING_LEVELS >= 4
@@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned
PGT_l2_page_table) )
goto out;
- if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
- L2_PAGETABLE_SHIFT - PAGE_SHIFT,
- L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
- goto out;
-
- p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
+ if ( page_order == 0 )
+ {
+ if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+ goto out;
+
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ 0, L1_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
+ entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
+ else
+ entry_content = l1e_empty();
+
+ /* level 1 entry */
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
+ }
+ else
+ {
+ p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+ L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+ L2_PAGETABLE_ENTRIES);
+ ASSERT(p2m_entry);
+
+ if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
+ !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ {
+ P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
+ domain_crash(d);
+ goto out;
+ }
+
+ if ( mfn_valid(mfn) )
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2mt) | _PAGE_PSE);
+ else
+ l2e_content = l2e_empty();
+
+ entry_content.l1 = l2e_content.l2;
+ paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
+ }
/* Track the highest gfn for which we have ever had a valid mapping */
if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn;
-
- if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
- entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
- else
- entry_content = l1e_empty();
-
- /* level 1 entry */
- paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
if ( iommu_enabled && is_hvm_domain(d) )
{
@@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
+ else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
+ {
+ mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
+ *t = p2m_flags_to_type(l2e_get_flags(*l2e));
+ unmap_domain_page(l2e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ }
+
mfn = _mfn(l2e_get_pfn(*l2e));
unmap_domain_page(l2e);
@@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
/* XXX This is for compatibility with the old model, where anything not
* XXX marked as RAM was considered to be emulated MMIO space.
* XXX Once we start explicitly registering MMIO regions in the p2m
@@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
l1_pgentry_t l1e = l1e_empty();
+ l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ ret = __copy_from_user(&l2e,
+
&__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) +
l2_linear_offset(addr)],
+ sizeof(l2e));
+
+ if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+ ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
+ mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+ else
p2mt = p2m_mmio_dm;
+ }
+ else
+ {
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
}
}
@@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
}
static inline
-int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t
p2mt)
-{
- return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
}
// Allocate a new p2m table for a domain.
@@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
P2M_PRINTK("populating p2m table\n");
/* Initialise physmap tables for slot zero. Other code assumes this. */
- if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
+ p2m_invalid) )
goto error;
/* Copy all existing mappings from the page list and m2p */
@@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
+ && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
goto error;
}
@@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
+
+ /* check for super page */
+ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
+ {
+ mfn = l2e_get_pfn(l2e[i2]);
+ ASSERT(mfn_valid(_mfn(mfn)));
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
+ {
+ m2pfn = get_gpfn_from_mfn(mfn+i1);
+ if ( m2pfn != (gfn + i) )
+ {
+ pmbad++;
+ P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn+i, mfn+i,
+ m2pfn);
+ BUG();
+ }
+ }
+ gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+ continue;
+ }
+
l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
@@ -737,32 +860,38 @@ static void audit_p2m(struct domain *d)
static void
-p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
-{
+p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
+ unsigned int page_order)
+{
+ unsigned long i;
+
if ( !paging_mode_translate(d) )
return;
+
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
}
void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
+ unsigned long mfn, unsigned int page_order)
{
p2m_lock(d->arch.p2m);
audit_p2m(d);
- p2m_remove_page(d, gfn, mfn);
+ p2m_remove_page(d, gfn, mfn, page_order);
audit_p2m(d);
p2m_unlock(d->arch.p2m);
}
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t)
-{
- unsigned long ogfn;
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t)
+{
+ unsigned long i, ogfn;
p2m_type_t ot;
mfn_t omfn;
int rc = 0;
@@ -795,7 +924,8 @@ guest_physmap_add_entry(struct domain *d
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
}
ogfn = mfn_to_gfn(d, _mfn(mfn));
@@ -818,21 +948,23 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn);
+ p2m_remove_page(d, ogfn, mfn, page_order);
}
}
if ( mfn_valid(_mfn(mfn)) )
{
- if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
+ if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
rc = -EINVAL;
- set_gpfn_from_mfn(mfn, gfn);
+ for ( i = 0; i < (1UL << page_order); i++ )
+ set_gpfn_from_mfn(mfn+i, gfn+i);
}
else
{
gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
gfn, mfn);
- if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
+ if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
+ p2m_invalid) )
rc = -EINVAL;
}
@@ -851,7 +983,7 @@ void p2m_change_type_global(struct domai
l1_pgentry_t l1e_content;
l1_pgentry_t *l1e;
l2_pgentry_t *l2e;
- mfn_t l1mfn;
+ mfn_t l1mfn, l2mfn;
int i1, i2;
l3_pgentry_t *l3e;
int i3;
@@ -891,11 +1023,26 @@ void p2m_change_type_global(struct domai
{
continue;
}
+ l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ continue;
+ }
+
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
+ {
+ flags = l2e_get_flags(l2e[i2]);
+ if ( p2m_flags_to_type(flags) != ot )
+ continue;
+ mfn = l2e_get_pfn(l2e[i2]);
+ gfn = get_gpfn_from_mfn(mfn);
+ flags = p2m_flags_to_type(nt);
+ l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
+ paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
+ l2mfn, l1e_content, 2);
continue;
}
@@ -944,7 +1091,7 @@ p2m_type_t p2m_change_type(struct domain
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
- set_p2m_entry(d, gfn, mfn, nt);
+ set_p2m_entry(d, gfn, mfn, 0, nt);
p2m_unlock(d->arch.p2m);
@@ -968,7 +1115,7 @@ set_mmio_p2m_entry(struct domain *d, uns
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
- rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
+ rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
"set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
@@ -992,7 +1139,7 @@ clear_mmio_p2m_entry(struct domain *d, u
"clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
return 0;
}
- rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
+ rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
return rc;
}
diff -r 29dc52031954 -r 8d18e52a1b23 xen/common/grant_table.c
--- a/xen/common/grant_table.c Thu May 15 09:38:00 2008 +0100
+++ b/xen/common/grant_table.c Thu May 15 11:09:16 2008 +0100
@@ -1159,7 +1159,7 @@ gnttab_transfer(
spin_lock(&e->grant_table->lock);
sha = &shared_entry(e->grant_table, gop.ref);
- guest_physmap_add_page(e, sha->frame, mfn);
+ guest_physmap_add_page(e, sha->frame, mfn, 0);
sha->frame = mfn;
wmb();
sha->flags |= GTF_transfer_completed;
diff -r 29dc52031954 -r 8d18e52a1b23 xen/common/memory.c
--- a/xen/common/memory.c Thu May 15 09:38:00 2008 +0100
+++ b/xen/common/memory.c Thu May 15 11:09:16 2008 +0100
@@ -127,9 +127,7 @@ static void populate_physmap(struct memo
if ( unlikely(paging_mode_translate(d)) )
{
- for ( j = 0; j < (1 << a->extent_order); j++ )
- if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
- goto out;
+ guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
}
else
{
@@ -172,7 +170,7 @@ int guest_remove_page(struct domain *d,
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
- guest_physmap_remove_page(d, gmfn, mfn);
+ guest_physmap_remove_page(d, gmfn, mfn, 0);
put_page(page);
@@ -419,7 +417,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
- guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
+ guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
put_page(page);
}
@@ -440,9 +438,7 @@ static long memory_exchange(XEN_GUEST_HA
mfn = page_to_mfn(page);
if ( unlikely(paging_mode_translate(d)) )
{
- /* Ignore failure here. There's nothing we can do. */
- for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
- (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
+ guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
}
else
{
diff -r 29dc52031954 -r 8d18e52a1b23 xen/include/asm-ia64/grant_table.h
--- a/xen/include/asm-ia64/grant_table.h Thu May 15 09:38:00 2008 +0100
+++ b/xen/include/asm-ia64/grant_table.h Thu May 15 11:09:16 2008 +0100
@@ -13,7 +13,7 @@ int replace_grant_host_mapping(unsigned
int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn,
unsigned long new_gpaddr, unsigned int flags);
// for grant transfer
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long
mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long
mfn, int order);
/* XXX
* somewhere appropriate
diff -r 29dc52031954 -r 8d18e52a1b23 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Thu May 15 09:38:00 2008 +0100
+++ b/xen/include/asm-ia64/shadow.h Thu May 15 11:09:16 2008 +0100
@@ -40,8 +40,10 @@
* Utilities to change relationship of gpfn->mfn for designated domain,
* which is required by gnttab transfer, balloon, device model and etc.
*/
-int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long
mfn);
-void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned
long mfn);
+int guest_physmap_add_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
+void guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
+ unsigned long mfn, unsigned int page_order);
static inline int
shadow_mode_enabled(struct domain *d)
diff -r 29dc52031954 -r 8d18e52a1b23 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu May 15 09:38:00 2008 +0100
+++ b/xen/include/asm-x86/p2m.h Thu May 15 11:09:16 2008 +0100
@@ -102,7 +102,8 @@ struct p2m_domain {
void (*free_page )(struct domain *d,
struct page_info *pg);
int (*set_entry )(struct domain *d, unsigned long gfn,
- mfn_t mfn, p2m_type_t p2mt);
+ mfn_t mfn, unsigned int page_order,
+ p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
p2m_type_t *p2mt);
mfn_t (*get_entry_current)(unsigned long gfn,
@@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
- unsigned long mfn, p2m_type_t t);
+ unsigned long mfn, unsigned int page_order,
+ p2m_type_t t);
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
*/
static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
- unsigned long mfn)
-{
- return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
+ unsigned long mfn,
+ unsigned int page_order)
+{
+ return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
}
/* Remove a page from a domain's p2m table */
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
- unsigned long mfn);
+ unsigned long mfn, unsigned int page_order);
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
diff -r 29dc52031954 -r 8d18e52a1b23 xen/include/xen/paging.h
--- a/xen/include/xen/paging.h Thu May 15 09:38:00 2008 +0100
+++ b/xen/include/xen/paging.h Thu May 15 11:09:16 2008 +0100
@@ -18,8 +18,8 @@
#else
#define paging_mode_translate(d) (0)
-#define guest_physmap_add_page(d, p, m) (0)
-#define guest_physmap_remove_page(d, p, m) ((void)0)
+#define guest_physmap_add_page(d, p, m, o) (0)
+#define guest_physmap_remove_page(d, p, m, o) ((void)0)
#endif
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|