# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID bfd00b317815f2d1c8989b55a4cfd174da043e43
# Parent 6f36370e373a4a75d0c3a6695a3cef7f1adb8ce6
[XEN] Revert changeset 11438. Needs fixing for PAE.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
xen/arch/ia64/xen/mm.c | 95 ++++++----
xen/arch/powerpc/mm.c | 82 +++++---
xen/arch/x86/domain_build.c | 8
xen/arch/x86/mm.c | 285 ++++++++++++++++++++-----------
xen/arch/x86/mm/shadow/common.c | 10 -
xen/arch/x86/mm/shadow/multi.c | 2
xen/include/asm-ia64/mm.h | 8
xen/include/asm-powerpc/mm.h | 8
xen/include/asm-x86/mm.h | 50 +++++
xen/include/asm-x86/x86_32/page-3level.h | 2
10 files changed, 392 insertions(+), 158 deletions(-)
diff -r 6f36370e373a -r bfd00b317815 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/ia64/xen/mm.c Mon Sep 11 01:55:03 2006 +0100
@@ -1624,6 +1624,13 @@ void put_page_type(struct page_info *pag
nx &= ~PGT_validated;
}
}
+ else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) ==
+ (PGT_pinned | 1)) &&
+ ((nx & PGT_type_mask) != PGT_writable_page)) )
+ {
+ /* Page is now only pinned. Make the back pointer mutable again. */
+ nx |= PGT_va_mutable;
+ }
}
while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x)
);
}
@@ -1632,8 +1639,6 @@ int get_page_type(struct page_info *page
int get_page_type(struct page_info *page, u32 type)
{
u32 nx, x, y = page->u.inuse.type_info;
-
- ASSERT(!(type & ~PGT_type_mask));
again:
do {
@@ -1646,25 +1651,29 @@ int get_page_type(struct page_info *page
}
else if ( unlikely((x & PGT_count_mask) == 0) )
{
- if ( (x & PGT_type_mask) != type )
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
{
- /*
- * On type change we check to flush stale TLB entries. This
- * may be unnecessary (e.g., page was GDT/LDT) but those
- * circumstances should be very rare.
- */
- cpumask_t mask =
- page_get_owner(page)->domain_dirty_cpumask;
- tlbflush_filter(mask, page->tlbflush_timestamp);
-
- if ( unlikely(!cpus_empty(mask)) )
+ if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
{
- perfc_incrc(need_flush_tlb_flush);
- flush_tlb_mask(mask);
+ /*
+ * On type change we check to flush stale TLB
+ * entries. This may be unnecessary (e.g., page
+ * was GDT/LDT) but those circumstances should be
+ * very rare.
+ */
+ cpumask_t mask =
+ page_get_owner(page)->domain_dirty_cpumask;
+ tlbflush_filter(mask, page->tlbflush_timestamp);
+
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
}
/* We lose existing type, back pointer, and validity. */
- nx &= ~(PGT_type_mask | PGT_validated);
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
nx |= type;
/* No special validation needed for writable pages. */
@@ -1673,22 +1682,46 @@ int get_page_type(struct page_info *page
nx |= PGT_validated;
}
}
- else if ( unlikely((x & PGT_type_mask) != type) )
+ else
{
- if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
- (type != PGT_l1_page_table) )
- MEM_LOG("Bad type (saw %08x != exp %08x) "
- "for mfn %016lx (pfn %016lx)",
- x, type, page_to_mfn(page),
- get_gpfn_from_mfn(page_to_mfn(page)));
- return 0;
- }
- else if ( unlikely(!(x & PGT_validated)) )
- {
- /* Someone else is updating validation of this page. Wait... */
- while ( (y = page->u.inuse.type_info) == x )
- cpu_relax();
- goto again;
+ if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+ {
+ if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+ {
+ if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+ ((type & PGT_type_mask) != PGT_l1_page_table) )
+ MEM_LOG("Bad type (saw %08x != exp %08x) "
+ "for mfn %016lx (pfn %016lx)",
+ x, type, page_to_mfn(page),
+ get_gpfn_from_mfn(page_to_mfn(page)));
+ return 0;
+ }
+ else if ( (x & PGT_va_mask) == PGT_va_mutable )
+ {
+ /* The va backpointer is mutable, hence we update it. */
+ nx &= ~PGT_va_mask;
+ nx |= type; /* we know the actual type is correct */
+ }
+ else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
+ ((type & PGT_va_mask) != (x & PGT_va_mask)) )
+ {
+#ifdef CONFIG_X86_PAE
+ /* We use backptr as extra typing. Cannot be unknown. */
+ if ( (type & PGT_type_mask) == PGT_l2_page_table )
+ return 0;
+#endif
+ /* This table is possibly mapped at multiple locations. */
+ nx &= ~PGT_va_mask;
+ nx |= PGT_va_unknown;
+ }
+ }
+ if ( unlikely(!(x & PGT_validated)) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) == x )
+ cpu_relax();
+ goto again;
+ }
}
}
while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x)
);
diff -r 6f36370e373a -r bfd00b317815 xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/powerpc/mm.c Mon Sep 11 01:55:03 2006 +0100
@@ -87,6 +87,12 @@ void put_page_type(struct page_info *pag
/* Record TLB information for flush later. */
page->tlbflush_timestamp = tlbflush_current_time();
}
+ else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) ==
+ (PGT_pinned | 1)) )
+ {
+ /* Page is now only pinned. Make the back pointer mutable again. */
+ nx |= PGT_va_mutable;
+ }
}
while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
}
@@ -95,8 +101,6 @@ int get_page_type(struct page_info *page
int get_page_type(struct page_info *page, unsigned long type)
{
unsigned long nx, x, y = page->u.inuse.type_info;
-
- ASSERT(!(type & ~PGT_type_mask));
again:
do {
@@ -109,25 +113,29 @@ int get_page_type(struct page_info *page
}
else if ( unlikely((x & PGT_count_mask) == 0) )
{
- if ( (x & PGT_type_mask) != type )
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
{
- /*
- * On type change we check to flush stale TLB entries. This
- * may be unnecessary (e.g., page was GDT/LDT) but those
- * circumstances should be very rare.
- */
- cpumask_t mask =
- page_get_owner(page)->domain_dirty_cpumask;
- tlbflush_filter(mask, page->tlbflush_timestamp);
-
- if ( unlikely(!cpus_empty(mask)) )
- {
- perfc_incrc(need_flush_tlb_flush);
- flush_tlb_mask(mask);
+ if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
+ {
+ /*
+ * On type change we check to flush stale TLB
+ * entries. This may be unnecessary (e.g., page
+ * was GDT/LDT) but those circumstances should be
+ * very rare.
+ */
+ cpumask_t mask =
+ page_get_owner(page)->domain_dirty_cpumask;
+ tlbflush_filter(mask, page->tlbflush_timestamp);
+
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
}
/* We lose existing type, back pointer, and validity. */
- nx &= ~(PGT_type_mask | PGT_validated);
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
nx |= type;
/* No special validation needed for writable pages. */
@@ -136,16 +144,36 @@ int get_page_type(struct page_info *page
nx |= PGT_validated;
}
}
- else if ( unlikely((x & PGT_type_mask) != type) )
- {
- return 0;
- }
- if ( unlikely(!(x & PGT_validated)) )
- {
- /* Someone else is updating validation of this page. Wait... */
- while ( (y = page->u.inuse.type_info) == x )
- cpu_relax();
- goto again;
+ else
+ {
+ if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+ {
+ if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+ {
+ return 0;
+ }
+ else if ( (x & PGT_va_mask) == PGT_va_mutable )
+ {
+ /* The va backpointer is mutable, hence we update it. */
+ nx &= ~PGT_va_mask;
+ nx |= type; /* we know the actual type is correct */
+ }
+ else if ( (type & PGT_va_mask) != PGT_va_mutable )
+ {
+ ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+
+ /* This table is possibly mapped at multiple locations. */
+ nx &= ~PGT_va_mask;
+ nx |= PGT_va_unknown;
+ }
+ }
+ if ( unlikely(!(x & PGT_validated)) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) == x )
+ cpu_relax();
+ goto again;
+ }
}
}
while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/domain_build.c Mon Sep 11 01:55:03 2006 +0100
@@ -510,13 +510,15 @@ int construct_dom0(struct domain *d,
case 1 ... 4:
page->u.inuse.type_info &= ~PGT_type_mask;
page->u.inuse.type_info |= PGT_l2_page_table;
- if ( count == 4 )
- page->u.inuse.type_info |= PGT_pae_xen_l2;
+ page->u.inuse.type_info |=
+ (count-1) << PGT_va_shift;
get_page(page, d); /* an extra ref because of readable mapping */
break;
default:
page->u.inuse.type_info &= ~PGT_type_mask;
page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift;
get_page(page, d); /* an extra ref because of readable mapping */
break;
}
@@ -542,6 +544,8 @@ int construct_dom0(struct domain *d,
{
page->u.inuse.type_info &= ~PGT_type_mask;
page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
/*
* No longer writable: decrement the type_count.
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/mm.c Mon Sep 11 01:55:03 2006 +0100
@@ -625,7 +625,8 @@ get_page_from_l1e(
/* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
static int
get_page_from_l2e(
- l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
+ l2_pgentry_t l2e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
{
int rc;
@@ -638,7 +639,10 @@ get_page_from_l2e(
return 0;
}
- rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
+ vaddr >>= L2_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
#if CONFIG_PAGING_LEVELS == 2
if ( unlikely(!rc) )
rc = get_linear_pagetable(l2e, pfn, d);
@@ -650,7 +654,8 @@ get_page_from_l2e(
#if CONFIG_PAGING_LEVELS >= 3
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
+ l3_pgentry_t l3e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
{
int rc;
@@ -663,7 +668,11 @@ get_page_from_l3e(
return 0;
}
- rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
+ vaddr >>= L3_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l3e_get_pfn(l3e),
+ PGT_l2_page_table | vaddr, d);
return rc;
}
#endif /* 3 level */
@@ -671,7 +680,8 @@ get_page_from_l3e(
#if CONFIG_PAGING_LEVELS >= 4
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
+ l4_pgentry_t l4e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
{
int rc;
@@ -684,7 +694,11 @@ get_page_from_l4e(
return 0;
}
- rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
+ vaddr >>= L4_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l4e_get_pfn(l4e),
+ PGT_l3_page_table | vaddr, d);
if ( unlikely(!rc) )
rc = get_linear_pagetable(l4e, pfn, d);
@@ -863,8 +877,8 @@ static int create_pae_xen_mappings(l3_pg
/*
* The Xen-private mappings include linear mappings. The L2 thus cannot
* be shared by multiple L3 tables. The test here is adequate because:
- * 1. Cannot appear in slots != 3 because get_page_type() checks the
- * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
+ * 1. Cannot appear in slots != 3 because the page would then then have
+ * unknown va backpointer, which get_page_type() explicitly disallows.
* 2. Cannot appear in another page table's L3:
* a. alloc_l3_table() calls this function and this check will fail
* b. mod_l3_entry() disallows updates to slot 3 in an existing table
@@ -874,7 +888,6 @@ static int create_pae_xen_mappings(l3_pg
page = l3e_get_page(l3e3);
BUG_ON(page->u.inuse.type_info & PGT_pinned);
BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
- BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
{
MEM_LOG("PAE L3 3rd slot is shared");
@@ -936,17 +949,61 @@ static void pae_flush_pgd(
flush_tlb_mask(d->domain_dirty_cpumask);
}
+static inline int l1_backptr(
+ unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+ unsigned long l2_backptr = l2_type & PGT_va_mask;
+ ASSERT(l2_backptr != PGT_va_unknown);
+ ASSERT(l2_backptr != PGT_va_mutable);
+ *backptr =
+ ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) |
+ (offset_in_l2 << L2_PAGETABLE_SHIFT);
+ return 1;
+}
+
#elif CONFIG_X86_64
# define create_pae_xen_mappings(pl3e) (1)
# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
+
+static inline int l1_backptr(
+ unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
+{
+ unsigned long l2_backptr = l2_type & PGT_va_mask;
+ ASSERT(l2_backptr != PGT_va_unknown);
+ ASSERT(l2_backptr != PGT_va_mutable);
+ *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) |
+ (offset_in_l2 << L2_PAGETABLE_SHIFT);
+ return 1;
+}
+
+static inline int l2_backptr(
+ unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type)
+{
+ unsigned long l3_backptr = l3_type & PGT_va_mask;
+ ASSERT(l3_backptr != PGT_va_unknown);
+ ASSERT(l3_backptr != PGT_va_mutable);
+ *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) |
+ (offset_in_l3 << L3_PAGETABLE_SHIFT);
+ return 1;
+}
+
+static inline int l3_backptr(
+ unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type)
+{
+ *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT);
+ return 1;
+}
#else
# define create_pae_xen_mappings(pl3e) (1)
+# define l1_backptr(bp,l2o,l2t) \
+ ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; })
#endif
static int alloc_l2_table(struct page_info *page, unsigned long type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
+ unsigned long vaddr;
l2_pgentry_t *pl2e;
int i;
@@ -956,8 +1013,10 @@ static int alloc_l2_table(struct page_in
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
+ if ( !l1_backptr(&vaddr, i, type) )
+ goto fail;
if ( is_guest_l2_slot(type, i) &&
- unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
+ unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
goto fail;
adjust_guest_l2e(pl2e[i]);
@@ -992,10 +1051,11 @@ static int alloc_l2_table(struct page_in
#if CONFIG_PAGING_LEVELS >= 3
-static int alloc_l3_table(struct page_info *page)
+static int alloc_l3_table(struct page_info *page, unsigned long type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
+ unsigned long vaddr;
l3_pgentry_t *pl3e;
int i;
@@ -1019,21 +1079,14 @@ static int alloc_l3_table(struct page_in
pl3e = map_domain_page(pfn);
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
{
-#ifdef CONFIG_X86_PAE
- if ( i == 3 )
- {
- if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
- (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) ||
- !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
- PGT_l2_page_table |
- PGT_pae_xen_l2,
- d) )
- goto fail;
- }
- else
+#if CONFIG_PAGING_LEVELS >= 4
+ if ( !l2_backptr(&vaddr, i, type) )
+ goto fail;
+#else
+ vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT;
#endif
if ( is_guest_l3_slot(i) &&
- unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
+ unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
goto fail;
adjust_guest_l3e(pl3e[i]);
@@ -1055,23 +1108,27 @@ static int alloc_l3_table(struct page_in
return 0;
}
#else
-#define alloc_l3_table(page) (0)
+#define alloc_l3_table(page, type) (0)
#endif
#if CONFIG_PAGING_LEVELS >= 4
-static int alloc_l4_table(struct page_info *page)
+static int alloc_l4_table(struct page_info *page, unsigned long type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
+ unsigned long vaddr;
int i;
ASSERT(!shadow_mode_refcounts(d));
for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
{
+ if ( !l3_backptr(&vaddr, i, type) )
+ goto fail;
+
if ( is_guest_l4_slot(i) &&
- unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+ unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
goto fail;
adjust_guest_l4e(pl4e[i]);
@@ -1099,7 +1156,7 @@ static int alloc_l4_table(struct page_in
return 0;
}
#else
-#define alloc_l4_table(page) (0)
+#define alloc_l4_table(page, type) (0)
#endif
@@ -1133,8 +1190,6 @@ static void free_l2_table(struct page_in
put_page_from_l2e(pl2e[i], pfn);
unmap_domain_page(pl2e);
-
- page->u.inuse.type_info &= ~PGT_pae_xen_l2;
}
@@ -1302,6 +1357,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
unsigned long type)
{
l2_pgentry_t ol2e;
+ unsigned long vaddr = 0;
if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
{
@@ -1327,7 +1383,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain)) )
+ if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) ||
+ unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) )
@@ -1350,9 +1407,11 @@ static int mod_l2_entry(l2_pgentry_t *pl
/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
static int mod_l3_entry(l3_pgentry_t *pl3e,
l3_pgentry_t nl3e,
- unsigned long pfn)
+ unsigned long pfn,
+ unsigned long type)
{
l3_pgentry_t ol3e;
+ unsigned long vaddr;
int okay;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
@@ -1388,8 +1447,16 @@ static int mod_l3_entry(l3_pgentry_t *pl
if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
- if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
+#if CONFIG_PAGING_LEVELS >= 4
+ if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) ||
+ unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
return 0;
+#else
+ vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
+ << L3_PAGETABLE_SHIFT;
+ if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
+ return 0;
+#endif
if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) )
{
@@ -1418,9 +1485,11 @@ static int mod_l3_entry(l3_pgentry_t *pl
/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
static int mod_l4_entry(l4_pgentry_t *pl4e,
l4_pgentry_t nl4e,
- unsigned long pfn)
+ unsigned long pfn,
+ unsigned long type)
{
l4_pgentry_t ol4e;
+ unsigned long vaddr;
if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
{
@@ -1446,7 +1515,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
- if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
+ if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) ||
+ unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) )
@@ -1480,9 +1550,9 @@ int alloc_page_type(struct page_info *pa
case PGT_l2_page_table:
return alloc_l2_table(page, type);
case PGT_l3_page_table:
- return alloc_l3_table(page);
+ return alloc_l3_table(page, type);
case PGT_l4_page_table:
- return alloc_l4_table(page);
+ return alloc_l4_table(page, type);
case PGT_gdt_page:
case PGT_ldt_page:
return alloc_segdesc_page(page);
@@ -1602,6 +1672,12 @@ void put_page_type(struct page_info *pag
/* Record TLB information for flush later. */
page->tlbflush_timestamp = tlbflush_current_time();
}
+ else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) ==
+ (PGT_pinned|PGT_l1_page_table|1)) )
+ {
+ /* Page is now only pinned. Make the back pointer mutable again. */
+ nx |= PGT_va_mutable;
+ }
}
while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
}
@@ -1610,8 +1686,6 @@ int get_page_type(struct page_info *page
int get_page_type(struct page_info *page, unsigned long type)
{
unsigned long nx, x, y = page->u.inuse.type_info;
-
- ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
again:
do {
@@ -1624,26 +1698,29 @@ int get_page_type(struct page_info *page
}
else if ( unlikely((x & PGT_count_mask) == 0) )
{
- ASSERT(!(x & PGT_pae_xen_l2));
- if ( (x & PGT_type_mask) != type )
+ if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
{
- /*
- * On type change we check to flush stale TLB entries. This
- * may be unnecessary (e.g., page was GDT/LDT) but those
- * circumstances should be very rare.
- */
- cpumask_t mask =
- page_get_owner(page)->domain_dirty_cpumask;
- tlbflush_filter(mask, page->tlbflush_timestamp);
-
- if ( unlikely(!cpus_empty(mask)) )
+ if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
{
- perfc_incrc(need_flush_tlb_flush);
- flush_tlb_mask(mask);
+ /*
+ * On type change we check to flush stale TLB
+ * entries. This may be unnecessary (e.g., page
+ * was GDT/LDT) but those circumstances should be
+ * very rare.
+ */
+ cpumask_t mask =
+ page_get_owner(page)->domain_dirty_cpumask;
+ tlbflush_filter(mask, page->tlbflush_timestamp);
+
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
}
/* We lose existing type, back pointer, and validity. */
- nx &= ~(PGT_type_mask | PGT_validated);
+ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
nx |= type;
/* No special validation needed for writable pages. */
@@ -1652,23 +1729,51 @@ int get_page_type(struct page_info *page
nx |= PGT_validated;
}
}
- else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
- {
- if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
- (type != PGT_l1_page_table) )
- MEM_LOG("Bad type (saw %" PRtype_info
- " != exp %" PRtype_info ") "
- "for mfn %lx (pfn %lx)",
- x, type, page_to_mfn(page),
- get_gpfn_from_mfn(page_to_mfn(page)));
- return 0;
- }
- else if ( unlikely(!(x & PGT_validated)) )
- {
- /* Someone else is updating validation of this page. Wait... */
- while ( (y = page->u.inuse.type_info) == x )
- cpu_relax();
- goto again;
+ else
+ {
+ if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
+ {
+ if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
+ {
+ if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
+ ((type & PGT_type_mask) != PGT_l1_page_table) )
+ MEM_LOG("Bad type (saw %" PRtype_info
+ " != exp %" PRtype_info ") "
+ "for mfn %lx (pfn %lx)",
+ x, type, page_to_mfn(page),
+ get_gpfn_from_mfn(page_to_mfn(page)));
+ return 0;
+ }
+ else if ( (x & PGT_va_mask) == PGT_va_mutable )
+ {
+ /* The va backpointer is mutable, hence we update it. */
+ nx &= ~PGT_va_mask;
+ nx |= type; /* we know the actual type is correct */
+ }
+ else if ( (type & PGT_va_mask) != PGT_va_mutable )
+ {
+ ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
+#ifdef CONFIG_X86_PAE
+ /* We use backptr as extra typing. Cannot be unknown. */
+ if ( (type & PGT_type_mask) == PGT_l2_page_table )
+ return 0;
+#endif
+ /* Fixme: add code to propagate va_unknown to subtables. */
+ if ( ((type & PGT_type_mask) >= PGT_l2_page_table) &&
+ !shadow_mode_refcounts(page_get_owner(page)) )
+ return 0;
+ /* This table is possibly mapped at multiple locations. */
+ nx &= ~PGT_va_mask;
+ nx |= PGT_va_unknown;
+ }
+ }
+ if ( unlikely(!(x & PGT_validated)) )
+ {
+ /* Someone else is updating validation of this page. Wait... */
+ while ( (y = page->u.inuse.type_info) == x )
+ cpu_relax();
+ goto again;
+ }
}
}
while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
@@ -1925,25 +2030,19 @@ int do_mmuext_op(
switch ( op.cmd )
{
case MMUEXT_PIN_L1_TABLE:
- type = PGT_l1_page_table;
+ type = PGT_l1_page_table | PGT_va_mutable;
goto pin_page;
case MMUEXT_PIN_L2_TABLE:
- type = PGT_l2_page_table;
- goto pin_page;
-
case MMUEXT_PIN_L3_TABLE:
- type = PGT_l3_page_table;
- goto pin_page;
-
case MMUEXT_PIN_L4_TABLE:
- type = PGT_l4_page_table;
+ /* Ignore pinning of subdirectories. */
+ if ( (op.cmd - MMUEXT_PIN_L1_TABLE) != (CONFIG_PAGING_LEVELS - 1) )
+ break;
+
+ type = PGT_root_page_table;
pin_page:
- /* Ignore pinning of invalid paging levels. */
- if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
- break;
-
if ( shadow_mode_refcounts(FOREIGNDOM) )
break;
@@ -2227,7 +2326,7 @@ int do_mmu_update(
}
if ( unlikely(!get_page_type(
- page, type_info & PGT_type_mask)) )
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
goto not_a_pt;
switch ( type_info & PGT_type_mask )
@@ -2249,7 +2348,7 @@ int do_mmu_update(
case PGT_l3_page_table:
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
- okay = mod_l3_entry(va, l3e, mfn);
+ okay = mod_l3_entry(va, l3e, mfn, type_info);
}
break;
#endif
@@ -2257,7 +2356,7 @@ int do_mmu_update(
case PGT_l4_page_table:
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
- okay = mod_l4_entry(va, l4e, mfn);
+ okay = mod_l4_entry(va, l4e, mfn, type_info);
}
break;
#endif
@@ -2355,7 +2454,7 @@ static int create_grant_pte_mapping(
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- u32 type;
+ u32 type_info;
l1_pgentry_t ol1e;
struct domain *d = v->domain;
@@ -2376,8 +2475,9 @@ static int create_grant_pte_mapping(
va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- type = page->u.inuse.type_info & PGT_type_mask;
- if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+ type_info = page->u.inuse.type_info;
+ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+ !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
{
MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
@@ -2411,7 +2511,7 @@ static int destroy_grant_pte_mapping(
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- u32 type;
+ u32 type_info;
l1_pgentry_t ol1e;
gmfn = addr >> PAGE_SHIFT;
@@ -2427,8 +2527,9 @@ static int destroy_grant_pte_mapping(
va = (void *)((unsigned long)va + (addr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- type = page->u.inuse.type_info & PGT_type_mask;
- if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+ type_info = page->u.inuse.type_info;
+ if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
+ !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
{
MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/mm/shadow/common.c Mon Sep 11 01:55:03 2006 +0100
@@ -21,6 +21,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define SHADOW 1
+
#include <xen/config.h>
#include <xen/types.h>
#include <xen/mm.h>
@@ -223,6 +225,7 @@ struct x86_emulate_ops shadow_emulator_o
.cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated,
};
+
/**************************************************************************/
/* Code for "promoting" a guest page to the point where the shadow code is
* willing to let it be treated as a guest page table. This generally
@@ -232,6 +235,7 @@ void shadow_promote(struct vcpu *v, mfn_
void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type)
{
struct page_info *page = mfn_to_page(gmfn);
+ unsigned long type_info;
ASSERT(valid_mfn(gmfn));
@@ -247,8 +251,10 @@ void shadow_promote(struct vcpu *v, mfn_
// vcpu or not, or even what kind of type we get; we just want the type
// count to be > 0.
//
- while ( !get_page_type(page, page->u.inuse.type_info & PGT_type_mask) )
- continue;
+ do {
+ type_info =
+ page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask);
+ } while ( !get_page_type(page, type_info) );
// Now that the type ref is non-zero, we can safely use the
// shadow_flags.
diff -r 6f36370e373a -r bfd00b317815 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 11 01:55:03 2006 +0100
@@ -34,6 +34,8 @@
// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
// space for both PV and HVM guests.
//
+
+#define SHADOW 1
#include <xen/config.h>
#include <xen/types.h>
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-ia64/mm.h Mon Sep 11 01:55:03 2006 +0100
@@ -102,6 +102,14 @@ struct page_info
/* Owning guest has pinned this page to its current type? */
#define _PGT_pinned 27
#define PGT_pinned (1U<<_PGT_pinned)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 32
+#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
/* 16-bit count of uses of this frame as its current type. */
#define PGT_count_mask ((1U<<16)-1)
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-powerpc/mm.h Mon Sep 11 01:55:03 2006 +0100
@@ -101,6 +101,14 @@ struct page_extents {
/* Has this page been validated for use as its current type? */
#define _PGT_validated 27
#define PGT_validated (1U<<_PGT_validated)
+
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 32
+#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
/* 16-bit count of uses of this frame as its current type. */
#define PGT_count_mask ((1U<<16)-1)
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-x86/mm.h Mon Sep 11 01:55:03 2006 +0100
@@ -75,6 +75,19 @@ struct page_info
#define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
#define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
#define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
+
+#ifndef SHADOW
+#define PGT_l1_shadow PGT_l1_page_table
+#define PGT_l2_shadow PGT_l2_page_table
+#define PGT_l3_shadow PGT_l3_page_table
+#define PGT_l4_shadow PGT_l4_page_table
+#define PGT_hl2_shadow (5U<<29)
+#define PGT_snapshot (6U<<29)
+#define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */
+
+#define PGT_fl1_shadow (5U<<29)
+#endif
+
#define PGT_type_mask (7U<<29) /* Bits 29-31. */
/* Owning guest has pinned this page to its current type? */
@@ -83,12 +96,43 @@ struct page_info
/* Has this page been validated for use as its current type? */
#define _PGT_validated 27
#define PGT_validated (1U<<_PGT_validated)
- /* PAE only: is this an L2 page directory containing Xen-private mappings? */
-#define _PGT_pae_xen_l2 26
-#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
+#if defined(__i386__)
+ /* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 16
+#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
+#elif defined(__x86_64__)
+ /* The 27 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 32
+#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer still mutable (i.e. not fixed yet)? */
+#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
+ /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
+#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
+#endif
/* 16-bit count of uses of this frame as its current type. */
#define PGT_count_mask ((1U<<16)-1)
+
+#ifndef SHADOW
+#ifdef __x86_64__
+#define PGT_high_mfn_shift 52
+#define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift)
+#define PGT_mfn_mask (((1U<<27)-1) | PGT_high_mfn_mask)
+#define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift)
+#else
+ /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
+#define PGT_mfn_mask ((1U<<23)-1)
+ /* NX for PAE xen is not supported yet */
+#define PGT_high_mfn_nx (1ULL << 63)
+
+#define PGT_score_shift 23
+#define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
+#endif
+#endif /* SHADOW */
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated 31
diff -r 6f36370e373a -r bfd00b317815 xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h Sat Sep 09 20:48:16 2006 +0100
+++ b/xen/include/asm-x86/x86_32/page-3level.h Mon Sep 11 01:55:03 2006 +0100
@@ -49,7 +49,7 @@ typedef l3_pgentry_t root_pgentry_t;
/* misc */
#define is_guest_l1_slot(s) (1)
#define is_guest_l2_slot(t,s) \
- ( !((t) & PGT_pae_xen_l2) || \
+ ( ((((t) & PGT_va_mask) >> PGT_va_shift) != 3) || \
((s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
#define is_guest_l3_slot(s) (1)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|