# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1192628299 -3600
# Node ID ca2984b17fcf134cd675248499e8ed90125774ba
# Parent b4278beaf3549f410a5a6086dbd8af93c495aeac
x86: Tighten handling of page-type attributes and make
map_pages_to_xen() smarter and safer.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
xen/arch/x86/mm.c | 136 ++++++++++++++++++++++++++-----
xen/arch/x86/smp.c | 2
xen/include/asm-x86/mm.h | 1
xen/include/asm-x86/page.h | 7 -
xen/include/asm-x86/x86_32/page-3level.h | 2
xen/include/asm-x86/x86_32/page.h | 4
xen/include/asm-x86/x86_64/page.h | 6 -
7 files changed, 126 insertions(+), 32 deletions(-)
diff -r b4278beaf354 -r ca2984b17fcf xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/x86/mm.c Wed Oct 17 14:38:19 2007 +0100
@@ -149,6 +149,13 @@ unsigned long max_page;
unsigned long max_page;
unsigned long total_pages;
+#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
+
+#define l1_disallow_mask(d) \
+ ((rangeset_is_empty((d)->iomem_caps) && \
+ rangeset_is_empty((d)->arch.ioport_caps)) ? \
+ L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS))
+
#ifdef CONFIG_COMPAT
l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
#define l3_disallow_mask(d) (!is_pv_32on64_domain(d) ? \
@@ -612,14 +619,17 @@ get_page_from_l1e(
{
unsigned long mfn = l1e_get_pfn(l1e);
struct page_info *page = mfn_to_page(mfn);
+ unsigned int disallow_mask;
int okay;
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
return 1;
- if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
- {
- MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
+ disallow_mask = l1_disallow_mask((d == dom_io) ? current->domain : d);
+ if ( unlikely(l1e_get_flags(l1e) & disallow_mask) )
+ {
+ MEM_LOG("Bad L1 flags %x",
+ l1e_get_flags(l1e) & disallow_mask);
return 0;
}
@@ -1367,10 +1377,10 @@ static int mod_l1_entry(l1_pgentry_t *pl
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
- if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
+ if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
{
MEM_LOG("Bad L1 flags %x",
- l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
+ l1e_get_flags(nl1e) & l1_disallow_mask(d));
return 0;
}
@@ -1574,7 +1584,7 @@ static int mod_l4_entry(struct domain *d
#endif
-int alloc_page_type(struct page_info *page, unsigned long type)
+static int alloc_page_type(struct page_info *page, unsigned long type)
{
struct domain *owner = page_get_owner(page);
@@ -3524,37 +3534,71 @@ void free_xen_pagetable(void *v)
free_domheap_page(virt_to_page(v));
}
+/* Convert to from superpage-mapping flags for map_pages_to_xen(). */
+#define l1f_to_l2f(f) ((f) | _PAGE_PSE)
+#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE)
+
+/*
+ * map_pages_to_xen() can be called with interrupts disabled:
+ * * During early bootstrap; or
+ * * alloc_xenheap_pages() via memguard_guard_range
+ * In these cases it is safe to use flush_area_local():
+ * * Because only the local CPU is online; or
+ * * Because stale TLB entries do not matter for memguard_[un]guard_range().
+ */
+#define flush_area(v,f) (!local_irq_is_enabled() ? \
+ flush_area_local((const void *)v, f) : \
+ flush_area_all((const void *)v, f))
+
int map_pages_to_xen(
unsigned long virt,
unsigned long mfn,
unsigned long nr_mfns,
- unsigned long flags)
+ unsigned int flags)
{
l2_pgentry_t *pl2e, ol2e;
l1_pgentry_t *pl1e, ol1e;
unsigned int i;
- unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
- flags &= ~MAP_SMALL_PAGES;
-
while ( nr_mfns != 0 )
{
pl2e = virt_to_xen_l2e(virt);
if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
(nr_mfns >= (1<<PAGETABLE_ORDER)) &&
- !map_small_pages )
+ !(flags & (_PAGE_PAT|MAP_SMALL_PAGES)) )
{
/* Super-page mapping. */
ol2e = *pl2e;
- l2e_write_atomic(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
+ l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags)));
if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
{
- flush_area_local((const void *)virt,
- FLUSH_TLB_GLOBAL|FLUSH_LEVEL(2));
- if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
- free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
+ unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(2);
+
+ if ( l2e_get_flags(ol2e) & _PAGE_PSE )
+ {
+ if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
+ flush_flags |= FLUSH_TLB_GLOBAL;
+ if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) &
+ l1f_to_l2f(PAGE_CACHE_ATTRS) )
+ flush_flags |= FLUSH_CACHE;
+ flush_area(virt, flush_flags);
+ }
+ else
+ {
+ pl1e = l2e_to_l1e(ol2e);
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
+ if ( l1e_get_flags(pl1e[i]) & _PAGE_GLOBAL )
+ flush_flags |= FLUSH_TLB_GLOBAL;
+ if ( (l1e_get_flags(pl1e[i]) ^ flags) &
+ PAGE_CACHE_ATTRS )
+ flush_flags |= FLUSH_CACHE;
+ }
+ flush_area(virt, flush_flags);
+ free_xen_pagetable(pl1e);
+ }
}
virt += 1UL << L2_PAGETABLE_SHIFT;
@@ -3567,32 +3611,83 @@ int map_pages_to_xen(
if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
{
pl1e = alloc_xen_pagetable();
+ if ( pl1e == NULL )
+ return -ENOMEM;
clear_page(pl1e);
l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
__PAGE_HYPERVISOR));
}
else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
{
+ unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(2);
+
+ /* Skip this PTE if there is no change. */
+ if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) +
+ l1_table_offset(virt)) == mfn) &&
+ (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
+ ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) )
+ {
+ virt += 1UL << L1_PAGETABLE_SHIFT;
+ mfn += 1UL;
+ nr_mfns -= 1UL;
+ continue;
+ }
+
pl1e = alloc_xen_pagetable();
+ if ( pl1e == NULL )
+ return -ENOMEM;
+
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
l1e_write(&pl1e[i],
l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
- l2e_get_flags(*pl2e) & ~_PAGE_PSE));
+ l2f_to_l1f(l2e_get_flags(*pl2e))));
+
+ if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
+ flush_flags |= FLUSH_TLB_GLOBAL;
+
l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
__PAGE_HYPERVISOR));
- flush_area_local((const void *)virt,
- FLUSH_TLB_GLOBAL|FLUSH_LEVEL(2));
+ flush_area(virt, flush_flags);
}
pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
ol1e = *pl1e;
l1e_write_atomic(pl1e, l1e_from_pfn(mfn, flags));
if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
- flush_tlb_one_local(virt);
+ {
+ unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(1);
+ if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL )
+ flush_flags |= FLUSH_TLB_GLOBAL;
+ if ( (l1e_get_flags(ol1e) ^ flags) & PAGE_CACHE_ATTRS )
+ flush_flags |= FLUSH_CACHE;
+ flush_area(virt, flush_flags);
+ }
virt += 1UL << L1_PAGETABLE_SHIFT;
mfn += 1UL;
nr_mfns -= 1UL;
+
+ if ( (flags == PAGE_HYPERVISOR) &&
+ ((nr_mfns == 0) ||
+ ((((virt >> PAGE_SHIFT) | mfn) &
+ ((1 << PAGETABLE_ORDER) - 1)) == 0)) )
+ {
+ unsigned long base_mfn;
+ pl1e = l2e_to_l1e(*pl2e);
+ base_mfn = l1e_get_pfn(*pl1e) & ~(L1_PAGETABLE_ENTRIES - 1);
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++, pl1e++ )
+ if ( (l1e_get_pfn(*pl1e) != (base_mfn + i)) ||
+ (l1e_get_flags(*pl1e) != flags) )
+ break;
+ if ( i == L1_PAGETABLE_ENTRIES )
+ {
+ ol2e = *pl2e;
+ l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn,
+ l1f_to_l2f(flags)));
+ flush_area(virt, FLUSH_TLB_GLOBAL | FLUSH_LEVEL(2));
+ free_xen_pagetable(l2e_to_l1e(ol2e));
+ }
+ }
}
}
@@ -3659,6 +3754,7 @@ void destroy_xen_mappings(unsigned long
{
/* Empty: zap the L2E and free the L1 page. */
l2e_write_atomic(pl2e, l2e_empty());
+ flush_all(FLUSH_TLB_GLOBAL); /* flush before free */
free_xen_pagetable(pl1e);
}
}
diff -r b4278beaf354 -r ca2984b17fcf xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/x86/smp.c Wed Oct 17 14:38:19 2007 +0100
@@ -182,7 +182,7 @@ void flush_area_mask(cpumask_t mask, con
void flush_area_mask(cpumask_t mask, const void *va, unsigned int flags)
{
ASSERT(local_irq_is_enabled());
-
+
if ( cpu_isset(smp_processor_id(), mask) )
{
flush_area_local(va, flags);
diff -r b4278beaf354 -r ca2984b17fcf xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/asm-x86/mm.h Wed Oct 17 14:38:19 2007 +0100
@@ -144,7 +144,6 @@ extern unsigned long total_pages;
extern unsigned long total_pages;
void init_frametable(void);
-int alloc_page_type(struct page_info *page, unsigned long type);
void free_page_type(struct page_info *page, unsigned long type);
int _shadow_mode_refcounts(struct domain *d);
diff -r b4278beaf354 -r ca2984b17fcf xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/asm-x86/page.h Wed Oct 17 14:38:19 2007 +0100
@@ -355,13 +355,12 @@ l2_pgentry_t *virt_to_xen_l2e(unsigned l
l2_pgentry_t *virt_to_xen_l2e(unsigned long v);
/* Map machine page range in Xen virtual address space. */
-#define MAP_SMALL_PAGES (1UL<<16) /* don't use superpages for the mapping */
-int
-map_pages_to_xen(
+#define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping */
+int map_pages_to_xen(
unsigned long virt,
unsigned long mfn,
unsigned long nr_mfns,
- unsigned long flags);
+ unsigned int flags);
void destroy_xen_mappings(unsigned long v, unsigned long e);
#endif /* !__ASSEMBLY__ */
diff -r b4278beaf354 -r ca2984b17fcf xen/include/asm-x86/x86_32/page-3level.h
--- a/xen/include/asm-x86/x86_32/page-3level.h Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/asm-x86/x86_32/page-3level.h Wed Oct 17 14:38:19 2007 +0100
@@ -85,6 +85,6 @@ typedef l3_pgentry_t root_pgentry_t;
#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
-#define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
+#define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
#endif /* __X86_32_PAGE_3LEVEL_H__ */
diff -r b4278beaf354 -r ca2984b17fcf xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/asm-x86/x86_32/page.h Wed Oct 17 14:38:19 2007 +0100
@@ -33,10 +33,10 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
/*
- * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
* Permit the NX bit if the hardware supports it.
*/
-#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
diff -r b4278beaf354 -r ca2984b17fcf xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/asm-x86/x86_64/page.h Wed Oct 17 14:38:19 2007 +0100
@@ -105,18 +105,18 @@ typedef l4_pgentry_t root_pgentry_t;
#define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U)
/*
- * Disallow unused flag bits plus PAT, PSE and GLOBAL.
+ * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
* Permit the NX bit if the hardware supports it.
* Note that range [62:52] is available for software use on x86/64.
*/
-#define BASE_DISALLOW_MASK (0xFF800180U & ~_PAGE_NX)
+#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
-#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1E6U
+#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1FEU
#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|