# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID fc5736e0a2eb749a242ec1a750b8d7bf35095792
# Parent d3a9bcf61c331b8985c32cd3d68e7ced25d4da64
[x86_64] Guests no longer set _PAGE_USER on kernel mappings.
This may allow guest kernels to be run outside ring 3 in future, and
also provides scope for optimisations today (e.g., using global bit on
user mappings).
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 3
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 6
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 21 +--
tools/libxc/xc_linux_build.c | 8 -
xen/arch/x86/domain_build.c | 8 -
xen/arch/x86/mm.c | 61
+++++++++-
6 files changed, 74 insertions(+), 33 deletions(-)
diff -r d3a9bcf61c33 -r fc5736e0a2eb
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Tue Aug 22 15:13:07
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Tue Aug 22 15:26:40
2006 +0100
@@ -282,9 +282,6 @@ void __iomem * __ioremap(unsigned long p
area->phys_addr = phys_addr;
addr = (void __iomem *) area->addr;
flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
-#ifdef __x86_64__
- flags |= _PAGE_USER;
-#endif
if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
phys_addr>>PAGE_SHIFT,
size, __pgprot(flags), domid)) {
diff -r d3a9bcf61c33 -r fc5736e0a2eb
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Aug 22 15:13:07
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Aug 22 15:26:40
2006 +0100
@@ -529,7 +529,7 @@ void __init xen_init_pt(void)
mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
level3_kernel_pgt[pud_index(__START_KERNEL_map)] =
__pud(__pa_symbol(level2_kernel_pgt) |
- _KERNPG_TABLE | _PAGE_USER);
+ _KERNPG_TABLE);
memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
early_make_page_readonly(init_level4_pgt,
@@ -578,7 +578,7 @@ void __init extend_init_mapping(unsigned
pte_page = alloc_static_page(&phys);
early_make_page_readonly(
pte_page, XENFEAT_writable_page_tables);
- set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
+ set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
} else {
addr = page[pmd_index(va)];
addr_to_page(addr, pte_page);
@@ -587,7 +587,7 @@ void __init extend_init_mapping(unsigned
if (pte_none(*pte)) {
new_pte = pfn_pte(
(va - __START_KERNEL_map) >> PAGE_SHIFT,
- __pgprot(_KERNPG_TABLE | _PAGE_USER));
+ __pgprot(_KERNPG_TABLE));
xen_l1_entry_update(pte, new_pte);
}
va += PAGE_SIZE;
diff -r d3a9bcf61c33 -r fc5736e0a2eb
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Tue Aug
22 15:13:07 2006 +0100
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Tue Aug
22 15:26:40 2006 +0100
@@ -206,7 +206,7 @@ static inline pte_t ptep_get_and_clear_f
#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED
| _PAGE_DIRTY)
-#define _KERNPG_TABLE _PAGE_TABLE
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |
_PAGE_DIRTY)
#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -219,22 +219,21 @@ static inline pte_t ptep_get_and_clear_f
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED |
_PAGE_NX)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER |
_PAGE_ACCESSED)
#define __PAGE_KERNEL \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX |
_PAGE_USER )
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER )
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
#define __PAGE_KERNEL_NOCACHE \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED |
_PAGE_NX | _PAGE_USER )
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED |
_PAGE_NX)
#define __PAGE_KERNEL_RO \
- (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
#define __PAGE_KERNEL_VSYSCALL \
- (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_USER )
+ (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define __PAGE_KERNEL_VSYSCALL_NOCACHE \
- (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_USER )
+ (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
#define __PAGE_KERNEL_LARGE \
- (__PAGE_KERNEL | _PAGE_PSE | _PAGE_USER )
+ (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC \
- (__PAGE_KERNEL_EXEC | _PAGE_PSE | _PAGE_USER )
-
+ (__PAGE_KERNEL_EXEC | _PAGE_PSE)
/*
* We don't support GLOBAL page in xenolinux64
@@ -423,7 +422,7 @@ static inline pud_t *pud_offset_k(pgd_t
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
-#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT))
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER &
~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
diff -r d3a9bcf61c33 -r fc5736e0a2eb tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c Tue Aug 22 15:13:07 2006 +0100
+++ b/tools/libxc/xc_linux_build.c Tue Aug 22 15:26:40 2006 +0100
@@ -16,15 +16,11 @@
/* Handy for printing out '0' prepended values at native pointer size */
#define _p(a) ((void *) ((ulong)a))
-#if defined(__i386__)
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#if defined(__i386__)
#define L3_PROT (_PAGE_PRESENT)
-#endif
-
-#if defined(__x86_64__)
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#elif defined(__x86_64__)
#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#endif
diff -r d3a9bcf61c33 -r fc5736e0a2eb xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Tue Aug 22 15:13:07 2006 +0100
+++ b/xen/arch/x86/domain_build.c Tue Aug 22 15:26:40 2006 +0100
@@ -66,15 +66,11 @@ static char opt_dom0_ioports_disable[200
static char opt_dom0_ioports_disable[200] = "";
string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
-#if defined(__i386__)
-/* No ring-3 access in initial leaf page tables. */
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#if CONFIG_PAGING_LEVELS == 3
#define L3_PROT (_PAGE_PRESENT)
-#elif defined(__x86_64__)
-/* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#elif CONFIG_PAGING_LEVELS == 4
#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#endif
diff -r d3a9bcf61c33 -r fc5736e0a2eb xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Aug 22 15:13:07 2006 +0100
+++ b/xen/arch/x86/mm.c Tue Aug 22 15:26:40 2006 +0100
@@ -707,6 +707,35 @@ get_page_from_l4e(
}
#endif /* 4 level */
+#ifdef __x86_64__
+#define adjust_guest_l1e(pl1e) \
+ do { \
+ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \
+ l1e_add_flags((pl1e), _PAGE_USER); \
+ } while ( 0 )
+
+#define adjust_guest_l2e(pl2e) \
+ do { \
+ if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) ) \
+ l2e_add_flags((pl2e), _PAGE_USER); \
+ } while ( 0 )
+
+#define adjust_guest_l3e(pl3e) \
+ do { \
+ if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
+ l3e_add_flags((pl3e), _PAGE_USER); \
+ } while ( 0 )
+
+#define adjust_guest_l4e(pl4e) \
+ do { \
+ if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \
+ l4e_add_flags((pl4e), _PAGE_USER); \
+ } while ( 0 )
+#else
+#define adjust_guest_l1e(_p) ((void)0)
+#define adjust_guest_l2e(_p) ((void)0)
+#define adjust_guest_l3e(_p) ((void)0)
+#endif
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
{
@@ -806,9 +835,13 @@ static int alloc_l1_table(struct page_in
pl1e = map_domain_page(pfn);
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ {
if ( is_guest_l1_slot(i) &&
unlikely(!get_page_from_l1e(pl1e[i], d)) )
goto fail;
+
+ adjust_guest_l1e(pl1e[i]);
+ }
unmap_domain_page(pl1e);
return 1;
@@ -985,6 +1018,8 @@ static int alloc_l2_table(struct page_in
if ( is_guest_l2_slot(type, i) &&
unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
goto fail;
+
+ adjust_guest_l2e(pl2e[i]);
}
#if CONFIG_PAGING_LEVELS == 2
@@ -1053,6 +1088,8 @@ static int alloc_l3_table(struct page_in
if ( is_guest_l3_slot(i) &&
unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
goto fail;
+
+ adjust_guest_l3e(pl3e[i]);
}
if ( !create_pae_xen_mappings(pl3e) )
@@ -1093,6 +1130,8 @@ static int alloc_l4_table(struct page_in
if ( is_guest_l4_slot(i) &&
unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
goto fail;
+
+ adjust_guest_l4e(pl4e[i]);
}
/* Xen private mappings. */
@@ -1254,6 +1293,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
return 0;
}
+
+ adjust_guest_l1e(nl1e);
/* Fast path for identical mapping, r/w and presence. */
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
@@ -1336,6 +1377,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
return 0;
}
+ adjust_guest_l2e(nl2e);
+
/* Fast path for identical mapping and presence. */
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
@@ -1397,6 +1440,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
return 0;
}
+
+ adjust_guest_l3e(nl3e);
/* Fast path for identical mapping and presence. */
if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
@@ -1463,6 +1508,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
return 0;
}
+
+ adjust_guest_l4e(nl4e);
/* Fast path for identical mapping and presence. */
if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
@@ -2402,7 +2449,7 @@ int do_mmu_update(
static int create_grant_pte_mapping(
- unsigned long pte_addr, l1_pgentry_t _nl1e, struct vcpu *v)
+ unsigned long pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
{
int rc = GNTST_okay;
void *va;
@@ -2414,6 +2461,8 @@ static int create_grant_pte_mapping(
ASSERT(spin_is_locked(&d->big_lock));
+ adjust_guest_l1e(nl1e);
+
gmfn = pte_addr >> PAGE_SHIFT;
mfn = gmfn_to_mfn(d, gmfn);
@@ -2437,7 +2486,7 @@ static int create_grant_pte_mapping(
}
ol1e = *(l1_pgentry_t *)va;
- if ( !update_l1e(va, ol1e, _nl1e, mfn, v) )
+ if ( !update_l1e(va, ol1e, nl1e, mfn, v) )
{
put_page_type(page);
rc = GNTST_general_error;
@@ -2526,17 +2575,19 @@ static int destroy_grant_pte_mapping(
static int create_grant_va_mapping(
- unsigned long va, l1_pgentry_t _nl1e, struct vcpu *v)
+ unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
{
l1_pgentry_t *pl1e, ol1e;
struct domain *d = v->domain;
ASSERT(spin_is_locked(&d->big_lock));
+ adjust_guest_l1e(nl1e);
+
pl1e = &linear_pg_table[l1_linear_offset(va)];
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
- !update_l1e(pl1e, ol1e, _nl1e,
+ !update_l1e(pl1e, ol1e, nl1e,
l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
return GNTST_general_error;
@@ -3139,6 +3190,8 @@ static int ptwr_emulated_update(
}
}
+ adjust_guest_l1e(nl1e);
+
/* Checked successfully: do the update (write or cmpxchg). */
pl1e = map_domain_page(page_to_mfn(page));
pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|