diff -r 0e32095a7b46 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Wed Aug 9 20:34:27 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Fri Aug 18 09:39:44 2006 @@ -282,9 +282,6 @@ area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; -#ifdef __x86_64__ - flags |= _PAGE_USER; -#endif if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr, phys_addr>>PAGE_SHIFT, size, __pgprot(flags), domid)) { diff -r 0e32095a7b46 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Wed Aug 9 20:34:27 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Fri Aug 18 09:39:44 2006 @@ -529,7 +529,7 @@ mk_kernel_pgd(__pa_symbol(level3_kernel_pgt)); level3_kernel_pgt[pud_index(__START_KERNEL_map)] = __pud(__pa_symbol(level2_kernel_pgt) | - _KERNPG_TABLE | _PAGE_USER); + _KERNPG_TABLE); memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE); early_make_page_readonly(init_level4_pgt, @@ -578,7 +578,7 @@ pte_page = alloc_static_page(&phys); early_make_page_readonly( pte_page, XENFEAT_writable_page_tables); - set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER)); + set_pmd(pmd, __pmd(phys | _KERNPG_TABLE)); } else { addr = page[pmd_index(va)]; addr_to_page(addr, pte_page); @@ -587,7 +587,7 @@ if (pte_none(*pte)) { new_pte = pfn_pte( (va - __START_KERNEL_map) >> PAGE_SHIFT, - __pgprot(_KERNPG_TABLE | _PAGE_USER)); + __pgprot(_KERNPG_TABLE)); xen_l1_entry_update(pte, new_pte); } va += PAGE_SIZE; diff -r 0e32095a7b46 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Wed Aug 9 20:34:27 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Fri Aug 18 09:39:44 2006 @@ -206,7 +206,7 @@ #define _PAGE_NX (1UL<<_PAGE_BIT_NX) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE _PAGE_TABLE +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -219,22 +219,21 @@ #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define __PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER ) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) #define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER ) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define __PAGE_KERNEL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER ) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX) #define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER ) + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) #define __PAGE_KERNEL_VSYSCALL \ - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_USER ) + (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define __PAGE_KERNEL_VSYSCALL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_USER ) + (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD) #define __PAGE_KERNEL_LARGE \ - (__PAGE_KERNEL | _PAGE_PSE | _PAGE_USER ) + (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC \ - (__PAGE_KERNEL_EXEC | _PAGE_PSE | _PAGE_USER ) - + (__PAGE_KERNEL_EXEC | _PAGE_PSE) /* * We don't support GLOBAL page in xenolinux64 @@ -424,7 +423,7 @@ can temporarily clear it. */ #define pmd_present(x) (pmd_val(x)) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) diff -r 0e32095a7b46 xen/arch/x86/flushtlb.c --- a/xen/arch/x86/flushtlb.c Wed Aug 9 20:34:27 2006 +++ b/xen/arch/x86/flushtlb.c Fri Aug 18 09:39:44 2006 @@ -22,7 +22,7 @@ u32 tlbflush_clock = 1U; DEFINE_PER_CPU(u32, tlbflush_time); -void write_cr3(unsigned long cr3) +void write_cr3(unsigned long cr3, const int flush_only) { u32 t, t1, t2; unsigned long flags; @@ -57,7 +57,14 @@ */ skip_clocktick: +#ifdef __x86_64__ + __pge_off(); + if ( flush_only != TLB_FLUSH_ONLY ) + __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + __pge_on(); +#else __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); +#endif /* * STEP 3. Update this CPU's timestamp. Note that this happens *after* diff -r 0e32095a7b46 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Aug 9 20:34:27 2006 +++ b/xen/arch/x86/mm.c Fri Aug 18 09:39:44 2006 @@ -305,7 +305,7 @@ /* Fast path 1: does this mfn need a shadow at all? */ if ( !l3tab_needs_shadow(mfn) ) { - write_cr3(mfn << PAGE_SHIFT); + write_cr3(mfn << PAGE_SHIFT, TLB_UPDATE_CR3); /* Cache is no longer in use or valid (/after/ write to %cr3). */ cache->high_mfn = 0; return; @@ -317,7 +317,7 @@ /* Fast path 2: is this mfn already cached? */ if ( cache->high_mfn == mfn ) { - write_cr3(__pa(cache->table[cache->inuse_idx])); + write_cr3(__pa(cache->table[cache->inuse_idx]), TLB_UPDATE_CR3); return; } @@ -335,7 +335,7 @@ *(fix_pae_highmem_pl1e - cpu) = l1e_empty(); /* Install the low-memory L3 table in CR3. */ - write_cr3(__pa(lowmem_l3tab)); + write_cr3(__pa(lowmem_l3tab), TLB_UPDATE_CR3); spin_unlock(&cache->lock); } @@ -344,7 +344,7 @@ static void __write_ptbase(unsigned long mfn) { - write_cr3(mfn << PAGE_SHIFT); + write_cr3(mfn << PAGE_SHIFT, TLB_UPDATE_CR3); } #endif /* !CONFIG_X86_PAE */ @@ -692,6 +692,43 @@ } #endif /* 4 level */ +#ifdef __x86_64__ +#define adjust_l1e(pl1e) \ + do { \ + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ + { \ + if (l1e_get_flags((pl1e)) & _PAGE_USER) \ + l1e_add_flags((pl1e), _PAGE_GLOBAL); \ + else \ + { \ + l1e_remove_flags((pl1e), _PAGE_GLOBAL); \ + l1e_add_flags((pl1e), _PAGE_USER); \ + } \ + } \ + } while ( 0 ) + +#define adjust_l2e(pl2e) \ + do { \ + if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) ) \ + l2e_add_flags((pl2e), _PAGE_USER); \ + } while ( 0 ) + +#define adjust_l3e(pl3e) \ + do { \ + if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ + l3e_add_flags((pl3e), _PAGE_USER); \ + } while ( 0 ) + +#define adjust_l4e(pl4e) \ + do { \ + if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \ + l4e_add_flags((pl4e), _PAGE_USER); \ + } while ( 0 ) +#else +#define adjust_l1e(_p) ((void)0) +#define adjust_l2e(_p) ((void)0) +#define adjust_l3e(_p) ((void)0) +#endif void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) { @@ -788,9 +825,13 @@ pl1e = map_domain_page(pfn); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + { if ( is_guest_l1_slot(i) && unlikely(!get_page_from_l1e(pl1e[i], d)) ) goto fail; + + adjust_l1e(pl1e[i]); + } unmap_domain_page(pl1e); return 1; @@ -969,6 +1010,8 @@ if ( is_guest_l2_slot(type, i) && unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) ) goto fail; + + adjust_l2e(pl2e[i]); } #if CONFIG_PAGING_LEVELS == 2 @@ -1041,6 +1084,8 @@ if ( is_guest_l3_slot(i) && unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) ) goto fail; + + adjust_l3e(pl3e[i]); } if ( !create_pae_xen_mappings(pl3e) ) @@ -1085,6 +1130,8 @@ if ( is_guest_l4_slot(i) && unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) ) goto fail; + + adjust_l4e(pl4e[i]); } /* Xen private mappings. */ @@ -1236,6 +1283,8 @@ l1e_get_flags(nl1e) & L1_DISALLOW_MASK); return 0; } + + adjust_l1e(nl1e); /* Fast path for identical mapping, r/w and presence. */ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT)) @@ -1306,6 +1355,8 @@ return 0; } + adjust_l2e(nl2e); + /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e); @@ -1368,6 +1419,8 @@ l3e_get_flags(nl3e) & L3_DISALLOW_MASK); return 0; } + + adjust_l3e(nl3e); /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) @@ -1434,6 +1487,8 @@ l4e_get_flags(nl4e) & L4_DISALLOW_MASK); return 0; } + + adjust_l4e(nl4e); /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) @@ -1770,7 +1825,7 @@ old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_null(); update_pagetables(v); - write_cr3(__pa(idle_pg_table)); + write_cr3(__pa(idle_pg_table), TLB_UPDATE_CR3); if ( old_base_mfn != 0 ) put_page_and_type(mfn_to_page(old_base_mfn)); @@ -3270,7 +3325,9 @@ domain_crash(d); break; } - + + adjust_l1e(l1page[i]); + put_page_from_l1e(ol1e, d); } diff -r 0e32095a7b46 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Wed Aug 9 20:34:27 2006 +++ b/xen/arch/x86/x86_64/traps.c Fri Aug 18 09:39:44 2006 @@ -163,7 +163,10 @@ v->arch.flags ^= TF_kernel_mode; __asm__ __volatile__ ( "swapgs" ); update_pagetables(v); - write_ptbase(v); + + __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" + (pagetable_get_paddr(v->arch.monitor_table)) + : "memory" ); } unsigned long do_iret(void) diff -r 0e32095a7b46 xen/include/asm-x86/flushtlb.h --- a/xen/include/asm-x86/flushtlb.h Wed Aug 9 20:34:27 2006 +++ b/xen/include/asm-x86/flushtlb.h Fri Aug 18 09:39:44 2006 @@ -69,12 +69,15 @@ } /* Write pagetable base and implicitly tick the tlbflush clock. */ -extern void write_cr3(unsigned long cr3); +#define TLB_FLUSH_ONLY 1 +#define TLB_UPDATE_CR3 0 + +extern void write_cr3(unsigned long cr3, const int flush_only); #define local_flush_tlb() \ do { \ unsigned long cr3 = read_cr3(); \ - write_cr3(cr3); \ + write_cr3(cr3, TLB_FLUSH_ONLY); \ } while ( 0 ) #define local_flush_tlb_pge() \ diff -r 0e32095a7b46 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Wed Aug 9 20:34:27 2006 +++ b/xen/include/asm-x86/shadow.h Fri Aug 18 09:39:44 2006 @@ -1752,10 +1752,22 @@ if ( hvm_guest(v) ) paging_enabled = hvm_paging_enabled(v); else + { + if ( !shadow_mode_enabled(d) ) + { +#if CONFIG_PAGING_LEVELS == 4 + if ( !(v->arch.flags & TF_kernel_mode) ) + v->arch.monitor_table = v->arch.guest_table_user; + else +#endif + v->arch.monitor_table = v->arch.guest_table; + return; + } // HACK ALERT: there's currently no easy way to figure out if a domU // has set its arch.guest_table to zero, vs not yet initialized it. // paging_enabled = !!pagetable_get_paddr(v->arch.guest_table); + } /* * We don't call __update_pagetables() when hvm guest paging is @@ -1774,13 +1786,6 @@ { if ( shadow_mode_enabled(d) ) v->arch.monitor_table = v->arch.shadow_table; - else -#if CONFIG_PAGING_LEVELS == 4 - if ( !(v->arch.flags & TF_kernel_mode) ) - v->arch.monitor_table = v->arch.guest_table_user; - else -#endif - v->arch.monitor_table = v->arch.guest_table; } } diff -r 0e32095a7b46 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Wed Aug 9 20:34:27 2006 +++ b/xen/include/asm-x86/x86_64/page.h Fri Aug 18 09:39:44 2006 @@ -72,7 +72,7 @@ /* Bit 23 of a 24-bit flag mask. This corresponds to bit 63 of a pte.*/ #define _PAGE_NX (cpu_has_nx ? (1U<<23) : 0U) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_GLOBAL) #define L2_DISALLOW_MASK BASE_DISALLOW_MASK #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)