diff -r fb427d7167fd xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/arch/x86/domain.c Wed Jan 24 06:38:40 2007 -0600 @@ -1483,7 +1483,7 @@ void arch_dump_domain_info(struct domain if ( shadow_mode_enabled(d) ) { printk(" shadow mode: "); - if ( d->arch.paging.shadow.mode & SHM2_enable ) + if ( shadow_mode_enabled(d) ) printk("enabled "); if ( shadow_mode_refcounts(d) ) printk("refcounts "); diff -r fb427d7167fd xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/arch/x86/mm/shadow/common.c Wed Jan 24 11:43:30 2007 -0600 @@ -1102,6 +1102,13 @@ shadow_alloc_p2m_page(struct domain *d) return mfn; } +void +shadow_free_p2m_page(struct domain *d, struct page_info *pg) +{ + page_set_owner(pg, NULL); + free_domheap_pages(pg, 0); +} + #if CONFIG_PAGING_LEVELS == 3 static void p2m_install_entry_in_monitors(struct domain *d, l3_pgentry_t *l3e) @@ -1147,342 +1154,10 @@ static void p2m_install_entry_in_monitor } #endif -// Find the next level's P2M entry, checking for out-of-range gfn's... -// Returns NULL on error. -// -static l1_pgentry_t * -p2m_find_entry(void *table, unsigned long *gfn_remainder, - unsigned long gfn, u32 shift, u32 max) -{ - u32 index; - - index = *gfn_remainder >> shift; - if ( index >= max ) - { - SHADOW_DEBUG(P2M, "gfn=0x%lx out of range " - "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", - gfn, *gfn_remainder, shift, index, max); - return NULL; - } - *gfn_remainder &= (1 << shift) - 1; - return (l1_pgentry_t *)table + index; -} - -// Walk one level of the P2M table, allocating a new table if required. -// Returns 0 on error. -// -static int -p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, - unsigned long *gfn_remainder, unsigned long gfn, u32 shift, - u32 max, unsigned long type) -{ - l1_pgentry_t *p2m_entry; - void *next; - - if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, - shift, max)) ) - return 0; - - if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) - { - mfn_t mfn = shadow_alloc_p2m_page(d); - if ( mfn_x(mfn) == 0 ) - return 0; - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated; - mfn_to_page(mfn)->count_info = 1; -#if CONFIG_PAGING_LEVELS == 3 - if (type == PGT_l2_page_table) - { - struct vcpu *v; - /* We have written to the p2m l3: need to sync the per-vcpu - * copies of it in the monitor tables */ - p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); - /* Also, any vcpus running on shadows of the p2m need to - * reload their CR3s so the change propagates to the shadow */ - ASSERT(shadow_locked_by_me(d)); - for_each_vcpu(d, v) - { - if ( pagetable_get_pfn(v->arch.guest_table) - == pagetable_get_pfn(d->arch.phys_table) - && v->arch.paging.mode != NULL ) - v->arch.paging.mode->update_cr3(v, 0); - } - } -#endif - /* The P2M can be shadowed: keep the shadows synced */ - if ( d->vcpu[0] != NULL ) - (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, - p2m_entry, sizeof *p2m_entry); - } - *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); - next = sh_map_domain_page(*table_mfn); - sh_unmap_domain_page(*table); - *table = next; - - return 1; -} - -// Returns 0 on error (out of memory) -int -shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) -{ - // XXX -- this might be able to be faster iff current->domain == d - mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); - void *table = sh_map_domain_page(table_mfn); - unsigned long gfn_remainder = gfn; - l1_pgentry_t *p2m_entry; - int rv=0; - -#if CONFIG_PAGING_LEVELS >= 4 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L4_PAGETABLE_SHIFT - PAGE_SHIFT, - L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) - goto out; -#endif -#if CONFIG_PAGING_LEVELS >= 3 - // When using PAE Xen, we only allow 33 bits of pseudo-physical - // address in translated guests (i.e. 8 GBytes). This restriction - // comes from wanting to map the P2M table into the 16MB RO_MPT hole - // in Xen's address space for translated PV guests. - // - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L3_PAGETABLE_SHIFT - PAGE_SHIFT, - (CONFIG_PAGING_LEVELS == 3 - ? 8 - : L3_PAGETABLE_ENTRIES), - PGT_l2_page_table) ) - goto out; -#endif - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L2_PAGETABLE_SHIFT - PAGE_SHIFT, - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) - goto out; - - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, - 0, L1_PAGETABLE_ENTRIES); - ASSERT(p2m_entry); - if ( mfn_valid(mfn) ) - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - else - *p2m_entry = l1e_empty(); - - /* Track the highest gfn for which we have ever had a valid mapping */ - if ( mfn_valid(mfn) && (gfn > d->arch.max_mapped_pfn) ) - d->arch.max_mapped_pfn = gfn; - - /* The P2M can be shadowed: keep the shadows synced */ - if ( d->vcpu[0] != NULL ) - (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, - p2m_entry, sizeof(*p2m_entry)); - - /* Success */ - rv = 1; - - out: - sh_unmap_domain_page(table); - return rv; -} - -// Allocate a new p2m table for a domain. -// -// The structure of the p2m table is that of a pagetable for xen (i.e. it is -// controlled by CONFIG_PAGING_LEVELS). -// -// Returns 0 if p2m table could not be initialized -// -static int -shadow_alloc_p2m_table(struct domain *d) -{ - mfn_t p2m_top, mfn; - struct list_head *entry; - struct page_info *page; - unsigned int page_count = 0; - unsigned long gfn; - - SHADOW_PRINTK("allocating p2m table\n"); - ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0); - - p2m_top = shadow_alloc_p2m_page(d); - mfn_to_page(p2m_top)->count_info = 1; - mfn_to_page(p2m_top)->u.inuse.type_info = -#if CONFIG_PAGING_LEVELS == 4 - PGT_l4_page_table -#elif CONFIG_PAGING_LEVELS == 3 - PGT_l3_page_table -#elif CONFIG_PAGING_LEVELS == 2 - PGT_l2_page_table -#endif - | 1 | PGT_validated; - - if ( mfn_x(p2m_top) == 0 ) - return 0; - - d->arch.phys_table = pagetable_from_mfn(p2m_top); - - SHADOW_PRINTK("populating p2m table\n"); - - /* Initialise physmap tables for slot zero. Other code assumes this. */ - gfn = 0; - mfn = _mfn(INVALID_MFN); - if ( !shadow_set_p2m_entry(d, gfn, mfn) ) - goto error; - - /* Build a p2m map that matches the m2p entries for this domain's - * allocated pages. Skip any pages that have an explicitly invalid - * or obviously bogus m2p entry. */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); - mfn = page_to_mfn(page); - gfn = get_gpfn_from_mfn(mfn_x(mfn)); - page_count++; - if ( -#ifdef __x86_64__ - (gfn != 0x5555555555555555L) -#else - (gfn != 0x55555555L) -#endif - && gfn != INVALID_M2P_ENTRY - && (gfn < - (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) - && !shadow_set_p2m_entry(d, gfn, mfn) ) - goto error; - } - - SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count); - return 1; - - error: - SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" - SH_PRI_mfn "\n", gfn, mfn_x(mfn)); - return 0; -} - -mfn_t -sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) -/* Read another domain's p2m entries */ -{ - mfn_t mfn; - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; - l2_pgentry_t *l2e; - l1_pgentry_t *l1e; - - ASSERT(shadow_mode_translate(d)); - mfn = pagetable_get_mfn(d->arch.phys_table); - - - if ( gpfn > d->arch.max_mapped_pfn ) - /* This pfn is higher than the highest the p2m map currently holds */ - return _mfn(INVALID_MFN); - -#if CONFIG_PAGING_LEVELS >= 4 - { - l4_pgentry_t *l4e = sh_map_domain_page(mfn); - l4e += l4_table_offset(addr); - if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l4e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l4e_get_pfn(*l4e)); - sh_unmap_domain_page(l4e); - } -#endif -#if CONFIG_PAGING_LEVELS >= 3 - { - l3_pgentry_t *l3e = sh_map_domain_page(mfn); -#if CONFIG_PAGING_LEVELS == 3 - /* On PAE hosts the p2m has eight l3 entries, not four (see - * shadow_set_p2m_entry()) so we can't use l3_table_offset. - * Instead, just count the number of l3es from zero. It's safe - * to do this because we already checked that the gfn is within - * the bounds of the p2m. */ - l3e += (addr >> L3_PAGETABLE_SHIFT); -#else - l3e += l3_table_offset(addr); -#endif - if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l3e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l3e_get_pfn(*l3e)); - sh_unmap_domain_page(l3e); - } -#endif - - l2e = sh_map_domain_page(mfn); - l2e += l2_table_offset(addr); - if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l2e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l2e_get_pfn(*l2e)); - sh_unmap_domain_page(l2e); - - l1e = sh_map_domain_page(mfn); - l1e += l1_table_offset(addr); - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l1e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l1e_get_pfn(*l1e)); - sh_unmap_domain_page(l1e); - - return mfn; -} - unsigned long shadow_gfn_to_mfn_foreign(unsigned long gpfn) { - return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn)); -} - - -static void shadow_p2m_teardown(struct domain *d) -/* Return all the p2m pages to Xen. - * We know we don't have any extra mappings to these pages */ -{ - struct list_head *entry, *n; - struct page_info *pg; - - d->arch.phys_table = pagetable_null(); - - list_for_each_safe(entry, n, &d->arch.paging.p2m_inuse) - { - pg = list_entry(entry, struct page_info, list); - list_del(entry); - /* Should have just the one ref we gave it in alloc_p2m_page() */ - if ( (pg->count_info & PGC_count_mask) != 1 ) - { - SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n", - pg->count_info, pg->u.inuse.type_info); - } - ASSERT(page_get_owner(pg) == d); - /* Free should not decrement domain's total allocation, since - * these pages were allocated without an owner. */ - page_set_owner(pg, NULL); - free_domheap_pages(pg, 0); - d->arch.paging.p2m_pages--; - perfc_decr(shadow_alloc_count); - } - list_for_each_safe(entry, n, &d->arch.paging.p2m_freelist) - { - list_del(entry); - pg = list_entry(entry, struct page_info, list); - ASSERT(page_get_owner(pg) == d); - /* Free should not decrement domain's total allocation. */ - page_set_owner(pg, NULL); - free_domheap_pages(pg, 0); - d->arch.paging.p2m_pages--; - perfc_decr(shadow_alloc_count); - } - ASSERT(d->arch.paging.p2m_pages == 0); + return mfn_x(gfn_to_mfn_foreign(current->domain, gpfn)); } /* Set the pool of shadow pages to the required number of pages. @@ -2040,7 +1715,7 @@ int sh_remove_write_access(struct vcpu * GUESS(0xC0000000UL + (fault_addr >> 10), 1); /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) + if ((gfn = paging_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); } @@ -2055,7 +1730,7 @@ int sh_remove_write_access(struct vcpu * } /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) + if ((gfn = paging_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); } #if CONFIG_PAGING_LEVELS >= 4 @@ -2071,7 +1746,7 @@ int sh_remove_write_access(struct vcpu * /* 64bit Linux direct map at 0xffff810000000000; older kernels * had it at 0x0000010000000000UL */ - gfn = sh_mfn_to_gfn(v->domain, gmfn); + gfn = paging_mfn_to_gfn(v->domain, gmfn); GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4); GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4); } @@ -2643,9 +2318,9 @@ static void sh_new_mode(struct domain *d ASSERT(shadow_locked_by_me(d)); ASSERT(d != current->domain); - d->arch.paging.shadow.mode = new_mode; + d->arch.paging.mode = new_mode; if ( new_mode & SHM2_translate ) - shadow_audit_p2m(d); + paging_audit_p2m(d); for_each_vcpu(d, v) sh_update_paging_modes(v); } @@ -2708,11 +2383,11 @@ int shadow_enable(struct domain *d, u32 /* Init the P2M table */ if ( mode & SHM2_translate ) - if ( !shadow_alloc_p2m_table(d) ) + if ( !paging_alloc_p2m_table(d) ) { shadow_hash_teardown(d); sh_set_allocation(d, old_pages, NULL); - shadow_p2m_teardown(d); + paging_p2m_teardown(d); rv = -ENOMEM; goto out; } @@ -2725,7 +2400,7 @@ int shadow_enable(struct domain *d, u32 /* Update the bits */ sh_new_mode(d, mode); - shadow_audit_p2m(d); + paging_audit_p2m(d); out: shadow_unlock(d); domain_unpause(d); @@ -2791,7 +2466,7 @@ void shadow_teardown(struct domain *d) /* We leave the "permanent" shadow modes enabled, but clear the * log-dirty mode bit. We don't want any more mark_dirty() * calls now that we've torn down the bitmap */ - d->arch.paging.shadow.mode &= ~SHM2_log_dirty; + d->arch.paging.mode &= ~SHM2_log_dirty; shadow_unlock(d); } @@ -2815,7 +2490,7 @@ void shadow_final_teardown(struct domain /* It is now safe to pull down the p2m map. */ if ( d->arch.paging.p2m_pages != 0 ) - shadow_p2m_teardown(d); + paging_p2m_teardown(d); SHADOW_PRINTK("dom %u final teardown done." " Shadow pages total = %u, free = %u, p2m=%u\n", @@ -2831,12 +2506,12 @@ static int shadow_one_bit_enable(struct ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ - if ( d == current->domain || (d->arch.paging.shadow.mode & mode) ) + if ( d == current->domain || (d->arch.paging.mode & mode) ) { return -EINVAL; } - if ( d->arch.paging.shadow.mode == 0 ) + if ( d->arch.paging.mode == 0 ) { /* Init the shadow memory allocation and the hash table */ if ( sh_set_allocation(d, 1, NULL) != 0 @@ -2848,7 +2523,7 @@ static int shadow_one_bit_enable(struct } /* Update the bits */ - sh_new_mode(d, d->arch.paging.shadow.mode | mode); + sh_new_mode(d, d->arch.paging.mode | mode); return 0; } @@ -2860,14 +2535,14 @@ static int shadow_one_bit_disable(struct ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ - if ( d == current->domain || !(d->arch.paging.shadow.mode & mode) ) + if ( d == current->domain || !(d->arch.paging.mode & mode) ) { return -EINVAL; } /* Update the bits */ - sh_new_mode(d, d->arch.paging.shadow.mode & ~mode); - if ( d->arch.paging.shadow.mode == 0 ) + sh_new_mode(d, d->arch.paging.mode & ~mode); + if ( d->arch.paging.mode == 0 ) { /* Get this domain off shadows */ SHADOW_PRINTK("un-shadowing of domain %u starts." @@ -3034,113 +2709,53 @@ static int shadow_log_dirty_disable(stru /**************************************************************************/ /* P2M map manipulations */ -static void -sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) -{ +/* shadow specific code which should be called when P2M table entry is updated + * with new content. It is responsible for update the entry, as well as other + * shadow processing jobs. + */ +void +shadow_write_p2m_entry(struct domain *d, unsigned long gfn, l1_pgentry_t *p, + l1_pgentry_t new, unsigned int level) +{ + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); + mfn_t mfn; struct vcpu *v; - - if ( !shadow_mode_translate(d) ) - return; - + + /* handle physmap_add and physmap_remove */ v = current; if ( v->domain != d ) v = d->vcpu[0]; - - SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn); - - ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn); - //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn); - - if ( v != NULL ) - { - sh_remove_all_shadows_and_parents(v, _mfn(mfn)); - if ( sh_remove_all_mappings(v, _mfn(mfn)) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - - shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); -} - -void -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ - shadow_lock(d); - shadow_audit_p2m(d); - sh_p2m_remove_page(d, gfn, mfn); - shadow_audit_p2m(d); - shadow_unlock(d); -} - -void -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ - unsigned long ogfn; - mfn_t omfn; - - if ( !shadow_mode_translate(d) ) - return; - - shadow_lock(d); - shadow_audit_p2m(d); - - SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn); - - omfn = sh_gfn_to_mfn(d, gfn); - if ( mfn_valid(omfn) ) - { - /* Get rid of the old mapping, especially any shadows */ - struct vcpu *v = current; - if ( v->domain != d ) - v = d->vcpu[0]; - if ( v != NULL ) - { - sh_remove_all_shadows_and_parents(v, omfn); - if ( sh_remove_all_mappings(v, omfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); - } - - ogfn = sh_mfn_to_gfn(d, _mfn(mfn)); - if ( -#ifdef __x86_64__ - (ogfn != 0x5555555555555555L) -#else - (ogfn != 0x55555555L) -#endif - && (ogfn != INVALID_M2P_ENTRY) - && (ogfn != gfn) ) - { - /* This machine frame is already mapped at another physical address */ - SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", - mfn, ogfn, gfn); - if ( mfn_valid(omfn = sh_gfn_to_mfn(d, ogfn)) ) - { - SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", - ogfn , mfn_x(omfn)); - if ( mfn_x(omfn) == mfn ) - sh_p2m_remove_page(d, ogfn, mfn); - } - } - - shadow_set_p2m_entry(d, gfn, _mfn(mfn)); - set_gpfn_from_mfn(mfn, gfn); - -#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) - /* If we're doing FAST_FAULT_PATH, then shadow mode may have - cached the fact that this is an mmio region in the shadow - page tables. Blow the tables away to remove the cache. - This is pretty heavy handed, but this is a rare operation - (it might happen a dozen times during boot and then never - again), so it doesn't matter too much. */ - shadow_blow_tables(d); -#endif - - shadow_audit_p2m(d); - shadow_unlock(d); + mfn = paging_gfn_to_mfn(d, gfn); + if ( v != NULL && level == 1 && mfn_valid(mfn) ) { + sh_remove_all_shadows_and_parents(v, mfn); + if ( sh_remove_all_mappings(v, mfn) ) + flush_tlb_mask(d->domain_dirty_cpumask); + } + + /* update the entry with new content */ + *p = new; + + /* The P2M can be shadowed: keep the shadows synced */ + if ( d->vcpu[0] != NULL ) + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p)); + + /* install P2M in monitors for PAE Xen */ +#if CONFIG_PAGING_LEVELS == 3 + if ( level == 3 ) { + struct vcpu *v; + /* We have written to the p2m l3: need to sync the per-vcpu + * copies of it in the monitor tables */ + p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p); + /* Also, any vcpus running on shadows of the p2m need to + * reload their CR3s so the change propagates to the shadow */ + for_each_vcpu(d, v) { + if ( pagetable_get_pfn(v->arch.guest_table) + == pagetable_get_pfn(d->arch.phys_table) + && v->arch.paging.mode != NULL ) + v->arch.paging.mode->update_cr3(v, 0); + } + } +#endif } /**************************************************************************/ @@ -3312,7 +2927,7 @@ int shadow_domctl(struct domain *d, return rc; if ( is_hvm_domain(d) ) return -EINVAL; - if ( d->arch.paging.shadow.mode & SHM2_enable ) + if ( d->arch.paging.mode & SHM2_enable ) if ( (rc = shadow_test_disable(d)) != 0 ) return rc; return 0; @@ -3426,199 +3041,6 @@ void shadow_audit_tables(struct vcpu *v) #endif /* Shadow audit */ - -/**************************************************************************/ -/* Auditing p2m tables */ - -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M - -void shadow_audit_p2m(struct domain *d) -{ - struct list_head *entry; - struct page_info *page; - struct domain *od; - unsigned long mfn, gfn, m2pfn, lp2mfn = 0; - mfn_t p2mfn; - unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; - int test_linear; - - if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) ) - return; - - //SHADOW_PRINTK("p2m audit starts\n"); - - test_linear = ( (d == current->domain) - && !pagetable_is_null(current->arch.monitor_table) ); - if ( test_linear ) - local_flush_tlb(); - - /* Audit part one: walk the domain's page allocation list, checking - * the m2p entries. */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); - mfn = mfn_x(page_to_mfn(page)); - - // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); - - od = page_get_owner(page); - - if ( od != d ) - { - SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", - mfn, od, (od?od->domain_id:-1), d, d->domain_id); - continue; - } - - gfn = get_gpfn_from_mfn(mfn); - if ( gfn == INVALID_M2P_ENTRY ) - { - orphans_i++; - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", - // mfn); - continue; - } - - if ( gfn == 0x55555555 ) - { - orphans_d++; - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", - // mfn); - continue; - } - - p2mfn = sh_gfn_to_mfn_foreign(d, gfn); - if ( mfn_x(p2mfn) != mfn ) - { - mpbad++; - SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" - " (-> gfn %#lx)\n", - mfn, gfn, mfn_x(p2mfn), - (mfn_valid(p2mfn) - ? get_gpfn_from_mfn(mfn_x(p2mfn)) - : -1u)); - /* This m2p entry is stale: the domain has another frame in - * this physical slot. No great disaster, but for neatness, - * blow away the m2p entry. */ - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); - } - - if ( test_linear && (gfn <= d->arch.max_mapped_pfn) ) - { - lp2mfn = gfn_to_mfn_current(gfn); - if ( mfn_x(lp2mfn) != mfn_x(p2mfn) ) - { - SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " - "(!= mfn %#lx)\n", gfn, - mfn_x(lp2mfn), mfn_x(p2mfn)); - } - } - - // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", - // mfn, gfn, p2mfn, lp2mfn); - } - - /* Audit part two: walk the domain's p2m table, checking the entries. */ - if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) - { - l2_pgentry_t *l2e; - l1_pgentry_t *l1e; - int i1, i2; - -#if CONFIG_PAGING_LEVELS == 4 - l4_pgentry_t *l4e; - l3_pgentry_t *l3e; - int i3, i4; - l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#elif CONFIG_PAGING_LEVELS == 3 - l3_pgentry_t *l3e; - int i3; - l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#else /* CONFIG_PAGING_LEVELS == 2 */ - l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#endif - - gfn = 0; -#if CONFIG_PAGING_LEVELS >= 3 -#if CONFIG_PAGING_LEVELS >= 4 - for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) - { - if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4]))); -#endif /* now at levels 3 or 4... */ - for ( i3 = 0; - i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); - i3++ ) - { - if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3]))); -#endif /* all levels... */ - for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) - { - if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2]))); - - for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) - { - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) - continue; - mfn = l1e_get_pfn(l1e[i1]); - ASSERT(mfn_valid(_mfn(mfn))); - m2pfn = get_gpfn_from_mfn(mfn); - if ( m2pfn != gfn ) - { - pmbad++; - SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx" - " -> gfn %#lx\n", gfn, mfn, m2pfn); - BUG(); - } - } - sh_unmap_domain_page(l1e); - } -#if CONFIG_PAGING_LEVELS >= 3 - sh_unmap_domain_page(l2e); - } -#if CONFIG_PAGING_LEVELS >= 4 - sh_unmap_domain_page(l3e); - } -#endif -#endif - -#if CONFIG_PAGING_LEVELS == 4 - sh_unmap_domain_page(l4e); -#elif CONFIG_PAGING_LEVELS == 3 - sh_unmap_domain_page(l3e); -#else /* CONFIG_PAGING_LEVELS == 2 */ - sh_unmap_domain_page(l2e); -#endif - - } - - //SHADOW_PRINTK("p2m audit complete\n"); - //if ( orphans_i | orphans_d | mpbad | pmbad ) - // SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", - // orphans_i + orphans_d, orphans_i, orphans_d, - if ( mpbad | pmbad ) - SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", - pmbad, mpbad); -} - -#endif /* p2m audit */ - /* * Local variables: * mode: C diff -r fb427d7167fd xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/arch/x86/mm/shadow/private.h Wed Jan 24 08:34:08 2007 -0600 @@ -32,6 +32,7 @@ #include + /****************************************************************************** * Levels of self-test and paranoia */ @@ -151,13 +152,6 @@ extern void shadow_audit_tables(struct v #else #define shadow_audit_tables(_v) do {} while(0) #endif - -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M -extern void shadow_audit_p2m(struct domain *d); -#else -#define shadow_audit_p2m(_d) do {} while(0) -#endif - /****************************************************************************** * Macro for dealing with the naming of the internal names of the @@ -477,19 +471,6 @@ sh_unmap_domain_page_global(void *p) unmap_domain_page_global(p); } -static inline mfn_t -pagetable_get_mfn(pagetable_t pt) -{ - return _mfn(pagetable_get_pfn(pt)); -} - -static inline pagetable_t -pagetable_from_mfn(mfn_t mfn) -{ - return pagetable_from_pfn(mfn_x(mfn)); -} - - /****************************************************************************** * Log-dirty mode bitmap handling */ diff -r fb427d7167fd xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/arch/x86/mm/shadow/types.h Wed Jan 24 08:00:26 2007 -0600 @@ -416,13 +416,13 @@ vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gf { if ( !paging_vcpu_mode_translate(v) ) return _mfn(gfn_x(gfn)); - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); + return paging_gfn_to_mfn(v->domain, gfn_x(gfn)); } static inline gfn_t mfn_to_gfn(struct domain *d, mfn_t mfn) { - return _gfn(sh_mfn_to_gfn(d, mfn)); + return _gfn(paging_mfn_to_gfn(d, mfn)); } static inline paddr_t diff -r fb427d7167fd xen/arch/x86/paging.c --- a/xen/arch/x86/paging.c Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/arch/x86/paging.c Wed Jan 24 11:45:07 2007 -0600 @@ -35,6 +35,12 @@ void paging_domain_init(struct domain *d d->arch.paging.domctl = &shadow_domctl; d->arch.paging.teardown = &shadow_teardown; d->arch.paging.final_teardown = &shadow_final_teardown; + /* P2M functions */ + d->arch.paging.alloc_p2m_page = &shadow_alloc_p2m_page; + d->arch.paging.free_p2m_page = &shadow_free_p2m_page; + d->arch.paging.write_p2m_entry = &shadow_write_p2m_entry; + + p2m_lock_init(d); /* generic P2M lock initialization */ } /* vcpu paging struct initialization goes here */ @@ -42,6 +48,601 @@ void paging_vcpu_init(struct vcpu *v) { shadow_vcpu_init(v); } + +/************************************************/ +/* P2M functions */ +/************************************************/ + +// Find the next level's P2M entry, checking for out-of-range gfn's... +// Returns NULL on error. +// +static l1_pgentry_t * +p2m_find_entry(void *table, unsigned long *gfn_remainder, + unsigned long gfn, u32 shift, u32 max) +{ + u32 index; + + index = *gfn_remainder >> shift; + if ( index >= max ) + { + PAGING_DEBUG(P2M, "gfn=0x%lx out of range " + "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", + gfn, *gfn_remainder, shift, index, max); + return NULL; + } + *gfn_remainder &= (1 << shift) - 1; + return (l1_pgentry_t *)table + index; +} + +// Walk one level of the P2M table, allocating a new table if required. +// Returns 0 on error. +// +static int +p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, + unsigned long *gfn_remainder, unsigned long gfn, u32 shift, + u32 max, unsigned long type) +{ + l1_pgentry_t *p2m_entry; + l1_pgentry_t new_entry; + void *next; + + if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, + shift, max)) ) + return 0; + + if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) + { + mfn_t mfn = paging_alloc_p2m_page(d); + if ( mfn_x(mfn) == 0 ) + return 0; + mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated; + mfn_to_page(mfn)->count_info = 1; + + new_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); + + switch ( type ) { + case PGT_l3_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 4); + break; + case PGT_l2_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 3); + break; + case PGT_l1_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 2); + break; + default: + BUG(); + break; + } + } + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); + next = map_domain_page(mfn_x(*table_mfn)); + unmap_domain_page(*table); + *table = next; + + return 1; +} + +// Returns 0 on error (out of memory) +int +paging_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +{ + // XXX -- this might be able to be faster iff current->domain == d + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); + void *table =map_domain_page(mfn_x(table_mfn)); + unsigned long gfn_remainder = gfn; + l1_pgentry_t *p2m_entry; + l1_pgentry_t entry_content; + int rv=0; + +#if CONFIG_PAGING_LEVELS >= 4 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L4_PAGETABLE_SHIFT - PAGE_SHIFT, + L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) + goto out; +#endif +#if CONFIG_PAGING_LEVELS >= 3 + // When using PAE Xen, we only allow 33 bits of pseudo-physical + // address in translated guests (i.e. 8 GBytes). This restriction + // comes from wanting to map the P2M table into the 16MB RO_MPT hole + // in Xen's address space for translated PV guests. + // + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L3_PAGETABLE_SHIFT - PAGE_SHIFT, + (CONFIG_PAGING_LEVELS == 3 + ? 8 + : L3_PAGETABLE_ENTRIES), + PGT_l2_page_table) ) + goto out; +#endif + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) + goto out; + + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + 0, L1_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + /* Track the highest gfn for which we have ever had a valid mapping */ + if ( mfn_valid(mfn) && (gfn > d->arch.max_mapped_pfn) ) + d->arch.max_mapped_pfn = gfn; + + if ( mfn_valid(mfn) ) + entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); + else + entry_content = l1e_empty(); + + /* level 1 entry */ + paging_write_p2m_entry(d, gfn, p2m_entry, entry_content, 1); + + /* Success */ + rv = 1; + + out: + unmap_domain_page(table); + return rv; +} + +// Allocate a new p2m table for a domain. +// +// The structure of the p2m table is that of a pagetable for xen (i.e. it is +// controlled by CONFIG_PAGING_LEVELS). +// +// Returns 0 if p2m table could not be initialized +// +int paging_alloc_p2m_table(struct domain *d) +{ + mfn_t p2m_top, mfn; + struct list_head *entry; + struct page_info *page; + unsigned int page_count = 0; + unsigned long gfn; + + PAGING_PRINTK("allocating p2m table\n"); + ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0); + + p2m_top = paging_alloc_p2m_page(d); + mfn_to_page(p2m_top)->count_info = 1; + mfn_to_page(p2m_top)->u.inuse.type_info = +#if CONFIG_PAGING_LEVELS == 4 + PGT_l4_page_table +#elif CONFIG_PAGING_LEVELS == 3 + PGT_l3_page_table +#elif CONFIG_PAGING_LEVELS == 2 + PGT_l2_page_table +#endif + | 1 | PGT_validated; + + if ( mfn_x(p2m_top) == 0 ) + return 0; + + d->arch.phys_table = pagetable_from_mfn(p2m_top); + + PAGING_PRINTK("populating p2m table\n"); + + /* Initialise physmap tables for slot zero. Other code assumes this. */ + gfn = 0; + mfn = _mfn(INVALID_MFN); + if ( !paging_set_p2m_entry(d, gfn, mfn) ) + goto error; + + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = page_to_mfn(page); + gfn = get_gpfn_from_mfn(mfn_x(mfn)); + page_count++; + if ( +#ifdef __x86_64__ + (gfn != 0x5555555555555555L) +#else + (gfn != 0x55555555L) +#endif + && gfn != INVALID_M2P_ENTRY + && !paging_set_p2m_entry(d, gfn, mfn) ) + goto error; + } + + PAGING_PRINTK("p2m table initialised (%u pages)\n", page_count); + return 1; + + error: + PAGING_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" + PG_PRI_mfn "\n", gfn, mfn_x(mfn)); + return 0; +} + +void paging_p2m_teardown(struct domain *d) +/* Return all the p2m pages to Xen. + * We know we don't have any extra mappings to these pages */ +{ + struct list_head *entry, *n; + struct page_info *pg; + + d->arch.phys_table = pagetable_null(); + + list_for_each_safe(entry, n, &d->arch.paging.p2m_inuse) + { + pg = list_entry(entry, struct page_info, list); + list_del(entry); + /* Should have just the one ref we gave it in alloc_p2m_page() */ + if ( (pg->count_info & PGC_count_mask) != 1 ) + { + PAGING_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n", + pg->count_info, pg->u.inuse.type_info); + } + ASSERT(page_get_owner(pg) == d); + /* Free should not decrement domain's total allocation, since + * these pages were allocated without an owner. */ + paging_free_p2m_page(d, pg); + d->arch.paging.p2m_pages--; + perfc_decr(shadow_alloc_count); + } + list_for_each_safe(entry, n, &d->arch.paging.p2m_freelist) + { + list_del(entry); + pg = list_entry(entry, struct page_info, list); + ASSERT(page_get_owner(pg) == d); + /* Free should not decrement domain's total allocation. */ + paging_free_p2m_page(d, pg); + d->arch.paging.p2m_pages--; + perfc_decr(shadow_alloc_count); + } + ASSERT(d->arch.paging.p2m_pages == 0); +} + +mfn_t +gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +/* Read another domain's p2m entries */ +{ + mfn_t mfn; + paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + + ASSERT(paging_mode_translate(d)); + mfn = pagetable_get_mfn(d->arch.phys_table); + + + if ( gpfn > d->arch.max_mapped_pfn ) + /* This pfn is higher than the highest the p2m map currently holds */ + return _mfn(INVALID_MFN); + +#if CONFIG_PAGING_LEVELS >= 4 + { + l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); + l4e += l4_table_offset(addr); + if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l4e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l4e_get_pfn(*l4e)); + unmap_domain_page(l4e); + } +#endif +#if CONFIG_PAGING_LEVELS >= 3 + { + l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); +#if CONFIG_PAGING_LEVELS == 3 + /* On PAE hosts the p2m has eight l3 entries, not four (see + * shadow_set_p2m_entry()) so we can't use l3_table_offset. + * Instead, just count the number of l3es from zero. It's safe + * to do this because we already checked that the gfn is within + * the bounds of the p2m. */ + l3e += (addr >> L3_PAGETABLE_SHIFT); +#else + l3e += l3_table_offset(addr); +#endif + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l3e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l3e_get_pfn(*l3e)); + unmap_domain_page(l3e); + } +#endif + + l2e = map_domain_page(mfn_x(mfn)); + l2e += l2_table_offset(addr); + if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l2e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l2e_get_pfn(*l2e)); + unmap_domain_page(l2e); + + l1e = map_domain_page(mfn_x(mfn)); + l1e += l1_table_offset(addr); + if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l1e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l1e_get_pfn(*l1e)); + unmap_domain_page(l1e); + + return mfn; +} + +#if PAGING_AUDIT_P2M +void paging_audit_p2m(struct domain *d) +{ + struct list_head *entry; + struct page_info *page; + struct domain *od; + unsigned long mfn, gfn, m2pfn, lp2mfn = 0; + mfn_t p2mfn; + unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; + int test_linear; + + if ( !shadow_mode_translate(d) ) + return; + + //PAGING_PRINTK("p2m audit starts\n"); + + test_linear = ( (d == current->domain) + && !pagetable_is_null(current->arch.monitor_table) ); + if ( test_linear ) + local_flush_tlb(); + + /* Audit part one: walk the domain's page allocation list, checking + * the m2p entries. */ + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = mfn_x(page_to_mfn(page)); + + // PAGING_PRINTK("auditing guest page, mfn=%#lx\n", mfn); + + od = page_get_owner(page); + + if ( od != d ) + { + PAGING_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", + mfn, od, (od?od->domain_id:-1), d, d->domain_id); + continue; + } + + gfn = get_gpfn_from_mfn(mfn); + if ( gfn == INVALID_M2P_ENTRY ) + { + orphans_i++; + //PAGING_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", + // mfn); + continue; + } + + if ( gfn == 0x55555555 ) + { + orphans_d++; + //PAGING_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", + // mfn); + continue; + } + + p2mfn = gfn_to_mfn_foreign(d, gfn); + if ( mfn_x(p2mfn) != mfn ) + { + mpbad++; + PAGING_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" + " (-> gfn %#lx)\n", + mfn, gfn, mfn_x(p2mfn), + (mfn_valid(p2mfn) + ? get_gpfn_from_mfn(mfn_x(p2mfn)) + : -1u)); + /* This m2p entry is stale: the domain has another frame in + * this physical slot. No great disaster, but for neatness, + * blow away the m2p entry. */ + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + } + + if ( test_linear && (gfn <= d->arch.max_mapped_pfn) ) + { + lp2mfn = gfn_to_mfn_current(gfn); + if ( mfn_x(lp2mfn) != mfn_x(p2mfn) ) + { + PAGING_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " + "(!= mfn %#lx)\n", gfn, + mfn_x(lp2mfn), mfn_x(p2mfn)); + } + } + + // PAGING_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", + // mfn, gfn, p2mfn, lp2mfn); + } + + /* Audit part two: walk the domain's p2m table, checking the entries. */ + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) + { + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + int i1, i2; + +#if CONFIG_PAGING_LEVELS == 4 + l4_pgentry_t *l4e; + l3_pgentry_t *l3e; + int i3, i4; + l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#elif CONFIG_PAGING_LEVELS == 3 + l3_pgentry_t *l3e; + int i3; + l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#else /* CONFIG_PAGING_LEVELS == 2 */ + l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#endif + + gfn = 0; +#if CONFIG_PAGING_LEVELS >= 3 +#if CONFIG_PAGING_LEVELS >= 4 + for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) + { + if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); +#endif /* now at levels 3 or 4... */ + for ( i3 = 0; + i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); + i3++ ) + { + if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); +#endif /* all levels... */ + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) + { + if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); + + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) + { + if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) + continue; + mfn = l1e_get_pfn(l1e[i1]); + ASSERT(mfn_valid(_mfn(mfn))); + m2pfn = get_gpfn_from_mfn(mfn); + if ( m2pfn != gfn ) + { + pmbad++; + PAGING_PRINTK("mismatch: gfn %#lx -> mfn %#lx" + " -> gfn %#lx\n", gfn, mfn, m2pfn); + BUG(); + } + } + unmap_domain_page(l1e); + } +#if CONFIG_PAGING_LEVELS >= 3 + unmap_domain_page(l2e); + } +#if CONFIG_PAGING_LEVELS >= 4 + unmap_domain_page(l3e); + } +#endif +#endif + +#if CONFIG_PAGING_LEVELS == 4 + unmap_domain_page(l4e); +#elif CONFIG_PAGING_LEVELS == 3 + unmap_domain_page(l3e); +#else /* CONFIG_PAGING_LEVELS == 2 */ + unmap_domain_page(l2e); +#endif + + } + + //PAGING_PRINTK("p2m audit complete\n"); + //if ( orphans_i | orphans_d | mpbad | pmbad ) + // PAGING_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", + // orphans_i + orphans_d, orphans_i, orphans_d, + if ( mpbad | pmbad ) + PAGING_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", + pmbad, mpbad); +} +#endif + +static void +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) +{ + if ( !paging_mode_translate(d) ) + return; + PAGING_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn); + + ASSERT(mfn_x(paging_gfn_to_mfn(d, gfn)) == mfn); + //ASSERT(paging_mfn_to_gfn(d, mfn) == gfn); + + paging_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); +} + +void +paging_guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn) +{ + p2m_lock(d); + paging_audit_p2m(d); + p2m_remove_page(d, gfn, mfn); + paging_audit_p2m(d); + p2m_unlock(d); +} + +void +paging_guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn) +{ + unsigned long ogfn; + mfn_t omfn; + + if ( !paging_mode_translate(d) ) + return; + + p2m_lock(d); + paging_audit_p2m(d); + + PAGING_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn); + + omfn = paging_gfn_to_mfn(d, gfn); + if ( mfn_valid(omfn) ) + { + paging_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); + } + + ogfn = paging_mfn_to_gfn(d, _mfn(mfn)); + if ( +#ifdef __x86_64__ + (ogfn != 0x5555555555555555L) +#else + (ogfn != 0x55555555L) +#endif + && (ogfn != INVALID_M2P_ENTRY) + && (ogfn != gfn) ) + { + /* This machine frame is already mapped at another physical address */ + PAGING_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", + mfn, ogfn, gfn); + if ( mfn_valid(omfn = paging_gfn_to_mfn(d, ogfn)) ) + { + PAGING_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", + ogfn , mfn_x(omfn)); + if ( mfn_x(omfn) == mfn ) + p2m_remove_page(d, ogfn, mfn); + } + } + + paging_set_p2m_entry(d, gfn, _mfn(mfn)); + set_gpfn_from_mfn(mfn, gfn); + +#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) + /* If we're doing FAST_FAULT_PATH, then shadow mode may have + cached the fact that this is an mmio region in the shadow + page tables. Blow the tables away to remove the cache. + This is pretty heavy handed, but this is a rare operation + (it might happen a dozen times during boot and then never + again), so it doesn't matter too much. */ + shadow_blow_tables(d); +#endif + + paging_audit_p2m(d); + p2m_unlock(d); +} + /* * Local variables: diff -r fb427d7167fd xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/include/asm-x86/domain.h Wed Jan 24 10:48:13 2007 -0600 @@ -62,7 +62,6 @@ extern void hypercall_page_initialise(st /* shadow paging extension */ /************************************************/ struct shadow_domain { - u32 mode; /* flags to control shadow operation */ spinlock_t lock; /* shadow domain lock */ int locker; /* processor which holds the lock */ const char *locker_function; /* Func that took it */ @@ -96,17 +95,28 @@ struct shadow_vcpu { /* common paging data structure */ /************************************************/ struct paging_domain { + u32 mode; /* flags to control paging operation */ struct list_head freelists[SHADOW_MAX_ORDER + 1]; struct list_head p2m_freelist; struct list_head p2m_inuse; unsigned int total_pages; /* number of pages allocated */ unsigned int free_pages; /* number of pages on freelists */ unsigned int p2m_pages; /* number of pages in p2m map */ + spinlock_t p2m_lock; /* P2m domain lock */ + int p2m_locker; /* processor which holds the lock */ + const char *p2m_locker_function; /* Func that took it */ + int (*domctl )(struct domain *d, xen_domctl_shadow_op_t *sc, XEN_GUEST_HANDLE(void) u_domctl); void (*final_teardown )(struct domain *d); void (*teardown )(struct domain *d); + /* P2M table functions */ + mfn_t (*alloc_p2m_page )(struct domain *d); + void (*free_p2m_page )(struct domain *d, struct page_info *pg); + void (*write_p2m_entry )(struct domain *d, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int level); /* extension for shadow paging support */ struct shadow_domain shadow; diff -r fb427d7167fd xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/include/asm-x86/mm.h Wed Jan 24 07:50:44 2007 -0600 @@ -7,6 +7,59 @@ #include #include #include + +/****************************************************************************** + * With shadow pagetables, the different kinds of address start + * to get get confusing. + * + * Virtual addresses are what they usually are: the addresses that are used + * to accessing memory while the guest is running. The MMU translates from + * virtual addresses to machine addresses. + * + * (Pseudo-)physical addresses are the abstraction of physical memory the + * guest uses for allocation and so forth. For the purposes of this code, + * we can largely ignore them. + * + * Guest frame numbers (gfns) are the entries that the guest puts in its + * pagetables. For normal paravirtual guests, they are actual frame numbers, + * with the translation done by the guest. + * + * Machine frame numbers (mfns) are the entries that the hypervisor puts + * in the shadow page tables. + * + * Elsewhere in the xen code base, the name "gmfn" is generally used to refer + * to a "machine frame number, from the guest's perspective", or in other + * words, pseudo-physical frame numbers. However, in the shadow code, the + * term "gmfn" means "the mfn of a guest page"; this combines naturally with + * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a + * guest L2 page), etc... + */ + +/* With this defined, we do some ugly things to force the compiler to + * give us type safety between mfns and gfns and other integers. + * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions + * that translate beween int and foo_t. + * + * It does have some performance cost because the types now have + * a different storage attribute, so may not want it on all the time. */ + +#ifndef NDEBUG +#define TYPE_SAFETY 1 +#endif + +#ifdef TYPE_SAFETY +#define TYPE_SAFE(_type,_name) \ +typedef struct { _type _name; } _name##_t; \ +static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ +static inline _type _name##_x(_name##_t n) { return n._name; } +#else +#define TYPE_SAFE(_type,_name) \ +typedef _type _name##_t; \ +static inline _name##_t _##_name(_type n) { return n; } \ +static inline _type _name##_x(_name##_t n) { return n; } +#endif + +TYPE_SAFE(unsigned long,mfn); /* * Per-page-frame information. @@ -275,7 +328,7 @@ int check_descriptor(const struct domain ? get_gpfn_from_mfn(mfn) \ : (mfn) ) -#define gmfn_to_mfn(_d, gpfn) mfn_x(sh_gfn_to_mfn(_d, gpfn)) +#define gmfn_to_mfn(_d, gpfn) mfn_x(paging_gfn_to_mfn(_d, gpfn)) #define INVALID_MFN (~0UL) diff -r fb427d7167fd xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/include/asm-x86/paging.h Wed Jan 24 11:48:15 2007 -0600 @@ -34,61 +34,66 @@ #include #include -/************************************************/ -/* type definition */ -/************************************************/ -/****************************************************************************** - * With shadow pagetables, the different kinds of address start - * to get get confusing. - * - * Virtual addresses are what they usually are: the addresses that are used - * to accessing memory while the guest is running. The MMU translates from - * virtual addresses to machine addresses. - * - * (Pseudo-)physical addresses are the abstraction of physical memory the - * guest uses for allocation and so forth. For the purposes of this code, - * we can largely ignore them. - * - * Guest frame numbers (gfns) are the entries that the guest puts in its - * pagetables. For normal paravirtual guests, they are actual frame numbers, - * with the translation done by the guest. - * - * Machine frame numbers (mfns) are the entries that the hypervisor puts - * in the shadow page tables. - * - * Elsewhere in the xen code base, the name "gmfn" is generally used to refer - * to a "machine frame number, from the guest's perspective", or in other - * words, pseudo-physical frame numbers. However, in the shadow code, the - * term "gmfn" means "the mfn of a guest page"; this combines naturally with - * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a - * guest L2 page), etc... - */ - -/* With this defined, we do some ugly things to force the compiler to - * give us type safety between mfns and gfns and other integers. - * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions - * that translate beween int and foo_t. - * - * It does have some performance cost because the types now have - * a different storage attribute, so may not want it on all the time. */ - -#ifndef NDEBUG -#define TYPE_SAFETY 1 -#endif - -#ifdef TYPE_SAFETY -#define TYPE_SAFE(_type,_name) \ -typedef struct { _type _name; } _name##_t; \ -static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ -static inline _type _name##_x(_name##_t n) { return n._name; } -#else -#define TYPE_SAFE(_type,_name) \ -typedef _type _name##_t; \ -static inline _name##_t _##_name(_type n) { return n; } \ -static inline _type _name##_x(_name##_t n) { return n; } -#endif - -TYPE_SAFE(unsigned long,mfn); + +#define PAGING_PRINTK(_f, _a...) \ + debugtrace_printk("pg: %s(): " _f, __func__, ##_a) +#define PAGING_ERROR(_f, _a...) \ + printk("pg error: %s(): " _f, __func__, ##_a) +#define PAGING_DEBUG(flag, _f, _a...) \ + do { \ + if (PAGING_DEBUG_ ## flag) \ + debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \ + } while (0) + +#define PG_PRI_mfn "05lx" + +/* The flags for use with paging debug */ +#define PAGING_AUDIT_P2M 0 +#define PAGING_DEBUG_P2M 0 +/***************************************************************************** + * Macros to tell which paging mode a domain is in */ + + +#define PG_SHM2_shift 20 +#define PG_HAP_shift 21 +/* We're in one of the shadow modes */ +#define PG_SHM2_enable (1U << PG_SHM2_shift) +#define PG_HAP_enable (1U << PG_HAP_shift) + +/* common paging mode bits */ +#define PG_shift 10 +/* Refcounts based on shadow tables instead of guest tables */ +#define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_shift) +/* Enable log dirty mode */ +#define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_shift) +/* Xen does p2m translation, not guest */ +#define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_shift) +/* Xen does not steal address space from the domain for its own booking; + * requires VT or similar mechanisms */ +#define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_shift) + +#define paging_mode_enabled(_d) ((_d)->arch.paging.mode) +#define paging_mode_shadow(_d) ((_d)->arch.paging.mode & PG_SHM2_enable) +#define paging_mode_hap(_d) ((_d)->arch.paging.mode & PG_HAP_enable) + +#define paging_mode_refcounts(_d) ((_d)->arch.paging.mode & PG_refcounts) +#define paging_mode_log_dirty(_d) ((_d)->arch.paging.mode & PG_log_dirty) +#define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate) +#define paging_mode_external(_d) ((_d)->arch.paging.mode & PG_external) +/************************************************/ +/* Misc Functions */ +/************************************************/ +static inline mfn_t +pagetable_get_mfn(pagetable_t pt) +{ + return _mfn(pagetable_get_pfn(pt)); +} + +static inline pagetable_t +pagetable_from_mfn(mfn_t mfn) +{ + return pagetable_from_pfn(mfn_x(mfn)); +} /************************************************/ /* shadow paging extension */ @@ -246,6 +251,158 @@ static inline void paging_final_teardown { d->arch.paging.final_teardown(d); } + +/************************************************/ +/* P2M table function */ +/************************************************/ +#define p2m_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.paging.p2m_lock); \ + (_d)->arch.paging.p2m_locker = -1; \ + (_d)->arch.paging.p2m_locker_function = "nobody"; \ + } while (0) + +#define p2m_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.paging.p2m_locker == current->processor) )\ + { \ + printk("Error: p2m lock held by %s\n", \ + (_d)->arch.paging.p2m_locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.paging.p2m_lock); \ + ASSERT((_d)->arch.paging.p2m_locker == -1); \ + (_d)->arch.paging.p2m_locker = current->processor; \ + (_d)->arch.paging.p2m_locker_function = __func__; \ + } while (0) + +#define p2m_unlock(_d) \ + do { \ + ASSERT((_d)->arch.paging.p2m_locker == current->processor); \ + (_d)->arch.paging.p2m_locker = -1; \ + (_d)->arch.paging.p2m_locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.paging.p2m_lock); \ + } while (0) + + +#if PAGING_AUDIT_P2M +void paging_audit_p2m(struct domain *d); +#else +#define paging_audit_p2m(_d) do {} while(0) +#endif + +static inline mfn_t paging_alloc_p2m_page(struct domain *d) +{ + return d->arch.paging.alloc_p2m_page(d); +} + +static inline void paging_free_p2m_page(struct domain *d, struct page_info *pg) +{ + d->arch.paging.free_p2m_page(d, pg); +} + +static inline void paging_write_p2m_entry(struct domain *d, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int lev) +{ + d->arch.paging.write_p2m_entry(d, gfn, p, new, lev); +} + +int paging_alloc_p2m_table(struct domain *d); +void paging_p2m_teardown(struct domain *d); +int paging_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); + +/* The phys_to_machine_mapping is the reversed mapping of MPT for full + * virtualization. It is only used by shadow_mode_translate()==true + * guests, so we steal the address space that would have normally + * been used by the read-only MPT map. + */ +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) + +/* Read the current domain's P2M table. */ +static inline mfn_t gfn_to_mfn_current(unsigned long gfn) +{ + l1_pgentry_t l1e = l1e_empty(); + int ret; + + if ( gfn > current->domain->arch.max_mapped_pfn ) + return _mfn(INVALID_MFN); + + /* Don't read off the end of the p2m table */ + ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); + + ret = __copy_from_user(&l1e, + &phys_to_machine_mapping[gfn], + sizeof(l1e)); + + if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) + return _mfn(l1e_get_pfn(l1e)); + + return _mfn(INVALID_MFN); +} + +/* Read another domain's P2M table, mapping pages as we go */ +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); + +/* General conversion function from gfn to mfn */ +static inline mfn_t +paging_gfn_to_mfn(struct domain *d, unsigned long gfn) +{ + if ( !paging_mode_translate(d) ) + return _mfn(gfn); + if ( likely(current->domain == d) ) + return gfn_to_mfn_current(gfn); + else + return gfn_to_mfn_foreign(d, gfn); +} + +/* General conversion function from mfn to gfn */ +static inline unsigned long +paging_mfn_to_gfn(struct domain *d, mfn_t mfn) +{ + if ( paging_mode_translate(d) ) + return get_gpfn_from_mfn(mfn_x(mfn)); + else + return mfn_x(mfn); +} + +/* Compatibility function for HVM code */ +static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) +{ + return mfn_x(gfn_to_mfn_current(pfn)); +} + +/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ +static inline int +mmio_space(paddr_t gpa) +{ + unsigned long gfn = gpa >> PAGE_SHIFT; + return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn))); +} + +/* Translate the frame number held in an l1e from guest to machine */ +static inline l1_pgentry_t +gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) +{ + if ( unlikely(paging_mode_translate(d)) ) + l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), + l1e_get_flags(l1e)); + return l1e; +} + + +/**************************************************************************/ +/* Add a page to a domain's p2m table */ +void paging_guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn); +/* Remove a page from a domain's p2m table */ +void paging_guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn); + +/* Aliases, called from common code. */ +#define guest_physmap_add_page paging_guest_physmap_add_page +#define guest_physmap_remove_page paging_guest_physmap_remove_page + #endif /* XEN_PAGING_H */ /* diff -r fb427d7167fd xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue Jan 23 19:04:06 2007 -0600 +++ b/xen/include/asm-x86/shadow.h Wed Jan 24 11:46:43 2007 -0600 @@ -31,30 +31,29 @@ #include /***************************************************************************** - * Macros to tell which shadow paging mode a domain is in */ - -#define SHM2_shift 10 -/* We're in one of the shadow modes */ -#define SHM2_enable (1U << SHM2_shift) + * Macros to tell which shadow paging mode a domain is in*/ + +#define SHM2_shift PG_SHM2_shift +#define SHM2_enable PG_SHM2_enable /* Refcounts based on shadow tables instead of guest tables */ -#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift) +#define SHM2_refcounts PG_refcounts /* Enable log dirty mode */ -#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift) +#define SHM2_log_dirty PG_log_dirty /* Xen does p2m translation, not guest */ -#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift) +#define SHM2_translate PG_translate /* Xen does not steal address space from the domain for its own booking; * requires VT or similar mechanisms */ -#define SHM2_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift) - -#define shadow_mode_enabled(_d) ((_d)->arch.paging.shadow.mode) -#define shadow_mode_refcounts(_d) ((_d)->arch.paging.shadow.mode & \ - SHM2_refcounts) -#define shadow_mode_log_dirty(_d) ((_d)->arch.paging.shadow.mode & \ - SHM2_log_dirty) -#define shadow_mode_translate(_d) ((_d)->arch.paging.shadow.mode & \ - SHM2_translate) -#define shadow_mode_external(_d) ((_d)->arch.paging.shadow.mode & \ - SHM2_external) +#define SHM2_external PG_external + +#define shadow_mode_enabled(_d) paging_mode_shadow(_d) +#define shadow_mode_refcounts(_d) (paging_mode_shadow(_d) && \ + paging_mode_refcounts(_d)) +#define shadow_mode_log_dirty(_d) (paging_mode_shadow(_d) && \ + paging_mode_log_dirty(_d)) +#define shadow_mode_translate(_d) (paging_mode_shadow(_d) && \ + paging_mode_translate(_d)) +#define shadow_mode_external(_d) (paging_mode_shadow(_d) && \ + paging_mode_external(_d)) /* Xen traps & emulates all reads of all page table pages: * not yet supported */ @@ -125,7 +124,11 @@ static inline void mark_dirty(struct dom * has changed, and when bringing up a VCPU for the first time. */ void shadow_update_paging_modes(struct vcpu *v); - +mfn_t shadow_alloc_p2m_page(struct domain *d); +void shadow_free_p2m_page(struct domain *d, struct page_info *pg); +void shadow_write_p2m_entry(struct domain *d, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int level); /***************************************************************************** * Access to the guest pagetables */ @@ -223,99 +226,6 @@ static inline void shadow_remove_all_sha sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */); } -/**************************************************************************/ -/* Guest physmap (p2m) support - * - * The phys_to_machine_mapping is the reversed mapping of MPT for full - * virtualization. It is only used by shadow_mode_translate()==true - * guests, so we steal the address space that would have normally - * been used by the read-only MPT map. - */ -#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) - -/* Add a page to a domain's p2m table */ -void shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Remove a page from a domain's p2m table */ -void shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); - -/* Aliases, called from common code. */ -#define guest_physmap_add_page shadow_guest_physmap_add_page -#define guest_physmap_remove_page shadow_guest_physmap_remove_page - -/* Read the current domain's P2M table. */ -static inline mfn_t sh_gfn_to_mfn_current(unsigned long gfn) -{ - l1_pgentry_t l1e = l1e_empty(); - int ret; - - if ( gfn > current->domain->arch.max_mapped_pfn ) - return _mfn(INVALID_MFN); - - /* Don't read off the end of the p2m table */ - ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); - - ret = __copy_from_user(&l1e, - &phys_to_machine_mapping[gfn], - sizeof(l1e)); - - if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) - return _mfn(l1e_get_pfn(l1e)); - - return _mfn(INVALID_MFN); -} - -/* Read another domain's P2M table, mapping pages as we go */ -mfn_t sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); - -/* General conversion function from gfn to mfn */ -static inline mfn_t -sh_gfn_to_mfn(struct domain *d, unsigned long gfn) -{ - if ( !shadow_mode_translate(d) ) - return _mfn(gfn); - if ( likely(current->domain == d) ) - return sh_gfn_to_mfn_current(gfn); - else - return sh_gfn_to_mfn_foreign(d, gfn); -} - -/* Compatibility function for HVM code */ -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) -{ - return mfn_x(sh_gfn_to_mfn_current(pfn)); -} - -/* General conversion function from mfn to gfn */ -static inline unsigned long -sh_mfn_to_gfn(struct domain *d, mfn_t mfn) -{ - if ( shadow_mode_translate(d) ) - return get_gpfn_from_mfn(mfn_x(mfn)); - else - return mfn_x(mfn); -} - -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ -static inline int -mmio_space(paddr_t gpa) -{ - unsigned long gfn = gpa >> PAGE_SHIFT; - return !mfn_valid(mfn_x(sh_gfn_to_mfn_current(gfn))); -} - -/* Translate the frame number held in an l1e from guest to machine */ -static inline l1_pgentry_t -gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) -{ - if ( unlikely(shadow_mode_translate(d)) ) - l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), - l1e_get_flags(l1e)); - return l1e; -} - #endif /* _XEN_SHADOW_H */ /*