# HG changeset patch # User Tim Deegan # Date 1282319533 -3600 # Node ID 1544aa105c624f8a49e16900b97e3f10aa30d0cd # Parent 66abfa6bc671b9b67c1fd729ddb9292c969d6ca2 x86 shadow: explicitly link the pages of multipage shadows. x86 shadow: explicitly link the pages of multipage shadows together using their list headers. Update the users of the pinned-shadows list to expect l2_32 shadows to have four entries in the list, which must be kept together during updates. Signed-off-by: Tim Deegan diff -r 66abfa6bc671 -r 1544aa105c62 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Fri Aug 20 16:52:12 2010 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Fri Aug 20 16:52:13 2010 +0100 @@ -1377,7 +1377,7 @@ static void _shadow_prealloc( /* Stage one: walk the list of pinned pages, unpinning them */ perfc_incr(shadow_prealloc_1); - page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows) + foreach_pinned_shadow(d, sp, t) { smfn = page_to_mfn(sp); @@ -1445,7 +1445,7 @@ static void shadow_blow_tables(struct do ASSERT(v != NULL); /* Pass one: unpin all pinned pages */ - page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows) + foreach_pinned_shadow(d, sp, t) { smfn = page_to_mfn(sp); sh_unpin(v, smfn); @@ -1527,6 +1527,7 @@ mfn_t shadow_alloc(struct domain *d, { struct page_info *sp = NULL; unsigned int order = shadow_order(shadow_type); + struct page_list_head tmp_list; cpumask_t mask; void *p; int i; @@ -1572,6 +1573,11 @@ mfn_t shadow_alloc(struct domain *d, break; } + /* Page lists don't have pointers back to the head structure, so + * it's safe to use a head structure on the stack to link the pages + * together. */ + INIT_PAGE_LIST_HEAD(&tmp_list); + /* Init page info fields and clear the pages */ for ( i = 0; i < 1<u.sh.type > 0); ASSERT(sp->u.sh.type < SH_type_max_shadow); - ASSERT(sp->u.sh.type != SH_type_l2_32_shadow); - ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow); - ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow); - ASSERT(sp->u.sh.type != SH_type_l4_64_shadow); + ASSERT(sh_type_has_up_pointer(v, sp->u.sh.type)); if (sp->up == 0) return 0; pmfn = _mfn(sp->up >> PAGE_SHIFT); @@ -2823,7 +2827,7 @@ void sh_remove_shadows(struct vcpu *v, m } \ if ( sh_type_is_pinnable(v, t) ) \ sh_unpin(v, smfn); \ - else \ + else if ( sh_type_has_up_pointer(v, t) ) \ sh_remove_shadow_via_pointer(v, smfn); \ if( !fast \ && (pg->count_info & PGC_page_table) \ diff -r 66abfa6bc671 -r 1544aa105c62 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Aug 20 16:52:12 2010 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Aug 20 16:52:13 2010 +0100 @@ -1588,10 +1588,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n", mfn_x(gmfn), shadow_type, mfn_x(smfn)); - if ( shadow_type != SH_type_l2_32_shadow - && shadow_type != SH_type_l2_pae_shadow - && shadow_type != SH_type_l2h_pae_shadow - && shadow_type != SH_type_l4_64_shadow ) + if ( sh_type_has_up_pointer(v, shadow_type) ) /* Lower-level shadow, not yet linked form a higher level */ mfn_to_page(smfn)->up = 0; @@ -1622,7 +1619,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf page_list_for_each_safe(sp, t, &v->domain->arch.paging.shadow.pinned_shadows) { if ( sp->u.sh.type == SH_type_l3_64_shadow ) + { sh_unpin(v, page_to_mfn(sp)); + sp->up = 0; + } } v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; } @@ -2534,9 +2534,12 @@ int sh_safe_not_to_sync(struct vcpu *v, struct page_info *sp; mfn_t smfn; + if ( !sh_type_has_up_pointer(v, SH_type_l1_shadow) ) + return 0; + smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */ - + /* Up to l2 */ sp = mfn_to_page(smfn); if ( sp->u.sh.count != 1 || !sp->up ) @@ -2547,6 +2550,7 @@ int sh_safe_not_to_sync(struct vcpu *v, #if (SHADOW_PAGING_LEVELS == 4) /* up to l3 */ sp = mfn_to_page(smfn); + ASSERT(sh_type_has_up_pointer(v, SH_type_l2_shadow)); if ( sp->u.sh.count != 1 || !sp->up ) return 0; smfn = _mfn(sp->up >> PAGE_SHIFT); @@ -2555,17 +2559,10 @@ int sh_safe_not_to_sync(struct vcpu *v, /* up to l4 */ sp = mfn_to_page(smfn); if ( sp->u.sh.count != 1 - || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up ) + || !sh_type_has_up_pointer(v, SH_type_l3_64_shadow) || !sp->up ) return 0; smfn = _mfn(sp->up >> PAGE_SHIFT); ASSERT(mfn_valid(smfn)); -#endif - -#if (GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS == 3) - /* In 2-on-3 shadow mode the up pointer contains the link to the - * shadow page, but the shadow_table contains only the first of the - * four pages that makes the PAE top shadow tables. */ - smfn = _mfn(mfn_x(smfn) & ~0x3UL); #endif if ( pagetable_get_pfn(v->arch.shadow_table[0]) == mfn_x(smfn) diff -r 66abfa6bc671 -r 1544aa105c62 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Fri Aug 20 16:52:12 2010 +0100 +++ b/xen/arch/x86/mm/shadow/private.h Fri Aug 20 16:52:13 2010 +0100 @@ -270,6 +270,17 @@ static inline int sh_type_is_pinnable(st /* Everything else is not pinnable, and can use the "up" pointer */ return 0; +} + +static inline int sh_type_has_up_pointer(struct vcpu *v, unsigned int t) +{ + /* Multi-page shadows don't have up-pointers */ + if ( t == SH_type_l1_32_shadow + || t == SH_type_fl1_32_shadow + || t == SH_type_l2_32_shadow ) + return 0; + /* Pinnable shadows don't have up-pointers either */ + return !sh_type_is_pinnable(v, t); } /* @@ -642,7 +653,7 @@ static inline int sh_get_ref(struct vcpu /* We remember the first shadow entry that points to each shadow. */ if ( entry_pa != 0 - && !sh_type_is_pinnable(v, sp->u.sh.type) + && sh_type_has_up_pointer(v, sp->u.sh.type) && sp->up == 0 ) sp->up = entry_pa; @@ -663,7 +674,7 @@ static inline void sh_put_ref(struct vcp /* If this is the entry in the up-pointer, remove it */ if ( entry_pa != 0 - && !sh_type_is_pinnable(v, sp->u.sh.type) + && sh_type_has_up_pointer(v, sp->u.sh.type) && sp->up == entry_pa ) sp->up = 0; @@ -685,21 +696,76 @@ static inline void sh_put_ref(struct vcp } +/* Walk the list of pinned shadows, from the tail forwards, + * skipping the non-head-page entries */ +static inline struct page_info * +prev_pinned_shadow(const struct page_info *page, + const struct domain *d) +{ + struct page_info *p; + + if ( page == d->arch.paging.shadow.pinned_shadows.next ) + return NULL; + + if ( page == NULL ) /* If no current place, start at the tail */ + p = d->arch.paging.shadow.pinned_shadows.tail; + else + p = pdx_to_page(page->list.prev); + /* Skip over the non-tail parts of multi-page shadows */ + if ( p && p->u.sh.type == SH_type_l2_32_shadow ) + { + p = pdx_to_page(p->list.prev); + ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow); + p = pdx_to_page(p->list.prev); + ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow); + p = pdx_to_page(p->list.prev); + ASSERT(p && p->u.sh.type == SH_type_l2_32_shadow); + } + ASSERT(!p || p->u.sh.head); + return p; +} + +#define foreach_pinned_shadow(dom, pos, tmp) \ + for ( pos = prev_pinned_shadow(NULL, (dom)); \ + pos ? (tmp = prev_pinned_shadow(pos, (dom)), 1) : 0; \ + pos = tmp ) + /* Pin a shadow page: take an extra refcount, set the pin bit, * and put the shadow at the head of the list of pinned shadows. * Returns 0 for failure, 1 for success. */ static inline int sh_pin(struct vcpu *v, mfn_t smfn) { struct page_info *sp; + struct page_list_head h, *pin_list; ASSERT(mfn_valid(smfn)); sp = mfn_to_page(smfn); ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); + ASSERT(sp->u.sh.head); + + /* Treat the up-to-four pages of the shadow as a unit in the list ops */ + h.next = h.tail = sp; + if ( sp->u.sh.type == SH_type_l2_32_shadow ) + { + h.tail = pdx_to_page(h.tail->list.next); + h.tail = pdx_to_page(h.tail->list.next); + h.tail = pdx_to_page(h.tail->list.next); + ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow); + } + pin_list = &v->domain->arch.paging.shadow.pinned_shadows; + if ( sp->u.sh.pinned ) { /* Already pinned: take it out of the pinned-list so it can go * at the front */ - page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows); + if ( pin_list->next == h.next ) + return 1; + page_list_prev(h.next, pin_list)->list.next = h.tail->list.next; + if ( pin_list->tail == h.tail ) + pin_list->tail = page_list_prev(h.next, pin_list); + else + page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev; + h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL; } else { @@ -707,9 +773,11 @@ static inline int sh_pin(struct vcpu *v, if ( !sh_get_ref(v, smfn, 0) ) return 0; sp->u.sh.pinned = 1; + ASSERT(h.next->list.prev == PAGE_LIST_NULL); + ASSERT(h.tail->list.next == PAGE_LIST_NULL); } /* Put it at the head of the list of pinned shadows */ - page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows); + page_list_splice(&h, pin_list); return 1; } @@ -717,18 +785,47 @@ static inline int sh_pin(struct vcpu *v, * of pinned shadows, and release the extra ref. */ static inline void sh_unpin(struct vcpu *v, mfn_t smfn) { + struct page_list_head h, *pin_list; struct page_info *sp; ASSERT(mfn_valid(smfn)); sp = mfn_to_page(smfn); ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); - if ( sp->u.sh.pinned ) + ASSERT(sp->u.sh.head); + + /* Treat the up-to-four pages of the shadow as a unit in the list ops */ + h.next = h.tail = sp; + if ( sp->u.sh.type == SH_type_l2_32_shadow ) { - sp->u.sh.pinned = 0; - page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows); - sp->up = 0; /* in case this stops being a pinnable type in future */ - sh_put_ref(v, smfn, 0); + h.tail = pdx_to_page(h.tail->list.next); + h.tail = pdx_to_page(h.tail->list.next); + h.tail = pdx_to_page(h.tail->list.next); + ASSERT(h.tail->u.sh.type == SH_type_l2_32_shadow); } + pin_list = &v->domain->arch.paging.shadow.pinned_shadows; + + if ( !sp->u.sh.pinned ) + return; + + sp->u.sh.pinned = 0; + + /* Cut the sub-list out of the list of pinned shadows */ + if ( pin_list->next == h.next && pin_list->tail == h.tail ) + pin_list->next = pin_list->tail = NULL; + else + { + if ( pin_list->next == h.next ) + pin_list->next = page_list_next(h.tail, pin_list); + else + page_list_prev(h.next, pin_list)->list.next = h.tail->list.next; + if ( pin_list->tail == h.tail ) + pin_list->tail = page_list_prev(h.next, pin_list); + else + page_list_next(h.tail, pin_list)->list.prev = h.next->list.prev; + } + h.tail->list.next = h.next->list.prev = PAGE_LIST_NULL; + + sh_put_ref(v, smfn, 0); } diff -r 66abfa6bc671 -r 1544aa105c62 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri Aug 20 16:52:12 2010 +0100 +++ b/xen/include/asm-x86/mm.h Fri Aug 20 16:52:13 2010 +0100 @@ -35,13 +35,18 @@ struct page_info union { /* Each frame can be threaded onto a doubly-linked list. * - * For unused shadow pages, a list of pages of this order; for - * pinnable shadows, if pinned, a list of other pinned shadows - * (see sh_type_is_pinnable() below for the definition of - * "pinnable" shadow types). + * For unused shadow pages, a list of pages of this order; + * for multi-page shadows, links to the other pages in this shadow; + * for pinnable shadows, if pinned, a list of all pinned shadows + * (see sh_type_is_pinnable() for the definition of "pinnable" + * shadow types). N.B. a shadow may be both pinnable and multi-page. + * In that case the pages are inserted in order in the list of + * pinned shadows and walkers of that list must be prepared + * to keep them all together during updates. */ struct page_list_entry list; - /* For non-pinnable shadows, a higher entry that points at us. */ + /* For non-pinnable single-page shadows, a higher entry that points + * at us. */ paddr_t up; /* For shared/sharable pages the sharing handle */ uint64_t shr_handle;