The hashtable mechanism used by the sharing code is a bottleneck. To remove complexity, this patch strips out the hashtable and adds the necessary shared gfn information directly into the page_info structure (without changing the size). The audit code is made to scan all MFNs instead of relying on the hashtable. Although this may take more time, this may be more robust since it will allow us to detect pages that are "shared" in type but for some reason we haven't accounted for correctly -- a different class of error. (Alternatively, a global list of shared pages could also be added to improve or enhance the audit code instead of the original hashtable -- but we'd still rather not have to search through it when sharing or unsharing a page.) diff -r 38135be750ed xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4230,12 +4230,19 @@ int page_make_sharable(struct domain *d, struct page_info *page, int expected_refcnt) { + struct list_head *shared_gfns = NULL; + + if((shared_gfns = xmalloc(struct list_head)) == NULL) + return -ENOMEM; + INIT_LIST_HEAD(shared_gfns); + spin_lock(&d->page_alloc_lock); /* Change page type and count atomically */ if ( !get_page_and_type(page, d, PGT_shared_page) ) { spin_unlock(&d->page_alloc_lock); + xfree(shared_gfns); return -EINVAL; } @@ -4244,6 +4251,7 @@ int page_make_sharable(struct domain *d, { put_page_and_type(page); spin_unlock(&d->page_alloc_lock); + xfree(shared_gfns); return -EEXIST; } @@ -4254,12 +4262,14 @@ int page_make_sharable(struct domain *d, /* Return type count back to zero */ put_page_and_type(page); spin_unlock(&d->page_alloc_lock); + xfree(shared_gfns); return -E2BIG; } page_set_owner(page, dom_cow); d->tot_pages--; page_list_del(page, &d->page_list); + page->shared_gfns = shared_gfns; spin_unlock(&d->page_alloc_lock); return 0; } @@ -4280,6 +4290,9 @@ int page_make_private(struct domain *d, return -EEXIST; } + /* Free the shared page info */ + xfree(page->shared_gfns); + /* Drop the final typecount */ put_page_and_type(page); diff -r 38135be750ed xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -39,8 +39,8 @@ #if MEM_SHARING_AUDIT static void mem_sharing_audit(void); -#define MEM_SHARING_DEBUG(_f, _a...) \ - debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a) +#define MEM_SHARING_DEBUG(_f, _a...) \ + debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a) #else # define mem_sharing_audit() do {} while(0) #endif /* MEM_SHARING_AUDIT */ @@ -55,19 +55,7 @@ static void mem_sharing_audit(void); #undef page_to_mfn #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) -static shr_handle_t next_handle = 1; -static atomic_t nr_saved_mfns = ATOMIC_INIT(0); - -typedef struct shr_hash_entry -{ - shr_handle_t handle; - mfn_t mfn; - struct shr_hash_entry *next; - struct list_head gfns; -} shr_hash_entry_t; - -#define SHR_HASH_LENGTH 1000 -static shr_hash_entry_t *shr_hash[SHR_HASH_LENGTH]; +static atomic_t nr_saved_mfns = ATOMIC_INIT(0); typedef struct gfn_info { @@ -84,28 +72,9 @@ static inline int list_has_one_entry(str return (head->next != head) && (head->next->next == head); } -static inline struct gfn_info* gfn_get_info(struct list_head *list) +static inline gfn_info_t* gfn_get_info(struct list_head *list) { - return list_entry(list->next, struct gfn_info, list); -} - -static void __init mem_sharing_hash_init(void) -{ - int i; - - mm_lock_init(&shr_lock); - for(i=0; inext, gfn_info_t, list); } static gfn_info_t *mem_sharing_gfn_alloc(void) @@ -130,125 +99,99 @@ static void mem_sharing_gfn_destroy(gfn_ xfree(gfn); } -static shr_hash_entry_t* mem_sharing_hash_lookup(shr_handle_t handle) +static struct page_info* mem_sharing_lookup(shr_handle_t handle) { - shr_hash_entry_t *e; - - e = shr_hash[handle % SHR_HASH_LENGTH]; - while(e != NULL) + if( mfn_valid(_mfn(handle)) ) { - if(e->handle == handle) - return e; - e = e->next; + struct page_info* page = mfn_to_page(_mfn(handle)); + if( page_get_owner(page) == dom_cow ) + return page; } return NULL; } -static shr_hash_entry_t* mem_sharing_hash_insert(shr_handle_t handle, mfn_t mfn) +#if MEM_SHARING_AUDIT +static int mem_sharing_audit(void) { - shr_hash_entry_t *e, **ee; - - e = mem_sharing_hash_alloc(); - if(e == NULL) return NULL; - e->handle = handle; - e->mfn = mfn; - ee = &shr_hash[handle % SHR_HASH_LENGTH]; - e->next = *ee; - *ee = e; - return e; -} - -static void mem_sharing_hash_delete(shr_handle_t handle) -{ - shr_hash_entry_t **pprev, *e; - - pprev = &shr_hash[handle % SHR_HASH_LENGTH]; - e = *pprev; - while(e != NULL) - { - if(e->handle == handle) - { - *pprev = e->next; - mem_sharing_hash_destroy(e); - return; - } - pprev = &e->next; - e = e->next; - } - printk("Could not find shr entry for handle %"PRIx64"\n", handle); - BUG(); -} - -#if MEM_SHARING_AUDIT -static void mem_sharing_audit(void) -{ - shr_hash_entry_t *e; - struct list_head *le; - gfn_info_t *g; - int bucket; - struct page_info *pg; + int errors = 0; + unsigned long count_found = 0; + unsigned long mfn; ASSERT(shr_locked_by_me()); - for(bucket=0; bucket < SHR_HASH_LENGTH; bucket++) + for( mfn = 0; mfn < max_page; mfn++ ) { - e = shr_hash[bucket]; - /* Loop over all shr_hash_entries */ - while(e != NULL) + unsigned long nr_gfns = 0; + struct page_info *pg; + struct list_head *le; + gfn_info_t *g; + + if( !mfn_valid(_mfn(mfn)) ) + continue; + + pg = mfn_to_page(_mfn(mfn)); + + /* Check if the MFN has correct type, owner and handle. */ + if((pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page) + continue; + + /* Count this page. */ + count_found++; + + /* Check the page owner. */ + if(page_get_owner(pg) != dom_cow) { - int nr_gfns=0; + MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner (%d)!\n", + mfn, page_get_owner(pg)->domain_id); + errors++; + } - /* Check if the MFN has correct type, owner and handle */ - pg = mfn_to_page(e->mfn); - if((pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page) - MEM_SHARING_DEBUG("mfn %lx not shared, but in the hash!\n", - mfn_x(e->mfn)); - if(page_get_owner(pg) != dom_cow) - MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner (%d)!\n", - mfn_x(e->mfn), - page_get_owner(pg)->domain_id); - if(e->handle != pg->shr_handle) - MEM_SHARING_DEBUG("mfn %lx shared, but wrong handle " - "(%ld != %ld)!\n", - mfn_x(e->mfn), pg->shr_handle, e->handle); - /* Check if all GFNs map to the MFN, and the p2m types */ - list_for_each(le, &e->gfns) + /* Check if all GFNs map to the MFN, and the p2m types */ + list_for_each(le, pg->shared_gfns) + { + struct domain *d; + p2m_type_t t; + mfn_t o_mfn; + + g = list_entry(le, gfn_info_t, list); + d = get_domain_by_id(g->domain); + if(d == NULL) { - struct domain *d; - p2m_type_t t; - mfn_t mfn; + MEM_SHARING_DEBUG("Unknown dom: %d, for PFN=%lx, MFN=%lx\n", + g->domain, g->gfn, mfn); + errors++; + continue; + } + o_mfn = gfn_to_mfn(d, g->gfn, &t); + if(mfn_x(o_mfn) != mfn) + { + MEM_SHARING_DEBUG("Incorrect P2M for d=%d, PFN=%lx." + "Expecting MFN=%ld, got %ld\n", + g->domain, g->gfn, mfn, mfn_x(o_mfn)); + errors++; + } + if(t != p2m_ram_shared) + { + MEM_SHARING_DEBUG("Incorrect P2M type for d=%d, PFN=%lx." + "Expecting t=%d, got %d\n", + g->domain, g->gfn, mfn, p2m_ram_shared, t); + errors++; + } + put_domain(d); + nr_gfns++; + } - g = list_entry(le, struct gfn_info, list); - d = get_domain_by_id(g->domain); - if(d == NULL) - { - MEM_SHARING_DEBUG("Unknow dom: %d, for PFN=%lx, MFN=%lx\n", - g->domain, g->gfn, mfn_x(e->mfn)); - continue; - } - mfn = gfn_to_mfn(d, g->gfn, &t); - if(mfn_x(mfn) != mfn_x(e->mfn)) - MEM_SHARING_DEBUG("Incorrect P2M for d=%d, PFN=%lx." - "Expecting MFN=%ld, got %ld\n", - g->domain, g->gfn, mfn_x(e->mfn), - mfn_x(mfn)); - if(t != p2m_ram_shared) - MEM_SHARING_DEBUG("Incorrect P2M type for d=%d, PFN=%lx." - "Expecting t=%d, got %d\n", - g->domain, g->gfn, mfn_x(e->mfn), - p2m_ram_shared, t); - put_domain(d); - nr_gfns++; - } - if(nr_gfns != (pg->u.inuse.type_info & PGT_count_mask)) - MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx." - "nr_gfns in hash %d, in type_info %d\n", - mfn_x(e->mfn), nr_gfns, - (pg->u.inuse.type_info & PGT_count_mask)); - e = e->next; + if(nr_gfns != (pg->u.inuse.type_info & PGT_count_mask)) + { + MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx." + "nr_gfns in hash %d, in type_info %d\n", + mfn, nr_gfns, (pg->u.inuse.type_info & PGT_count_mask)); + errors++; } } + + return errors; } #endif @@ -392,37 +335,6 @@ static int mem_sharing_gref_to_gfn(struc return -2; } -/* Account for a GFN being shared/unshared. - * When sharing this function needs to be called _before_ gfn lists are merged - * together, but _after_ gfn is removed from the list when unsharing. - */ -static int mem_sharing_gfn_account(struct gfn_info *gfn, int sharing) -{ - struct domain *d; - - /* A) When sharing: - * if the gfn being shared is in > 1 long list, its already been - * accounted for - * B) When unsharing: - * if the list is longer than > 1, we don't have to account yet. - */ - if(list_has_one_entry(&gfn->list)) - { - d = get_domain_by_id(gfn->domain); - BUG_ON(!d); - if(sharing) - atomic_inc(&d->shr_pages); - else - atomic_dec(&d->shr_pages); - put_domain(d); - - return 1; - } - mem_sharing_audit(); - - return 0; -} - int mem_sharing_debug_gref(struct domain *d, grant_ref_t ref) { grant_entry_header_t *shah; @@ -457,8 +369,6 @@ int mem_sharing_nominate_page(struct dom mfn_t mfn; struct page_info *page; int ret; - shr_handle_t handle; - shr_hash_entry_t *hash_entry; struct gfn_info *gfn_info; *phandle = 0UL; @@ -474,7 +384,7 @@ int mem_sharing_nominate_page(struct dom /* Return the handle if the page is already shared */ page = mfn_to_page(mfn); if (p2m_is_shared(p2mt)) { - *phandle = page->shr_handle; + *phandle = mfn_x(mfn); ret = 0; goto out; } @@ -490,16 +400,8 @@ int mem_sharing_nominate_page(struct dom /* Create the handle */ ret = -ENOMEM; - handle = next_handle++; - if((hash_entry = mem_sharing_hash_insert(handle, mfn)) == NULL) - { + if((gfn_info = mem_sharing_gfn_alloc()) == NULL) goto out; - } - if((gfn_info = mem_sharing_gfn_alloc()) == NULL) - { - mem_sharing_hash_destroy(hash_entry); - goto out; - } /* Change the p2m type */ if(p2m_change_type(d, gfn, p2mt, p2m_ram_shared) != p2mt) @@ -509,7 +411,6 @@ int mem_sharing_nominate_page(struct dom * The mfn needs to revert back to rw type. This should never fail, * since no-one knew that the mfn was temporarily sharable */ BUG_ON(page_make_private(d, page) != 0); - mem_sharing_hash_destroy(hash_entry); mem_sharing_gfn_destroy(gfn_info, 0); goto out; } @@ -517,13 +418,12 @@ int mem_sharing_nominate_page(struct dom /* Update m2p entry to SHARED_M2P_ENTRY */ set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY); - INIT_LIST_HEAD(&hash_entry->gfns); INIT_LIST_HEAD(&gfn_info->list); - list_add(&gfn_info->list, &hash_entry->gfns); + list_add(&gfn_info->list, page->shared_gfns); gfn_info->gfn = gfn; gfn_info->domain = d->domain_id; - page->shr_handle = handle; - *phandle = handle; + *phandle = mfn_x(mfn); + atomic_inc(&d->shr_pages); ret = 0; @@ -534,29 +434,25 @@ out: int mem_sharing_share_pages(shr_handle_t sh, shr_handle_t ch) { - shr_hash_entry_t *se, *ce; struct page_info *spage, *cpage; struct list_head *le, *te; - struct gfn_info *gfn; + gfn_info_t *gfn; struct domain *d; int ret; shr_lock(); ret = XEN_DOMCTL_MEM_SHARING_S_HANDLE_INVALID; - se = mem_sharing_hash_lookup(sh); - if(se == NULL) goto err_out; + spage = mem_sharing_lookup(sh); + if(spage == NULL) goto err_out; ret = XEN_DOMCTL_MEM_SHARING_C_HANDLE_INVALID; - ce = mem_sharing_hash_lookup(ch); - if(ce == NULL) goto err_out; - spage = mfn_to_page(se->mfn); - cpage = mfn_to_page(ce->mfn); - /* gfn lists always have at least one entry => save to call list_entry */ - mem_sharing_gfn_account(gfn_get_info(&ce->gfns), 1); - mem_sharing_gfn_account(gfn_get_info(&se->gfns), 1); - list_for_each_safe(le, te, &ce->gfns) + cpage = mem_sharing_lookup(ch); + if(cpage == NULL) goto err_out; + + /* Merge the lists together */ + list_for_each_safe(le, te, cpage->shared_gfns) { - gfn = list_entry(le, struct gfn_info, list); + gfn = list_entry(le, gfn_info_t, list); /* Get the source page and type, this should never fail * because we are under shr lock, and got non-null se */ BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page)); @@ -564,13 +460,12 @@ int mem_sharing_share_pages(shr_handle_t list_del(&gfn->list); d = get_domain_by_id(gfn->domain); BUG_ON(!d); - BUG_ON(set_shared_p2m_entry(d, gfn->gfn, se->mfn) == 0); + BUG_ON(set_shared_p2m_entry(d, gfn->gfn, _mfn(sh)) == 0); put_domain(d); - list_add(&gfn->list, &se->gfns); + list_add(&gfn->list, spage->shared_gfns); put_page_and_type(cpage); - } - ASSERT(list_empty(&ce->gfns)); - mem_sharing_hash_delete(ch); + } + ASSERT(list_empty(cpage->shared_gfns)); atomic_inc(&nr_saved_mfns); /* Free the client page */ if(test_and_clear_bit(_PGC_allocated, &cpage->count_info)) @@ -592,9 +487,7 @@ int mem_sharing_unshare_page(struct doma struct page_info *page, *old_page; void *s, *t; int ret, last_gfn; - shr_hash_entry_t *hash_entry; - struct gfn_info *gfn_info = NULL; - shr_handle_t handle; + gfn_info_t *gfn_info = NULL; struct list_head *le; shr_lock(); @@ -604,37 +497,40 @@ int mem_sharing_unshare_page(struct doma mfn = gfn_to_mfn(d, gfn, &p2mt); /* Has someone already unshared it? */ - if (!p2m_is_shared(p2mt)) { + if (!p2m_is_shared(p2mt)) + { shr_unlock(); return 0; } - page = mfn_to_page(mfn); - handle = page->shr_handle; - - hash_entry = mem_sharing_hash_lookup(handle); - list_for_each(le, &hash_entry->gfns) + page = mem_sharing_lookup((shr_handle_t)mfn_x(mfn)); + if (page == NULL || page_get_owner(page) != dom_cow) { - gfn_info = list_entry(le, struct gfn_info, list); + printk("Domain p2m is shared, but page is not: %lx\n", gfn); + BUG(); + } + + list_for_each(le, page->shared_gfns) + { + gfn_info = list_entry(le, gfn_info_t, list); if((gfn_info->gfn == gfn) && (gfn_info->domain == d->domain_id)) goto gfn_found; } printk("Could not find gfn_info for shared gfn: %lx\n", gfn); BUG(); + gfn_found: /* Delete gfn_info from the list, but hold on to it, until we've allocated * memory to make a copy */ list_del(&gfn_info->list); - last_gfn = list_empty(&hash_entry->gfns); + last_gfn = list_empty(page->shared_gfns); /* If the GFN is getting destroyed drop the references to MFN * (possibly freeing the page), and exit early */ if(flags & MEM_SHARING_DESTROY_GFN) { mem_sharing_gfn_destroy(gfn_info, !last_gfn); - if(last_gfn) - mem_sharing_hash_delete(handle); - else + if(!last_gfn) /* Even though we don't allocate a private page, we have to account * for the MFN that originally backed this PFN. */ atomic_dec(&nr_saved_mfns); @@ -656,7 +552,7 @@ gfn_found: { /* We've failed to obtain memory for private page. Need to re-add the * gfn_info to relevant list */ - list_add(&gfn_info->list, &hash_entry->gfns); + list_add(&gfn_info->list, page->shared_gfns); shr_unlock(); return -ENOMEM; } @@ -673,9 +569,7 @@ gfn_found: private_page_found: /* We've got a private page, we can commit the gfn destruction */ mem_sharing_gfn_destroy(gfn_info, !last_gfn); - if(last_gfn) - mem_sharing_hash_delete(handle); - else + if(!last_gfn) atomic_dec(&nr_saved_mfns); if(p2m_change_type(d, gfn, p2m_ram_shared, p2m_ram_rw) != @@ -786,6 +680,5 @@ int mem_sharing_domctl(struct domain *d, void __init mem_sharing_init(void) { printk("Initing memory sharing.\n"); - mem_sharing_hash_init(); + mm_lock_init(&shr_lock); } - diff -r 38135be750ed xen/include/asm-x86/mem_sharing.h --- a/xen/include/asm-x86/mem_sharing.h +++ b/xen/include/asm-x86/mem_sharing.h @@ -41,9 +41,10 @@ int mem_sharing_unshare_page(struct doma int mem_sharing_sharing_resume(struct domain *d); int mem_sharing_domctl(struct domain *d, xen_domctl_mem_sharing_op_t *mec); + void mem_sharing_init(void); -#else +#else #define mem_sharing_init() do { } while (0) diff -r 38135be750ed xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -49,8 +49,15 @@ struct page_info /* For non-pinnable single-page shadows, a higher entry that points * at us. */ paddr_t up; - /* For shared/sharable pages the sharing handle */ - uint64_t shr_handle; + /* For shared/sharable pages, we use a doubly-linked list + * of all the domains that map this page (w/ associated PFNs). + * Previously, this was a handle to a separate hashtable structure, + * however given that this is the only real information we need + * about a shared page, we can associate this directly with the + * page_info frame and use the mfn as the handle for usercode. + * This list is allocated and freed when a page is shared/unshared. + */ + struct list_head *shared_gfns; }; /* Reference count and various PGC_xxx flags and fields. */