I'll leave this alone for the moment, but I'll try to explain here the
end-goal:
1. we need to protect p2m entries on lookups, any lookups
2. If performance becomes prohibitive, then we need to break-up that lock
3. pod locking breaks, so pod will need its own lock
4. hence this patch
Agree with you it's ahead of the curve by removing p2m_lock's before
p2mm_lock's become fine-grained. So, I'll leave this on the side for now.
Andres
> On Thu, Oct 27, 2011 at 1:33 PM, Andres Lagar-Cavilla
> <andres@xxxxxxxxxxxxxxxx> wrote:
>> xen/arch/x86/mm/mm-locks.h | 9 ++
>> xen/arch/x86/mm/p2m-pod.c | 145
>> +++++++++++++++++++++++++++------------------
>> xen/arch/x86/mm/p2m-pt.c | 3 +
>> xen/arch/x86/mm/p2m.c | 7 +-
>> xen/include/asm-x86/p2m.h | 25 ++-----
>> 5 files changed, 113 insertions(+), 76 deletions(-)
>>
>>
>> The PoD layer has a fragile locking discipline. It relies on the
>> p2m being globally locked, and it also relies on the page alloc
>> lock to protect some of its data structures. Replace this all by an
>> explicit pod lock: per p2m, order enforced.
>>
>> Two consequences:
>> - Critical sections in the pod code protected by the page alloc
>> lock are now reduced to modifications of the domain page list.
>> - When the p2m lock becomes fine-grained, there are no
>> assumptions broken in the PoD layer.
>>
>> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>>
>> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/mm-locks.h
>> --- a/xen/arch/x86/mm/mm-locks.h
>> +++ b/xen/arch/x86/mm/mm-locks.h
>> @@ -155,6 +155,15 @@ declare_mm_lock(p2m)
>> #define p2m_unlock(p) mm_unlock(&(p)->lock)
>> #define p2m_locked_by_me(p) mm_locked_by_me(&(p)->lock)
>>
>> +/* PoD lock (per-p2m-table)
>> + *
>> + * Protects private PoD data structs. */
>> +
>> +declare_mm_lock(pod)
>> +#define pod_lock(p) mm_lock(pod, &(p)->pod.lock)
>> +#define pod_unlock(p) mm_unlock(&(p)->pod.lock)
>> +#define pod_locked_by_me(p) mm_locked_by_me(&(p)->pod.lock)
>> +
>> /* Page alloc lock (per-domain)
>> *
>> * This is an external lock, not represented by an mm_lock_t. However,
>> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/p2m-pod.c
>> --- a/xen/arch/x86/mm/p2m-pod.c
>> +++ b/xen/arch/x86/mm/p2m-pod.c
>> @@ -63,6 +63,7 @@ static inline void unlock_page_alloc(str
>> * Populate-on-demand functionality
>> */
>>
>> +/* PoD lock held on entry */
>> static int
>> p2m_pod_cache_add(struct p2m_domain *p2m,
>> struct page_info *page,
>> @@ -114,43 +115,42 @@ p2m_pod_cache_add(struct p2m_domain *p2m
>> unmap_domain_page(b);
>> }
>>
>> + /* First, take all pages off the domain list */
>> lock_page_alloc(p2m);
>> -
>> - /* First, take all pages off the domain list */
>> for(i=0; i < 1 << order ; i++)
>> {
>> p = page + i;
>> page_list_del(p, &d->page_list);
>> }
>>
>> - /* Then add the first one to the appropriate populate-on-demand
>> list */
>> - switch(order)
>> - {
>> - case PAGE_ORDER_2M:
>> - page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc
>> */
>> - p2m->pod.count += 1 << order;
>> - break;
>> - case PAGE_ORDER_4K:
>> - page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc
>> */
>> - p2m->pod.count += 1;
>> - break;
>> - default:
>> - BUG();
>> - }
>> -
>> /* Ensure that the PoD cache has never been emptied.
>> * This may cause "zombie domains" since the page will never be
>> freed. */
>> BUG_ON( d->arch.relmem != RELMEM_not_started );
>>
>> unlock_page_alloc(p2m);
>>
>> + /* Then add the first one to the appropriate populate-on-demand
>> list */
>> + switch(order)
>> + {
>> + case PAGE_ORDER_2M:
>> + page_list_add_tail(page, &p2m->pod.super);
>> + p2m->pod.count += 1 << order;
>> + break;
>> + case PAGE_ORDER_4K:
>> + page_list_add_tail(page, &p2m->pod.single);
>> + p2m->pod.count += 1;
>> + break;
>> + default:
>> + BUG();
>> + }
>> +
>> return 0;
>> }
>>
>> /* Get a page of size order from the populate-on-demand cache. Will
>> break
>> * down 2-meg pages into singleton pages automatically. Returns null if
>> - * a superpage is requested and no superpages are available. Must be
>> called
>> - * with the d->page_lock held. */
>> + * a superpage is requested and no superpages are available. */
>> +/* PoD lock held on entry */
>> static struct page_info * p2m_pod_cache_get(struct p2m_domain *p2m,
>> unsigned long order)
>> {
>> @@ -185,7 +185,7 @@ static struct page_info * p2m_pod_cache_
>> case PAGE_ORDER_2M:
>> BUG_ON( page_list_empty(&p2m->pod.super) );
>> p = page_list_remove_head(&p2m->pod.super);
>> - p2m->pod.count -= 1 << order; /* Lock: page_alloc */
>> + p2m->pod.count -= 1 << order;
>> break;
>> case PAGE_ORDER_4K:
>> BUG_ON( page_list_empty(&p2m->pod.single) );
>> @@ -197,16 +197,19 @@ static struct page_info * p2m_pod_cache_
>> }
>>
>> /* Put the pages back on the domain page_list */
>> + lock_page_alloc(p2m);
>> for ( i = 0 ; i < (1 << order); i++ )
>> {
>> BUG_ON(page_get_owner(p + i) != p2m->domain);
>> page_list_add_tail(p + i, &p2m->domain->page_list);
>> }
>> + unlock_page_alloc(p2m);
>>
>> return p;
>> }
>>
>> /* Set the size of the cache, allocating or freeing as necessary. */
>> +/* PoD lock held on entry */
>> static int
>> p2m_pod_set_cache_target(struct p2m_domain *p2m, unsigned long
>> pod_target, int preemptible)
>> {
>> @@ -259,8 +262,6 @@ p2m_pod_set_cache_target(struct p2m_doma
>>
>> /* Grab the lock before checking that pod.super is empty, or the
>> last
>> * entries may disappear before we grab the lock. */
>> - lock_page_alloc(p2m);
>> -
>> if ( (p2m->pod.count - pod_target) > SUPERPAGE_PAGES
>> && !page_list_empty(&p2m->pod.super) )
>> order = PAGE_ORDER_2M;
>> @@ -271,8 +272,6 @@ p2m_pod_set_cache_target(struct p2m_doma
>>
>> ASSERT(page != NULL);
>>
>> - unlock_page_alloc(p2m);
>> -
>> /* Then free them */
>> for ( i = 0 ; i < (1 << order) ; i++ )
>> {
>> @@ -348,7 +347,7 @@ p2m_pod_set_mem_target(struct domain *d,
>> int ret = 0;
>> unsigned long populated;
>>
>> - p2m_lock(p2m);
>> + pod_lock(p2m);
>>
>> /* P == B: Nothing to do. */
>> if ( p2m->pod.entry_count == 0 )
>> @@ -377,7 +376,7 @@ p2m_pod_set_mem_target(struct domain *d,
>> ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
>>
>> out:
>> - p2m_unlock(p2m);
>> + pod_unlock(p2m);
>>
>> return ret;
>> }
>> @@ -390,7 +389,7 @@ p2m_pod_empty_cache(struct domain *d)
>>
>> /* After this barrier no new PoD activities can happen. */
>> BUG_ON(!d->is_dying);
>> - spin_barrier(&p2m->lock.lock);
>> + spin_barrier(&p2m->pod.lock.lock);
>>
>> lock_page_alloc(p2m);
>>
>> @@ -431,7 +430,8 @@ p2m_pod_offline_or_broken_hit(struct pag
>> if ( !(d = page_get_owner(p)) || !(p2m = p2m_get_hostp2m(d)) )
>> return 0;
>>
>> - lock_page_alloc(p2m);
>> + pod_lock(p2m);
>> +
>> bmfn = mfn_x(page_to_mfn(p));
>> page_list_for_each_safe(q, tmp, &p2m->pod.super)
>> {
>> @@ -462,12 +462,14 @@ p2m_pod_offline_or_broken_hit(struct pag
>> }
>> }
>>
>> - unlock_page_alloc(p2m);
>> + pod_unlock(p2m);
>> return 0;
>>
>> pod_hit:
>> + lock_page_alloc(p2m);
>> page_list_add_tail(p, &d->arch.relmem_list);
>> unlock_page_alloc(p2m);
>> + pod_unlock(p2m);
>> return 1;
>> }
>>
>> @@ -486,9 +488,9 @@ p2m_pod_offline_or_broken_replace(struct
>> if ( unlikely(!p) )
>> return;
>>
>> - p2m_lock(p2m);
>> + pod_lock(p2m);
>> p2m_pod_cache_add(p2m, p, PAGE_ORDER_4K);
>> - p2m_unlock(p2m);
>> + pod_unlock(p2m);
>> return;
>> }
>>
>> @@ -512,6 +514,7 @@ p2m_pod_decrease_reservation(struct doma
>> int steal_for_cache = 0;
>> int pod = 0, nonpod = 0, ram = 0;
>>
>> + pod_lock(p2m);
>>
>> /* If we don't have any outstanding PoD entries, let things take
>> their
>> * course */
>> @@ -521,11 +524,10 @@ p2m_pod_decrease_reservation(struct doma
>> /* Figure out if we need to steal some freed memory for our cache */
>> steal_for_cache = ( p2m->pod.entry_count > p2m->pod.count );
>>
>> - p2m_lock(p2m);
>> audit_p2m(p2m, 1);
>>
>> if ( unlikely(d->is_dying) )
>> - goto out_unlock;
>> + goto out;
>>
>> /* See what's in here. */
>> /* FIXME: Add contiguous; query for PSE entries? */
>
> I don't think this can be quite right.
>
> The point of holding the p2m lock here is so that the p2m entries
> don't change between the gfn_to_mfn_query() here and the
> set_p2m_entries() below. The balloon driver racing with other vcpus
> populating pages is exactly the kind of race we expect to experience.
> And in any case, this change will cause set_p2m_entry() to ASSERT()
> because we're not holding the p2m lock.
>
> Or am I missing something?
>
> I haven't yet looked at the rest of the patch series, but it would
> definitely be better for people in the future looking back and trying
> to figure out why the code is the way that it is if even transitory
> changesets don't introduce "temporary" violations of invariants. :-)
>
>> @@ -547,14 +549,14 @@ p2m_pod_decrease_reservation(struct doma
>>
>> /* No populate-on-demand? Don't need to steal anything? Then we're
>> done!*/
>> if(!pod && !steal_for_cache)
>> - goto out_unlock;
>> + goto out_audit;
>>
>> if ( !nonpod )
>> {
>> /* All PoD: Mark the whole region invalid and tell caller
>> * we're done. */
>> set_p2m_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid,
>> p2m->default_access);
>> - p2m->pod.entry_count-=(1<<order); /* Lock: p2m */
>> + p2m->pod.entry_count-=(1<<order);
>> BUG_ON(p2m->pod.entry_count < 0);
>> ret = 1;
>> goto out_entry_check;
>> @@ -577,7 +579,7 @@ p2m_pod_decrease_reservation(struct doma
>> if ( t == p2m_populate_on_demand )
>> {
>> set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0,
>> p2m_invalid, p2m->default_access);
>> - p2m->pod.entry_count--; /* Lock: p2m */
>> + p2m->pod.entry_count--;
>> BUG_ON(p2m->pod.entry_count < 0);
>> pod--;
>> }
>> @@ -613,11 +615,11 @@ out_entry_check:
>> p2m_pod_set_cache_target(p2m, p2m->pod.entry_count, 0/*can't
>> preempt*/);
>> }
>>
>> -out_unlock:
>> +out_audit:
>> audit_p2m(p2m, 1);
>> - p2m_unlock(p2m);
>>
>> out:
>> + pod_unlock(p2m);
>> return ret;
>> }
>>
>> @@ -630,20 +632,24 @@ void p2m_pod_dump_data(struct domain *d)
>>
>>
>> /* Search for all-zero superpages to be reclaimed as superpages for the
>> - * PoD cache. Must be called w/ p2m lock held, page_alloc lock not
>> held. */
>> -static int
>> + * PoD cache. Must be called w/ pod lock held, page_alloc lock not
>> held. */
>> +static void
>
> For the same reason, this must be called with the p2m lock held: it
> calls gfn_to_mfn_query() and then calls set_p2m_entry(). As it
> happens, this always *is* called with the p2m lock held at the moment;
> but the comment still needs to reflect this. Similarly in
> p2m_pod_zero_check().
>
>> p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn)
>> {
>> mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
>> p2m_type_t type, type0 = 0;
>> unsigned long * map = NULL;
>> - int ret=0, reset = 0;
>> + int success = 0, reset = 0;
>> int i, j;
>> int max_ref = 1;
>> struct domain *d = p2m->domain;
>>
>> if ( !superpage_aligned(gfn) )
>> - goto out;
>> + return;
>> +
>> + /* If we were enforcing ordering against p2m locks, this is a place
>> + * to drop the PoD lock and re-acquire it once we're done mucking
>> with
>> + * the p2m. */
>>
>> /* Allow an extra refcount for one shadow pt mapping in shadowed
>> domains */
>> if ( paging_mode_shadow(d) )
>> @@ -751,19 +757,24 @@ p2m_pod_zero_check_superpage(struct p2m_
>> __trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), &t);
>> }
>>
>> - /* Finally! We've passed all the checks, and can add the mfn
>> superpage
>> - * back on the PoD cache, and account for the new p2m PoD entries
>> */
>> - p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
>> - p2m->pod.entry_count += SUPERPAGE_PAGES;
>> + success = 1;
>> +
>>
>> out_reset:
>> if ( reset )
>> set_p2m_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access);
>>
>> out:
>> - return ret;
>> + if ( success )
>> + {
>> + /* Finally! We've passed all the checks, and can add the mfn
>> superpage
>> + * back on the PoD cache, and account for the new p2m PoD
>> entries */
>> + p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
>> + p2m->pod.entry_count += SUPERPAGE_PAGES;
>> + }
>> }
>>
>> +/* On entry, PoD lock is held */
>> static void
>> p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int
>> count)
>> {
>> @@ -775,6 +786,8 @@ p2m_pod_zero_check(struct p2m_domain *p2
>> int i, j;
>> int max_ref = 1;
>>
>> + /* Also the right time to drop pod_lock if enforcing ordering
>> against p2m_lock */
>> +
>> /* Allow an extra refcount for one shadow pt mapping in shadowed
>> domains */
>> if ( paging_mode_shadow(d) )
>> max_ref++;
>> @@ -841,7 +854,6 @@ p2m_pod_zero_check(struct p2m_domain *p2
>> if( *(map[i]+j) != 0 )
>> break;
>>
>> - unmap_domain_page(map[i]);
>>
>> /* See comment in p2m_pod_zero_check_superpage() re gnttab
>> * check timing. */
>> @@ -849,8 +861,15 @@ p2m_pod_zero_check(struct p2m_domain *p2
>> {
>> set_p2m_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
>> types[i], p2m->default_access);
>> + unmap_domain_page(map[i]);
>> + map[i] = NULL;
>> }
>> - else
>> + }
>> +
>> + /* Finally, add to cache */
>> + for ( i=0; i < count; i++ )
>> + {
>> + if ( map[i] )
>> {
>> if ( tb_init_done )
>> {
>> @@ -867,6 +886,8 @@ p2m_pod_zero_check(struct p2m_domain *p2
>> __trace_var(TRC_MEM_POD_ZERO_RECLAIM, 0, sizeof(t), &t);
>> }
>>
>> + unmap_domain_page(map[i]);
>> +
>> /* Add to cache, and account for the new p2m PoD entry */
>> p2m_pod_cache_add(p2m, mfn_to_page(mfns[i]), PAGE_ORDER_4K);
>> p2m->pod.entry_count++;
>> @@ -876,6 +897,7 @@ p2m_pod_zero_check(struct p2m_domain *p2
>> }
>>
>> #define POD_SWEEP_LIMIT 1024
>> +/* Only one CPU at a time is guaranteed to enter a sweep */
>> static void
>> p2m_pod_emergency_sweep_super(struct p2m_domain *p2m)
>> {
>> @@ -964,7 +986,8 @@ p2m_pod_demand_populate(struct p2m_domai
>>
>> ASSERT(p2m_locked_by_me(p2m));
>>
>> - /* This check is done with the p2m lock held. This will make sure
>> that
>> + pod_lock(p2m);
>> + /* This check is done with the pod lock held. This will make sure
>> that
>> * even if d->is_dying changes under our feet, p2m_pod_empty_cache()
>> * won't start until we're done. */
>> if ( unlikely(d->is_dying) )
>> @@ -974,6 +997,7 @@ p2m_pod_demand_populate(struct p2m_domai
>> * 1GB region to 2MB chunks for a retry. */
>> if ( order == PAGE_ORDER_1G )
>> {
>> + pod_unlock(p2m);
>> gfn_aligned = (gfn >> order) << order;
>> /* Note that we are supposed to call set_p2m_entry() 512 times
>> to
>> * split 1GB into 512 2MB pages here. But We only do once here
>> because
>> @@ -983,6 +1007,7 @@ p2m_pod_demand_populate(struct p2m_domai
>> set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
>> p2m_populate_on_demand, p2m->default_access);
>> audit_p2m(p2m, 1);
>> + /* This is because the ept/pt caller locks the p2m recursively
>> */
>> p2m_unlock(p2m);
>> return 0;
>> }
>> @@ -996,11 +1021,15 @@ p2m_pod_demand_populate(struct p2m_domai
>>
>> /* If we're low, start a sweep */
>> if ( order == PAGE_ORDER_2M && page_list_empty(&p2m->pod.super)
>> )
>> + /* Note that sweeps scan other ranges in the p2m. In an
>> scenario
>> + * in which p2m locks are order-enforced wrt pod lock and
>> p2m
>> + * locks are fine grained, this will result in deadlock */
>> p2m_pod_emergency_sweep_super(p2m);
>>
>> if ( page_list_empty(&p2m->pod.single) &&
>> ( ( order == PAGE_ORDER_4K )
>> || (order == PAGE_ORDER_2M &&
>> page_list_empty(&p2m->pod.super) ) ) )
>> + /* Same comment regarding deadlock applies */
>> p2m_pod_emergency_sweep(p2m);
>> }
>>
>> @@ -1008,8 +1037,6 @@ p2m_pod_demand_populate(struct p2m_domai
>> if ( q == p2m_guest && gfn > p2m->pod.max_guest )
>> p2m->pod.max_guest = gfn;
>>
>> - lock_page_alloc(p2m);
>> -
>> if ( p2m->pod.count == 0 )
>> goto out_of_memory;
>>
>> @@ -1022,8 +1049,6 @@ p2m_pod_demand_populate(struct p2m_domai
>>
>> BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
>>
>> - unlock_page_alloc(p2m);
>> -
>> gfn_aligned = (gfn >> order) << order;
>>
>> set_p2m_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw,
>> p2m->default_access);
>> @@ -1034,8 +1059,9 @@ p2m_pod_demand_populate(struct p2m_domai
>> paging_mark_dirty(d, mfn_x(mfn) + i);
>> }
>>
>> - p2m->pod.entry_count -= (1 << order); /* Lock: p2m */
>> + p2m->pod.entry_count -= (1 << order);
>> BUG_ON(p2m->pod.entry_count < 0);
>> + pod_unlock(p2m);
>>
>> if ( tb_init_done )
>> {
>> @@ -1054,16 +1080,17 @@ p2m_pod_demand_populate(struct p2m_domai
>>
>> return 0;
>> out_of_memory:
>> - unlock_page_alloc(p2m);
>> + pod_unlock(p2m);
>>
>> printk("%s: Out of populate-on-demand memory! tot_pages %" PRIu32 "
>> pod_entries %" PRIi32 "\n",
>> __func__, d->tot_pages, p2m->pod.entry_count);
>> domain_crash(d);
>> out_fail:
>> + pod_unlock(p2m);
>> return -1;
>> remap_and_retry:
>> BUG_ON(order != PAGE_ORDER_2M);
>> - unlock_page_alloc(p2m);
>> + pod_unlock(p2m);
>>
>> /* Remap this 2-meg region in singleton chunks */
>> gfn_aligned = (gfn>>order)<<order;
>> @@ -1133,9 +1160,11 @@ guest_physmap_mark_populate_on_demand(st
>> rc = -EINVAL;
>> else
>> {
>> - p2m->pod.entry_count += 1 << order; /* Lock: p2m */
>> + pod_lock(p2m);
>> + p2m->pod.entry_count += 1 << order;
>> p2m->pod.entry_count -= pod_count;
>> BUG_ON(p2m->pod.entry_count < 0);
>> + pod_unlock(p2m);
>> }
>>
>> audit_p2m(p2m, 1);
>> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/p2m-pt.c
>> --- a/xen/arch/x86/mm/p2m-pt.c
>> +++ b/xen/arch/x86/mm/p2m-pt.c
>> @@ -1001,6 +1001,7 @@ void audit_p2m(struct p2m_domain *p2m, i
>> if ( !paging_mode_translate(d) )
>> return;
>>
>> + pod_lock(p2m);
>> //P2M_PRINTK("p2m audit starts\n");
>>
>> test_linear = ( (d == current->domain)
>> @@ -1247,6 +1248,8 @@ void audit_p2m(struct p2m_domain *p2m, i
>> pmbad, mpbad);
>> WARN();
>> }
>> +
>> + pod_unlock(p2m);
>> }
>> #endif /* P2M_AUDIT */
>>
>> diff -r 332775f72a30 -r 981073d78f7f xen/arch/x86/mm/p2m.c
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -72,6 +72,7 @@ boolean_param("hap_2mb", opt_hap_2mb);
>> static void p2m_initialise(struct domain *d, struct p2m_domain *p2m)
>> {
>> mm_lock_init(&p2m->lock);
>> + mm_lock_init(&p2m->pod.lock);
>> INIT_LIST_HEAD(&p2m->np2m_list);
>> INIT_PAGE_LIST_HEAD(&p2m->pages);
>> INIT_PAGE_LIST_HEAD(&p2m->pod.super);
>> @@ -506,8 +507,10 @@ guest_physmap_add_entry(struct domain *d
>> rc = -EINVAL;
>> else
>> {
>> - p2m->pod.entry_count -= pod_count; /* Lock: p2m */
>> + pod_lock(p2m);
>> + p2m->pod.entry_count -= pod_count;
>> BUG_ON(p2m->pod.entry_count < 0);
>> + pod_unlock(p2m);
>> }
>> }
>>
>> @@ -1125,8 +1128,10 @@ p2m_flush_table(struct p2m_domain *p2m)
>> /* "Host" p2m tables can have shared entries &c that need a bit more
>> * care when discarding them */
>> ASSERT(p2m_is_nestedp2m(p2m));
>> + pod_lock(p2m);
>> ASSERT(page_list_empty(&p2m->pod.super));
>> ASSERT(page_list_empty(&p2m->pod.single));
>> + pod_unlock(p2m);
>>
>> /* This is no longer a valid nested p2m for any address space */
>> p2m->cr3 = CR3_EADDR;
>> diff -r 332775f72a30 -r 981073d78f7f xen/include/asm-x86/p2m.h
>> --- a/xen/include/asm-x86/p2m.h
>> +++ b/xen/include/asm-x86/p2m.h
>> @@ -257,24 +257,13 @@ struct p2m_domain {
>> unsigned long max_mapped_pfn;
>>
>> /* Populate-on-demand variables
>> - * NB on locking. {super,single,count} are
>> - * covered by d->page_alloc_lock, since they're almost always used
>> in
>> - * conjunction with that functionality. {entry_count} is covered
>> by
>> - * the domain p2m lock, since it's almost always used in
>> conjunction
>> - * with changing the p2m tables.
>> *
>> - * At this point, both locks are held in two places. In both,
>> - * the order is [p2m,page_alloc]:
>> - * + p2m_pod_decrease_reservation() calls p2m_pod_cache_add(),
>> - * which grabs page_alloc
>> - * + p2m_pod_demand_populate() grabs both; the p2m lock to avoid
>> - * double-demand-populating of pages, the page_alloc lock to
>> - * protect moving stuff from the PoD cache to the domain page
>> list.
>> - *
>> - * We enforce this lock ordering through a construct in mm-locks.h.
>> - * This demands, however, that we store the previous lock-ordering
>> - * level in effect before grabbing the page_alloc lock.
>> - */
>> + * All variables are protected with the pod lock. We cannot rely on
>> + * the p2m lock if it's turned into a fine-grained lock.
>> + * We only use the domain page_alloc lock for additions and
>> + * deletions to the domain's page list. Because we use it nested
>> + * within the PoD lock, we enforce it's ordering (by remembering
>> + * the unlock level). */
>> struct {
>> struct page_list_head super, /* List of superpages
>> */
>> single; /* Non-super lists
>> */
>> @@ -283,6 +272,8 @@ struct p2m_domain {
>> unsigned reclaim_super; /* Last gpfn of a scan */
>> unsigned reclaim_single; /* Last gpfn of a scan */
>> unsigned max_guest; /* gpfn of max guest
>> demand-populate */
>> + mm_lock_t lock; /* Locking of private pod
>> structs, *
>> + * not relying on the p2m lock.
>> */
>> int page_alloc_unlock_level; /* To enforce lock
>> ordering */
>> } pod;
>> };
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@xxxxxxxxxxxxxxxxxxx
>> http://lists.xensource.com/xen-devel
>>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|