On Thu, Oct 27, 2011 at 1:33 PM, Andres Lagar-Cavilla
<andres@xxxxxxxxxxxxxxxx> wrote:
> xen/arch/x86/mm/hap/hap.c | 2 +-
> xen/arch/x86/mm/hap/nested_hap.c | 21 ++-
> xen/arch/x86/mm/p2m-ept.c | 26 +----
> xen/arch/x86/mm/p2m-pod.c | 42 +++++--
> xen/arch/x86/mm/p2m-pt.c | 20 +---
> xen/arch/x86/mm/p2m.c | 185
> ++++++++++++++++++++++++--------------
> xen/include/asm-ia64/mm.h | 5 +
> xen/include/asm-x86/p2m.h | 45 +++++++++-
> 8 files changed, 217 insertions(+), 129 deletions(-)
>
>
> This patch only modifies code internal to the p2m, adding convenience
> macros, etc. It will yield a compiling code base but an incorrect
> hypervisor (external callers of queries into the p2m will not unlock).
> Next patch takes care of external callers, split done for the benefit
> of conciseness.
It's not obvious to me where in this patch to find a description of
what the new locking regime is. What does the _unlocked() mean? When
do I have to call that vs a different one, and when do I have to lock
/ unlock / whatever?
I think that should ideally be both in the commit message (at least a
summary), and also in a comment in a header somewhere. Perhaps it is
already in the patch somewhere, but a quick glance through didn't find
it...
>
> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/hap.c
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -861,7 +861,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
> old_flags = l1e_get_flags(*p);
>
> if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT)
> - && !p2m_get_hostp2m(d)->defer_nested_flush ) {
> + && !atomic_read(&(p2m_get_hostp2m(d)->defer_nested_flush)) ) {
> /* We are replacing a valid entry so we need to flush nested p2ms,
> * unless the only change is an increase in access rights. */
> mfn_t omfn = _mfn(l1e_get_pfn(*p));
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/nested_hap.c
> --- a/xen/arch/x86/mm/hap/nested_hap.c
> +++ b/xen/arch/x86/mm/hap/nested_hap.c
> @@ -105,8 +105,6 @@ nestedhap_fix_p2m(struct vcpu *v, struct
> ASSERT(p2m);
> ASSERT(p2m->set_entry);
>
> - p2m_lock(p2m);
> -
> /* If this p2m table has been flushed or recycled under our feet,
> * leave it alone. We'll pick up the right one as we try to
> * vmenter the guest. */
> @@ -122,11 +120,13 @@ nestedhap_fix_p2m(struct vcpu *v, struct
> gfn = (L2_gpa >> PAGE_SHIFT) & mask;
> mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask);
>
> + /* Not bumping refcount of pages underneath because we're getting
> + * rid of whatever was there */
> + get_p2m(p2m, gfn, page_order);
> rv = set_p2m_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
> + put_p2m(p2m, gfn, page_order);
> }
>
> - p2m_unlock(p2m);
> -
> if (rv == 0) {
> gdprintk(XENLOG_ERR,
> "failed to set entry for 0x%"PRIx64" -> 0x%"PRIx64"\n",
> @@ -146,19 +146,26 @@ nestedhap_walk_L0_p2m(struct p2m_domain
> mfn_t mfn;
> p2m_type_t p2mt;
> p2m_access_t p2ma;
> + int rc;
>
> /* walk L0 P2M table */
> mfn = gfn_to_mfn_type_p2m(p2m, L1_gpa >> PAGE_SHIFT, &p2mt, &p2ma,
> p2m_query, page_order);
>
> + rc = NESTEDHVM_PAGEFAULT_ERROR;
> if ( p2m_is_paging(p2mt) || p2m_is_shared(p2mt) || !p2m_is_ram(p2mt) )
> - return NESTEDHVM_PAGEFAULT_ERROR;
> + goto out;
>
> + rc = NESTEDHVM_PAGEFAULT_ERROR;
> if ( !mfn_valid(mfn) )
> - return NESTEDHVM_PAGEFAULT_ERROR;
> + goto out;
>
> *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
> - return NESTEDHVM_PAGEFAULT_DONE;
> + rc = NESTEDHVM_PAGEFAULT_DONE;
> +
> +out:
> + drop_p2m_gfn(p2m, L1_gpa >> PAGE_SHIFT, mfn_x(mfn));
> + return rc;
> }
>
> /* This function uses L2_gpa to walk the P2M page table in L1. If the
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-ept.c
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -43,29 +43,16 @@
> #define is_epte_present(ept_entry) ((ept_entry)->epte & 0x7)
> #define is_epte_superpage(ept_entry) ((ept_entry)->sp)
>
> -/* Non-ept "lock-and-check" wrapper */
> +/* Ept-specific check wrapper */
> static int ept_pod_check_and_populate(struct p2m_domain *p2m, unsigned long
> gfn,
> ept_entry_t *entry, int order,
> p2m_query_t q)
> {
> - int r;
> -
> - /* This is called from the p2m lookups, which can happen with or
> - * without the lock hed. */
> - p2m_lock_recursive(p2m);
> -
> /* Check to make sure this is still PoD */
> if ( entry->sa_p2mt != p2m_populate_on_demand )
> - {
> - p2m_unlock(p2m);
> return 0;
> - }
>
> - r = p2m_pod_demand_populate(p2m, gfn, order, q);
> -
> - p2m_unlock(p2m);
> -
> - return r;
> + return p2m_pod_demand_populate(p2m, gfn, order, q);
> }
>
> static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type,
> p2m_access_t access)
> @@ -265,9 +252,9 @@ static int ept_next_level(struct p2m_dom
>
> ept_entry = (*table) + index;
>
> - /* ept_next_level() is called (sometimes) without a lock. Read
> + /* ept_next_level() is called (never) without a lock. Read
> * the entry once, and act on the "cached" entry after that to
> - * avoid races. */
> + * avoid races. AAA */
> e = atomic_read_ept_entry(ept_entry);
>
> if ( !is_epte_present(&e) )
> @@ -733,7 +720,8 @@ void ept_change_entry_emt_with_range(str
> int order = 0;
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> - p2m_lock(p2m);
> + /* This is a global operation, essentially */
> + get_p2m_global(p2m);
> for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
> {
> int level = 0;
> @@ -773,7 +761,7 @@ void ept_change_entry_emt_with_range(str
> ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access);
> }
> }
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> }
>
> /*
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pod.c
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -102,8 +102,6 @@ p2m_pod_cache_add(struct p2m_domain *p2m
> }
> #endif
>
> - ASSERT(p2m_locked_by_me(p2m));
> -
> /*
> * Pages from domain_alloc and returned by the balloon driver aren't
> * guaranteed to be zero; but by reclaiming zero pages, we implicitly
> @@ -536,7 +534,7 @@ p2m_pod_decrease_reservation(struct doma
> {
> p2m_type_t t;
>
> - gfn_to_mfn_query(d, gpfn + i, &t);
> + gfn_to_mfn_query_unlocked(d, gpfn + i, &t);
>
> if ( t == p2m_populate_on_demand )
> pod++;
The rest of the code makes it seem like gfn_to_mfn_query() will grab
the p2m lock for a range, but the _unlocked() version will not. Is
that correct?
Shouldn't this remain as it is then?
> @@ -602,6 +600,7 @@ p2m_pod_decrease_reservation(struct doma
> nonpod--;
> ram--;
> }
> + drop_p2m_gfn(p2m, gpfn + i, mfn_x(mfn));
> }
And how does this fit with the _query() call above?
>
> /* If there are no more non-PoD entries, tell decrease_reservation() that
> @@ -661,12 +660,15 @@ p2m_pod_zero_check_superpage(struct p2m_
> for ( i=0; i<SUPERPAGE_PAGES; i++ )
> {
>
> - mfn = gfn_to_mfn_query(d, gfn + i, &type);
> -
> if ( i == 0 )
> {
> + /* Only lock the p2m entry the first time, that will lock
> + * server for the whole superpage */
> + mfn = gfn_to_mfn_query(d, gfn + i, &type);
> mfn0 = mfn;
> type0 = type;
> + } else {
> + mfn = gfn_to_mfn_query_unlocked(d, gfn + i, &type);
> }
>
> /* Conditions that must be met for superpage-superpage:
> @@ -773,6 +775,10 @@ out:
> p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
> p2m->pod.entry_count += SUPERPAGE_PAGES;
> }
> +
> + /* We got p2m locks once for the whole superpage, with the original
> + * mfn0. We drop it here. */
> + drop_p2m_gfn(p2m, gfn, mfn_x(mfn0));
> }
>
> /* On entry, PoD lock is held */
> @@ -894,6 +900,12 @@ p2m_pod_zero_check(struct p2m_domain *p2
> p2m->pod.entry_count++;
> }
> }
> +
> + /* Drop all p2m locks and references */
> + for ( i=0; i<count; i++ )
> + {
> + drop_p2m_gfn(p2m, gfns[i], mfn_x(mfns[i]));
> + }
>
> }
>
> @@ -928,7 +940,9 @@ p2m_pod_emergency_sweep_super(struct p2m
> p2m->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0;
> }
>
> -#define POD_SWEEP_STRIDE 16
> +/* Note that spinlock recursion counters have 4 bits, so 16 or higher
> + * will overflow a single 2M spinlock in a zero check. */
> +#define POD_SWEEP_STRIDE 15
> static void
> p2m_pod_emergency_sweep(struct p2m_domain *p2m)
> {
> @@ -946,7 +960,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
> /* FIXME: Figure out how to avoid superpages */
> for ( i=p2m->pod.reclaim_single; i > 0 ; i-- )
> {
> - gfn_to_mfn_query(p2m->domain, i, &t );
> + gfn_to_mfn_query_unlocked(p2m->domain, i, &t );
> if ( p2m_is_ram(t) )
> {
> gfns[j] = i;
> @@ -974,6 +988,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
>
> }
>
> +/* The gfn and order need to be locked in the p2m before you walk in here */
> int
> p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
> unsigned int order,
> @@ -985,8 +1000,6 @@ p2m_pod_demand_populate(struct p2m_domai
> mfn_t mfn;
> int i;
>
> - ASSERT(p2m_locked_by_me(p2m));
> -
> pod_lock(p2m);
> /* This check is done with the pod lock held. This will make sure that
> * even if d->is_dying changes under our feet, p2m_pod_empty_cache()
> @@ -1008,8 +1021,6 @@ p2m_pod_demand_populate(struct p2m_domai
> set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
> p2m_populate_on_demand, p2m->default_access);
> audit_p2m(p2m, 1);
> - /* This is because the ept/pt caller locks the p2m recursively */
> - p2m_unlock(p2m);
> return 0;
> }
>
> @@ -1132,7 +1143,9 @@ guest_physmap_mark_populate_on_demand(st
> if ( rc != 0 )
> return rc;
>
> - p2m_lock(p2m);
> + /* Pre-lock all the p2m entries. We don't take refs to the
> + * pages, because there shouldn't be any pages underneath. */
> + get_p2m(p2m, gfn, order);
> audit_p2m(p2m, 1);
>
> P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
> @@ -1140,7 +1153,8 @@ guest_physmap_mark_populate_on_demand(st
> /* Make sure all gpfns are unused */
> for ( i = 0; i < (1UL << order); i++ )
> {
> - omfn = gfn_to_mfn_query(d, gfn + i, &ot);
> + p2m_access_t a;
> + omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
> if ( p2m_is_ram(ot) )
> {
> printk("%s: gfn_to_mfn returned type %d!\n",
> @@ -1169,9 +1183,9 @@ guest_physmap_mark_populate_on_demand(st
> }
>
> audit_p2m(p2m, 1);
> - p2m_unlock(p2m);
>
> out:
> + put_p2m(p2m, gfn, order);
> return rc;
> }
>
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pt.c
> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -487,31 +487,16 @@ out:
> }
>
>
> -/* Non-ept "lock-and-check" wrapper */
> +/* PT-specific check wrapper */
> static int p2m_pod_check_and_populate(struct p2m_domain *p2m, unsigned long
> gfn,
> l1_pgentry_t *p2m_entry, int order,
> p2m_query_t q)
> {
> - int r;
> -
> - /* This is called from the p2m lookups, which can happen with or
> - * without the lock hed. */
> - p2m_lock_recursive(p2m);
> - audit_p2m(p2m, 1);
> -
> /* Check to make sure this is still PoD */
> if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) !=
> p2m_populate_on_demand )
> - {
> - p2m_unlock(p2m);
> return 0;
> - }
>
> - r = p2m_pod_demand_populate(p2m, gfn, order, q);
> -
> - audit_p2m(p2m, 1);
> - p2m_unlock(p2m);
> -
> - return r;
> + return p2m_pod_demand_populate(p2m, gfn, order, q);
> }
>
> /* Read the current domain's p2m table (through the linear mapping). */
> @@ -894,6 +879,7 @@ static void p2m_change_type_global(struc
> if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
> return;
>
> + /* Checks for exclusive lock */
> ASSERT(p2m_locked_by_me(p2m));
>
> #if CONFIG_PAGING_LEVELS == 4
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -143,9 +143,9 @@ void p2m_change_entry_type_global(struct
> p2m_type_t ot, p2m_type_t nt)
> {
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> - p2m_lock(p2m);
> + get_p2m_global(p2m);
> p2m->change_entry_type_global(p2m, ot, nt);
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> }
>
> mfn_t gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn,
> @@ -162,12 +162,17 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom
> return _mfn(gfn);
> }
>
> + /* We take the lock for this single gfn. The caller has to put this lock
> */
> + get_p2m_gfn(p2m, gfn);
> +
> mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
>
> #ifdef __x86_64__
> if ( q == p2m_unshare && p2m_is_shared(*t) )
> {
> ASSERT(!p2m_is_nestedp2m(p2m));
> + /* p2m locking is recursive, so we won't deadlock going
> + * into the sharing code */
> mem_sharing_unshare_page(p2m->domain, gfn, 0);
> mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
> }
> @@ -179,13 +184,28 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom
> /* Return invalid_mfn to avoid caller's access */
> mfn = _mfn(INVALID_MFN);
> if (q == p2m_guest)
> + {
> + put_p2m_gfn(p2m, gfn);
> domain_crash(p2m->domain);
> + }
> }
> #endif
>
> + /* Take an extra reference to the page. It won't disappear beneath us */
> + if ( mfn_valid(mfn) )
> + {
> + /* Use this because we don't necessarily know who owns the page */
> + if ( !page_get_owner_and_reference(mfn_to_page(mfn)) )
> + {
> + mfn = _mfn(INVALID_MFN);
> + }
> + }
> +
> + /* We leave holding the p2m lock for this gfn */
> return mfn;
> }
>
> +/* Appropriate locks held on entry */
> int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
> unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
> {
> @@ -194,8 +214,6 @@ int set_p2m_entry(struct p2m_domain *p2m
> unsigned int order;
> int rc = 1;
>
> - ASSERT(p2m_locked_by_me(p2m));
> -
> while ( todo )
> {
> if ( hap_enabled(d) )
> @@ -217,6 +235,18 @@ int set_p2m_entry(struct p2m_domain *p2m
> return rc;
> }
>
> +void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn,
> + unsigned long frame)
> +{
> + mfn_t mfn = _mfn(frame);
> + /* For non-translated domains, locks are never taken */
> + if ( !p2m || !paging_mode_translate(p2m->domain) )
> + return;
> + if ( mfn_valid(mfn) )
> + put_page(mfn_to_page(mfn));
> + put_p2m_gfn(p2m, gfn);
> +}
> +
> struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
> {
> struct page_info *pg;
> @@ -262,12 +292,12 @@ int p2m_alloc_table(struct p2m_domain *p
> unsigned long gfn = -1UL;
> struct domain *d = p2m->domain;
>
> - p2m_lock(p2m);
> + get_p2m_global(p2m);
>
> if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
> {
> P2M_ERROR("p2m already allocated for this domain\n");
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> return -EINVAL;
> }
>
> @@ -283,7 +313,7 @@ int p2m_alloc_table(struct p2m_domain *p
>
> if ( p2m_top == NULL )
> {
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> return -ENOMEM;
> }
>
> @@ -295,7 +325,7 @@ int p2m_alloc_table(struct p2m_domain *p
> P2M_PRINTK("populating p2m table\n");
>
> /* Initialise physmap tables for slot zero. Other code assumes this. */
> - p2m->defer_nested_flush = 1;
> + atomic_set(&p2m->defer_nested_flush, 1);
> if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), 0,
> p2m_invalid, p2m->default_access) )
> goto error;
> @@ -323,10 +353,10 @@ int p2m_alloc_table(struct p2m_domain *p
> }
> spin_unlock(&p2m->domain->page_alloc_lock);
> }
> - p2m->defer_nested_flush = 0;
> + atomic_set(&p2m->defer_nested_flush, 0);
>
> P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> return 0;
>
> error_unlock:
> @@ -334,7 +364,7 @@ error_unlock:
> error:
> P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
> PRI_mfn "\n", gfn, mfn_x(mfn));
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> return -ENOMEM;
> }
>
> @@ -354,26 +384,28 @@ void p2m_teardown(struct p2m_domain *p2m
> if (p2m == NULL)
> return;
>
> + get_p2m_global(p2m);
> +
> #ifdef __x86_64__
> for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ )
> {
> - mfn = gfn_to_mfn_type_p2m(p2m, gfn, &t, &a, p2m_query, NULL);
> + mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL);
> if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
> {
> ASSERT(!p2m_is_nestedp2m(p2m));
> + /* The p2m allows an exclusive global holder to recursively
> + * lock sub-ranges. For this. */
> BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN));
> }
>
> }
> #endif
>
> - p2m_lock(p2m);
> -
> p2m->phys_table = pagetable_null();
>
> while ( (pg = page_list_remove_head(&p2m->pages)) )
> d->arch.paging.free_page(d, pg);
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> }
>
> static void p2m_teardown_nestedp2m(struct domain *d)
> @@ -401,6 +433,7 @@ void p2m_final_teardown(struct domain *d
> }
>
>
> +/* Locks held on entry */
> static void
> p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
> unsigned int page_order)
> @@ -438,11 +471,11 @@ guest_physmap_remove_page(struct domain
> unsigned long mfn, unsigned int page_order)
> {
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> - p2m_lock(p2m);
> + get_p2m(p2m, gfn, page_order);
> audit_p2m(p2m, 1);
> p2m_remove_page(p2m, gfn, mfn, page_order);
> audit_p2m(p2m, 1);
> - p2m_unlock(p2m);
> + put_p2m(p2m, gfn, page_order);
> }
>
> int
> @@ -480,7 +513,7 @@ guest_physmap_add_entry(struct domain *d
> if ( rc != 0 )
> return rc;
>
> - p2m_lock(p2m);
> + get_p2m(p2m, gfn, page_order);
> audit_p2m(p2m, 0);
>
> P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
> @@ -488,12 +521,13 @@ guest_physmap_add_entry(struct domain *d
> /* First, remove m->p mappings for existing p->m mappings */
> for ( i = 0; i < (1UL << page_order); i++ )
> {
> - omfn = gfn_to_mfn_query(d, gfn + i, &ot);
> + p2m_access_t a;
> + omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
> if ( p2m_is_grant(ot) )
> {
> /* Really shouldn't be unmapping grant maps this way */
> + put_p2m(p2m, gfn, page_order);
> domain_crash(d);
> - p2m_unlock(p2m);
> return -EINVAL;
> }
> else if ( p2m_is_ram(ot) )
> @@ -523,11 +557,12 @@ guest_physmap_add_entry(struct domain *d
> && (ogfn != INVALID_M2P_ENTRY)
> && (ogfn != gfn + i) )
> {
> + p2m_access_t a;
> /* This machine frame is already mapped at another physical
> * address */
> P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
> mfn + i, ogfn, gfn + i);
> - omfn = gfn_to_mfn_query(d, ogfn, &ot);
> + omfn = p2m->get_entry(p2m, ogfn, &ot, &a, p2m_query, NULL);
> if ( p2m_is_ram(ot) )
> {
> ASSERT(mfn_valid(omfn));
> @@ -567,7 +602,7 @@ guest_physmap_add_entry(struct domain *d
> }
>
> audit_p2m(p2m, 1);
> - p2m_unlock(p2m);
> + put_p2m(p2m, gfn, page_order);
>
> return rc;
> }
> @@ -579,18 +614,19 @@ p2m_type_t p2m_change_type(struct domain
> p2m_type_t ot, p2m_type_t nt)
> {
> p2m_type_t pt;
> + p2m_access_t a;
> mfn_t mfn;
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
>
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
>
> - mfn = gfn_to_mfn_query(d, gfn, &pt);
> + mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL);
> if ( pt == ot )
> set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
>
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
>
> return pt;
> }
> @@ -608,20 +644,23 @@ void p2m_change_type_range(struct domain
>
> BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
>
> - p2m_lock(p2m);
> - p2m->defer_nested_flush = 1;
> + atomic_set(&p2m->defer_nested_flush, 1);
>
> + /* We've been given a number instead of an order, so lock each
> + * gfn individually */
> for ( gfn = start; gfn < end; gfn++ )
> {
> - mfn = gfn_to_mfn_query(d, gfn, &pt);
> + p2m_access_t a;
> + get_p2m_gfn(p2m, gfn);
> + mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL);
> if ( pt == ot )
> set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
> + put_p2m_gfn(p2m, gfn);
> }
>
> - p2m->defer_nested_flush = 0;
> + atomic_set(&p2m->defer_nested_flush, 0);
> if ( nestedhvm_enabled(d) )
> p2m_flush_nestedp2m(d);
> - p2m_unlock(p2m);
> }
>
>
> @@ -631,17 +670,18 @@ set_mmio_p2m_entry(struct domain *d, uns
> {
> int rc = 0;
> p2m_type_t ot;
> + p2m_access_t a;
> mfn_t omfn;
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> if ( !paging_mode_translate(d) )
> return 0;
>
> - p2m_lock(p2m);
> - omfn = gfn_to_mfn_query(d, gfn, &ot);
> + get_p2m_gfn(p2m, gfn);
> + omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL);
> if ( p2m_is_grant(ot) )
> {
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> domain_crash(d);
> return 0;
> }
> @@ -654,11 +694,11 @@ set_mmio_p2m_entry(struct domain *d, uns
> P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
> rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct,
> p2m->default_access);
> audit_p2m(p2m, 1);
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> if ( 0 == rc )
> gdprintk(XENLOG_ERR,
> "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
> - mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
> + mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot)));
> return rc;
> }
>
> @@ -668,13 +708,14 @@ clear_mmio_p2m_entry(struct domain *d, u
> int rc = 0;
> mfn_t mfn;
> p2m_type_t t;
> + p2m_access_t a;
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> if ( !paging_mode_translate(d) )
> return 0;
>
> - p2m_lock(p2m);
> - mfn = gfn_to_mfn_query(d, gfn, &t);
> + get_p2m_gfn(p2m, gfn);
> + mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL);
>
> /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
> if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) )
> @@ -687,8 +728,7 @@ clear_mmio_p2m_entry(struct domain *d, u
> audit_p2m(p2m, 1);
>
> out:
> - p2m_unlock(p2m);
> -
> + put_p2m_gfn(p2m, gfn);
> return rc;
> }
>
> @@ -698,13 +738,14 @@ set_shared_p2m_entry(struct domain *d, u
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> int rc = 0;
> p2m_type_t ot;
> + p2m_access_t a;
> mfn_t omfn;
>
> if ( !paging_mode_translate(p2m->domain) )
> return 0;
>
> - p2m_lock(p2m);
> - omfn = gfn_to_mfn_query(p2m->domain, gfn, &ot);
> + get_p2m_gfn(p2m, gfn);
> + omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL);
> /* At the moment we only allow p2m change if gfn has already been made
> * sharable first */
> ASSERT(p2m_is_shared(ot));
> @@ -714,11 +755,11 @@ set_shared_p2m_entry(struct domain *d, u
>
> P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
> rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access);
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> if ( 0 == rc )
> gdprintk(XENLOG_ERR,
> "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
> - mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
> + mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot)));
> return rc;
> }
>
> @@ -732,7 +773,7 @@ int p2m_mem_paging_nominate(struct domai
> mfn_t mfn;
> int ret;
>
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
>
> mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
>
> @@ -765,7 +806,7 @@ int p2m_mem_paging_nominate(struct domai
> ret = 0;
>
> out:
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> return ret;
> }
>
> @@ -778,7 +819,7 @@ int p2m_mem_paging_evict(struct domain *
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> int ret = -EINVAL;
>
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
>
> /* Get mfn */
> mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
> @@ -824,7 +865,7 @@ int p2m_mem_paging_evict(struct domain *
> put_page(page);
>
> out:
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> return ret;
> }
>
> @@ -863,7 +904,7 @@ void p2m_mem_paging_populate(struct doma
> req.type = MEM_EVENT_TYPE_PAGING;
>
> /* Fix p2m mapping */
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
> mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
> /* Allow only nominated or evicted pages to enter page-in path */
> if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
> @@ -875,7 +916,7 @@ void p2m_mem_paging_populate(struct doma
> set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
> audit_p2m(p2m, 1);
> }
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
>
> /* Pause domain if request came from guest and gfn has paging type */
> if ( p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
> @@ -908,7 +949,7 @@ int p2m_mem_paging_prep(struct domain *d
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> int ret = -ENOMEM;
>
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
>
> mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
>
> @@ -931,7 +972,7 @@ int p2m_mem_paging_prep(struct domain *d
> ret = 0;
>
> out:
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> return ret;
> }
>
> @@ -949,12 +990,12 @@ void p2m_mem_paging_resume(struct domain
> /* Fix p2m entry if the page was not dropped */
> if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
> {
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, rsp.gfn);
> mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL);
> set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
> set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
> audit_p2m(p2m, 1);
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, rsp.gfn);
> }
>
> /* Unpause domain */
> @@ -979,16 +1020,16 @@ void p2m_mem_access_check(unsigned long
> p2m_access_t p2ma;
>
> /* First, handle rx2rw conversion automatically */
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
> mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL);
>
> if ( access_w && p2ma == p2m_access_rx2rw )
> {
> p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw);
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> return;
> }
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
>
> /* Otherwise, check if there is a memory event listener, and send the
> message along */
> res = mem_event_check_ring(d, &d->mem_access);
> @@ -1006,9 +1047,9 @@ void p2m_mem_access_check(unsigned long
> else
> {
> /* A listener is not required, so clear the access restrictions */
> - p2m_lock(p2m);
> + get_p2m_gfn(p2m, gfn);
> p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt,
> p2m_access_rwx);
> - p2m_unlock(p2m);
> + put_p2m_gfn(p2m, gfn);
> }
>
> return;
> @@ -1064,7 +1105,7 @@ int p2m_set_mem_access(struct domain *d,
> {
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> unsigned long pfn;
> - p2m_access_t a;
> + p2m_access_t a, _a;
> p2m_type_t t;
> mfn_t mfn;
> int rc = 0;
> @@ -1095,17 +1136,20 @@ int p2m_set_mem_access(struct domain *d,
> return 0;
> }
>
> - p2m_lock(p2m);
> + /* Because we don't get an order, rather a number, we need to lock each
> + * entry individually */
> for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ )
> {
> - mfn = gfn_to_mfn_query(d, pfn, &t);
> + get_p2m_gfn(p2m, pfn);
> + mfn = p2m->get_entry(p2m, pfn, &t, &_a, p2m_query, NULL);
> if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 )
> {
> + put_p2m_gfn(p2m, pfn);
> rc = -ENOMEM;
> break;
> }
> + put_p2m_gfn(p2m, pfn);
> }
> - p2m_unlock(p2m);
> return rc;
> }
>
> @@ -1138,7 +1182,10 @@ int p2m_get_mem_access(struct domain *d,
> return 0;
> }
>
> + get_p2m_gfn(p2m, pfn);
> mfn = p2m->get_entry(p2m, pfn, &t, &a, p2m_query, NULL);
> + put_p2m_gfn(p2m, pfn);
> +
> if ( mfn_x(mfn) == INVALID_MFN )
> return -ESRCH;
>
> @@ -1175,7 +1222,7 @@ p2m_flush_table(struct p2m_domain *p2m)
> struct domain *d = p2m->domain;
> void *p;
>
> - p2m_lock(p2m);
> + get_p2m_global(p2m);
>
> /* "Host" p2m tables can have shared entries &c that need a bit more
> * care when discarding them */
> @@ -1203,7 +1250,7 @@ p2m_flush_table(struct p2m_domain *p2m)
> d->arch.paging.free_page(d, pg);
> page_list_add(top, &p2m->pages);
>
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> }
>
> void
> @@ -1245,7 +1292,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
> p2m = nv->nv_p2m;
> if ( p2m )
> {
> - p2m_lock(p2m);
> + get_p2m_global(p2m);
> if ( p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR )
> {
> nv->nv_flushp2m = 0;
> @@ -1255,24 +1302,24 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
> hvm_asid_flush_vcpu(v);
> p2m->cr3 = cr3;
> cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> nestedp2m_unlock(d);
> return p2m;
> }
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> }
>
> /* All p2m's are or were in use. Take the least recent used one,
> * flush it and reuse. */
> p2m = p2m_getlru_nestedp2m(d, NULL);
> p2m_flush_table(p2m);
> - p2m_lock(p2m);
> + get_p2m_global(p2m);
> nv->nv_p2m = p2m;
> p2m->cr3 = cr3;
> nv->nv_flushp2m = 0;
> hvm_asid_flush_vcpu(v);
> cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> - p2m_unlock(p2m);
> + put_p2m_global(p2m);
> nestedp2m_unlock(d);
>
> return p2m;
> diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-ia64/mm.h
> --- a/xen/include/asm-ia64/mm.h
> +++ b/xen/include/asm-ia64/mm.h
> @@ -561,6 +561,11 @@ extern u64 translate_domain_pte(u64 ptev
> ((get_gpfn_from_mfn((madr) >> PAGE_SHIFT) << PAGE_SHIFT) | \
> ((madr) & ~PAGE_MASK))
>
> +/* Because x86-specific p2m fine-grained lock releases are called from common
> + * code, we put a dummy placeholder here */
> +#define drop_p2m_gfn(p, g, m) ((void)0)
> +#define drop_p2m_gfn_domain(p, g, m) ((void)0)
> +
> /* Internal use only: returns 0 in case of bad address. */
> extern unsigned long paddr_to_maddr(unsigned long paddr);
>
> diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-x86/p2m.h
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -220,7 +220,7 @@ struct p2m_domain {
> * tables on every host-p2m change. The setter of this flag
> * is responsible for performing the full flush before releasing the
> * host p2m's lock. */
> - int defer_nested_flush;
> + atomic_t defer_nested_flush;
>
> /* Pages used to construct the p2m */
> struct page_list_head pages;
> @@ -298,6 +298,15 @@ struct p2m_domain *p2m_get_p2m(struct vc
> #define p2m_get_pagetable(p2m) ((p2m)->phys_table)
>
>
> +/* No matter what value you get out of a query, the p2m has been locked for
> + * that range. No matter what you do, you need to drop those locks.
> + * You need to pass back the mfn obtained when locking, not the new one,
> + * as the refcount of the original mfn was bumped. */
> +void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn,
> + unsigned long mfn);
> +#define drop_p2m_gfn_domain(d, g, m) \
> + drop_p2m_gfn(p2m_get_hostp2m((d)), (g), (m))
> +
> /* Read a particular P2M table, mapping pages as we go. Most callers
> * should _not_ call this directly; use the other gfn_to_mfn_* functions
> * below unless you know you want to walk a p2m that isn't a domain's
> @@ -327,6 +336,28 @@ static inline mfn_t gfn_to_mfn_type(stru
> #define gfn_to_mfn_guest(d, g, t) gfn_to_mfn_type((d), (g), (t), p2m_guest)
> #define gfn_to_mfn_unshare(d, g, t) gfn_to_mfn_type((d), (g), (t),
> p2m_unshare)
>
> +/* This one applies to very specific situations in which you're querying
> + * a p2m entry and will be done "immediately" (such as a printk or computing
> a
> + * return value). Use this only if there are no expectations of the p2m entry
> + * holding steady. */
> +static inline mfn_t gfn_to_mfn_type_unlocked(struct domain *d,
> + unsigned long gfn, p2m_type_t *t,
> + p2m_query_t q)
> +{
> + mfn_t mfn = gfn_to_mfn_type(d, gfn, t, q);
> + drop_p2m_gfn_domain(d, gfn, mfn_x(mfn));
> + return mfn;
> +}
> +
> +#define gfn_to_mfn_unlocked(d, g, t) \
> + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_alloc)
> +#define gfn_to_mfn_query_unlocked(d, g, t) \
> + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_query)
> +#define gfn_to_mfn_guest_unlocked(d, g, t) \
> + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_guest)
> +#define gfn_to_mfn_unshare_unlocked(d, g, t) \
> + gfn_to_mfn_type_unlocked((d), (g), (t), p2m_unshare)
> +
> /* Compatibility function exporting the old untyped interface */
> static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
> {
> @@ -338,6 +369,15 @@ static inline unsigned long gmfn_to_mfn(
> return INVALID_MFN;
> }
>
> +/* Same comments apply re unlocking */
> +static inline unsigned long gmfn_to_mfn_unlocked(struct domain *d,
> + unsigned long gpfn)
> +{
> + unsigned long mfn = gmfn_to_mfn(d, gpfn);
> + drop_p2m_gfn_domain(d, gpfn, mfn);
> + return mfn;
> +}
> +
> /* General conversion function from mfn to gfn */
> static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn)
> {
> @@ -521,7 +561,8 @@ static inline int p2m_gfn_check_limit(
> #define p2m_gfn_check_limit(d, g, o) 0
> #endif
>
> -/* Directly set a p2m entry: only for use by p2m code */
> +/* Directly set a p2m entry: only for use by p2m code. It expects locks to
> + * be held on entry */
> int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
> unsigned int page_order, p2m_type_t p2mt, p2m_access_t
> p2ma);
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|