Re: [Xen-devel] [PATCH 8 of 9] Modify all internal p2m functions

On Thu, Oct 27, 2011 at 1:33 PM, Andres Lagar-Cavilla
<andres@xxxxxxxxxxxxxxxx> wrote:
>  xen/arch/x86/mm/hap/hap.c        |    2 +-
>  xen/arch/x86/mm/hap/nested_hap.c |   21 ++-
>  xen/arch/x86/mm/p2m-ept.c        |   26 +----
>  xen/arch/x86/mm/p2m-pod.c        |   42 +++++--
>  xen/arch/x86/mm/p2m-pt.c         |   20 +---
>  xen/arch/x86/mm/p2m.c            |  185 
> ++++++++++++++++++++++++--------------
>  xen/include/asm-ia64/mm.h        |    5 +
>  xen/include/asm-x86/p2m.h        |   45 +++++++++-
>  8 files changed, 217 insertions(+), 129 deletions(-)
>
>
> This patch only modifies code internal to the p2m, adding convenience
> macros, etc. It will yield a compiling code base but an incorrect
> hypervisor (external callers of queries into the p2m will not unlock).
> Next patch takes care of external callers, split done for the benefit
> of conciseness.

It's not obvious to me where in this patch to find a description of
what the new locking regime is.  What does the _unlocked() mean?  When
do I have to call that vs a different one, and when do I have to lock
/ unlock / whatever?

I think that should ideally be both in the commit message (at least a
summary), and also in a comment in a header somewhere.  Perhaps it is
already in the patch somewhere, but a quick glance through didn't find
it...

>
> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/hap.c
> --- a/xen/arch/x86/mm/hap/hap.c
> +++ b/xen/arch/x86/mm/hap/hap.c
> @@ -861,7 +861,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
>     old_flags = l1e_get_flags(*p);
>
>     if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT)
> -         && !p2m_get_hostp2m(d)->defer_nested_flush ) {
> +         && !atomic_read(&(p2m_get_hostp2m(d)->defer_nested_flush)) ) {
>         /* We are replacing a valid entry so we need to flush nested p2ms,
>          * unless the only change is an increase in access rights. */
>         mfn_t omfn = _mfn(l1e_get_pfn(*p));
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/hap/nested_hap.c
> --- a/xen/arch/x86/mm/hap/nested_hap.c
> +++ b/xen/arch/x86/mm/hap/nested_hap.c
> @@ -105,8 +105,6 @@ nestedhap_fix_p2m(struct vcpu *v, struct
>     ASSERT(p2m);
>     ASSERT(p2m->set_entry);
>
> -    p2m_lock(p2m);
> -
>     /* If this p2m table has been flushed or recycled under our feet,
>      * leave it alone.  We'll pick up the right one as we try to
>      * vmenter the guest. */
> @@ -122,11 +120,13 @@ nestedhap_fix_p2m(struct vcpu *v, struct
>         gfn = (L2_gpa >> PAGE_SHIFT) & mask;
>         mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask);
>
> +        /* Not bumping refcount of pages underneath because we're getting
> +         * rid of whatever was there */
> +        get_p2m(p2m, gfn, page_order);
>         rv = set_p2m_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
> +        put_p2m(p2m, gfn, page_order);
>     }
>
> -    p2m_unlock(p2m);
> -
>     if (rv == 0) {
>         gdprintk(XENLOG_ERR,
>                "failed to set entry for 0x%"PRIx64" -> 0x%"PRIx64"\n",
> @@ -146,19 +146,26 @@ nestedhap_walk_L0_p2m(struct p2m_domain
>     mfn_t mfn;
>     p2m_type_t p2mt;
>     p2m_access_t p2ma;
> +    int rc;
>
>     /* walk L0 P2M table */
>     mfn = gfn_to_mfn_type_p2m(p2m, L1_gpa >> PAGE_SHIFT, &p2mt, &p2ma,
>                               p2m_query, page_order);
>
> +    rc = NESTEDHVM_PAGEFAULT_ERROR;
>     if ( p2m_is_paging(p2mt) || p2m_is_shared(p2mt) || !p2m_is_ram(p2mt) )
> -        return NESTEDHVM_PAGEFAULT_ERROR;
> +        goto out;
>
> +    rc = NESTEDHVM_PAGEFAULT_ERROR;
>     if ( !mfn_valid(mfn) )
> -        return NESTEDHVM_PAGEFAULT_ERROR;
> +        goto out;
>
>     *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
> -    return NESTEDHVM_PAGEFAULT_DONE;
> +    rc = NESTEDHVM_PAGEFAULT_DONE;
> +
> +out:
> +    drop_p2m_gfn(p2m, L1_gpa >> PAGE_SHIFT, mfn_x(mfn));
> +    return rc;
>  }
>
>  /* This function uses L2_gpa to walk the P2M page table in L1. If the
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-ept.c
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -43,29 +43,16 @@
>  #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
>  #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
>
> -/* Non-ept "lock-and-check" wrapper */
> +/* Ept-specific check wrapper */
>  static int ept_pod_check_and_populate(struct p2m_domain *p2m, unsigned long 
> gfn,
>                                       ept_entry_t *entry, int order,
>                                       p2m_query_t q)
>  {
> -    int r;
> -
> -    /* This is called from the p2m lookups, which can happen with or
> -     * without the lock hed. */
> -    p2m_lock_recursive(p2m);
> -
>     /* Check to make sure this is still PoD */
>     if ( entry->sa_p2mt != p2m_populate_on_demand )
> -    {
> -        p2m_unlock(p2m);
>         return 0;
> -    }
>
> -    r = p2m_pod_demand_populate(p2m, gfn, order, q);
> -
> -    p2m_unlock(p2m);
> -
> -    return r;
> +    return p2m_pod_demand_populate(p2m, gfn, order, q);
>  }
>
>  static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, 
> p2m_access_t access)
> @@ -265,9 +252,9 @@ static int ept_next_level(struct p2m_dom
>
>     ept_entry = (*table) + index;
>
> -    /* ept_next_level() is called (sometimes) without a lock.  Read
> +    /* ept_next_level() is called (never) without a lock.  Read
>      * the entry once, and act on the "cached" entry after that to
> -     * avoid races. */
> +     * avoid races. AAA */
>     e = atomic_read_ept_entry(ept_entry);
>
>     if ( !is_epte_present(&e) )
> @@ -733,7 +720,8 @@ void ept_change_entry_emt_with_range(str
>     int order = 0;
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
> -    p2m_lock(p2m);
> +    /* This is a global operation, essentially */
> +    get_p2m_global(p2m);
>     for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
>     {
>         int level = 0;
> @@ -773,7 +761,7 @@ void ept_change_entry_emt_with_range(str
>                 ept_set_entry(p2m, gfn, mfn, order, e.sa_p2mt, e.access);
>         }
>     }
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>  }
>
>  /*
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pod.c
> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -102,8 +102,6 @@ p2m_pod_cache_add(struct p2m_domain *p2m
>     }
>  #endif
>
> -    ASSERT(p2m_locked_by_me(p2m));
> -
>     /*
>      * Pages from domain_alloc and returned by the balloon driver aren't
>      * guaranteed to be zero; but by reclaiming zero pages, we implicitly
> @@ -536,7 +534,7 @@ p2m_pod_decrease_reservation(struct doma
>     {
>         p2m_type_t t;
>
> -        gfn_to_mfn_query(d, gpfn + i, &t);
> +        gfn_to_mfn_query_unlocked(d, gpfn + i, &t);
>
>         if ( t == p2m_populate_on_demand )
>             pod++;

The rest of the code makes it seem like gfn_to_mfn_query() will grab
the p2m lock for a range, but the _unlocked() version will not.  Is
that correct?

Shouldn't this remain as it is then?

> @@ -602,6 +600,7 @@ p2m_pod_decrease_reservation(struct doma
>             nonpod--;
>             ram--;
>         }
> +        drop_p2m_gfn(p2m, gpfn + i, mfn_x(mfn));
>     }

And how does this fit with the _query() call above?

>
>     /* If there are no more non-PoD entries, tell decrease_reservation() that
> @@ -661,12 +660,15 @@ p2m_pod_zero_check_superpage(struct p2m_
>     for ( i=0; i<SUPERPAGE_PAGES; i++ )
>     {
>
> -        mfn = gfn_to_mfn_query(d, gfn + i, &type);
> -
>         if ( i == 0 )
>         {
> +            /* Only lock the p2m entry the first time, that will lock
> +             * server for the whole superpage */
> +            mfn = gfn_to_mfn_query(d, gfn + i, &type);
>             mfn0 = mfn;
>             type0 = type;
> +        } else {
> +            mfn = gfn_to_mfn_query_unlocked(d, gfn + i, &type);
>         }
>
>         /* Conditions that must be met for superpage-superpage:
> @@ -773,6 +775,10 @@ out:
>         p2m_pod_cache_add(p2m, mfn_to_page(mfn0), PAGE_ORDER_2M);
>         p2m->pod.entry_count += SUPERPAGE_PAGES;
>     }
> +
> +    /* We got p2m locks once for the whole superpage, with the original
> +     * mfn0. We drop it here. */
> +    drop_p2m_gfn(p2m, gfn, mfn_x(mfn0));
>  }
>
>  /* On entry, PoD lock is held */
> @@ -894,6 +900,12 @@ p2m_pod_zero_check(struct p2m_domain *p2
>             p2m->pod.entry_count++;
>         }
>     }
> +
> +    /* Drop all p2m locks and references */
> +    for ( i=0; i<count; i++ )
> +    {
> +        drop_p2m_gfn(p2m, gfns[i], mfn_x(mfns[i]));
> +    }
>
>  }
>
> @@ -928,7 +940,9 @@ p2m_pod_emergency_sweep_super(struct p2m
>     p2m->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0;
>  }
>
> -#define POD_SWEEP_STRIDE  16
> +/* Note that spinlock recursion counters have 4 bits, so 16 or higher
> + * will overflow a single 2M spinlock in a zero check. */
> +#define POD_SWEEP_STRIDE  15
>  static void
>  p2m_pod_emergency_sweep(struct p2m_domain *p2m)
>  {
> @@ -946,7 +960,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
>     /* FIXME: Figure out how to avoid superpages */
>     for ( i=p2m->pod.reclaim_single; i > 0 ; i-- )
>     {
> -        gfn_to_mfn_query(p2m->domain, i, &t );
> +        gfn_to_mfn_query_unlocked(p2m->domain, i, &t );
>         if ( p2m_is_ram(t) )
>         {
>             gfns[j] = i;
> @@ -974,6 +988,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
>
>  }
>
> +/* The gfn and order need to be locked in the p2m before you walk in here */
>  int
>  p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
>                         unsigned int order,
> @@ -985,8 +1000,6 @@ p2m_pod_demand_populate(struct p2m_domai
>     mfn_t mfn;
>     int i;
>
> -    ASSERT(p2m_locked_by_me(p2m));
> -
>     pod_lock(p2m);
>     /* This check is done with the pod lock held.  This will make sure that
>      * even if d->is_dying changes under our feet, p2m_pod_empty_cache()
> @@ -1008,8 +1021,6 @@ p2m_pod_demand_populate(struct p2m_domai
>         set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
>                       p2m_populate_on_demand, p2m->default_access);
>         audit_p2m(p2m, 1);
> -        /* This is because the ept/pt caller locks the p2m recursively */
> -        p2m_unlock(p2m);
>         return 0;
>     }
>
> @@ -1132,7 +1143,9 @@ guest_physmap_mark_populate_on_demand(st
>     if ( rc != 0 )
>         return rc;
>
> -    p2m_lock(p2m);
> +    /* Pre-lock all the p2m entries. We don't take refs to the
> +     * pages, because there shouldn't be any pages underneath. */
> +    get_p2m(p2m, gfn, order);
>     audit_p2m(p2m, 1);
>
>     P2M_DEBUG("mark pod gfn=%#lx\n", gfn);
> @@ -1140,7 +1153,8 @@ guest_physmap_mark_populate_on_demand(st
>     /* Make sure all gpfns are unused */
>     for ( i = 0; i < (1UL << order); i++ )
>     {
> -        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
> +        p2m_access_t a;
> +        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
>         if ( p2m_is_ram(ot) )
>         {
>             printk("%s: gfn_to_mfn returned type %d!\n",
> @@ -1169,9 +1183,9 @@ guest_physmap_mark_populate_on_demand(st
>     }
>
>     audit_p2m(p2m, 1);
> -    p2m_unlock(p2m);
>
>  out:
> +    put_p2m(p2m, gfn, order);
>     return rc;
>  }
>
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m-pt.c
> --- a/xen/arch/x86/mm/p2m-pt.c
> +++ b/xen/arch/x86/mm/p2m-pt.c
> @@ -487,31 +487,16 @@ out:
>  }
>
>
> -/* Non-ept "lock-and-check" wrapper */
> +/* PT-specific check wrapper */
>  static int p2m_pod_check_and_populate(struct p2m_domain *p2m, unsigned long 
> gfn,
>                                       l1_pgentry_t *p2m_entry, int order,
>                                       p2m_query_t q)
>  {
> -    int r;
> -
> -    /* This is called from the p2m lookups, which can happen with or
> -     * without the lock hed. */
> -    p2m_lock_recursive(p2m);
> -    audit_p2m(p2m, 1);
> -
>     /* Check to make sure this is still PoD */
>     if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != 
> p2m_populate_on_demand )
> -    {
> -        p2m_unlock(p2m);
>         return 0;
> -    }
>
> -    r = p2m_pod_demand_populate(p2m, gfn, order, q);
> -
> -    audit_p2m(p2m, 1);
> -    p2m_unlock(p2m);
> -
> -    return r;
> +    return p2m_pod_demand_populate(p2m, gfn, order, q);
>  }
>
>  /* Read the current domain's p2m table (through the linear mapping). */
> @@ -894,6 +879,7 @@ static void p2m_change_type_global(struc
>     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
>         return;
>
> +    /* Checks for exclusive lock */
>     ASSERT(p2m_locked_by_me(p2m));
>
>  #if CONFIG_PAGING_LEVELS == 4
> diff -r 8a98179666de -r 471d4f2754d6 xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -143,9 +143,9 @@ void p2m_change_entry_type_global(struct
>                                   p2m_type_t ot, p2m_type_t nt)
>  {
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
> -    p2m_lock(p2m);
> +    get_p2m_global(p2m);
>     p2m->change_entry_type_global(p2m, ot, nt);
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>  }
>
>  mfn_t gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn,
> @@ -162,12 +162,17 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom
>         return _mfn(gfn);
>     }
>
> +    /* We take the lock for this single gfn. The caller has to put this lock 
> */
> +    get_p2m_gfn(p2m, gfn);
> +
>     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
>
>  #ifdef __x86_64__
>     if ( q == p2m_unshare && p2m_is_shared(*t) )
>     {
>         ASSERT(!p2m_is_nestedp2m(p2m));
> +        /* p2m locking is recursive, so we won't deadlock going
> +         * into the sharing code */
>         mem_sharing_unshare_page(p2m->domain, gfn, 0);
>         mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
>     }
> @@ -179,13 +184,28 @@ mfn_t gfn_to_mfn_type_p2m(struct p2m_dom
>         /* Return invalid_mfn to avoid caller's access */
>         mfn = _mfn(INVALID_MFN);
>         if (q == p2m_guest)
> +        {
> +            put_p2m_gfn(p2m, gfn);
>             domain_crash(p2m->domain);
> +        }
>     }
>  #endif
>
> +    /* Take an extra reference to the page. It won't disappear beneath us */
> +    if ( mfn_valid(mfn) )
> +    {
> +        /* Use this because we don't necessarily know who owns the page */
> +        if ( !page_get_owner_and_reference(mfn_to_page(mfn)) )
> +        {
> +            mfn = _mfn(INVALID_MFN);
> +        }
> +    }
> +
> +    /* We leave holding the p2m lock for this gfn */
>     return mfn;
>  }
>
> +/* Appropriate locks held on entry */
>  int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
>  {
> @@ -194,8 +214,6 @@ int set_p2m_entry(struct p2m_domain *p2m
>     unsigned int order;
>     int rc = 1;
>
> -    ASSERT(p2m_locked_by_me(p2m));
> -
>     while ( todo )
>     {
>         if ( hap_enabled(d) )
> @@ -217,6 +235,18 @@ int set_p2m_entry(struct p2m_domain *p2m
>     return rc;
>  }
>
> +void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn,
> +                    unsigned long frame)
> +{
> +    mfn_t mfn = _mfn(frame);
> +    /* For non-translated domains, locks are never taken */
> +    if ( !p2m || !paging_mode_translate(p2m->domain) )
> +        return;
> +    if ( mfn_valid(mfn) )
> +        put_page(mfn_to_page(mfn));
> +    put_p2m_gfn(p2m, gfn);
> +}
> +
>  struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
>  {
>     struct page_info *pg;
> @@ -262,12 +292,12 @@ int p2m_alloc_table(struct p2m_domain *p
>     unsigned long gfn = -1UL;
>     struct domain *d = p2m->domain;
>
> -    p2m_lock(p2m);
> +    get_p2m_global(p2m);
>
>     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
>     {
>         P2M_ERROR("p2m already allocated for this domain\n");
> -        p2m_unlock(p2m);
> +        put_p2m_global(p2m);
>         return -EINVAL;
>     }
>
> @@ -283,7 +313,7 @@ int p2m_alloc_table(struct p2m_domain *p
>
>     if ( p2m_top == NULL )
>     {
> -        p2m_unlock(p2m);
> +        put_p2m_global(p2m);
>         return -ENOMEM;
>     }
>
> @@ -295,7 +325,7 @@ int p2m_alloc_table(struct p2m_domain *p
>     P2M_PRINTK("populating p2m table\n");
>
>     /* Initialise physmap tables for slot zero. Other code assumes this. */
> -    p2m->defer_nested_flush = 1;
> +    atomic_set(&p2m->defer_nested_flush, 1);
>     if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), 0,
>                         p2m_invalid, p2m->default_access) )
>         goto error;
> @@ -323,10 +353,10 @@ int p2m_alloc_table(struct p2m_domain *p
>         }
>         spin_unlock(&p2m->domain->page_alloc_lock);
>     }
> -    p2m->defer_nested_flush = 0;
> +    atomic_set(&p2m->defer_nested_flush, 0);
>
>     P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>     return 0;
>
>  error_unlock:
> @@ -334,7 +364,7 @@ error_unlock:
>  error:
>     P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
>                PRI_mfn "\n", gfn, mfn_x(mfn));
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>     return -ENOMEM;
>  }
>
> @@ -354,26 +384,28 @@ void p2m_teardown(struct p2m_domain *p2m
>     if (p2m == NULL)
>         return;
>
> +    get_p2m_global(p2m);
> +
>  #ifdef __x86_64__
>     for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ )
>     {
> -        mfn = gfn_to_mfn_type_p2m(p2m, gfn, &t, &a, p2m_query, NULL);
> +        mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL);
>         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
>         {
>             ASSERT(!p2m_is_nestedp2m(p2m));
> +            /* The p2m allows an exclusive global holder to recursively
> +             * lock sub-ranges. For this. */
>             BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN));
>         }
>
>     }
>  #endif
>
> -    p2m_lock(p2m);
> -
>     p2m->phys_table = pagetable_null();
>
>     while ( (pg = page_list_remove_head(&p2m->pages)) )
>         d->arch.paging.free_page(d, pg);
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>  }
>
>  static void p2m_teardown_nestedp2m(struct domain *d)
> @@ -401,6 +433,7 @@ void p2m_final_teardown(struct domain *d
>  }
>
>
> +/* Locks held on entry */
>  static void
>  p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn,
>                 unsigned int page_order)
> @@ -438,11 +471,11 @@ guest_physmap_remove_page(struct domain
>                           unsigned long mfn, unsigned int page_order)
>  {
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
> -    p2m_lock(p2m);
> +    get_p2m(p2m, gfn, page_order);
>     audit_p2m(p2m, 1);
>     p2m_remove_page(p2m, gfn, mfn, page_order);
>     audit_p2m(p2m, 1);
> -    p2m_unlock(p2m);
> +    put_p2m(p2m, gfn, page_order);
>  }
>
>  int
> @@ -480,7 +513,7 @@ guest_physmap_add_entry(struct domain *d
>     if ( rc != 0 )
>         return rc;
>
> -    p2m_lock(p2m);
> +    get_p2m(p2m, gfn, page_order);
>     audit_p2m(p2m, 0);
>
>     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
> @@ -488,12 +521,13 @@ guest_physmap_add_entry(struct domain *d
>     /* First, remove m->p mappings for existing p->m mappings */
>     for ( i = 0; i < (1UL << page_order); i++ )
>     {
> -        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
> +        p2m_access_t a;
> +        omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
>         if ( p2m_is_grant(ot) )
>         {
>             /* Really shouldn't be unmapping grant maps this way */
> +            put_p2m(p2m, gfn, page_order);
>             domain_crash(d);
> -            p2m_unlock(p2m);
>             return -EINVAL;
>         }
>         else if ( p2m_is_ram(ot) )
> @@ -523,11 +557,12 @@ guest_physmap_add_entry(struct domain *d
>             && (ogfn != INVALID_M2P_ENTRY)
>             && (ogfn != gfn + i) )
>         {
> +            p2m_access_t a;
>             /* This machine frame is already mapped at another physical
>              * address */
>             P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
>                       mfn + i, ogfn, gfn + i);
> -            omfn = gfn_to_mfn_query(d, ogfn, &ot);
> +            omfn = p2m->get_entry(p2m, ogfn, &ot, &a, p2m_query, NULL);
>             if ( p2m_is_ram(ot) )
>             {
>                 ASSERT(mfn_valid(omfn));
> @@ -567,7 +602,7 @@ guest_physmap_add_entry(struct domain *d
>     }
>
>     audit_p2m(p2m, 1);
> -    p2m_unlock(p2m);
> +    put_p2m(p2m, gfn, page_order);
>
>     return rc;
>  }
> @@ -579,18 +614,19 @@ p2m_type_t p2m_change_type(struct domain
>                            p2m_type_t ot, p2m_type_t nt)
>  {
>     p2m_type_t pt;
> +    p2m_access_t a;
>     mfn_t mfn;
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
>
> -    p2m_lock(p2m);
> +    get_p2m_gfn(p2m, gfn);
>
> -    mfn = gfn_to_mfn_query(d, gfn, &pt);
> +    mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL);
>     if ( pt == ot )
>         set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
>
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>
>     return pt;
>  }
> @@ -608,20 +644,23 @@ void p2m_change_type_range(struct domain
>
>     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
>
> -    p2m_lock(p2m);
> -    p2m->defer_nested_flush = 1;
> +    atomic_set(&p2m->defer_nested_flush, 1);
>
> +    /* We've been given a number instead of an order, so lock each
> +     * gfn individually */
>     for ( gfn = start; gfn < end; gfn++ )
>     {
> -        mfn = gfn_to_mfn_query(d, gfn, &pt);
> +        p2m_access_t a;
> +        get_p2m_gfn(p2m, gfn);
> +        mfn = p2m->get_entry(p2m, gfn, &pt, &a, p2m_query, NULL);
>         if ( pt == ot )
>             set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
> +        put_p2m_gfn(p2m, gfn);
>     }
>
> -    p2m->defer_nested_flush = 0;
> +    atomic_set(&p2m->defer_nested_flush, 0);
>     if ( nestedhvm_enabled(d) )
>         p2m_flush_nestedp2m(d);
> -    p2m_unlock(p2m);
>  }
>
>
> @@ -631,17 +670,18 @@ set_mmio_p2m_entry(struct domain *d, uns
>  {
>     int rc = 0;
>     p2m_type_t ot;
> +    p2m_access_t a;
>     mfn_t omfn;
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>     if ( !paging_mode_translate(d) )
>         return 0;
>
> -    p2m_lock(p2m);
> -    omfn = gfn_to_mfn_query(d, gfn, &ot);
> +    get_p2m_gfn(p2m, gfn);
> +    omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL);
>     if ( p2m_is_grant(ot) )
>     {
> -        p2m_unlock(p2m);
> +        put_p2m_gfn(p2m, gfn);
>         domain_crash(d);
>         return 0;
>     }
> @@ -654,11 +694,11 @@ set_mmio_p2m_entry(struct domain *d, uns
>     P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
>     rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct, 
> p2m->default_access);
>     audit_p2m(p2m, 1);
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>     if ( 0 == rc )
>         gdprintk(XENLOG_ERR,
>             "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
> -            mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
> +            mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot)));
>     return rc;
>  }
>
> @@ -668,13 +708,14 @@ clear_mmio_p2m_entry(struct domain *d, u
>     int rc = 0;
>     mfn_t mfn;
>     p2m_type_t t;
> +    p2m_access_t a;
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>     if ( !paging_mode_translate(d) )
>         return 0;
>
> -    p2m_lock(p2m);
> -    mfn = gfn_to_mfn_query(d, gfn, &t);
> +    get_p2m_gfn(p2m, gfn);
> +    mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query, NULL);
>
>     /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
>     if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) )
> @@ -687,8 +728,7 @@ clear_mmio_p2m_entry(struct domain *d, u
>     audit_p2m(p2m, 1);
>
>  out:
> -    p2m_unlock(p2m);
> -
> +    put_p2m_gfn(p2m, gfn);
>     return rc;
>  }
>
> @@ -698,13 +738,14 @@ set_shared_p2m_entry(struct domain *d, u
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>     int rc = 0;
>     p2m_type_t ot;
> +    p2m_access_t a;
>     mfn_t omfn;
>
>     if ( !paging_mode_translate(p2m->domain) )
>         return 0;
>
> -    p2m_lock(p2m);
> -    omfn = gfn_to_mfn_query(p2m->domain, gfn, &ot);
> +    get_p2m_gfn(p2m, gfn);
> +    omfn = p2m->get_entry(p2m, gfn, &ot, &a, p2m_query, NULL);
>     /* At the moment we only allow p2m change if gfn has already been made
>      * sharable first */
>     ASSERT(p2m_is_shared(ot));
> @@ -714,11 +755,11 @@ set_shared_p2m_entry(struct domain *d, u
>
>     P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
>     rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access);
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>     if ( 0 == rc )
>         gdprintk(XENLOG_ERR,
>             "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
> -            mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
> +            mfn_x(gfn_to_mfn_query_unlocked(d, gfn, &ot)));
>     return rc;
>  }
>
> @@ -732,7 +773,7 @@ int p2m_mem_paging_nominate(struct domai
>     mfn_t mfn;
>     int ret;
>
> -    p2m_lock(p2m);
> +    get_p2m_gfn(p2m, gfn);
>
>     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
>
> @@ -765,7 +806,7 @@ int p2m_mem_paging_nominate(struct domai
>     ret = 0;
>
>  out:
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>     return ret;
>  }
>
> @@ -778,7 +819,7 @@ int p2m_mem_paging_evict(struct domain *
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>     int ret = -EINVAL;
>
> -    p2m_lock(p2m);
> +    get_p2m_gfn(p2m, gfn);
>
>     /* Get mfn */
>     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
> @@ -824,7 +865,7 @@ int p2m_mem_paging_evict(struct domain *
>     put_page(page);
>
>  out:
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>     return ret;
>  }
>
> @@ -863,7 +904,7 @@ void p2m_mem_paging_populate(struct doma
>     req.type = MEM_EVENT_TYPE_PAGING;
>
>     /* Fix p2m mapping */
> -    p2m_lock(p2m);
> +    get_p2m_gfn(p2m, gfn);
>     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
>     /* Allow only nominated or evicted pages to enter page-in path */
>     if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
> @@ -875,7 +916,7 @@ void p2m_mem_paging_populate(struct doma
>         set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
>         audit_p2m(p2m, 1);
>     }
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>
>     /* Pause domain if request came from guest and gfn has paging type */
>     if (  p2m_is_paging(p2mt) && v->domain->domain_id == d->domain_id )
> @@ -908,7 +949,7 @@ int p2m_mem_paging_prep(struct domain *d
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>     int ret = -ENOMEM;
>
> -    p2m_lock(p2m);
> +    get_p2m_gfn(p2m, gfn);
>
>     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query, NULL);
>
> @@ -931,7 +972,7 @@ int p2m_mem_paging_prep(struct domain *d
>     ret = 0;
>
>  out:
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>     return ret;
>  }
>
> @@ -949,12 +990,12 @@ void p2m_mem_paging_resume(struct domain
>     /* Fix p2m entry if the page was not dropped */
>     if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
>     {
> -        p2m_lock(p2m);
> +        get_p2m_gfn(p2m, rsp.gfn);
>         mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL);
>         set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, a);
>         set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
>         audit_p2m(p2m, 1);
> -        p2m_unlock(p2m);
> +        put_p2m_gfn(p2m, rsp.gfn);
>     }
>
>     /* Unpause domain */
> @@ -979,16 +1020,16 @@ void p2m_mem_access_check(unsigned long
>     p2m_access_t p2ma;
>
>     /* First, handle rx2rw conversion automatically */
> -    p2m_lock(p2m);
> +    get_p2m_gfn(p2m, gfn);
>     mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL);
>
>     if ( access_w && p2ma == p2m_access_rx2rw )
>     {
>         p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw);
> -        p2m_unlock(p2m);
> +        put_p2m_gfn(p2m, gfn);
>         return;
>     }
> -    p2m_unlock(p2m);
> +    put_p2m_gfn(p2m, gfn);
>
>     /* Otherwise, check if there is a memory event listener, and send the 
> message along */
>     res = mem_event_check_ring(d, &d->mem_access);
> @@ -1006,9 +1047,9 @@ void p2m_mem_access_check(unsigned long
>         else
>         {
>             /* A listener is not required, so clear the access restrictions */
> -            p2m_lock(p2m);
> +            get_p2m_gfn(p2m, gfn);
>             p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, 
> p2m_access_rwx);
> -            p2m_unlock(p2m);
> +            put_p2m_gfn(p2m, gfn);
>         }
>
>         return;
> @@ -1064,7 +1105,7 @@ int p2m_set_mem_access(struct domain *d,
>  {
>     struct p2m_domain *p2m = p2m_get_hostp2m(d);
>     unsigned long pfn;
> -    p2m_access_t a;
> +    p2m_access_t a, _a;
>     p2m_type_t t;
>     mfn_t mfn;
>     int rc = 0;
> @@ -1095,17 +1136,20 @@ int p2m_set_mem_access(struct domain *d,
>         return 0;
>     }
>
> -    p2m_lock(p2m);
> +    /* Because we don't get an order, rather a number, we need to lock each
> +     * entry individually */
>     for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ )
>     {
> -        mfn = gfn_to_mfn_query(d, pfn, &t);
> +        get_p2m_gfn(p2m, pfn);
> +        mfn = p2m->get_entry(p2m, pfn, &t, &_a, p2m_query, NULL);
>         if ( p2m->set_entry(p2m, pfn, mfn, PAGE_ORDER_4K, t, a) == 0 )
>         {
> +            put_p2m_gfn(p2m, pfn);
>             rc = -ENOMEM;
>             break;
>         }
> +        put_p2m_gfn(p2m, pfn);
>     }
> -    p2m_unlock(p2m);
>     return rc;
>  }
>
> @@ -1138,7 +1182,10 @@ int p2m_get_mem_access(struct domain *d,
>         return 0;
>     }
>
> +    get_p2m_gfn(p2m, pfn);
>     mfn = p2m->get_entry(p2m, pfn, &t, &a, p2m_query, NULL);
> +    put_p2m_gfn(p2m, pfn);
> +
>     if ( mfn_x(mfn) == INVALID_MFN )
>         return -ESRCH;
>
> @@ -1175,7 +1222,7 @@ p2m_flush_table(struct p2m_domain *p2m)
>     struct domain *d = p2m->domain;
>     void *p;
>
> -    p2m_lock(p2m);
> +    get_p2m_global(p2m);
>
>     /* "Host" p2m tables can have shared entries &c that need a bit more
>      * care when discarding them */
> @@ -1203,7 +1250,7 @@ p2m_flush_table(struct p2m_domain *p2m)
>             d->arch.paging.free_page(d, pg);
>     page_list_add(top, &p2m->pages);
>
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>  }
>
>  void
> @@ -1245,7 +1292,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>     p2m = nv->nv_p2m;
>     if ( p2m )
>     {
> -        p2m_lock(p2m);
> +        get_p2m_global(p2m);
>         if ( p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR )
>         {
>             nv->nv_flushp2m = 0;
> @@ -1255,24 +1302,24 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>                 hvm_asid_flush_vcpu(v);
>             p2m->cr3 = cr3;
>             cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> -            p2m_unlock(p2m);
> +            put_p2m_global(p2m);
>             nestedp2m_unlock(d);
>             return p2m;
>         }
> -        p2m_unlock(p2m);
> +        put_p2m_global(p2m);
>     }
>
>     /* All p2m's are or were in use. Take the least recent used one,
>      * flush it and reuse. */
>     p2m = p2m_getlru_nestedp2m(d, NULL);
>     p2m_flush_table(p2m);
> -    p2m_lock(p2m);
> +    get_p2m_global(p2m);
>     nv->nv_p2m = p2m;
>     p2m->cr3 = cr3;
>     nv->nv_flushp2m = 0;
>     hvm_asid_flush_vcpu(v);
>     cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> -    p2m_unlock(p2m);
> +    put_p2m_global(p2m);
>     nestedp2m_unlock(d);
>
>     return p2m;
> diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-ia64/mm.h
> --- a/xen/include/asm-ia64/mm.h
> +++ b/xen/include/asm-ia64/mm.h
> @@ -561,6 +561,11 @@ extern u64 translate_domain_pte(u64 ptev
>     ((get_gpfn_from_mfn((madr) >> PAGE_SHIFT) << PAGE_SHIFT) | \
>     ((madr) & ~PAGE_MASK))
>
> +/* Because x86-specific p2m fine-grained lock releases are called from common
> + * code, we put a dummy placeholder here */
> +#define drop_p2m_gfn(p, g, m)           ((void)0)
> +#define drop_p2m_gfn_domain(p, g, m)    ((void)0)
> +
>  /* Internal use only: returns 0 in case of bad address.  */
>  extern unsigned long paddr_to_maddr(unsigned long paddr);
>
> diff -r 8a98179666de -r 471d4f2754d6 xen/include/asm-x86/p2m.h
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -220,7 +220,7 @@ struct p2m_domain {
>      * tables on every host-p2m change.  The setter of this flag
>      * is responsible for performing the full flush before releasing the
>      * host p2m's lock. */
> -    int                defer_nested_flush;
> +    atomic_t           defer_nested_flush;
>
>     /* Pages used to construct the p2m */
>     struct page_list_head pages;
> @@ -298,6 +298,15 @@ struct p2m_domain *p2m_get_p2m(struct vc
>  #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
>
>
> +/* No matter what value you get out of a query, the p2m has been locked for
> + * that range. No matter what you do, you need to drop those locks.
> + * You need to pass back the mfn obtained when locking, not the new one,
> + * as the refcount of the original mfn was bumped. */
> +void drop_p2m_gfn(struct p2m_domain *p2m, unsigned long gfn,
> +                        unsigned long mfn);
> +#define drop_p2m_gfn_domain(d, g, m)    \
> +        drop_p2m_gfn(p2m_get_hostp2m((d)), (g), (m))
> +
>  /* Read a particular P2M table, mapping pages as we go.  Most callers
>  * should _not_ call this directly; use the other gfn_to_mfn_* functions
>  * below unless you know you want to walk a p2m that isn't a domain's
> @@ -327,6 +336,28 @@ static inline mfn_t gfn_to_mfn_type(stru
>  #define gfn_to_mfn_guest(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_guest)
>  #define gfn_to_mfn_unshare(d, g, t) gfn_to_mfn_type((d), (g), (t), 
> p2m_unshare)
>
> +/* This one applies to very specific situations in which you're querying
> + * a p2m entry and will be done "immediately" (such as a printk or computing 
> a
> + * return value). Use this only if there are no expectations of the p2m entry
> + * holding steady. */
> +static inline mfn_t gfn_to_mfn_type_unlocked(struct domain *d,
> +                                        unsigned long gfn, p2m_type_t *t,
> +                                        p2m_query_t q)
> +{
> +    mfn_t mfn = gfn_to_mfn_type(d, gfn, t, q);
> +    drop_p2m_gfn_domain(d, gfn, mfn_x(mfn));
> +    return mfn;
> +}
> +
> +#define gfn_to_mfn_unlocked(d, g, t)            \
> +    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_alloc)
> +#define gfn_to_mfn_query_unlocked(d, g, t)    \
> +    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_query)
> +#define gfn_to_mfn_guest_unlocked(d, g, t)    \
> +    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_guest)
> +#define gfn_to_mfn_unshare_unlocked(d, g, t)    \
> +    gfn_to_mfn_type_unlocked((d), (g), (t), p2m_unshare)
> +
>  /* Compatibility function exporting the old untyped interface */
>  static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
>  {
> @@ -338,6 +369,15 @@ static inline unsigned long gmfn_to_mfn(
>     return INVALID_MFN;
>  }
>
> +/* Same comments apply re unlocking */
> +static inline unsigned long gmfn_to_mfn_unlocked(struct domain *d,
> +                                                 unsigned long gpfn)
> +{
> +    unsigned long mfn = gmfn_to_mfn(d, gpfn);
> +    drop_p2m_gfn_domain(d, gpfn, mfn);
> +    return mfn;
> +}
> +
>  /* General conversion function from mfn to gfn */
>  static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn)
>  {
> @@ -521,7 +561,8 @@ static inline int p2m_gfn_check_limit(
>  #define p2m_gfn_check_limit(d, g, o) 0
>  #endif
>
> -/* Directly set a p2m entry: only for use by p2m code */
> +/* Directly set a p2m entry: only for use by p2m code. It expects locks to
> + * be held on entry */
>  int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
>                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t 
> p2ma);
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
WARNING - OLD ARCHIVES

xen-devel

Re: [Xen-devel] [PATCH 8 of 9] Modify all internal p2m functions to use