WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] PoD memory 3/9: PoD core

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] PoD memory 3/9: PoD core
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 08 Jan 2009 06:57:20 -0800
Delivery-date: Thu, 08 Jan 2009 06:59:47 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1231152199 0
# Node ID f2ba08549466d595fd5901ad655ebe82c266753d
# Parent  629f028d22f9885cee83a7eac23ff0f78155bc38
PoD memory 3/9: PoD core
X-BeenThere: xen-devel@xxxxxxxxxxxxxxxxxxx
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: Xen developer discussion <xen-devel.lists.xensource.com>
List-Unsubscribe:
<http://lists.xensource.com/mailman/listinfo/xen-devel>,
        <mailto:xen-devel-request@xxxxxxxxxxxxxxxxxxx?subject=unsubscribe>
List-Post: <mailto:xen-devel@xxxxxxxxxxxxxxxxxxx>
List-Help: <mailto:xen-devel-request@xxxxxxxxxxxxxxxxxxx?subject=help>
List-Subscribe:
<http://lists.xensource.com/mailman/listinfo/xen-devel>,
        <mailto:xen-devel-request@xxxxxxxxxxxxxxxxxxx?subject=subscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Errors-To: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Return-Path: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
X-OriginalArrivalTime: 23 Dec 2008 13:47:03.0625 (UTC)
FILETIME=[EFEBC390:01C96504]

Core of populate-on-demand functionality:
* Introduce a populate-on-demand type
* Call p2m_demand_populate() when gfn_to_mfn() encounters PoD entries
* Return p2m memory to the domain list for freeing during domain destruction
* Audit p2m checks our PoD-entry reference-counting
* Add PoD information to the 'q' debug key

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
---
 xen/arch/x86/domain.c          |    5 
 xen/arch/x86/mm/p2m.c          |  497 ++++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/mm/paging.c       |    3 
 xen/arch/x86/mm/shadow/multi.c |    8 
 xen/include/asm-x86/p2m.h      |   42 +++
 5 files changed, 519 insertions(+), 36 deletions(-)

diff -r 629f028d22f9 -r f2ba08549466 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jan 05 10:42:39 2009 +0000
+++ b/xen/arch/x86/domain.c     Mon Jan 05 10:43:19 2009 +0000
@@ -149,6 +149,11 @@ void dump_pageframe_info(struct domain *
         }
     }
 
+    if ( is_hvm_domain(d) )
+    {
+        p2m_pod_dump_data(d);
+    }
+
     list_for_each_entry ( page, &d->xenpage_list, list )
     {
         printk("    XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
diff -r 629f028d22f9 -r f2ba08549466 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Jan 05 10:42:39 2009 +0000
+++ b/xen/arch/x86/mm/p2m.c     Mon Jan 05 10:43:19 2009 +0000
@@ -118,9 +118,16 @@ static unsigned long p2m_type_to_flags(p
         return flags;
     case p2m_mmio_direct:
         return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
-    }
-}
-
+    case p2m_populate_on_demand:
+        return flags;
+    }
+}
+
+#if P2M_AUDIT
+static void audit_p2m(struct domain *d);
+#else
+# define audit_p2m(_d) do { (void)(_d); } while(0)
+#endif /* P2M_AUDIT */
 
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
@@ -162,7 +169,8 @@ p2m_next_level(struct domain *d, mfn_t *
                                       shift, max)) )
         return 0;
 
-    if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
+    /* PoD: Not present doesn't imply empty. */
+    if ( !l1e_get_flags(*p2m_entry) )
     {
         struct page_info *pg = d->arch.p2m->alloc_page(d);
         if ( pg == NULL )
@@ -197,7 +205,7 @@ p2m_next_level(struct domain *d, mfn_t *
         }
     }
 
-    ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+    ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
 
     /* split single large page into 4KB page in P2M table */
     if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
@@ -242,6 +250,236 @@ p2m_next_level(struct domain *d, mfn_t *
     return 1;
 }
 
+/*
+ * Populate-on-demand functionality
+ */
+int
+p2m_pod_cache_add(struct domain *d,
+                  struct page_info *page,
+                  unsigned long order)
+{
+    int i;
+    struct page_info *p;
+    struct p2m_domain *p2md = d->arch.p2m;
+
+#ifndef NDEBUG
+    mfn_t mfn;
+
+    mfn = page_to_mfn(page);
+
+    /* Check to make sure this is a contiguous region */
+    if( mfn_x(mfn) & ((1 << order) - 1) )
+    {
+        printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
+               __func__, mfn_x(mfn), order, ((1UL << order) - 1));
+        return -1;
+    }
+    
+    for(i=0; i < 1 << order ; i++) {
+        struct domain * od;
+
+        p = mfn_to_page(_mfn(mfn_x(mfn) + i));
+        od = page_get_owner(p);
+        if(od != d)
+        {
+            printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
+                   __func__, mfn_x(mfn), d->domain_id,
+                   od?od->domain_id:-1);
+            return -1;
+        }
+    }
+#endif
+
+    spin_lock(&d->page_alloc_lock);
+
+    /* First, take all pages off the domain list */
+    for(i=0; i < 1 << order ; i++)
+    {
+        p = page + i;
+        list_del(&p->list);
+    }
+
+    /* Then add the first one to the appropriate populate-on-demand list */
+    switch(order)
+    {
+    case 9:
+        list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */
+        p2md->pod.count += 1 << order;
+        break;
+    case 0:
+        list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */
+        p2md->pod.count += 1 ;
+        break;
+    default:
+        BUG();
+    }
+
+    spin_unlock(&d->page_alloc_lock);
+
+    return 0;
+}
+
+void
+p2m_pod_empty_cache(struct domain *d)
+{
+    struct p2m_domain *p2md = d->arch.p2m;
+    struct list_head *q, *p;
+
+    spin_lock(&d->page_alloc_lock);
+
+    list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */
+    {
+        int i;
+        struct page_info *page;
+            
+        list_del(p);
+            
+        page = list_entry(p, struct page_info, list);
+
+        for ( i = 0 ; i < (1 << 9) ; i++ )
+        {
+            BUG_ON(page_get_owner(page + i) != d);
+            list_add_tail(&page[i].list, &d->page_list);
+        }
+
+        p2md->pod.count -= 1<<9;
+    }
+
+    list_for_each_safe(p, q, &p2md->pod.single)
+    {
+        struct page_info *page;
+            
+        list_del(p);
+            
+        page = list_entry(p, struct page_info, list);
+
+        BUG_ON(page_get_owner(page) != d);
+        list_add_tail(&page->list, &d->page_list);
+
+        p2md->pod.count -= 1;
+    }
+
+    BUG_ON(p2md->pod.count != 0);
+
+    spin_unlock(&d->page_alloc_lock);
+}
+
+void
+p2m_pod_dump_data(struct domain *d)
+{
+    struct p2m_domain *p2md = d->arch.p2m;
+    
+    printk("    PoD entries=%d cachesize=%d\n",
+           p2md->pod.entry_count, p2md->pod.count);
+}
+
+static int
+p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
+                        mfn_t table_mfn,
+                        l1_pgentry_t *p2m_entry,
+                        unsigned int order,
+                        p2m_query_t q)
+{
+    struct page_info *p = NULL; /* Compiler warnings */
+    unsigned long gfn_aligned;
+    mfn_t mfn;
+    l1_pgentry_t entry_content = l1e_empty();
+    struct p2m_domain *p2md = d->arch.p2m;
+    int i;
+
+    /* We need to grab the p2m lock here and re-check the entry to make
+     * sure that someone else hasn't populated it for us, then hold it
+     * until we're done. */
+    p2m_lock(p2md);
+    audit_p2m(d);
+
+    /* Check to make sure this is still PoD */
+    if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != 
p2m_populate_on_demand )
+    {
+        p2m_unlock(p2md);
+        return 0;
+    }
+
+    spin_lock(&d->page_alloc_lock);
+
+    if ( p2md->pod.count == 0 )
+        goto out_of_memory;
+
+    /* FIXME -- use single pages / splinter superpages if need be */
+    switch ( order )
+    {
+    case 9:
+        BUG_ON( list_empty(&p2md->pod.super) );
+        p = list_entry(p2md->pod.super.next, struct page_info, list); 
+        p2md->pod.count -= 1 << order; /* Lock: page_alloc */
+        break;
+    case 0:
+        BUG_ON( list_empty(&p2md->pod.single) );
+        p = list_entry(p2md->pod.single.next, struct page_info, list);
+        p2md->pod.count -= 1;
+        break;
+    default:
+        BUG();
+    }
+        
+    list_del(&p->list);
+
+    mfn = page_to_mfn(p);
+
+    BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
+
+    /* Put the pages back on the domain page_list */
+    for ( i = 0 ; i < (1 << order) ; i++ )
+    {
+        BUG_ON(page_get_owner(p + i) != d);
+        list_add_tail(&p[i].list, &d->page_list);
+    }
+
+    spin_unlock(&d->page_alloc_lock);
+
+    /* Fill in the entry in the p2m */
+    switch ( order )
+    {
+    case 9:
+    {
+        l2_pgentry_t l2e_content;
+        
+        l2e_content = l2e_from_pfn(mfn_x(mfn),
+                                   p2m_type_to_flags(p2m_ram_rw) | _PAGE_PSE);
+
+        entry_content.l1 = l2e_content.l2;
+    }
+    break;
+    case 0:
+        entry_content = l1e_from_pfn(mfn_x(mfn),
+                                     p2m_type_to_flags(p2m_ram_rw));
+        break;
+        
+    }
+
+    gfn_aligned = (gfn >> order) << order;
+
+    paging_write_p2m_entry(d, gfn_aligned, p2m_entry, table_mfn,
+                           entry_content, (order==9)?2:1);
+
+    for( i = 0 ; i < (1UL << order) ; i++ )
+        set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
+    
+    p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
+    BUG_ON(p2md->pod.entry_count < 0);
+    audit_p2m(d);
+    p2m_unlock(p2md);
+
+    return 0;
+out_of_memory:
+    spin_unlock(&d->page_alloc_lock);
+    audit_p2m(d);
+    p2m_unlock(p2md);
+    printk("%s: Out of populate-on-demand memory!\n", __func__);
+    domain_crash(d);
+    return -1;
+}
+
 // Returns 0 on error (out of memory)
 static int
 p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
@@ -303,6 +541,7 @@ p2m_set_entry(struct domain *d, unsigned
                                    L2_PAGETABLE_ENTRIES);
         ASSERT(p2m_entry);
         
+        /* FIXME: Deal with 4k replaced by 2meg pages */
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
@@ -311,7 +550,7 @@ p2m_set_entry(struct domain *d, unsigned
             goto out;
         }
         
-        if ( mfn_valid(mfn) )
+        if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
             l2e_content = l2e_from_pfn(mfn_x(mfn),
                                        p2m_type_to_flags(p2mt) | _PAGE_PSE);
         else
@@ -403,8 +642,21 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
 
     l2e = map_domain_page(mfn_x(mfn));
     l2e += l2_table_offset(addr);
+
+pod_retry_l2:
     if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
     {
+        /* PoD: Try to populate a 2-meg chunk */
+        if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
+        {
+            if ( q != p2m_query ) {
+                if( !p2m_pod_demand_populate(d, gfn, mfn,
+                                             (l1_pgentry_t *)l2e, 9, q) )
+                    goto pod_retry_l2;
+            } else
+                *t = p2m_populate_on_demand;
+        }
+    
         unmap_domain_page(l2e);
         return _mfn(INVALID_MFN);
     }
@@ -423,8 +675,20 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
 
     l1e = map_domain_page(mfn_x(mfn));
     l1e += l1_table_offset(addr);
+pod_retry_l1:
     if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
     {
+        /* PoD: Try to populate */
+        if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
+        {
+            if ( q != p2m_query ) {
+                if( !p2m_pod_demand_populate(d, gfn, mfn,
+                                             (l1_pgentry_t *)l1e, 0, q) )
+                    goto pod_retry_l1;
+            } else
+                *t = p2m_populate_on_demand;
+        }
+    
         unmap_domain_page(l1e);
         return _mfn(INVALID_MFN);
     }
@@ -450,48 +714,114 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
 
     if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
     {
-        l1_pgentry_t l1e = l1e_empty();
+        l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
         l2_pgentry_t l2e = l2e_empty();
         int ret;
 
         ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
                / sizeof(l1_pgentry_t));
 
+        /*
+         * Read & process L2
+         */
+        p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
+                                       + l2_linear_offset(addr)];
+
+    pod_retry_l2:
         ret = __copy_from_user(&l2e,
-                               
&__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + 
l2_linear_offset(addr)],
+                               p2m_entry,
                                sizeof(l2e));
+        if ( ret != 0
+             || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+        {
+            if( (l2e_get_flags(l2e) & _PAGE_PSE)
+                && ( p2m_flags_to_type(l2e_get_flags(l2e))
+                     == p2m_populate_on_demand ) )
+            {
+                /* The read has succeeded, so we know that the mapping
+                 * exits at this point.  */
+                if ( q != p2m_query )
+                {
+                    if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
+                                                 p2m_entry, 9, q) )
+                        goto pod_retry_l2;
+
+                    /* Allocate failed. */
+                    p2mt = p2m_invalid;
+                    printk("%s: Allocate failed!\n", __func__);
+                    goto out;
+                }
+                else
+                {
+                    p2mt = p2m_populate_on_demand;
+                    goto out;
+                }
+            }
+
+            goto pod_retry_l1;
+        }
         
-        if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
-             (l2e_get_flags(l2e) & _PAGE_PSE) ) 
+        if (l2e_get_flags(l2e) & _PAGE_PSE)
         {
             p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
             ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
+
             if ( p2m_is_valid(p2mt) )
                 mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
             else
                 p2mt = p2m_mmio_dm;
-        }
-        else
-        {
-        
-            /* Need to __copy_from_user because the p2m is sparse and this
-             * part might not exist */
-            ret = __copy_from_user(&l1e,
-                                   &phys_to_machine_mapping[gfn],
-                                   sizeof(l1e));
+
+            goto out;
+        }
+
+        /*
+         * Read and process L1
+         */
+
+        /* Need to __copy_from_user because the p2m is sparse and this
+         * part might not exist */
+    pod_retry_l1:
+        p2m_entry = &phys_to_machine_mapping[gfn];
+
+        ret = __copy_from_user(&l1e,
+                               p2m_entry,
+                               sizeof(l1e));
             
-            if ( ret == 0 ) {
-                p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
-                ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
-                if ( p2m_is_valid(p2mt) )
-                    mfn = _mfn(l1e_get_pfn(l1e));
-                else 
-                    /* XXX see above */
-                    p2mt = p2m_mmio_dm;
+        if ( ret == 0 ) {
+            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+
+            if ( p2m_flags_to_type(l1e_get_flags(l1e))
+                 == p2m_populate_on_demand )
+            {
+                /* The read has succeeded, so we know that the mapping
+                 * exits at this point.  */
+                if ( q != p2m_query )
+                {
+                    if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
+                                                 (l1_pgentry_t *)p2m_entry, 0,
+                                                 q) )
+                        goto pod_retry_l1;
+
+                    /* Allocate failed. */
+                    p2mt = p2m_invalid;
+                    goto out;
+                }
+                else
+                {
+                    p2mt = p2m_populate_on_demand;
+                    goto out;
+                }
             }
-        }
-    }
-
+
+            if ( p2m_is_valid(p2mt) )
+                mfn = _mfn(l1e_get_pfn(l1e));
+            else 
+                /* XXX see above */
+                p2mt = p2m_mmio_dm;
+        }
+    }
+out:
     *t = p2mt;
     return mfn;
 }
@@ -510,6 +840,8 @@ int p2m_init(struct domain *d)
     memset(p2m, 0, sizeof(*p2m));
     p2m_lock_init(p2m);
     INIT_LIST_HEAD(&p2m->pages);
+    INIT_LIST_HEAD(&p2m->pod.super);
+    INIT_LIST_HEAD(&p2m->pod.single);
 
     p2m->set_entry = p2m_set_entry;
     p2m->get_entry = p2m_gfn_to_mfn;
@@ -680,6 +1012,7 @@ static void audit_p2m(struct domain *d)
     struct page_info *page;
     struct domain *od;
     unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
+    int entry_count = 0;
     mfn_t p2mfn;
     unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
     int test_linear;
@@ -805,6 +1138,10 @@ static void audit_p2m(struct domain *d)
                 {
                     if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
                     {
+                        if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
+                             && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
+                                  == p2m_populate_on_demand ) )
+                            entry_count+=(1<<9);
                         gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
                         continue;
                     }
@@ -835,13 +1172,20 @@ static void audit_p2m(struct domain *d)
                     for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
                     {
                         if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                        {
+                            if ( p2m_flags_to_type(l1e_get_flags(l1e[i1]))
+                                 == p2m_populate_on_demand )
+                            entry_count++;
                             continue;
+                        }
                         mfn = l1e_get_pfn(l1e[i1]);
                         ASSERT(mfn_valid(_mfn(mfn)));
                         m2pfn = get_gpfn_from_mfn(mfn);
                         if ( m2pfn != gfn )
                         {
                             pmbad++;
+                            printk("mismatch: gfn %#lx -> mfn %#lx"
+                                   " -> gfn %#lx\n", gfn, mfn, m2pfn);
                             P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
                                        " -> gfn %#lx\n", gfn, mfn, m2pfn);
                             BUG();
@@ -864,6 +1208,15 @@ static void audit_p2m(struct domain *d)
 
     }
 
+    if ( entry_count != d->arch.p2m->pod.entry_count )
+    {
+        printk("%s: refcounted entry count %d, audit count %d!\n",
+               __func__,
+               d->arch.p2m->pod.entry_count,
+               entry_count);
+        BUG();
+    }
+        
     //P2M_PRINTK("p2m audit complete\n");
     //if ( orphans_i | orphans_d | mpbad | pmbad )
     //    P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
@@ -872,8 +1225,6 @@ static void audit_p2m(struct domain *d)
         P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
                    pmbad, mpbad);
 }
-#else
-#define audit_p2m(_d) do { (void)(_d); } while(0)
 #endif /* P2M_AUDIT */
 
 
@@ -911,6 +1262,77 @@ guest_physmap_remove_page(struct domain 
 }
 
 int
+guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+                                      unsigned int order)
+{
+    struct p2m_domain *p2md = d->arch.p2m;
+    unsigned long i;
+    p2m_type_t ot;
+    mfn_t omfn;
+    int pod_count = 0;
+    int rc = 0;
+
+    BUG_ON(!paging_mode_translate(d));
+
+#if CONFIG_PAGING_LEVELS == 3
+    /*
+     * 32bit PAE nested paging does not support over 4GB guest due to 
+     * hardware translation limit. This limitation is checked by comparing
+     * gfn with 0xfffffUL.
+     */
+    if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
+    {
+        if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
+            dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
+                    " 4GB: specify 'hap=0' domain config option.\n",
+                    d->domain_id);
+        return -EINVAL;
+    }
+#endif
+
+    p2m_lock(p2md);
+    audit_p2m(d);
+
+    P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    /* Make sure all gpfns are unused */
+    for ( i = 0; i < (1UL << order); i++ )
+    {
+        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
+        if ( p2m_is_ram(ot) )
+        {
+            printk("%s: gfn_to_mfn returned type %d!\n",
+                   __func__, ot);
+            rc = -EBUSY;
+            goto out;
+        }
+        else if ( ot == p2m_populate_on_demand )
+        {
+            /* Count how man PoD entries we'll be replacing if successful */
+            pod_count++;
+        }
+    }
+
+    /* Now, actually do the two-way mapping */
+    if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
+                        p2m_populate_on_demand) )
+        rc = -EINVAL;
+    else
+    {
+        p2md->pod.entry_count += 1 << order; /* Lock: p2m */
+        p2md->pod.entry_count -= pod_count;
+        BUG_ON(p2md->pod.entry_count < 0);
+    }
+
+    audit_p2m(d);
+    p2m_unlock(p2md);
+
+out:
+    return rc;
+
+}
+
+int
 guest_physmap_add_entry(struct domain *d, unsigned long gfn,
                         unsigned long mfn, unsigned int page_order, 
                         p2m_type_t t)
@@ -918,6 +1340,7 @@ guest_physmap_add_entry(struct domain *d
     unsigned long i, ogfn;
     p2m_type_t ot;
     mfn_t omfn;
+    int pod_count = 0;
     int rc = 0;
 
     if ( !paging_mode_translate(d) )
@@ -965,6 +1388,11 @@ guest_physmap_add_entry(struct domain *d
         {
             ASSERT(mfn_valid(omfn));
             set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+        }
+        else if ( ot == p2m_populate_on_demand )
+        {
+            /* Count how man PoD entries we'll be replacing if successful */
+            pod_count++;
         }
     }
 
@@ -1012,6 +1440,11 @@ guest_physmap_add_entry(struct domain *d
         if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, 
                             p2m_invalid) )
             rc = -EINVAL;
+        else
+        {
+            d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
+            BUG_ON(d->arch.p2m->pod.entry_count < 0);
+        }
     }
 
     audit_p2m(d);
diff -r 629f028d22f9 -r f2ba08549466 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Mon Jan 05 10:42:39 2009 +0000
+++ b/xen/arch/x86/mm/paging.c  Mon Jan 05 10:43:19 2009 +0000
@@ -585,6 +585,9 @@ void paging_teardown(struct domain *d)
 
     /* clean up log dirty resources. */
     paging_log_dirty_teardown(d);
+
+    /* Move populate-on-demand cache back to domain_list for destruction */
+    p2m_pod_empty_cache(d);
 }
 
 /* Call once all of the references to the domain have gone away */
diff -r 629f028d22f9 -r f2ba08549466 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Mon Jan 05 10:42:39 2009 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Jan 05 10:43:19 2009 +0000
@@ -2173,7 +2173,7 @@ static int validate_gl4e(struct vcpu *v,
         mfn_t gl3mfn = gfn_to_mfn_query(d, gl3gfn, &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
-        else
+        else if ( p2mt != p2m_populate_on_demand )
             result |= SHADOW_SET_ERROR;
 
 #if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
@@ -2230,7 +2230,7 @@ static int validate_gl3e(struct vcpu *v,
         mfn_t gl2mfn = gfn_to_mfn_query(v->domain, gl2gfn, &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
-        else
+        else if ( p2mt != p2m_populate_on_demand )
             result |= SHADOW_SET_ERROR;
 
 #if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
@@ -2278,8 +2278,8 @@ static int validate_gl2e(struct vcpu *v,
         {
             mfn_t gl1mfn = gfn_to_mfn_query(v->domain, gl1gfn, &p2mt);
             if ( p2m_is_ram(p2mt) )
-                sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
-            else
+                sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); 
+            else if ( p2mt != p2m_populate_on_demand )
                 result |= SHADOW_SET_ERROR;
         }
     }
diff -r 629f028d22f9 -r f2ba08549466 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Mon Jan 05 10:42:39 2009 +0000
+++ b/xen/include/asm-x86/p2m.h Mon Jan 05 10:43:19 2009 +0000
@@ -64,6 +64,7 @@ typedef enum {
     p2m_ram_ro = 3,             /* Read-only; writes are silently dropped */
     p2m_mmio_dm = 4,            /* Reads and write go to the device model */
     p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
+    p2m_populate_on_demand = 6, /* Place-holder for empty memory */
 } p2m_type_t;
 
 typedef enum {
@@ -88,11 +89,19 @@ typedef enum {
 #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
                       | p2m_to_mask(p2m_ram_ro))
 
+#define P2M_MAGIC_TYPES (p2m_to_mask(p2m_populate_on_demand))
+
 /* Useful predicates */
 #define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
 #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
 #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_magic(_t) (p2m_to_mask(_t) & P2M_MAGIC_TYPES)
 #define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+
+/* Populate-on-demand */
+#define POPULATE_ON_DEMAND_MFN  (1<<9)
+#define POD_PAGE_ORDER 9
+
 
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
@@ -122,6 +131,28 @@ struct p2m_domain {
 
     /* Highest guest frame that's ever been mapped in the p2m */
     unsigned long max_mapped_pfn;
+
+    /* Populate-on-demand variables
+     * NB on locking.  {super,single,count} are
+     * covered by d->page_alloc_lock, since they're almost always used in
+     * conjunction with that functionality.  {entry_count} is covered by
+     * the domain p2m lock, since it's almost always used in conjunction
+     * with changing the p2m tables.
+     *
+     * At this point, both locks are held in two places.  In both,
+     * the order is [p2m,page_alloc]:
+     * + p2m_pod_decrease_reservation() calls p2m_pod_cache_add(),
+     *   which grabs page_alloc
+     * + p2m_pod_demand_populate() grabs both; the p2m lock to avoid
+     *   double-demand-populating of pages, the page_alloc lock to
+     *   protect moving stuff from the PoD cache to the domain page list.
+     */
+    struct {
+        struct list_head super,        /* List of superpages                */
+                         single;       /* Non-super lists                   */
+        int              count,        /* # of pages in cache lists         */
+                         entry_count;  /* # of pages in p2m marked pod      */
+    } pod;
 };
 
 /* Extract the type from the PTE flags that store it */
@@ -220,10 +251,21 @@ void p2m_teardown(struct domain *d);
 void p2m_teardown(struct domain *d);
 void p2m_final_teardown(struct domain *d);
 
+/* Dump PoD information about the domain */
+void p2m_pod_dump_data(struct domain *d);
+
+/* Move all pages from the populate-on-demand cache to the domain page_list
+ * (usually in preparation for domain destruction) */
+void p2m_pod_empty_cache(struct domain *d);
+
 /* Add a page to a domain's p2m table */
 int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
                             unsigned long mfn, unsigned int page_order, 
                             p2m_type_t t);
+
+/* Set a p2m range as populate-on-demand */
+int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+                                          unsigned int order);
 
 /* Untyped version for RAM only, for compatibility 
  *

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] PoD memory 3/9: PoD core, Xen patchbot-unstable <=