WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] implement HVMOP_pagetable_dying

To: Keir Fraser <Keir.Fraser@xxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] implement HVMOP_pagetable_dying
From: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Date: Mon, 21 Jun 2010 18:50:24 +0100
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Stefano Stabellini <Stefano.Stabellini@xxxxxxxxxxxxx>
Delivery-date: Mon, 21 Jun 2010 10:52:48 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <C8456226.181DA%keir.fraser@xxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <C8456226.181DA%keir.fraser@xxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Alpine 2.00 (DEB 1167 2008-08-23)
On Mon, 21 Jun 2010, Keir Fraser wrote:
> On 21/06/2010 17:15, "Stefano Stabellini" <Stefano.Stabellini@xxxxxxxxxxxxx>
> wrote:
> 
> > Hi all,
> > this patch implements HVMOP_pagetable_dying: an hypercall for
> > guests to notify Xen that a pagetable is about to be destroyed so that
> > Xen can use it as a hint to unshadow the pagetable soon and unhook the
> > top-level user-mode shadow entries right away.
> 
> This patch doesn't apply to xen-unstable tip.
> 
 
here we go:


diff -r 31708477f0a9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Mon Jun 21 18:49:28 2010 +0100
@@ -3153,6 +3153,30 @@
         break;
     }
 
+    case HVMOP_pagetable_dying:
+    {
+        struct xen_hvm_pagetable_dying a;
+        struct domain *d;
+
+        if ( copy_from_guest(&a, arg, 1) )
+            return -EFAULT;
+
+        rc = rcu_lock_target_domain_by_id(a.domid, &d);
+        if ( rc != 0 )
+            return rc;
+
+        rc = -EINVAL;
+        if ( !is_hvm_domain(d) || !paging_mode_shadow(d) )
+            goto param_fail5;
+
+        rc = 0;
+        pagetable_dying(d, a.gpa);
+
+    param_fail5:
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
     {
         gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
diff -r 31708477f0a9 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/arch/x86/mm/paging.c  Mon Jun 21 18:49:28 2010 +0100
@@ -766,6 +766,18 @@
         return shadow_enable(d, mode | PG_SH_enable);
 }
 
+/* Called from the guest to indicate that a process is being torn down
+ * and therefore its pagetables will soon be discarded */
+void pagetable_dying(struct domain *d, paddr_t gpa)
+{
+    struct vcpu *v;
+
+    ASSERT(paging_mode_shadow(d));
+
+    v = d->vcpu[0];
+    v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
+}
+
 /* Print paging-assistance info to the console */
 void paging_dump_domain_info(struct domain *d)
 {
diff -r 31708477f0a9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Jun 21 18:49:28 2010 +0100
@@ -60,6 +60,7 @@
     d->arch.paging.shadow.oos_active = 0;
     d->arch.paging.shadow.oos_off = (domcr_flags & DOMCRF_oos_off) ?  1 : 0;
 #endif
+    d->arch.paging.shadow.pagetable_dying_op = 0;
 }
 
 /* Setup the shadow-specfic parts of a vcpu struct. Note: The most important
@@ -1314,22 +1315,23 @@
 }
 
 /* Dispatcher function: call the per-mode function that will unhook the
- * non-Xen mappings in this top-level shadow mfn */
-static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
+ * non-Xen mappings in this top-level shadow mfn.  With user_only == 1,
+ * unhooks only the user-mode mappings. */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only)
 {
     struct page_info *sp = mfn_to_page(smfn);
     switch ( sp->u.sh.type )
     {
     case SH_type_l2_32_shadow:
-        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v, smfn, user_only);
         break;
     case SH_type_l2_pae_shadow:
     case SH_type_l2h_pae_shadow:
-        SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, 3)(v,smfn);
+        SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, 3)(v, smfn, user_only);
         break;
 #if CONFIG_PAGING_LEVELS >= 4
     case SH_type_l4_64_shadow:
-        SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, 4)(v,smfn);
+        SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, 4)(v, smfn, user_only);
         break;
 #endif
     default:
@@ -1399,7 +1401,7 @@
             {
                 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK);
                 shadow_unhook_mappings(v, 
-                               pagetable_get_mfn(v2->arch.shadow_table[i]));
+                               pagetable_get_mfn(v2->arch.shadow_table[i]), 0);
 
                 /* See if that freed up enough space */
                 if ( space_is_available(d, order, count) )
@@ -1454,7 +1456,7 @@
         for ( i = 0 ; i < 4 ; i++ )
             if ( !pagetable_is_null(v->arch.shadow_table[i]) )
                 shadow_unhook_mappings(v, 
-                               pagetable_get_mfn(v->arch.shadow_table[i]));
+                               pagetable_get_mfn(v->arch.shadow_table[i]), 0);
 
     /* Make sure everyone sees the unshadowings */
     flush_tlb_mask(&d->domain_dirty_cpumask);
diff -r 31708477f0a9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Jun 21 18:49:28 2010 +0100
@@ -2179,37 +2179,43 @@
  * These are called from common code when we are running out of shadow
  * memory, and unpinning all the top-level shadows hasn't worked. 
  *
+ * With user_only == 1, we leave guest kernel-mode mappings in place too,
+ * unhooking only the user-mode mappings
+ *
  * This implementation is pretty crude and slow, but we hope that it won't 
  * be called very often. */
 
 #if GUEST_PAGING_LEVELS == 2
 
-void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
+void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn, int user_only)
 {    
     shadow_l2e_t *sl2e;
     SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
-        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+        if ( !user_only || (sl2e->l2 & _PAGE_USER) )
+            (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
     });
 }
 
 #elif GUEST_PAGING_LEVELS == 3
 
-void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn)
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl2mfn, int user_only)
 /* Walk a PAE l2 shadow, unhooking entries from all the subshadows */
 {
     shadow_l2e_t *sl2e;
     SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, v->domain, {
-        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+        if ( !user_only || (sl2e->l2 & _PAGE_USER) )
+            (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
     });
 }
 
 #elif GUEST_PAGING_LEVELS == 4
 
-void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
+void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn, int user_only)
 {
     shadow_l4e_t *sl4e;
     SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, v->domain, {
-        (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+        if ( !user_only || (sl4e->l4 & _PAGE_USER) )
+            (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
     });
 }
 
@@ -2693,8 +2699,18 @@
 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
 {
 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
-    if ( v->arch.paging.shadow.last_emulated_mfn_for_unshadow == mfn_x(gmfn)
-         && sh_mfn_is_a_page_table(gmfn) )
+    /* If the domain has never made a "dying" op, use the two-writes
+     * heuristic; otherwise, unshadow as soon as we write a zero for a dying
+     * process.
+     *
+     * Don't bother trying to unshadow if it's not a PT, or if it's > l1.
+     */
+    if ( ( v->arch.paging.shadow.pagetable_dying
+           || ( !v->domain->arch.paging.shadow.pagetable_dying_op
+                && v->arch.paging.shadow.last_emulated_mfn_for_unshadow == 
mfn_x(gmfn) ) )
+         && sh_mfn_is_a_page_table(gmfn)
+         && !(mfn_to_page(gmfn)->shadow_flags
+              & (SHF_L2_32|SHF_L2_PAE|SHF_L2H_PAE|SHF_L4_64)) )
     {
         perfc_incr(shadow_early_unshadow);
         sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
@@ -3384,6 +3400,40 @@
      * caught by user-mode page-table check above.
      */
  emulate_readonly:
+
+    /* Unshadow if we are writing to a toplevel pagetable that is
+     * flagged as a dying process, and that is not currently used. */
+    if ( sh_mfn_is_a_page_table(gmfn)
+         && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) )
+    {
+        int used = 0;
+        struct vcpu *tmp;
+        for_each_vcpu(d, tmp)
+        {
+#if GUEST_PAGING_LEVELS == 3
+            int i;
+            for ( i = 0; i < 4; i++ )
+            {
+                mfn_t smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i]));
+                if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) )
+                {
+                    used |= (mfn_to_page(smfn)->v.sh.back == mfn_x(gmfn));
+
+                    if ( used )
+                        break;
+                }
+            }
+#else /* 32 or 64 */
+            used = (mfn_x(pagetable_get_mfn(tmp->arch.guest_table)) == 
mfn_x(gmfn));
+#endif
+            if ( used )
+                break;
+        }
+
+        if ( !used )
+            sh_remove_shadows(v, gmfn, 1 /* fast */, 0 /* can fail */);
+    }
+
     /*
      * We don't need to hold the lock for the whole emulation; we will
      * take it again when we write to the pagetables.
@@ -4033,6 +4083,11 @@
         smfn = sh_make_shadow(v, gmfn, root_type);
     }
     ASSERT(mfn_valid(smfn));
+
+    /* Remember if we've been told that this process is being torn down */
+    v->arch.paging.shadow.pagetable_dying
+        = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying);
+
     
     /* Pin the shadow and put it (back) on the list of pinned shadows */
     if ( sh_pin(v, smfn) == 0 )
@@ -4603,6 +4658,110 @@
 #endif /* 64bit guest */ 
 
 /**************************************************************************/
+/* Function for the guest to inform us that a process is being torn
+ * down.  We remember that as a hint to unshadow its pagetables soon,
+ * and in the meantime we unhook its top-level user-mode entries. */
+
+#if GUEST_PAGING_LEVELS == 3
+static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
+{
+    int i = 0;
+    int flush = 0;
+    int fast_path = 0;
+    paddr_t gcr3 = 0;
+    mfn_t smfn, gmfn;
+    p2m_type_t p2mt;
+    unsigned long gl3pa;
+    guest_l3e_t *gl3e = NULL;
+    paddr_t gl2a = 0;
+
+    shadow_lock(v->domain);
+
+    gcr3 = (v->arch.hvm_vcpu.guest_cr[3]);
+    /* fast path: the pagetable belongs to the current context */
+    if ( gcr3 == gpa )
+        fast_path = 1;
+
+    gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt);
+    if ( !mfn_valid(gmfn) || !p2m_is_ram(p2mt) )
+    {
+        printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %lx\n", gpa);
+        goto out;
+    }
+    if ( !fast_path )
+    {
+        gl3pa = (unsigned long) sh_map_domain_page(gmfn);
+        gl3e = (guest_l3e_t *) (gl3pa + (gpa & ~PAGE_MASK));
+    }
+    for ( i = 0; i < 4; i++ )
+    {
+        if ( fast_path )
+            smfn = _mfn(pagetable_get_pfn(v->arch.shadow_table[i]));
+        else
+        {
+            /* retrieving the l2s */
+            gl2a = guest_l3e_get_paddr(gl3e[i]);
+            gmfn = gfn_to_mfn_query(v->domain, _gfn(gl2a >> PAGE_SHIFT), 
&p2mt);
+            smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_pae_shadow);
+        }
+
+        if ( mfn_valid(smfn) )
+        {
+            gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+            mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
+            shadow_unhook_mappings(v, smfn, 1/* user pages only */);
+            flush = 1;
+        }
+    }
+    if ( flush )
+        flush_tlb_mask(&v->domain->domain_dirty_cpumask);
+
+    /* Remember that we've seen the guest use this interface, so we
+     * can rely on it using it in future, instead of guessing at
+     * when processes are being torn down. */
+    v->domain->arch.paging.shadow.pagetable_dying_op = 1;
+
+    v->arch.paging.shadow.pagetable_dying = 1;
+
+out:
+    if ( !fast_path )
+        unmap_domain_page(gl3pa);
+    shadow_unlock(v->domain);
+}
+#else
+static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
+{
+    mfn_t smfn, gmfn;
+    p2m_type_t p2mt;
+
+    shadow_lock(v->domain);
+
+    gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt);
+#if GUEST_PAGING_LEVELS == 2
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_32_shadow);
+#else
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l4_64_shadow);
+#endif
+    if ( mfn_valid(smfn) )
+    {
+        mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
+        shadow_unhook_mappings(v, smfn, 1/* user pages only */);
+        /* Now flush the TLB: we removed toplevel mappings. */
+        flush_tlb_mask(&v->domain->domain_dirty_cpumask);
+    }
+
+    /* Remember that we've seen the guest use this interface, so we
+     * can rely on it using it in future, instead of guessing at
+     * when processes are being torn down. */
+    v->domain->arch.paging.shadow.pagetable_dying_op = 1;
+
+    v->arch.paging.shadow.pagetable_dying = 1;
+
+    shadow_unlock(v->domain);
+}
+#endif
+
+/**************************************************************************/
 /* Handling HVM guest writes to pagetables  */
 
 /* Translate a VA to an MFN, injecting a page-fault if we fail */
@@ -5247,6 +5406,7 @@
 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
     .shadow.guess_wrmap            = sh_guess_wrmap,
 #endif
+    .shadow.pagetable_dying        = sh_pagetable_dying,
     .shadow.shadow_levels          = SHADOW_PAGING_LEVELS,
 };
 
diff -r 31708477f0a9 xen/arch/x86/mm/shadow/multi.h
--- a/xen/arch/x86/mm/shadow/multi.h    Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/arch/x86/mm/shadow/multi.h    Mon Jun 21 18:49:28 2010 +0100
@@ -52,13 +52,13 @@
 
 extern void 
 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl2mfn);
+    (struct vcpu *v, mfn_t sl2mfn, int user_only);
 extern void 
 SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl3mfn);
+    (struct vcpu *v, mfn_t sl3mfn, int user_only);
 extern void 
 SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings, GUEST_LEVELS)
-    (struct vcpu *v, mfn_t sl4mfn);
+    (struct vcpu *v, mfn_t sl4mfn, int user_only);
 
 extern int
 SHADOW_INTERNAL_NAME(sh_rm_write_access_from_l1, GUEST_LEVELS)
diff -r 31708477f0a9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/arch/x86/mm/shadow/private.h  Mon Jun 21 18:49:28 2010 +0100
@@ -321,6 +321,8 @@
 
 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
 
+#define SHF_pagetable_dying (1u<<31)
+
 static inline int sh_page_has_multiple_shadows(struct page_info *pg)
 {
     u32 shadows;
@@ -406,6 +408,10 @@
 int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p,
                                intpte_t *old, intpte_t new, mfn_t gmfn);
 
+/* Unhook the non-Xen mappings in this top-level shadow mfn.
+ * With user_only == 1, unhooks only the user-mode mappings. */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn, int user_only);
+
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
 /* Allow a shadowed page to go out of sync */
 int sh_unsync(struct vcpu *v, mfn_t gmfn);
diff -r 31708477f0a9 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/include/asm-x86/domain.h      Mon Jun 21 18:49:28 2010 +0100
@@ -121,6 +121,8 @@
     /* OOS */
     int oos_active;
     int oos_off;
+
+    int pagetable_dying_op;
 };
 
 struct shadow_vcpu {
@@ -149,6 +151,8 @@
         mfn_t smfn[SHADOW_OOS_FIXUPS];
         unsigned long off[SHADOW_OOS_FIXUPS];
     } oos_fixup[SHADOW_OOS_PAGES];
+
+    int pagetable_dying;
 };
 
 /************************************************/
diff -r 31708477f0a9 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h      Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/include/asm-x86/paging.h      Mon Jun 21 18:49:28 2010 +0100
@@ -95,6 +95,7 @@
     void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
     int           (*guess_wrmap           )(struct vcpu *v, 
                                             unsigned long vaddr, mfn_t gmfn);
+    void          (*pagetable_dying       )(struct vcpu *v, paddr_t gpa);
     /* For outsiders to tell what mode we're in */
     unsigned int shadow_levels;
 };
@@ -342,6 +343,10 @@
         safe_write_pte(p, new);
 }
 
+/* Called from the guest to indicate that the a process is being
+ * torn down and its pagetables will soon be discarded */
+void pagetable_dying(struct domain *d, paddr_t gpa);
+
 /* Print paging-assistance info to the console */
 void paging_dump_domain_info(struct domain *d);
 void paging_dump_vcpu_info(struct vcpu *v);
diff -r 31708477f0a9 xen/include/public/hvm/hvm_op.h
--- a/xen/include/public/hvm/hvm_op.h   Mon Jun 21 09:59:10 2010 +0100
+++ b/xen/include/public/hvm/hvm_op.h   Mon Jun 21 18:49:28 2010 +0100
@@ -127,6 +127,16 @@
 typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
 DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
 
+/* Hint from PV drivers for pagetable destruction. */
+#define HVMOP_pagetable_dying        9
+struct xen_hvm_pagetable_dying {
+    /* Domain with a pagetable about to be destroyed. */
+    domid_t  domid;
+    /* guest physical address of the toplevel pagetable dying */
+    uint64_aligned_t gpa;
+};
+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);
 
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>