WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [XEN] Remove batched writable pagetable l

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [XEN] Remove batched writable pagetable logic.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Sat, 12 Aug 2006 13:00:30 +0000
Delivery-date: Sat, 12 Aug 2006 06:03:36 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 9727328c008edbc7be03c8223a605e828eed52e7
# Parent  1d817bfc5ed90ac6b637e6b52b1c07c22f9fcf50
[XEN] Remove batched writable pagetable logic.

Benchmarks show it provides little or no benefit (except
on synthetic benchmarks). Also it is complicated and
likely to hinder efforts to reduce lockign granularity.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/ia64/xen/mm.c       |   21 -
 xen/arch/x86/domain.c        |    7 
 xen/arch/x86/mm.c            |  483 +------------------------------------------
 xen/arch/x86/traps.c         |   38 ---
 xen/include/asm-ia64/mm.h    |    2 
 xen/include/asm-powerpc/mm.h |    6 
 xen/include/asm-x86/domain.h |    3 
 xen/include/asm-x86/mm.h     |   54 ----
 xen/include/asm-x86/perfc.h  |   12 -
 xen/include/xen/mm.h         |    4 
 10 files changed, 42 insertions(+), 588 deletions(-)

diff -r 1d817bfc5ed9 -r 9727328c008e xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/arch/ia64/xen/mm.c    Fri Aug 11 16:07:22 2006 +0100
@@ -1580,11 +1580,6 @@ void pgtable_quicklist_free(void *pgtabl
        free_xenheap_page(pgtable_entry);
 }
 
-void cleanup_writable_pagetable(struct domain *d)
-{
-  return;
-}
-
 void put_page_type(struct page_info *page)
 {
     u32 nx, x, y = page->u.inuse.type_info;
@@ -1692,22 +1687,6 @@ int get_page_type(struct page_info *page
             {
                 if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
                 {
-                    if ( current->domain == page_get_owner(page) )
-                    {
-                        /*
-                         * This ensures functions like set_gdt() see up-to-date
-                         * type info without needing to clean up writable p.t.
-                         * state on the fast path.
-                         */
-                        LOCK_BIGLOCK(current->domain);
-                        cleanup_writable_pagetable(current->domain);
-                        y = page->u.inuse.type_info;
-                        UNLOCK_BIGLOCK(current->domain);
-                        /* Can we make progress now? */
-                        if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
-                             ((y & PGT_count_mask) == 0) )
-                            goto again;
-                    }
                     if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
                          ((type & PGT_type_mask) != PGT_l1_page_table) )
                         MEM_LOG("Bad type (saw %08x != exp %08x) "
diff -r 1d817bfc5ed9 -r 9727328c008e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/arch/x86/domain.c     Fri Aug 11 16:07:22 2006 +0100
@@ -154,7 +154,7 @@ int arch_domain_create(struct domain *d)
 int arch_domain_create(struct domain *d)
 {
     l1_pgentry_t gdt_l1e;
-    int vcpuid, pdpt_order, rc;
+    int vcpuid, pdpt_order;
 #ifdef __x86_64__
     int i;
 #endif
@@ -213,9 +213,6 @@ int arch_domain_create(struct domain *d)
             goto fail_nomem;
 
         if ( (d->shared_info = alloc_xenheap_page()) == NULL )
-            goto fail_nomem;
-
-        if ( (rc = ptwr_init(d)) != 0 )
             goto fail_nomem;
 
         memset(d->shared_info, 0, PAGE_SIZE);
@@ -927,8 +924,6 @@ void domain_relinquish_resources(struct 
 
     BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
 
-    ptwr_destroy(d);
-
     /* Drop the in-use references to page-table bases. */
     for_each_vcpu ( d, v )
     {
diff -r 1d817bfc5ed9 -r 9727328c008e xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/arch/x86/mm.c Fri Aug 11 16:07:22 2006 +0100
@@ -1669,10 +1669,8 @@ int get_page_type(struct page_info *page
                          *     enter a recursive loop via get_page_from_l1e()
                          *     during pagetable revalidation.
                          */
-                        LOCK_BIGLOCK(current->domain);
-                        cleanup_writable_pagetable(current->domain);
+                        sync_pagetable_state(current->domain);
                         y = page->u.inuse.type_info;
-                        UNLOCK_BIGLOCK(current->domain);
                         /* Can we make progress now? */
                         if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
                              ((y & PGT_count_mask) == 0) )
@@ -1750,8 +1748,6 @@ int new_guest_cr3(unsigned long mfn)
     int okay;
     unsigned long old_base_mfn;
 
-    ASSERT(writable_pagetable_in_sync(d));
-
     if ( shadow_mode_refcounts(d) )
     {
         okay = get_page_from_pagenr(mfn, d);
@@ -1940,7 +1936,7 @@ int do_mmuext_op(
 
     LOCK_BIGLOCK(d);
 
-    cleanup_writable_pagetable(d);
+    sync_pagetable_state(d);
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
     {
@@ -2193,7 +2189,7 @@ int do_mmu_update(
 
     LOCK_BIGLOCK(d);
 
-    cleanup_writable_pagetable(d);
+    sync_pagetable_state(d);
 
     if ( unlikely(shadow_mode_enabled(d)) )
         check_pagetable(v, "pre-mmu"); /* debug */
@@ -2704,7 +2700,7 @@ int do_update_va_mapping(unsigned long v
 
     LOCK_BIGLOCK(d);
 
-    cleanup_writable_pagetable(d);
+    sync_pagetable_state(d);
 
     if ( unlikely(shadow_mode_enabled(d)) )
         check_pagetable(v, "pre-va"); /* debug */
@@ -3102,131 +3098,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
  * Writable Pagetables
  */
 
-#ifdef VVERBOSE
-int ptwr_debug = 0x0;
-#define PTWR_PRINTK(_f, _a...) \
- do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
-#define PTWR_PRINT_WHICH (which ? 'I' : 'A')
-#else
-#define PTWR_PRINTK(_f, _a...) ((void)0)
-#endif
-
-
-#ifdef PERF_ARRAYS
-
-/**************** writeable pagetables profiling functions *****************/
-
-#define ptwr_eip_buckets        256
-
-int ptwr_eip_stat_threshold[] = {1, 10, 50, 100, L1_PAGETABLE_ENTRIES};
-
-#define ptwr_eip_stat_thresholdN (sizeof(ptwr_eip_stat_threshold)/sizeof(int))
-
-struct {
-    unsigned long eip;
-    domid_t       id;
-    u32           val[ptwr_eip_stat_thresholdN];
-} typedef ptwr_eip_stat_t;
-
-ptwr_eip_stat_t ptwr_eip_stats[ptwr_eip_buckets];
-
-static inline unsigned int ptwr_eip_stat_hash( unsigned long eip, domid_t id )
-{
-    return (((unsigned long) id) ^ eip ^ (eip>>8) ^ (eip>>16) ^ (eip>24)) % 
-        ptwr_eip_buckets;
-}
-
-static void ptwr_eip_stat_inc(u32 *n)
-{
-    unsigned int i, j;
-
-    if ( ++(*n) != 0 )
-        return;
-
-    *n = ~0;
-
-    /* Re-scale all buckets. */
-    for ( i = 0; i < ptwr_eip_buckets; i++ )
-        for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
-            ptwr_eip_stats[i].val[j] >>= 1;
-}
-
-static void ptwr_eip_stat_update(unsigned long eip, domid_t id, int modified)
-{
-    unsigned int i, j, b;
-
-    i = b = ptwr_eip_stat_hash(eip, id);
-
-    do
-    {
-        if ( !ptwr_eip_stats[i].eip )
-        {
-            /* doesn't exist */
-            ptwr_eip_stats[i].eip = eip;
-            ptwr_eip_stats[i].id = id;
-            memset(ptwr_eip_stats[i].val,0, sizeof(ptwr_eip_stats[i].val));
-        }
-
-        if ( ptwr_eip_stats[i].eip == eip && ptwr_eip_stats[i].id == id)
-        {
-            for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
-                if ( modified <= ptwr_eip_stat_threshold[j] )
-                    break;
-            BUG_ON(j >= ptwr_eip_stat_thresholdN);
-            ptwr_eip_stat_inc(&ptwr_eip_stats[i].val[j]);
-            return;
-        }
-
-        i = (i+1) % ptwr_eip_buckets;
-    }
-    while ( i != b );
-   
-    printk("ptwr_eip_stat: too many EIPs in use!\n");
-    
-    ptwr_eip_stat_print();
-    ptwr_eip_stat_reset();
-}
-
-void ptwr_eip_stat_reset(void)
-{
-    memset(ptwr_eip_stats, 0, sizeof(ptwr_eip_stats));
-}
-
-void ptwr_eip_stat_print(void)
-{
-    struct domain *e;
-    domid_t d;
-    unsigned int i, j;
-
-    for_each_domain( e )
-    {
-        d = e->domain_id;
-
-        for ( i = 0; i < ptwr_eip_buckets; i++ )
-        {
-            if ( !ptwr_eip_stats[i].eip || ptwr_eip_stats[i].id != d )
-                continue;
-
-            printk("D %5d  eip %p ",
-                   ptwr_eip_stats[i].id, (void *)ptwr_eip_stats[i].eip);
-
-            for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
-                printk("<=%u %4u \t",
-                       ptwr_eip_stat_threshold[j],
-                       ptwr_eip_stats[i].val[j]);
-            printk("\n");
-        }
-    }
-}
-
-#else /* PERF_ARRAYS */
-
-#define ptwr_eip_stat_update(eip, id, modified) ((void)0)
-
-#endif
-
-/*******************************************************************/
-
 /* Re-validate a given p.t. page, given its prior snapshot */
 int revalidate_l1(
     struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
@@ -3277,112 +3148,6 @@ int revalidate_l1(
     return modified;
 }
 
-
-/* Flush the given writable p.t. page and write-protect it again. */
-void ptwr_flush(struct domain *d, const int which)
-{
-    unsigned long l1va;
-    l1_pgentry_t  *pl1e, pte, *ptep;
-    l2_pgentry_t  *pl2e;
-    unsigned int   modified;
-
-#ifdef CONFIG_X86_64
-    struct vcpu *v = current;
-    int user_mode = !(v->arch.flags & TF_kernel_mode);
-#endif
-
-    ASSERT(!shadow_mode_enabled(d));
-
-    if ( unlikely(d->arch.ptwr[which].vcpu != current) )
-        /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
-        __write_ptbase(pagetable_get_pfn(
-            d->arch.ptwr[which].vcpu->arch.guest_table));
-    else
-        TOGGLE_MODE();
-
-    l1va = d->arch.ptwr[which].l1va;
-    ptep = (l1_pgentry_t *)&linear_pg_table[l1_linear_offset(l1va)];
-
-    /*
-     * STEP 1. Write-protect the p.t. page so no more updates can occur.
-     */
-
-    if ( unlikely(__get_user(pte.l1, &ptep->l1)) )
-    {
-        MEM_LOG("ptwr: Could not read pte at %p", ptep);
-        /*
-         * Really a bug. We could read this PTE during the initial fault,
-         * and pagetables can't have changed meantime.
-         */
-        BUG();
-    }
-    PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
-                PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
-    l1e_remove_flags(pte, _PAGE_RW);
-
-    /* Write-protect the p.t. page in the guest page table. */
-    if ( unlikely(__put_user(pte, ptep)) )
-    {
-        MEM_LOG("ptwr: Could not update pte at %p", ptep);
-        /*
-         * Really a bug. We could write this PTE during the initial fault,
-         * and pagetables can't have changed meantime.
-         */
-        BUG();
-    }
-
-    /* Ensure that there are no stale writable mappings in any TLB. */
-    /* NB. INVLPG is a serialising instruction: flushes pending updates. */
-    flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
-    PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
-                PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
-
-    /*
-     * STEP 2. Validate any modified PTEs.
-     */
-
-    if ( likely(d == current->domain) )
-    {
-        pl1e = map_domain_page(l1e_get_pfn(pte));
-        modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
-        unmap_domain_page(pl1e);
-        perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
-        ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
-        d->arch.ptwr[which].prev_nr_updates = modified;
-    }
-    else
-    {
-        /*
-         * Must make a temporary global mapping, since we are running in the
-         * wrong address space, so no access to our own mapcache.
-         */
-        pl1e = map_domain_page_global(l1e_get_pfn(pte));
-        modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
-        unmap_domain_page_global(pl1e);
-    }
-
-    /*
-     * STEP 3. Reattach the L1 p.t. page into the current address space.
-     */
-
-    if ( which == PTWR_PT_ACTIVE )
-    {
-        pl2e = &__linear_l2_table[d->arch.ptwr[which].l2_idx];
-        l2e_add_flags(*pl2e, _PAGE_PRESENT); 
-    }
-
-    /*
-     * STEP 4. Final tidy-up.
-     */
-
-    d->arch.ptwr[which].l1va = 0;
-
-    if ( unlikely(d->arch.ptwr[which].vcpu != current) )
-        write_ptbase(current);
-    else 
-        TOGGLE_MODE();
-}
-
 static int ptwr_emulated_update(
     unsigned long addr,
     paddr_t old,
@@ -3390,7 +3155,7 @@ static int ptwr_emulated_update(
     unsigned int bytes,
     unsigned int do_cmpxchg)
 {
-    unsigned long pfn, l1va;
+    unsigned long pfn;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
     struct domain *d = current->domain;
@@ -3428,24 +3193,6 @@ static int ptwr_emulated_update(
         old  |= full;
     }
 
-#if 0 /* XXX KAF: I don't think this can happen. */
-    /*
-     * We must not emulate an update to a PTE that is temporarily marked
-     * writable by the batched ptwr logic, else we can corrupt page refcnts! 
-     */
-    if ( ((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
-         (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
-        ptwr_flush(d, PTWR_PT_ACTIVE);
-    if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
-         (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
-        ptwr_flush(d, PTWR_PT_INACTIVE);
-#else
-    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
-           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
-    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
-           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
-#endif
-
     /* Read the PTE that maps the page being updated. */
     if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
                           sizeof(pte)) )
@@ -3545,239 +3292,53 @@ int ptwr_do_page_fault(struct domain *d,
 int ptwr_do_page_fault(struct domain *d, unsigned long addr, 
                        struct cpu_user_regs *regs)
 {
-    unsigned long    pfn;
+    unsigned long     pfn;
     struct page_info *page;
-    l1_pgentry_t    *pl1e, pte;
-    l2_pgentry_t    *pl2e, l2e;
-    int              which, flags;
-    unsigned long    l2_idx;
+    l1_pgentry_t      pte;
+    l2_pgentry_t     *pl2e, l2e;
     struct x86_emulate_ctxt emul_ctxt;
 
-    ASSERT(!shadow_mode_enabled(d));
+    LOCK_BIGLOCK(d);
 
     /*
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
-     * NB. The L2 entry cannot be detached due to existing ptwr work: the
-     * caller already checked that.
      */
     pl2e = &__linear_l2_table[l2_linear_offset(addr)];
     if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
         !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
          __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
                           sizeof(pte)) )
-    {
-        return 0;
-    }
+        goto bail;
 
     pfn  = l1e_get_pfn(pte);
     page = mfn_to_page(pfn);
 
-#ifdef CONFIG_X86_64
-#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT | _PAGE_USER)
-#else
-#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT)
-#endif
-
-    /*
-     * Check the required flags for a valid wrpt mapping. If the page is
-     * already writable then we can return straight to the guest (SMP race).
-     * We decide whether or not to propagate the fault by testing for write
-     * permissions in page directories by writing back to the linear mapping.
-     */
-    if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
-        return __put_user(
-            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ?
-            0 : EXCRET_not_a_fault;
-
     /* We are looking only for read-only mappings of p.t. pages. */
-    if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
+    if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
          ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
          ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
          (page_get_owner(page) != d) )
-    {
-        return 0;
-    }
-
-#if 0 /* Leave this in as useful for debugging */ 
-    goto emulate; 
-#endif
-
-    PTWR_PRINTK("ptwr_page_fault on l1 pt at va %lx, pfn %lx, eip %lx\n",
-                addr, pfn, (unsigned long)regs->eip);
-    
-    /* Get the L2 index at which this L1 p.t. is always mapped. */
-    l2_idx = page->u.inuse.type_info & PGT_va_mask;
-    if ( unlikely(l2_idx >= PGT_va_unknown) )
-        goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
-    l2_idx >>= PGT_va_shift;
-
-    if ( unlikely(l2_idx == l2_linear_offset(addr)) )
-        goto emulate; /* Urk! Pagetable maps itself! */
-
-    /*
-     * Is the L1 p.t. mapped into the current address space? If so we call it
-     * an ACTIVE p.t., otherwise it is INACTIVE.
-     */
-    pl2e = &__linear_l2_table[l2_idx];
-    which = PTWR_PT_INACTIVE;
-
-    if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) )
-    {
-        /*
-         * Check the PRESENT bit to set ACTIVE mode.
-         * If the PRESENT bit is clear, we may be conflicting with the current 
-         * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
-         * The ptwr_flush call below will restore the PRESENT bit.
-         */
-        if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
-             (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
-              (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
-            which = PTWR_PT_ACTIVE;
-    }
-
-    /*
-     * Multi-processor guest? Then ensure that the page table is hooked into
-     * at most one L2, and also ensure that there is only one mapping of the
-     * page table itself (or there can be conflicting writable mappings from
-     * other VCPUs).
-     */
-    if ( d->vcpu[0]->next_in_list != NULL )
-    {
-        if ( /* Hooked into at most one L2 table (which this VCPU maps)? */
-             ((page->u.inuse.type_info & PGT_count_mask) != 
-              (!!(page->u.inuse.type_info & PGT_pinned) +
-               (which == PTWR_PT_ACTIVE))) ||
-             /* PTEs are mapped read-only in only one place? */
-             ((page->count_info & PGC_count_mask) !=
-              (!!(page->count_info & PGC_allocated) +       /* alloc count */
-               (page->u.inuse.type_info & PGT_count_mask) + /* type count  */
-               1)) )                                        /* map count   */
-        {
-            /* Could be conflicting writable mappings from other VCPUs. */
-            cleanup_writable_pagetable(d);
-            goto emulate;
-        }
-    }
-
-    /*
-     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
-     * time. If there is already one, we must flush it out.
-     */
-    if ( d->arch.ptwr[which].l1va )
-        ptwr_flush(d, which);
-
-    /*
-     * If last batch made no updates then we are probably stuck. Emulate this 
-     * update to ensure we make progress.
-     */
-    if ( d->arch.ptwr[which].prev_nr_updates == 0 )
-    {
-        /* Ensure that we don't get stuck in an emulation-only rut. */
-        d->arch.ptwr[which].prev_nr_updates = 1;
-        goto emulate;
-    }
-
-    PTWR_PRINTK("[%c] batched ptwr_page_fault at va %lx, pt for %08lx, "
-                "pfn %lx\n", PTWR_PRINT_WHICH, addr,
-                l2_idx << L2_PAGETABLE_SHIFT, pfn);
-
-    /* For safety, disconnect the L1 p.t. page from current space. */
-    if ( which == PTWR_PT_ACTIVE )
-    {
-        l2e_remove_flags(l2e, _PAGE_PRESENT);
-        if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) )
-        {
-            MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e);
-            domain_crash(d);
-            return 0;
-        }
-        flush_tlb_mask(d->domain_dirty_cpumask);
-    }
-    
-    /* Temporarily map the L1 page, and make a copy of it. */
-    pl1e = map_domain_page(pfn);
-    memcpy(d->arch.ptwr[which].page, pl1e, PAGE_SIZE);
-    unmap_domain_page(pl1e);
-
-    /* Finally, make the p.t. page writable by the guest OS. */
-    l1e_add_flags(pte, _PAGE_RW);
-    if ( unlikely(__put_user(pte.l1,
-                             &linear_pg_table[l1_linear_offset(addr)].l1)) )
-    {
-        MEM_LOG("ptwr: Could not update pte at %p",
-                &linear_pg_table[l1_linear_offset(addr)]);
-        domain_crash(d);
-        return 0;
-    }
-    
-    /*
-     * Now record the writable pagetable state *after* any accesses that can
-     * cause a recursive page fault (i.e., those via the *_user() accessors).
-     * Otherwise we can enter ptwr_flush() with half-done ptwr state.
-     */
-    d->arch.ptwr[which].l1va   = addr | 1;
-    d->arch.ptwr[which].l2_idx = l2_idx;
-    d->arch.ptwr[which].vcpu   = current;
-#ifdef PERF_ARRAYS
-    d->arch.ptwr[which].eip    = regs->eip;
-#endif
-
-    return EXCRET_fault_fixed;
-
- emulate:
+        goto bail;
+
     emul_ctxt.regs = guest_cpu_user_regs();
     emul_ctxt.cr2  = addr;
     emul_ctxt.mode = X86EMUL_MODE_HOST;
     if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
-        return 0;
+        goto bail;
+
+    UNLOCK_BIGLOCK(d);
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
-}
-
-int ptwr_init(struct domain *d)
-{
-    void *x = alloc_xenheap_page();
-    void *y = alloc_xenheap_page();
-
-    if ( (x == NULL) || (y == NULL) )
-    {
-        free_xenheap_page(x);
-        free_xenheap_page(y);
-        return -ENOMEM;
-    }
-
-    d->arch.ptwr[PTWR_PT_ACTIVE].page   = x;
-    d->arch.ptwr[PTWR_PT_INACTIVE].page = y;
-
+
+ bail:
+    UNLOCK_BIGLOCK(d);
     return 0;
 }
 
-void ptwr_destroy(struct domain *d)
-{
-    LOCK_BIGLOCK(d);
-    cleanup_writable_pagetable(d);
-    UNLOCK_BIGLOCK(d);
-    free_xenheap_page(d->arch.ptwr[PTWR_PT_ACTIVE].page);
-    free_xenheap_page(d->arch.ptwr[PTWR_PT_INACTIVE].page);
-}
-
-void cleanup_writable_pagetable(struct domain *d)
-{
-    if ( unlikely(!VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
-        return;
-
-    if ( unlikely(shadow_mode_enabled(d)) )
-    {
-        shadow_sync_all(d);
-    }
-    else
-    {
-        if ( d->arch.ptwr[PTWR_PT_ACTIVE].l1va )
-            ptwr_flush(d, PTWR_PT_ACTIVE);
-        if ( d->arch.ptwr[PTWR_PT_INACTIVE].l1va )
-            ptwr_flush(d, PTWR_PT_INACTIVE);
-    }
+void sync_pagetable_state(struct domain *d)
+{
+    shadow_sync_all(d);
 }
 
 int map_pages_to_xen(
diff -r 1d817bfc5ed9 -r 9727328c008e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/arch/x86/traps.c      Fri Aug 11 16:07:22 2006 +0100
@@ -713,7 +713,7 @@ static int handle_gdt_ldt_mapping_fault(
     {
         /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
         LOCK_BIGLOCK(d);
-        cleanup_writable_pagetable(d);
+        sync_pagetable_state(d);
         ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
         UNLOCK_BIGLOCK(d);
 
@@ -849,7 +849,7 @@ static int spurious_page_fault(
     int            is_spurious;
 
     LOCK_BIGLOCK(d);
-    cleanup_writable_pagetable(d);
+    sync_pagetable_state(d);
     is_spurious = __spurious_page_fault(addr, regs);
     UNLOCK_BIGLOCK(d);
 
@@ -878,33 +878,11 @@ static int fixup_page_fault(unsigned lon
     if ( unlikely(shadow_mode_enabled(d)) )
         return shadow_fault(addr, regs);
 
-    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
-    {
-        LOCK_BIGLOCK(d);
-        if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-             unlikely(l2_linear_offset(addr) ==
-                      d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
-        {
-            ptwr_flush(d, PTWR_PT_ACTIVE);
-            UNLOCK_BIGLOCK(d);
-            return EXCRET_fault_fixed;
-        }
-
-        /*
-         * Note it is *not* safe to check PGERR_page_present here. It can be
-         * clear, due to unhooked page table, when we would otherwise expect
-         * it to be set. We have an aversion to trusting that flag in Xen, and
-         * guests ought to be leery too.
-         */
-        if ( guest_kernel_mode(v, regs) &&
-             (regs->error_code & PGERR_write_access) &&
-             ptwr_do_page_fault(d, addr, regs) )
-        {
-            UNLOCK_BIGLOCK(d);
-            return EXCRET_fault_fixed;
-        }
-        UNLOCK_BIGLOCK(d);
-    }
+    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) &&
+         guest_kernel_mode(v, regs) &&
+         ((regs->error_code & (PGERR_write_access|PGERR_page_present)) ==
+          (PGERR_write_access|PGERR_page_present)) )
+        return ptwr_do_page_fault(d, addr, regs) ? EXCRET_fault_fixed : 0;
 
     return 0;
 }
@@ -1324,7 +1302,7 @@ static int emulate_privileged_op(struct 
 
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
-            cleanup_writable_pagetable(v->domain);
+            sync_pagetable_state(v->domain);
             (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
             UNLOCK_BIGLOCK(v->domain);
             break;
diff -r 1d817bfc5ed9 -r 9727328c008e xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/include/asm-ia64/mm.h Fri Aug 11 16:07:22 2006 +0100
@@ -500,4 +500,6 @@ int steal_page(
 int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags);
 
+#define sync_pagetable_state(d) ((void)0)
+
 #endif /* __ASM_IA64_MM_H__ */
diff -r 1d817bfc5ed9 -r 9727328c008e xen/include/asm-powerpc/mm.h
--- a/xen/include/asm-powerpc/mm.h      Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/include/asm-powerpc/mm.h      Fri Aug 11 16:07:22 2006 +0100
@@ -34,8 +34,6 @@
 
 extern unsigned long xenheap_phys_end;
 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_mfn(_pfn) < xenheap_phys_end)
-
-#define cleanup_writable_pagetable(_d)
 
 /*
  * Per-page-frame information.
@@ -226,4 +224,6 @@ extern int steal_page(struct domain *d, 
 extern int steal_page(struct domain *d, struct page_info *page,
                         unsigned int memflags);
 
-#endif
+#define sync_pagetable_state(d) ((void)0)
+
+#endif
diff -r 1d817bfc5ed9 -r 9727328c008e xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/include/asm-x86/domain.h      Fri Aug 11 16:07:22 2006 +0100
@@ -69,9 +69,6 @@ struct arch_domain
     /* map_domain_page() mapping cache. */
     struct mapcache mapcache;
 #endif
-
-    /* Writable pagetables. */
-    struct ptwr_info ptwr[2];
 
     /* I/O-port admin-specified access capabilities. */
     struct rangeset *ioport_caps;
diff -r 1d817bfc5ed9 -r 9727328c008e xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/include/asm-x86/mm.h  Fri Aug 11 16:07:22 2006 +0100
@@ -198,7 +198,8 @@ static inline int get_page(struct page_i
              unlikely(d != _domain) )                /* Wrong owner? */
         {
             if ( !_shadow_mode_refcounts(domain) )
-                DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" 
PRtype_info "\n",
+                DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
+                        PRtype_info "\n",
                         page_to_mfn(page), domain, unpickle_domptr(d),
                         x, page->u.inuse.type_info);
             return 0;
@@ -307,48 +308,11 @@ void memguard_unguard_range(void *p, uns
 
 void memguard_guard_stack(void *p);
 
-/* Writable Pagetables */
-struct ptwr_info {
-    /* Linear address where the guest is updating the p.t. page. */
-    unsigned long l1va;
-    /* Copy of the p.t. page, taken before guest is given write access. */
-    l1_pgentry_t *page;
-    /* Index in L2 page table where this L1 p.t. is always hooked. */
-    unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
-    /* Info about last ptwr update batch. */
-    unsigned int prev_nr_updates;
-    /* VCPU which created writable mapping. */
-    struct vcpu *vcpu;
-    /* EIP of the original write fault (stats collection only). */
-    unsigned long eip;
-};
-
-#define PTWR_PT_ACTIVE 0
-#define PTWR_PT_INACTIVE 1
-
-#define PTWR_CLEANUP_ACTIVE 1
-#define PTWR_CLEANUP_INACTIVE 2
-
-int  ptwr_init(struct domain *);
-void ptwr_destroy(struct domain *);
-void ptwr_flush(struct domain *, const int);
 int  ptwr_do_page_fault(struct domain *, unsigned long,
                         struct cpu_user_regs *);
 int  revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
 
-void cleanup_writable_pagetable(struct domain *d);
-#define sync_pagetable_state(d)                                 \
-    do {                                                        \
-        LOCK_BIGLOCK(d);                                        \
-        /* Avoid racing with ptwr_destroy(). */                 \
-        if ( !test_bit(_DOMF_dying, &(d)->domain_flags) )       \
-            cleanup_writable_pagetable(d);                      \
-        UNLOCK_BIGLOCK(d);                                      \
-    } while ( 0 )
-
-#define writable_pagetable_in_sync(d)           \
-    (!((d)->arch.ptwr[PTWR_PT_ACTIVE].l1va |    \
-       (d)->arch.ptwr[PTWR_PT_INACTIVE].l1va))
+void sync_pagetable_state(struct domain *d);
 
 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
 
@@ -370,18 +334,6 @@ void audit_domains(void);
 
 #endif
 
-#ifdef PERF_ARRAYS
-
-void ptwr_eip_stat_reset(void);
-void ptwr_eip_stat_print(void);
-
-#else
-
-#define ptwr_eip_stat_reset() ((void)0)
-#define ptwr_eip_stat_print() ((void)0)
-
-#endif
-
 int new_guest_cr3(unsigned long pfn);
 
 void propagate_page_fault(unsigned long addr, u16 error_code);
diff -r 1d817bfc5ed9 -r 9727328c008e xen/include/asm-x86/perfc.h
--- a/xen/include/asm-x86/perfc.h       Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/include/asm-x86/perfc.h       Fri Aug 11 16:07:22 2006 +0100
@@ -2,21 +2,15 @@
 #define __ASM_PERFC_H__
 #include <asm/mm.h>
 
-static inline void arch_perfc_printall (void)
+static inline void arch_perfc_printall(void)
 {
-#ifdef PERF_ARRAYS
-    ptwr_eip_stat_print();
-#endif
 }
 
-static inline void arch_perfc_reset (void)
+static inline void arch_perfc_reset(void)
 {
-#ifdef PERF_ARRAYS
-    ptwr_eip_stat_reset();
-#endif
 }
 
-static inline void arch_perfc_gather (void)
+static inline void arch_perfc_gather(void)
 {
 }
 
diff -r 1d817bfc5ed9 -r 9727328c008e xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Fri Aug 11 14:22:54 2006 +0100
+++ b/xen/include/xen/mm.h      Fri Aug 11 16:07:22 2006 +0100
@@ -95,10 +95,6 @@ unsigned long avail_scrub_pages(void);
 
 #include <asm/mm.h>
 
-#ifndef sync_pagetable_state
-#define sync_pagetable_state(d) ((void)0)
-#endif
-
 int guest_remove_page(struct domain *d, unsigned long gmfn);
 
 #endif /* __XEN_MM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [XEN] Remove batched writable pagetable logic., Xen patchbot-unstable <=