WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] Segment dirty log for performance

To: xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] Segment dirty log for performance
From: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
Date: Wed, 24 Oct 2007 17:00:26 -0400
Cc: Dave Lively <dlively@xxxxxxxxxxxxxxx>
Delivery-date: Wed, 24 Oct 2007 14:19:42 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.5 (X11/20070719)
Represent dirty log as an array of bitmaps.
Also, when bookkeeping the valid HVM pfn ranges, breaks the PFNs
into two ranges  -- RAM and VGA.  This allows the dirty
page bitmaps to conform to these ranges and to skip the
(sometimes large) empty PFN range between them.

Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxx>
Signed-off-by: Dave Lively <dlively@xxxxxxxxxxxxxxx>

diff -r 9bdb3e7a99c9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/p2m.c     Tue Oct 23 12:58:25 2007 -0400
@@ -241,9 +241,12 @@ set_p2m_entry(struct domain *d, unsigned
                                0, L1_PAGETABLE_ENTRIES);
     ASSERT(p2m_entry);
 
-    /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
-        d->arch.p2m.max_mapped_pfn = gfn;
+    if ( mfn_valid(mfn) ) {
+        paging_pfn_range_append(d, gfn);
+        /* Track the highest gfn for which we have ever had a valid mapping */
+        if (gfn > d->arch.p2m.max_mapped_pfn ) 
+            d->arch.p2m.max_mapped_pfn = gfn;
+    }
 
     if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
         entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c  Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/paging.c  Tue Oct 23 12:59:41 2007 -0400
@@ -96,36 +96,98 @@
         spin_unlock(&(_d)->arch.paging.log_dirty.lock);                   \
     } while (0)
 
+void paging_pfn_range_append(struct domain *d, unsigned long gfn)
+{
+    /* Maintain a very small number of pfn ranges; ie 4
+     * Don't bother with an optimal representation (by consolidating ranges, 
etc.)
+     * because in practice it isn't required. */
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+
+    for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+    {
+        int last_range;
+        
+        /* unused range? */
+        if ( unlikely(pr->len == 0) ) {
+            *pr = (struct pfn_range){ gfn, 1 };
+            return;
+        }
+        
+        last_range = (pr == pr0 + PFN_RANGE_NR - 1);
+        
+#define CLOSE_ENOUGH 0x20 /* keep 0x00..0x9f and 0xc0... in same range  */
+        
+        /* gfn precedes existing range by a substantial amount? */
+        if (unlikely(gfn + CLOSE_ENOUGH < pr->start && !last_range)) /* yes */
+        {
+            /* insert a new range */
+            memmove(pr+1, pr, (PFN_RANGE_NR - (pr - pr0) - 1) * sizeof(*pr));
+            *pr = (struct pfn_range){ gfn, 1 };
+            return;
+        }
+        
+        /* gfn precedes existing range? */
+        if (unlikely(gfn < pr->start)) /* yes */
+        {
+            /* extend start of range */
+            pr->len += pr->start - gfn;
+            pr->start = gfn;
+            return;
+        }
+        
+        /* gfn within existing range? */
+        if ( unlikely(pr->start <= gfn && gfn < pr->start + pr->len) ) /* yes 
*/
+            return;
+        
+        /* gfn abuts or closely follows existing range? or this is last range? 
*/
+        if ( likely(gfn <= pr->start + pr->len + CLOSE_ENOUGH || last_range) )
+        {
+            /* extend end of range */
+            pr->len = gfn - pr->start + 1;
+            return;
+        }
+    }
+    BUG();
+}
+
 /* allocate bitmap resources for log dirty */
 int paging_alloc_log_dirty_bitmap(struct domain *d)
 {
-    if ( d->arch.paging.log_dirty.bitmap != NULL )
-        return 0;
-
-    d->arch.paging.log_dirty.bitmap_size =
-        (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
-    d->arch.paging.log_dirty.bitmap =
-        xmalloc_array(unsigned long,
-                      d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
-    if ( d->arch.paging.log_dirty.bitmap == NULL )
-    {
-        d->arch.paging.log_dirty.bitmap_size = 0;
-        return -ENOMEM;
-    }
-    memset(d->arch.paging.log_dirty.bitmap, 0,
-           d->arch.paging.log_dirty.bitmap_size/8);
-
-    return 0;
+     struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+  
+     for (pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++)
+     {
+         ASSERT(pr->dirty_bitmap == NULL);
+         pr->dirty_bitmap_size = (pr->len + (BITS_PER_LONG - 1)) & 
+             ~(BITS_PER_LONG - 1);
+         printk("%s: allocating %dKB for domain %d dirty log range %ld\n",
+                 __FUNCTION__, (pr->dirty_bitmap_size + 8195)/8196,
+                 d->domain_id, pr - pr0);
+         pr->dirty_bitmap =
+             xmalloc_array(uint8_t, pr->dirty_bitmap_size/8);
+         if ( pr->dirty_bitmap == NULL )
+         {
+             pr->dirty_bitmap_size = 0;
+             return -ENOMEM;
+         }
+         
+         memset(pr->dirty_bitmap, 0x0, pr->dirty_bitmap_size/8);
+     }
+     return 0;
 }
 
 /* free bitmap resources */
 void paging_free_log_dirty_bitmap(struct domain *d)
 {
-    d->arch.paging.log_dirty.bitmap_size = 0;
-    if ( d->arch.paging.log_dirty.bitmap )
-    {
-        xfree(d->arch.paging.log_dirty.bitmap);
-        d->arch.paging.log_dirty.bitmap = NULL;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+    for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+    {
+        pr->dirty_bitmap_size = 0;
+        if ( pr->dirty_bitmap )
+        {
+            xfree(pr->dirty_bitmap);
+            pr->dirty_bitmap = NULL;
+        }
     }
 }
 
@@ -174,7 +236,7 @@ int paging_log_dirty_disable(struct doma
     /* Safe because the domain is paused. */
     ret = d->arch.paging.log_dirty.disable_log_dirty(d);
     log_dirty_lock(d);
-    if ( !paging_mode_log_dirty(d) )
+    if ( !ret )
         paging_free_log_dirty_bitmap(d);
     log_dirty_unlock(d);
     domain_unpause(d);
@@ -187,6 +249,7 @@ void paging_mark_dirty(struct domain *d,
 {
     unsigned long pfn;
     mfn_t gmfn;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
 
     gmfn = _mfn(guest_mfn);
 
@@ -211,30 +274,40 @@ void paging_mark_dirty(struct domain *d,
         return;
     }
 
-    if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) )
-    {
-        if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
-        {
-            PAGING_DEBUG(LOGDIRTY,
-                         "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
-                         mfn_x(gmfn), pfn, d->domain_id);
-            d->arch.paging.log_dirty.dirty_count++;
-        }
-    }
-    else
-    {
-        PAGING_PRINTK("mark_dirty OOR! "
-                      "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
-                      "owner=%d c=%08x t=%" PRtype_info "\n",
-                      mfn_x(gmfn),
-                      pfn,
-                      d->arch.paging.log_dirty.bitmap_size,
-                      d->domain_id,
-                      (page_get_owner(mfn_to_page(gmfn))
-                       ? page_get_owner(mfn_to_page(gmfn))->domain_id
-                       : -1),
-                      mfn_to_page(gmfn)->count_info,
+    for ( pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++ )
+    {
+        ASSERT(pr->dirty_bitmap != NULL);
+        if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) ) 
+        {
+            if ( !__test_and_set_bit(pfn - pr->start, pr->dirty_bitmap) )
+            {
+                PAGING_DEBUG(LOGDIRTY,
+                             "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+                              mfn_x(gmfn), pfn, d->domain_id);
+                d->arch.paging.log_dirty.dirty_count++;
+            }
+            log_dirty_unlock(d);
+            return;
+        }
+    }
+
+    PAGING_PRINTK("mark_dirty OOR! "
+                  "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+                  "owner=%d c=%08x t=%" PRtype_info "\n",
+                  mfn_x(gmfn),
+                  pfn,
+                  d->arch.paging.log_dirty.bitmap_size,
+                  d->domain_id,
+                  (page_get_owner(mfn_to_page(gmfn))
+                   ? page_get_owner(mfn_to_page(gmfn))->domain_id
+                   : -1),
+                  mfn_to_page(gmfn)->count_info,
                       mfn_to_page(gmfn)->u.inuse.type_info);
+    for ( pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++ )
+    {
+        PAGING_PRINTK("   pfn_range[%ld] start:0x%"
+                      PRI_mfn " len:0x%" PRI_mfn "\n",
+                      pr - pr0, pr->start, pr->len);
     }
 
     log_dirty_unlock(d);
@@ -245,6 +318,8 @@ int paging_log_dirty_op(struct domain *d
 int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
 {
     int i, rv = 0, clean = 0, peek = 1;
+    int bits;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
 
     domain_pause(d);
     log_dirty_lock(d);
@@ -270,37 +345,70 @@ int paging_log_dirty_op(struct domain *d
         /* caller may have wanted just to clean the state or access stats. */
         peek = 0;
 
-    if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
-    {
-        rv = -EINVAL; /* perhaps should be ENOMEM? */
-        goto out;
-    }
-
-    if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
-        sc->pages = d->arch.paging.log_dirty.bitmap_size;
-
-#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
-    for ( i = 0; i < sc->pages; i += CHUNK )
-    {
-        int bytes = ((((sc->pages - i) > CHUNK)
-                      ? CHUNK
-                      : (sc->pages - i)) + 7) / 8;
-
-        if ( likely(peek) )
-        {
+    pr = pr0;
+#define BITS_PER_BYTE 8
+#define CHUNK (BITS_PER_BYTE*1024) /* Transfer and clear in 8kB chunks for L1 
cache. */
+    for ( i = 0; i < sc->pages; i += bits )
+    {
+        /* In gap between ranges? */
+        if ( i < pr->start )      /* yes */
+        {
+            static uint8_t zeroes[CHUNK];
+            uint8_t *pzeroes = zeroes;
+            /* copy zeroes to guest */
+            bits = pr->start - i;
+            if ( bits > sc->pages - i )
+                bits = sc->pages - i;
+            if ( bits > CHUNK * BITS_PER_BYTE )
+                bits = CHUNK * BITS_PER_BYTE;
+            bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
             if ( copy_to_guest_offset(
-                sc->dirty_bitmap, i/8,
-                (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
+                     sc->dirty_bitmap, 
+                     i/BITS_PER_BYTE,
+                     pzeroes,
+                     bits/BITS_PER_BYTE) )
             {
-                rv = -EFAULT;
+                rv = -EINVAL;
                 goto out;
             }
         }
 
-        if ( clean )
-            memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, 
bytes);
+        /* Within a range? */
+        else if ( i < pr->start + pr->len ) /* yes */
+        {
+            bits = pr->start + pr->len - i;
+            if ( bits > sc->pages - i )
+                bits = sc->pages - i;
+            if ( bits > CHUNK * BITS_PER_BYTE )
+                bits = CHUNK * BITS_PER_BYTE;
+            bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
+            if ( copy_to_guest_offset(
+                     sc->dirty_bitmap, 
+                     i/BITS_PER_BYTE,
+                     pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+                     bits/BITS_PER_BYTE) )
+            {
+                rv = -EINVAL;
+                goto out;
+            }
+            if ( clean )
+                memset(pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+                       0, bits/BITS_PER_BYTE);
+        }
+        /* Last range? */
+        else if (pr - pr0 == PFN_RANGE_NR-1) /* yes */
+        {
+            sc->pages = pr->start + pr->len;
+            break;
+        }
+        else
+        { /* Use next range */
+            pr++;
+            bits = 0;
+        }
     }
 #undef CHUNK
+#undef BITS_PER_BYTE
 
     log_dirty_unlock(d);
 
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/shadow/private.h  Tue Oct 23 12:58:25 2007 -0400
@@ -491,17 +491,22 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
 /* Is this guest page dirty?  Call only in log-dirty mode. */
 {
     unsigned long pfn;
+    struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+    
     ASSERT(shadow_mode_log_dirty(d));
-    ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-    if ( likely(VALID_M2P(pfn))
-         && likely(pfn < d->arch.paging.log_dirty.bitmap_size) 
-         && test_bit(pfn, d->arch.paging.log_dirty.bitmap) )
-        return 1;
-
-    return 0;
+    if ( unlikely(!VALID_M2P(pfn)) )
+         return 0;
+         
+    for (pr = pr0; likely(pr - pr0 != PFN_RANGE_NR && pr->len > 0); pr++)
+    {
+        ASSERT(pr->dirty_bitmap != NULL);
+        if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) )
+            return test_bit(pfn - pr->start, pr->dirty_bitmap);
+    }
+    return 0; /* shouldn't get here */
 }
 
 
diff -r 9bdb3e7a99c9 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/domain.h      Tue Oct 23 12:58:25 2007 -0400
@@ -158,13 +158,18 @@ struct log_dirty_domain {
     int            locker; /* processor that holds the lock */
     const char    *locker_function; /* func that took it */
 
-    /* log-dirty bitmap to record dirty pages */
-    unsigned long *bitmap;
-    unsigned int   bitmap_size;  /* in pages, bit per page */
-
     /* log-dirty mode stats */
     unsigned int   fault_count;
     unsigned int   dirty_count;
+
+    /* segmented log-dirty bitmap to record dirty pages */
+#define PFN_RANGE_NR 4
+    struct pfn_range {
+        unsigned long start;
+        unsigned long len;
+        uint8_t *dirty_bitmap;
+        unsigned int dirty_bitmap_size;  /* in pages, bit per page */
+    } pfn_range[PFN_RANGE_NR];
 
     /* functions which are paging mode specific */
     int            (*enable_log_dirty   )(struct domain *d);
diff -r 9bdb3e7a99c9 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h      Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/paging.h      Tue Oct 23 12:58:25 2007 -0400
@@ -258,6 +258,8 @@ static inline int paging_cmpxchg_guest_e
         return (!cmpxchg_user(p, *old, new));
 }
 
+void paging_pfn_range_append(struct domain *d, unsigned long gfn);
+
 /* Helper function that writes a pte in such a way that a concurrent read 
  * never sees a half-written entry that has _PAGE_PRESENT set */
 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>