Represent dirty log as an array of bitmaps.
Also, when bookkeeping the valid HVM pfn ranges, breaks the PFNs
into two ranges -- RAM and VGA. This allows the dirty
page bitmaps to conform to these ranges and to skip the
(sometimes large) empty PFN range between them.
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxx>
Signed-off-by: Dave Lively <dlively@xxxxxxxxxxxxxxx>
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/p2m.c Tue Oct 23 12:58:25 2007 -0400
@@ -241,9 +241,12 @@ set_p2m_entry(struct domain *d, unsigned
0, L1_PAGETABLE_ENTRIES);
ASSERT(p2m_entry);
- /* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
- d->arch.p2m.max_mapped_pfn = gfn;
+ if ( mfn_valid(mfn) ) {
+ paging_pfn_range_append(d, gfn);
+ /* Track the highest gfn for which we have ever had a valid mapping */
+ if (gfn > d->arch.p2m.max_mapped_pfn )
+ d->arch.p2m.max_mapped_pfn = gfn;
+ }
if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/paging.c Tue Oct 23 12:59:41 2007 -0400
@@ -96,36 +96,98 @@
spin_unlock(&(_d)->arch.paging.log_dirty.lock); \
} while (0)
+void paging_pfn_range_append(struct domain *d, unsigned long gfn)
+{
+ /* Maintain a very small number of pfn ranges; ie 4
+ * Don't bother with an optimal representation (by consolidating ranges,
etc.)
+ * because in practice it isn't required. */
+ struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+
+ for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+ {
+ int last_range;
+
+ /* unused range? */
+ if ( unlikely(pr->len == 0) ) {
+ *pr = (struct pfn_range){ gfn, 1 };
+ return;
+ }
+
+ last_range = (pr == pr0 + PFN_RANGE_NR - 1);
+
+#define CLOSE_ENOUGH 0x20 /* keep 0x00..0x9f and 0xc0... in same range */
+
+ /* gfn precedes existing range by a substantial amount? */
+ if (unlikely(gfn + CLOSE_ENOUGH < pr->start && !last_range)) /* yes */
+ {
+ /* insert a new range */
+ memmove(pr+1, pr, (PFN_RANGE_NR - (pr - pr0) - 1) * sizeof(*pr));
+ *pr = (struct pfn_range){ gfn, 1 };
+ return;
+ }
+
+ /* gfn precedes existing range? */
+ if (unlikely(gfn < pr->start)) /* yes */
+ {
+ /* extend start of range */
+ pr->len += pr->start - gfn;
+ pr->start = gfn;
+ return;
+ }
+
+ /* gfn within existing range? */
+ if ( unlikely(pr->start <= gfn && gfn < pr->start + pr->len) ) /* yes
*/
+ return;
+
+ /* gfn abuts or closely follows existing range? or this is last range?
*/
+ if ( likely(gfn <= pr->start + pr->len + CLOSE_ENOUGH || last_range) )
+ {
+ /* extend end of range */
+ pr->len = gfn - pr->start + 1;
+ return;
+ }
+ }
+ BUG();
+}
+
/* allocate bitmap resources for log dirty */
int paging_alloc_log_dirty_bitmap(struct domain *d)
{
- if ( d->arch.paging.log_dirty.bitmap != NULL )
- return 0;
-
- d->arch.paging.log_dirty.bitmap_size =
- (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
- d->arch.paging.log_dirty.bitmap =
- xmalloc_array(unsigned long,
- d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
- if ( d->arch.paging.log_dirty.bitmap == NULL )
- {
- d->arch.paging.log_dirty.bitmap_size = 0;
- return -ENOMEM;
- }
- memset(d->arch.paging.log_dirty.bitmap, 0,
- d->arch.paging.log_dirty.bitmap_size/8);
-
- return 0;
+ struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+
+ for (pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++)
+ {
+ ASSERT(pr->dirty_bitmap == NULL);
+ pr->dirty_bitmap_size = (pr->len + (BITS_PER_LONG - 1)) &
+ ~(BITS_PER_LONG - 1);
+ printk("%s: allocating %dKB for domain %d dirty log range %ld\n",
+ __FUNCTION__, (pr->dirty_bitmap_size + 8195)/8196,
+ d->domain_id, pr - pr0);
+ pr->dirty_bitmap =
+ xmalloc_array(uint8_t, pr->dirty_bitmap_size/8);
+ if ( pr->dirty_bitmap == NULL )
+ {
+ pr->dirty_bitmap_size = 0;
+ return -ENOMEM;
+ }
+
+ memset(pr->dirty_bitmap, 0x0, pr->dirty_bitmap_size/8);
+ }
+ return 0;
}
/* free bitmap resources */
void paging_free_log_dirty_bitmap(struct domain *d)
{
- d->arch.paging.log_dirty.bitmap_size = 0;
- if ( d->arch.paging.log_dirty.bitmap )
- {
- xfree(d->arch.paging.log_dirty.bitmap);
- d->arch.paging.log_dirty.bitmap = NULL;
+ struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+ for (pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++)
+ {
+ pr->dirty_bitmap_size = 0;
+ if ( pr->dirty_bitmap )
+ {
+ xfree(pr->dirty_bitmap);
+ pr->dirty_bitmap = NULL;
+ }
}
}
@@ -174,7 +236,7 @@ int paging_log_dirty_disable(struct doma
/* Safe because the domain is paused. */
ret = d->arch.paging.log_dirty.disable_log_dirty(d);
log_dirty_lock(d);
- if ( !paging_mode_log_dirty(d) )
+ if ( !ret )
paging_free_log_dirty_bitmap(d);
log_dirty_unlock(d);
domain_unpause(d);
@@ -187,6 +249,7 @@ void paging_mark_dirty(struct domain *d,
{
unsigned long pfn;
mfn_t gmfn;
+ struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
gmfn = _mfn(guest_mfn);
@@ -211,30 +274,40 @@ void paging_mark_dirty(struct domain *d,
return;
}
- if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) )
- {
- if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
- {
- PAGING_DEBUG(LOGDIRTY,
- "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
- mfn_x(gmfn), pfn, d->domain_id);
- d->arch.paging.log_dirty.dirty_count++;
- }
- }
- else
- {
- PAGING_PRINTK("mark_dirty OOR! "
- "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
- "owner=%d c=%08x t=%" PRtype_info "\n",
- mfn_x(gmfn),
- pfn,
- d->arch.paging.log_dirty.bitmap_size,
- d->domain_id,
- (page_get_owner(mfn_to_page(gmfn))
- ? page_get_owner(mfn_to_page(gmfn))->domain_id
- : -1),
- mfn_to_page(gmfn)->count_info,
+ for ( pr = pr0; pr - pr0 != PFN_RANGE_NR && pr->len > 0; pr++ )
+ {
+ ASSERT(pr->dirty_bitmap != NULL);
+ if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) )
+ {
+ if ( !__test_and_set_bit(pfn - pr->start, pr->dirty_bitmap) )
+ {
+ PAGING_DEBUG(LOGDIRTY,
+ "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+ mfn_x(gmfn), pfn, d->domain_id);
+ d->arch.paging.log_dirty.dirty_count++;
+ }
+ log_dirty_unlock(d);
+ return;
+ }
+ }
+
+ PAGING_PRINTK("mark_dirty OOR! "
+ "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+ "owner=%d c=%08x t=%" PRtype_info "\n",
+ mfn_x(gmfn),
+ pfn,
+ d->arch.paging.log_dirty.bitmap_size,
+ d->domain_id,
+ (page_get_owner(mfn_to_page(gmfn))
+ ? page_get_owner(mfn_to_page(gmfn))->domain_id
+ : -1),
+ mfn_to_page(gmfn)->count_info,
mfn_to_page(gmfn)->u.inuse.type_info);
+ for ( pr = pr0; pr - pr0 != PFN_RANGE_NR; pr++ )
+ {
+ PAGING_PRINTK(" pfn_range[%ld] start:0x%"
+ PRI_mfn " len:0x%" PRI_mfn "\n",
+ pr - pr0, pr->start, pr->len);
}
log_dirty_unlock(d);
@@ -245,6 +318,8 @@ int paging_log_dirty_op(struct domain *d
int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
{
int i, rv = 0, clean = 0, peek = 1;
+ int bits;
+ struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
domain_pause(d);
log_dirty_lock(d);
@@ -270,37 +345,70 @@ int paging_log_dirty_op(struct domain *d
/* caller may have wanted just to clean the state or access stats. */
peek = 0;
- if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
- {
- rv = -EINVAL; /* perhaps should be ENOMEM? */
- goto out;
- }
-
- if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
- sc->pages = d->arch.paging.log_dirty.bitmap_size;
-
-#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
- for ( i = 0; i < sc->pages; i += CHUNK )
- {
- int bytes = ((((sc->pages - i) > CHUNK)
- ? CHUNK
- : (sc->pages - i)) + 7) / 8;
-
- if ( likely(peek) )
- {
+ pr = pr0;
+#define BITS_PER_BYTE 8
+#define CHUNK (BITS_PER_BYTE*1024) /* Transfer and clear in 8kB chunks for L1
cache. */
+ for ( i = 0; i < sc->pages; i += bits )
+ {
+ /* In gap between ranges? */
+ if ( i < pr->start ) /* yes */
+ {
+ static uint8_t zeroes[CHUNK];
+ uint8_t *pzeroes = zeroes;
+ /* copy zeroes to guest */
+ bits = pr->start - i;
+ if ( bits > sc->pages - i )
+ bits = sc->pages - i;
+ if ( bits > CHUNK * BITS_PER_BYTE )
+ bits = CHUNK * BITS_PER_BYTE;
+ bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
if ( copy_to_guest_offset(
- sc->dirty_bitmap, i/8,
- (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
+ sc->dirty_bitmap,
+ i/BITS_PER_BYTE,
+ pzeroes,
+ bits/BITS_PER_BYTE) )
{
- rv = -EFAULT;
+ rv = -EINVAL;
goto out;
}
}
- if ( clean )
- memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0,
bytes);
+ /* Within a range? */
+ else if ( i < pr->start + pr->len ) /* yes */
+ {
+ bits = pr->start + pr->len - i;
+ if ( bits > sc->pages - i )
+ bits = sc->pages - i;
+ if ( bits > CHUNK * BITS_PER_BYTE )
+ bits = CHUNK * BITS_PER_BYTE;
+ bits = (bits + BITS_PER_BYTE - 1) & ~(BITS_PER_BYTE - 1);
+ if ( copy_to_guest_offset(
+ sc->dirty_bitmap,
+ i/BITS_PER_BYTE,
+ pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+ bits/BITS_PER_BYTE) )
+ {
+ rv = -EINVAL;
+ goto out;
+ }
+ if ( clean )
+ memset(pr->dirty_bitmap + ((i - pr->start)/BITS_PER_BYTE),
+ 0, bits/BITS_PER_BYTE);
+ }
+ /* Last range? */
+ else if (pr - pr0 == PFN_RANGE_NR-1) /* yes */
+ {
+ sc->pages = pr->start + pr->len;
+ break;
+ }
+ else
+ { /* Use next range */
+ pr++;
+ bits = 0;
+ }
}
#undef CHUNK
+#undef BITS_PER_BYTE
log_dirty_unlock(d);
diff -r 9bdb3e7a99c9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/arch/x86/mm/shadow/private.h Tue Oct 23 12:58:25 2007 -0400
@@ -491,17 +491,22 @@ sh_mfn_is_dirty(struct domain *d, mfn_t
/* Is this guest page dirty? Call only in log-dirty mode. */
{
unsigned long pfn;
+ struct pfn_range *pr, *pr0 = d->arch.paging.log_dirty.pfn_range;
+
ASSERT(shadow_mode_log_dirty(d));
- ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
/* We /really/ mean PFN here, even for non-translated guests. */
pfn = get_gpfn_from_mfn(mfn_x(gmfn));
- if ( likely(VALID_M2P(pfn))
- && likely(pfn < d->arch.paging.log_dirty.bitmap_size)
- && test_bit(pfn, d->arch.paging.log_dirty.bitmap) )
- return 1;
-
- return 0;
+ if ( unlikely(!VALID_M2P(pfn)) )
+ return 0;
+
+ for (pr = pr0; likely(pr - pr0 != PFN_RANGE_NR && pr->len > 0); pr++)
+ {
+ ASSERT(pr->dirty_bitmap != NULL);
+ if ( likely(pr->start <= pfn && pfn < pr->start + pr->len) )
+ return test_bit(pfn - pr->start, pr->dirty_bitmap);
+ }
+ return 0; /* shouldn't get here */
}
diff -r 9bdb3e7a99c9 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/domain.h Tue Oct 23 12:58:25 2007 -0400
@@ -158,13 +158,18 @@ struct log_dirty_domain {
int locker; /* processor that holds the lock */
const char *locker_function; /* func that took it */
- /* log-dirty bitmap to record dirty pages */
- unsigned long *bitmap;
- unsigned int bitmap_size; /* in pages, bit per page */
-
/* log-dirty mode stats */
unsigned int fault_count;
unsigned int dirty_count;
+
+ /* segmented log-dirty bitmap to record dirty pages */
+#define PFN_RANGE_NR 4
+ struct pfn_range {
+ unsigned long start;
+ unsigned long len;
+ uint8_t *dirty_bitmap;
+ unsigned int dirty_bitmap_size; /* in pages, bit per page */
+ } pfn_range[PFN_RANGE_NR];
/* functions which are paging mode specific */
int (*enable_log_dirty )(struct domain *d);
diff -r 9bdb3e7a99c9 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h Tue Oct 23 10:26:00 2007 -0400
+++ b/xen/include/asm-x86/paging.h Tue Oct 23 12:58:25 2007 -0400
@@ -258,6 +258,8 @@ static inline int paging_cmpxchg_guest_e
return (!cmpxchg_user(p, *old, new));
}
+void paging_pfn_range_append(struct domain *d, unsigned long gfn);
+
/* Helper function that writes a pte in such a way that a concurrent read
* never sees a half-written entry that has _PAGE_PRESENT set */
static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|