# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 86e5d8458c08d057bacd7c578bfa84a219b3d461
# Parent e585c2dade143d171fb589e5a7a33b6c1fa137a9
[IA64] live migration
Shadow mode and live migration.
Virtualize Dirty bit.
Signed-off-by: Tristan Gingold <tristan.gingold@xxxxxxxx>
---
tools/libxc/ia64/xc_ia64_linux_restore.c | 2
tools/libxc/ia64/xc_ia64_linux_save.c | 314 ++++++++++++++++++++++-----
xen/arch/ia64/asm-offsets.c | 5
xen/arch/ia64/xen/dom0_ops.c | 14 +
xen/arch/ia64/xen/domain.c | 163 ++++++++++++--
xen/arch/ia64/xen/faults.c | 91 +++++++
xen/arch/ia64/xen/ivt.S | 43 +++
xen/arch/ia64/xen/mm.c | 20 +
xen/arch/ia64/xen/privop.c | 3
xen/arch/ia64/xen/vhpt.c | 2
xen/include/asm-ia64/domain.h | 13 +
xen/include/asm-ia64/linux-xen/asm/pgtable.h | 5
xen/include/asm-ia64/shadow.h | 18 +
xen/include/asm-ia64/tlbflush.h | 4
14 files changed, 623 insertions(+), 74 deletions(-)
diff -r e585c2dade14 -r 86e5d8458c08 tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c Wed Jul 26 09:02:43 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Wed Jul 26 09:36:36 2006 -0600
@@ -163,7 +163,7 @@ xc_linux_restore(int xc_handle, int io_f
pfn = page_array[mfn];
- DPRINTF ("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn);
+ //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn,
pfn);
if (read_page(xc_handle, io_fd, dom, page_array[mfn]) < 0)
goto out;
diff -r e585c2dade14 -r 86e5d8458c08 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c Wed Jul 26 09:02:43 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c Wed Jul 26 09:36:36 2006 -0600
@@ -15,8 +15,72 @@
#include "xg_private.h"
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_linux_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS (4 - 1) /* limit us to 4 times round loop */
+#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+ ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int test_bit (int nr, volatile void * addr)
+{
+ return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit (int nr, volatile void * addr)
+{
+ BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline void set_bit ( int nr, volatile void * addr)
+{
+ BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
/* total number of pages used by the current guest */
static unsigned long max_pfn;
+
+static int xc_ia64_shadow_control(int xc_handle,
+ uint32_t domid,
+ unsigned int sop,
+ unsigned long *dirty_bitmap,
+ unsigned long pages,
+ xc_shadow_control_stats_t *stats)
+{
+ if (dirty_bitmap != NULL && pages > 0) {
+ int i;
+ unsigned char *bmap = (unsigned char *)dirty_bitmap;
+ unsigned long bmap_bytes =
+ ((pages + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1)) / 8;
+ unsigned int bmap_pages = (bmap_bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ /* Touch the page so that it is in the TC.
+ FIXME: use a more reliable method. */
+ for (i = 0 ; i < bmap_pages ; i++)
+ bmap[i * PAGE_SIZE] = 0;
+ /* Because bmap is not page aligned (allocated by malloc), be sure the
+ last page is touched. */
+ bmap[bmap_bytes - 1] = 0;
+ }
+
+ return xc_shadow_control(xc_handle, domid, sop,
+ dirty_bitmap, pages, stats);
+}
static inline ssize_t
write_exact(int fd, void *buf, size_t count)
@@ -77,10 +141,10 @@ xc_linux_save(int xc_handle, int io_fd,
xc_dominfo_t info;
int rc = 1;
- unsigned long N;
//int live = (flags & XCFLAGS_LIVE);
int debug = (flags & XCFLAGS_DEBUG);
+ int live = (flags & XCFLAGS_LIVE);
/* The new domain's shared-info frame number. */
unsigned long shared_info_frame;
@@ -93,10 +157,38 @@ xc_linux_save(int xc_handle, int io_fd,
/* Live mapping of shared info structure */
shared_info_t *live_shinfo = NULL;
+ /* Iteration number. */
+ int iter;
+
+ /* Number of pages sent in the last iteration (live only). */
+ unsigned int sent_last_iter;
+
+ /* Number of pages sent (live only). */
+ unsigned int total_sent;
+
+ /* Size of the shadow bitmap (live only). */
+ unsigned int bitmap_size = 0;
+
+ /* True if last iteration. */
+ int last_iter;
+
+ /* Bitmap of pages to be sent. */
+ unsigned long *to_send = NULL;
+ /* Bitmap of pages not to be sent (because dirtied). */
+ unsigned long *to_skip = NULL;
+
char *mem;
if (debug)
fprintf (stderr, "xc_linux_save (ia64): started dom=%d\n", dom);
+
+ /* If no explicit control parameters given, use defaults */
+ if (!max_iters)
+ max_iters = DEF_MAX_ITERS;
+ if (!max_factor)
+ max_factor = DEF_MAX_FACTOR;
+
+ //initialize_mbit_rate();
if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
ERR("Could not get domain info");
@@ -124,24 +216,9 @@ xc_linux_save(int xc_handle, int io_fd,
max_pfn = info.max_memkb >> (PAGE_SHIFT - 10);
-
- /* This is a non-live suspend. Issue the call back to get the
- domain suspended */
-
- if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
- ERR("Domain appears not to have suspended");
- goto out;
- }
-
page_array = malloc(max_pfn * sizeof(unsigned long));
if (page_array == NULL) {
ERR("Could not allocate memory");
- goto out;
- }
-
- if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
- 0, max_pfn) != max_pfn) {
- ERR("Could not get the page frame list");
goto out;
}
@@ -156,10 +233,13 @@ xc_linux_save(int xc_handle, int io_fd,
if the format change.
The version is hard-coded, don't forget to change the restore code
too! */
- N = 1;
- if (!write_exact(io_fd, &N, sizeof(unsigned long))) {
- ERR("write: version");
- goto out;
+ {
+ unsigned long version = 1;
+
+ if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
+ ERR("write: version");
+ goto out;
+ }
}
op.cmd = DOM0_DOMAIN_SETUP;
@@ -175,39 +255,165 @@ xc_linux_save(int xc_handle, int io_fd,
goto out;
}
- /* Start writing out the saved-domain record. */
- for (N = 0; N < max_pfn; N++) {
- if (page_array[N] == INVALID_MFN)
- continue;
- if (debug)
- fprintf (stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
- page_array[N], N, max_pfn);
-
- if (!write_exact(io_fd, &N, sizeof(N))) {
- ERR("write: max_pfn");
- goto out;
- }
-
- mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
- PROT_READ|PROT_WRITE, page_array[N]);
- if (mem == NULL) {
- ERR("cannot map page");
- goto out;
- }
- if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
- ERR("Error when writing to state file (5)");
- goto out;
- }
- munmap(mem, PAGE_SIZE);
+ /* Domain is still running at this point */
+ if (live) {
+
+ if (xc_ia64_shadow_control(xc_handle, dom,
+ DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
+ NULL, 0, NULL ) < 0) {
+ ERR("Couldn't enable shadow mode");
+ goto out;
+ }
+
+ last_iter = 0;
+
+ bitmap_size = ((max_pfn + BITS_PER_LONG-1) & ~(BITS_PER_LONG-1)) / 8;
+ to_send = malloc(bitmap_size);
+ to_skip = malloc(bitmap_size);
+
+ if (!to_send || !to_skip) {
+ ERR("Couldn't allocate bitmap array");
+ goto out;
+ }
+
+ /* Initially all the pages must be sent. */
+ memset(to_send, 0xff, bitmap_size);
+
+ if (mlock(to_send, bitmap_size)) {
+ ERR("Unable to mlock to_send");
+ goto out;
+ }
+ if (mlock(to_skip, bitmap_size)) {
+ ERR("Unable to mlock to_skip");
+ goto out;
+ }
+
+ } else {
+
+ /* This is a non-live suspend. Issue the call back to get the
+ domain suspended */
+
+ last_iter = 1;
+
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+ ERR("Domain appears not to have suspended");
+ goto out;
+ }
+
+ }
+
+ sent_last_iter = max_pfn;
+ total_sent = 0;
+
+ for (iter = 1; ; iter++) {
+ unsigned int sent_this_iter, skip_this_iter;
+ unsigned long N;
+
+ sent_this_iter = 0;
+ skip_this_iter = 0;
+
+ /* Get the pfn list, as it may change. */
+ if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
+ 0, max_pfn) != max_pfn) {
+ ERR("Could not get the page frame list");
+ goto out;
+ }
+
+ /* Dirtied pages won't be saved.
+ slightly wasteful to peek the whole array evey time,
+ but this is fast enough for the moment. */
+ if (!last_iter) {
+ if (xc_ia64_shadow_control(xc_handle, dom,
+ DOM0_SHADOW_CONTROL_OP_PEEK,
+ to_skip, max_pfn, NULL) != max_pfn) {
+ ERR("Error peeking shadow bitmap");
+ goto out;
+ }
+ }
+
+ /* Start writing out the saved-domain record. */
+ for (N = 0; N < max_pfn; N++) {
+ if (page_array[N] == INVALID_MFN)
+ continue;
+ if (!last_iter) {
+ if (test_bit(N, to_skip) && test_bit(N, to_send))
+ skip_this_iter++;
+ if (test_bit(N, to_skip) || !test_bit(N, to_send))
+ continue;
+ }
+
+ if (debug)
+ fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
+ page_array[N], N, max_pfn);
+
+ mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, page_array[N]);
+ if (mem == NULL) {
+ /* The page may have move.
+ It will be remarked dirty.
+ FIXME: to be tracked. */
+ fprintf(stderr, "cannot map page %lx: %s\n",
+ page_array[N], strerror (errno));
+ continue;
+ }
+
+ if (!write_exact(io_fd, &N, sizeof(N))) {
+ ERR("write: max_pfn");
+ goto out;
+ }
+
+ if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
+ ERR("Error when writing to state file (5)");
+ goto out;
+ }
+ munmap(mem, PAGE_SIZE);
+ sent_this_iter++;
+ total_sent++;
+ }
+
+ if (last_iter)
+ break;
+
+ DPRINTF(" %d: sent %d, skipped %d\n",
+ iter, sent_this_iter, skip_this_iter );
+
+ if (live) {
+ if ( /* ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || */
+ (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) ||
+ (total_sent > max_pfn*max_factor)) {
+ DPRINTF("Start last iteration\n");
+ last_iter = 1;
+
+ if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+ ERR("Domain appears not to have suspended");
+ goto out;
+ }
+ }
+
+ /* Pages to be sent are pages which were dirty. */
+ if (xc_ia64_shadow_control(xc_handle, dom,
+ DOM0_SHADOW_CONTROL_OP_CLEAN,
+ to_send, max_pfn, NULL ) != max_pfn) {
+ ERR("Error flushing shadow PT");
+ goto out;
+ }
+
+ sent_last_iter = sent_this_iter;
+
+ //print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
+ }
+
}
fprintf (stderr, "All memory is saved\n");
/* terminate */
- N = INVALID_MFN;
- if (!write_exact(io_fd, &N, sizeof(N))) {
- ERR("Error when writing to state file (6)");
- goto out;
+ {
+ unsigned long pfn = INVALID_MFN;
+ if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+ ERR("Error when writing to state file (6)");
+ goto out;
+ }
}
/* Send through a list of all the PFNs that were not in map at the close */
@@ -274,8 +480,16 @@ xc_linux_save(int xc_handle, int io_fd,
out:
- free (page_array);
-
+ if (live) {
+ if (xc_ia64_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF,
+ NULL, 0, NULL ) < 0) {
+ DPRINTF("Warning - couldn't disable shadow mode");
+ }
+ }
+
+ free(page_array);
+ free(to_send);
+ free(to_skip);
if (live_shinfo)
munmap(live_shinfo, PAGE_SIZE);
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/asm-offsets.c Wed Jul 26 09:36:36 2006 -0600
@@ -65,6 +65,11 @@ void foo(void)
DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
BLANK();
+
+ DEFINE(IA64_DOMAIN_SHADOW_BITMAP_OFFSET, offsetof (struct domain,
arch.shadow_bitmap));
+
+ BLANK();
+
DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64,
itm_next));
DEFINE(IA64_CPUINFO_KSOFTIRQD_OFFSET, offsetof (struct cpuinfo_ia64,
ksoftirqd));
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/dom0_ops.c Wed Jul 26 09:36:36 2006 -0600
@@ -265,6 +265,20 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
}
break;
+ case DOM0_SHADOW_CONTROL:
+ {
+ struct domain *d;
+ ret = -ESRCH;
+ d = find_domain_by_id(op->u.shadow_control.domain);
+ if ( d != NULL )
+ {
+ ret = shadow_mode_control(d, &op->u.shadow_control);
+ put_domain(d);
+ copy_to_guest(u_dom0_op, op, 1);
+ }
+ }
+ break;
+
default:
printf("arch_do_dom0_op: unrecognized dom0 op: %d!!!\n",op->cmd);
ret = -ENOSYS;
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/domain.c Wed Jul 26 09:36:36 2006 -0600
@@ -25,26 +25,15 @@
#include <xen/mm.h>
#include <xen/iocap.h>
#include <asm/asm-xsi-offsets.h>
-#include <asm/ptrace.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/hw_irq.h>
-#include <asm/setup.h>
-//#include <asm/mpspec.h>
-#include <xen/irq.h>
#include <xen/event.h>
-//#include <xen/shadow.h>
#include <xen/console.h>
#include <xen/compile.h>
-
#include <xen/elf.h>
-//#include <asm/page.h>
#include <asm/pgalloc.h>
-
#include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
-
#include <asm/vcpu.h> /* for function declarations */
#include <public/arch-ia64.h>
#include <xen/domain.h>
@@ -52,13 +41,12 @@
#include <asm/vmx_vcpu.h>
#include <asm/vmx_vpd.h>
#include <asm/vmx_phy_mode.h>
-#include <asm/pal.h>
#include <asm/vhpt.h>
-#include <public/hvm/ioreq.h>
#include <public/arch-ia64.h>
#include <asm/tlbflush.h>
#include <asm/regionreg.h>
#include <asm/dom_fw.h>
+#include <asm/shadow.h>
#include <asm/privop_stat.h>
#ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -388,8 +376,11 @@ void arch_domain_destroy(struct domain *
BUG_ON(d->arch.mm.pgd != NULL);
if (d->shared_info != NULL)
free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT));
-
- domain_flush_destroy (d);
+ if (d->arch.shadow_bitmap != NULL)
+ xfree(d->arch.shadow_bitmap);
+
+ /* Clear vTLB for the next domain. */
+ domain_flush_tlb_vhpt(d);
deallocate_rid_range(d);
}
@@ -594,6 +585,148 @@ domain_set_shared_info_va (unsigned long
return 0;
}
+/* Transfer and clear the shadow bitmap in 1kB chunks for L1 cache. */
+#define SHADOW_COPY_CHUNK (1024 / sizeof (unsigned long))
+
+int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
+{
+ unsigned int op = sc->op;
+ int rc = 0;
+ int i;
+ //struct vcpu *v;
+
+ if (unlikely(d == current->domain)) {
+ DPRINTK("Don't try to do a shadow op on yourself!\n");
+ return -EINVAL;
+ }
+
+ domain_pause(d);
+
+ switch (op)
+ {
+ case DOM0_SHADOW_CONTROL_OP_OFF:
+ if (shadow_mode_enabled (d)) {
+ u64 *bm = d->arch.shadow_bitmap;
+
+ /* Flush vhpt and tlb to restore dirty bit usage. */
+ domain_flush_tlb_vhpt(d);
+
+ /* Free bitmap. */
+ d->arch.shadow_bitmap_size = 0;
+ d->arch.shadow_bitmap = NULL;
+ xfree(bm);
+ }
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
+ case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
+ rc = -EINVAL;
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
+ if (shadow_mode_enabled(d)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ atomic64_set(&d->arch.shadow_fault_count, 0);
+ atomic64_set(&d->arch.shadow_dirty_count, 0);
+
+ d->arch.shadow_bitmap_size = (d->max_pages + BITS_PER_LONG-1) &
+ ~(BITS_PER_LONG-1);
+ d->arch.shadow_bitmap = xmalloc_array(unsigned long,
+ d->arch.shadow_bitmap_size / BITS_PER_LONG);
+ if (d->arch.shadow_bitmap == NULL) {
+ d->arch.shadow_bitmap_size = 0;
+ rc = -ENOMEM;
+ }
+ else {
+ memset(d->arch.shadow_bitmap, 0,
+ d->arch.shadow_bitmap_size / 8);
+
+ /* Flush vhtp and tlb to enable dirty bit
+ virtualization. */
+ domain_flush_tlb_vhpt(d);
+ }
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_FLUSH:
+ atomic64_set(&d->arch.shadow_fault_count, 0);
+ atomic64_set(&d->arch.shadow_dirty_count, 0);
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_CLEAN:
+ {
+ int nbr_longs;
+
+ sc->stats.fault_count =
atomic64_read(&d->arch.shadow_fault_count);
+ sc->stats.dirty_count =
atomic64_read(&d->arch.shadow_dirty_count);
+
+ atomic64_set(&d->arch.shadow_fault_count, 0);
+ atomic64_set(&d->arch.shadow_dirty_count, 0);
+
+ if (guest_handle_is_null(sc->dirty_bitmap) ||
+ (d->arch.shadow_bitmap == NULL)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ if (sc->pages > d->arch.shadow_bitmap_size)
+ sc->pages = d->arch.shadow_bitmap_size;
+
+ nbr_longs = (sc->pages + BITS_PER_LONG - 1) / BITS_PER_LONG;
+
+ for (i = 0; i < nbr_longs; i += SHADOW_COPY_CHUNK) {
+ int size = (nbr_longs - i) > SHADOW_COPY_CHUNK ?
+ SHADOW_COPY_CHUNK : nbr_longs - i;
+
+ if (copy_to_guest_offset(sc->dirty_bitmap, i,
+ d->arch.shadow_bitmap + i,
+ size)) {
+ rc = -EFAULT;
+ break;
+ }
+
+ memset(d->arch.shadow_bitmap + i,
+ 0, size * sizeof(unsigned long));
+ }
+
+ break;
+ }
+
+ case DOM0_SHADOW_CONTROL_OP_PEEK:
+ {
+ unsigned long size;
+
+ sc->stats.fault_count =
atomic64_read(&d->arch.shadow_fault_count);
+ sc->stats.dirty_count =
atomic64_read(&d->arch.shadow_dirty_count);
+
+ if (guest_handle_is_null(sc->dirty_bitmap) ||
+ (d->arch.shadow_bitmap == NULL)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ if (sc->pages > d->arch.shadow_bitmap_size)
+ sc->pages = d->arch.shadow_bitmap_size;
+
+ size = (sc->pages + BITS_PER_LONG - 1) / BITS_PER_LONG;
+ if (copy_to_guest(sc->dirty_bitmap,
+ d->arch.shadow_bitmap, size)) {
+ rc = -EFAULT;
+ break;
+ }
+ break;
+ }
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ domain_unpause(d);
+
+ return rc;
+}
// remove following line if not privifying in memory
//#define HAVE_PRIVIFY_MEMORY
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/faults.c Wed Jul 26 09:36:36 2006 -0600
@@ -1,4 +1,3 @@
-
/*
* Miscellaneous process/domain related routines
*
@@ -29,6 +28,7 @@
#include <asm/bundle.h>
#include <asm/privop_stat.h>
#include <asm/asm-xsi-offsets.h>
+#include <asm/shadow.h>
extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
/* FIXME: where these declarations shold be there ? */
@@ -648,3 +648,92 @@ ia64_handle_reflection (unsigned long if
reflect_interruption(isr,regs,vector);
}
+void
+ia64_shadow_fault(unsigned long ifa, unsigned long itir,
+ unsigned long isr, struct pt_regs *regs)
+{
+ struct vcpu *v = current;
+ struct domain *d = current->domain;
+ unsigned long gpfn;
+ unsigned long pte = 0;
+ struct vhpt_lf_entry *vlfe;
+
+ /* There are 2 jobs to do:
+ - marking the page as dirty (the metaphysical address must be
+ extracted to do that).
+ - reflecting or not the fault (the virtual Dirty bit must be
+ extracted to decide).
+ Unfortunatly these informations are not immediatly available!
+ */
+
+ /* Extract the metaphysical address.
+ Try to get it from VHPT and M2P as we need the flags. */
+ vlfe = (struct vhpt_lf_entry *)ia64_thash(ifa);
+ pte = vlfe->page_flags;
+ if (vlfe->ti_tag == ia64_ttag(ifa)) {
+ /* The VHPT entry is valid. */
+ gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+ BUG_ON(gpfn == INVALID_M2P_ENTRY);
+ }
+ else {
+ unsigned long itir, iha;
+ IA64FAULT fault;
+
+ /* The VHPT entry is not valid. */
+ vlfe = NULL;
+
+ /* FIXME: gives a chance to tpa, as the TC was valid. */
+
+ fault = vcpu_translate(v, ifa, 1, &pte, &itir, &iha);
+
+ /* Try again! */
+ if (fault != IA64_NO_FAULT) {
+ /* This will trigger a dtlb miss. */
+ ia64_ptcl(ifa, PAGE_SHIFT << 2);
+ return;
+ }
+ gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+ if (pte & _PAGE_D)
+ pte |= _PAGE_VIRT_D;
+ }
+
+ /* Set the dirty bit in the bitmap. */
+ shadow_mark_page_dirty (d, gpfn);
+
+ /* Update the local TC/VHPT and decides wether or not the fault should
+ be reflected.
+ SMP note: we almost ignore the other processors. The shadow_bitmap
+ has been atomically updated. If the dirty fault happen on another
+ processor, it will do its job.
+ */
+
+ if (pte != 0) {
+ /* We will know how to handle the fault. */
+
+ if (pte & _PAGE_VIRT_D) {
+ /* Rewrite VHPT entry.
+ There is no race here because only the
+ cpu VHPT owner can write page_flags. */
+ if (vlfe)
+ vlfe->page_flags = pte | _PAGE_D;
+
+ /* Purge the TC locally.
+ It will be reloaded from the VHPT iff the
+ VHPT entry is still valid. */
+ ia64_ptcl(ifa, PAGE_SHIFT << 2);
+
+ atomic64_inc(&d->arch.shadow_fault_count);
+ }
+ else {
+ /* Reflect.
+ In this case there is no need to purge. */
+ ia64_handle_reflection(ifa, regs, isr, 0, 8);
+ }
+ }
+ else {
+ /* We don't know wether or not the fault must be
+ reflected. The VHPT entry is not valid. */
+ /* FIXME: in metaphysical mode, we could do an ITC now. */
+ ia64_ptcl(ifa, PAGE_SHIFT << 2);
+ }
+}
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/ivt.S Wed Jul 26 09:36:36 2006 -0600
@@ -746,7 +746,48 @@ ENTRY(dirty_bit)
ENTRY(dirty_bit)
DBG_FAULT(8)
#ifdef XEN
- FAULT_OR_REFLECT(8)
+ mov r20=cr.ipsr
+ mov r31=pr;;
+ extr.u r20=r20,IA64_PSR_CPL0_BIT,2;;
+ mov r19=8 /* prepare to save predicates */
+ cmp.eq p6,p0=r0,r20 /* cpl == 0?*/
+(p6) br.sptk.few dispatch_to_fault_handler
+ /* If shadow mode is not enabled, reflect the fault. */
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET
+ ;;
+ ld8 r22=[r22]
+ ;;
+ add r22=IA64_VCPU_DOMAIN_OFFSET,r22
+ ;;
+ /* Read domain. */
+ ld8 r22=[r22]
+ ;;
+ add r22=IA64_DOMAIN_SHADOW_BITMAP_OFFSET,r22
+ ;;
+ ld8 r22=[r22]
+ ;;
+ cmp.eq p6,p0=r0,r22 /* !shadow_bitmap ?*/
+(p6) br.dptk.many dispatch_reflection
+
+ SAVE_MIN_WITH_COVER
+ alloc r14=ar.pfs,0,0,4,0
+ mov out0=cr.ifa
+ mov out1=cr.itir
+ mov out2=cr.isr
+ adds out3=16,sp
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_shadow_fault
#else
/*
* What we do here is to simply turn on the dirty bit in the PTE. We
need to
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/mm.c Wed Jul 26 09:36:36 2006 -0600
@@ -170,6 +170,7 @@
#include <asm/pgalloc.h>
#include <asm/vhpt.h>
#include <asm/vcpu.h>
+#include <asm/shadow.h>
#include <linux/efi.h>
#ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -470,7 +471,7 @@ u64 translate_domain_pte(u64 pteval, u64
pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
pteval2 |= (pteval & _PAGE_ED);
pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
- pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
+ pteval2 |= (pteval & ~_PAGE_PPN_MASK);
/*
* Don't let non-dom0 domains map uncached addresses. This can
* happen when domU tries to touch i/o port space. Also prevents
@@ -481,6 +482,18 @@ u64 translate_domain_pte(u64 pteval, u64
*/
if (d != dom0 && (pteval2 & _PAGE_MA_MASK) != _PAGE_MA_NAT)
pteval2 &= ~_PAGE_MA_MASK;
+
+ /* If shadow mode is enabled, virtualize dirty bit. */
+ if (shadow_mode_enabled(d) && (pteval2 & _PAGE_D)) {
+ u64 mp_page = mpaddr >> PAGE_SHIFT;
+ pteval2 |= _PAGE_VIRT_D;
+
+ /* If the page is not already dirty, don't set the dirty bit.
+ This is a small optimization! */
+ if (mp_page < d->arch.shadow_bitmap_size * 8
+ && !test_bit(mp_page, d->arch.shadow_bitmap))
+ pteval2 = (pteval2 & ~_PAGE_D);
+ }
return pteval2;
}
@@ -1418,10 +1431,13 @@ guest_physmap_remove_page(struct domain
//XXX sledgehammer.
// flush finer range.
-void
+static void
domain_page_flush(struct domain* d, unsigned long mpaddr,
unsigned long old_mfn, unsigned long new_mfn)
{
+ if (shadow_mode_enabled(d))
+ shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
+
domain_flush_vtlb_all();
}
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/privop.c
--- a/xen/arch/ia64/xen/privop.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/privop.c Wed Jul 26 09:36:36 2006 -0600
@@ -686,7 +686,8 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN
(void)vcpu_increment_iip(vcpu);
}
if (fault == IA64_ILLOP_FAULT)
- printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr);
+ printf("priv_emulate: priv_handle_op fails, "
+ "isr=0x%lx iip=%lx\n",isr, regs->cr_iip);
return fault;
}
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/vhpt.c Wed Jul 26 09:36:36 2006 -0600
@@ -236,7 +236,7 @@ static void flush_tlb_vhpt_all (struct d
local_flush_tlb_all ();
}
-void domain_flush_destroy (struct domain *d)
+void domain_flush_tlb_vhpt(struct domain *d)
{
/* Very heavy... */
on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/include/asm-ia64/domain.h Wed Jul 26 09:36:36 2006 -0600
@@ -48,6 +48,9 @@ extern unsigned long domain_set_shared_i
If sync_only is true, only synchronize I&D caches,
if false, flush and invalidate caches. */
extern void domain_cache_flush (struct domain *d, int sync_only);
+
+/* Control the shadow mode. */
+extern int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc);
/* Cleanly crash the current domain with a message. */
extern void panic_domain(struct pt_regs *, const char *, ...)
@@ -117,6 +120,16 @@ struct arch_domain {
/* Address of fpswa_interface_t (placed in domain memory) */
void *fpswa_inf;
+ /* Bitmap of shadow dirty bits.
+ Set iff shadow mode is enabled. */
+ u64 *shadow_bitmap;
+ /* Length (in bits!) of shadow bitmap. */
+ unsigned long shadow_bitmap_size;
+ /* Number of bits set in bitmap. */
+ atomic64_t shadow_dirty_count;
+ /* Number of faults. */
+ atomic64_t shadow_fault_count;
+
struct last_vcpu last_vcpu[NR_CPUS];
};
#define INT_ENABLE_OFFSET(v) \
diff -r e585c2dade14 -r 86e5d8458c08
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Wed Jul 26 09:02:43
2006 -0600
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Wed Jul 26 09:36:36
2006 -0600
@@ -62,7 +62,12 @@
#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty
bit */
#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) &
~0xfffUL)
#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
+#ifdef XEN
+#define _PAGE_VIRT_D (__IA64_UL(1) << 53) /* Virtual dirty bit */
+#define _PAGE_PROTNONE 0
+#else
#define _PAGE_PROTNONE (__IA64_UL(1) << 63)
+#endif
/* Valid only for a PTE with the present bit cleared: */
#define _PAGE_FILE (1 << 1) /* see swap & file pte
remarks below */
diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/include/asm-ia64/shadow.h Wed Jul 26 09:36:36 2006 -0600
@@ -45,6 +45,24 @@ void guest_physmap_remove_page(struct do
void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned
long mfn);
#endif
+static inline int
+shadow_mode_enabled(struct domain *d)
+{
+ return d->arch.shadow_bitmap != NULL;
+}
+
+static inline int
+shadow_mark_page_dirty(struct domain *d, unsigned long gpfn)
+{
+ if (gpfn < d->arch.shadow_bitmap_size * 8
+ && !test_and_set_bit(gpfn, d->arch.shadow_bitmap)) {
+ /* The page was not dirty. */
+ atomic64_inc(&d->arch.shadow_dirty_count);
+ return 1;
+ } else
+ return 0;
+}
+
#endif // _XEN_SHADOW_H
/*
diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/tlbflush.h
--- a/xen/include/asm-ia64/tlbflush.h Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/include/asm-ia64/tlbflush.h Wed Jul 26 09:36:36 2006 -0600
@@ -22,8 +22,8 @@ void domain_flush_vtlb_all (void);
/* Global range-flush of vTLB. */
void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
-/* Final vTLB flush on every dirty cpus. */
-void domain_flush_destroy (struct domain *d);
+/* Flush vhpt and mTLB on every dirty cpus. */
+void domain_flush_tlb_vhpt(struct domain *d);
/* Flush v-tlb on cpus set in mask for current domain. */
void flush_tlb_mask(cpumask_t mask);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|