WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [IA64] live migration

# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 86e5d8458c08d057bacd7c578bfa84a219b3d461
# Parent  e585c2dade143d171fb589e5a7a33b6c1fa137a9
[IA64] live migration

Shadow mode and live migration.

Virtualize Dirty bit.

Signed-off-by: Tristan Gingold <tristan.gingold@xxxxxxxx>
---
 tools/libxc/ia64/xc_ia64_linux_restore.c     |    2 
 tools/libxc/ia64/xc_ia64_linux_save.c        |  314 ++++++++++++++++++++++-----
 xen/arch/ia64/asm-offsets.c                  |    5 
 xen/arch/ia64/xen/dom0_ops.c                 |   14 +
 xen/arch/ia64/xen/domain.c                   |  163 ++++++++++++--
 xen/arch/ia64/xen/faults.c                   |   91 +++++++
 xen/arch/ia64/xen/ivt.S                      |   43 +++
 xen/arch/ia64/xen/mm.c                       |   20 +
 xen/arch/ia64/xen/privop.c                   |    3 
 xen/arch/ia64/xen/vhpt.c                     |    2 
 xen/include/asm-ia64/domain.h                |   13 +
 xen/include/asm-ia64/linux-xen/asm/pgtable.h |    5 
 xen/include/asm-ia64/shadow.h                |   18 +
 xen/include/asm-ia64/tlbflush.h              |    4 
 14 files changed, 623 insertions(+), 74 deletions(-)

diff -r e585c2dade14 -r 86e5d8458c08 tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c  Wed Jul 26 09:02:43 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c  Wed Jul 26 09:36:36 2006 -0600
@@ -163,7 +163,7 @@ xc_linux_restore(int xc_handle, int io_f
 
        pfn = page_array[mfn];
 
-        DPRINTF ("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn);
+        //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, 
pfn);
 
        if (read_page(xc_handle, io_fd, dom, page_array[mfn]) < 0)
                goto out;
diff -r e585c2dade14 -r 86e5d8458c08 tools/libxc/ia64/xc_ia64_linux_save.c
--- a/tools/libxc/ia64/xc_ia64_linux_save.c     Wed Jul 26 09:02:43 2006 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_save.c     Wed Jul 26 09:36:36 2006 -0600
@@ -15,8 +15,72 @@
 
 #include "xg_private.h"
 
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_linux_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS    (4 - 1)       /* limit us to 4 times round loop  */
+#define DEF_MAX_FACTOR   3             /* never send more than 3x nr_pfns */
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int test_bit (int nr, volatile void * addr)
+{
+    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void clear_bit (int nr, volatile void * addr)
+{
+    BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr));
+}
+
+static inline void set_bit ( int nr, volatile void * addr)
+{
+    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
 /* total number of pages used by the current guest */
 static unsigned long max_pfn;
+
+static int xc_ia64_shadow_control(int xc_handle,
+                                  uint32_t domid,
+                                  unsigned int sop,
+                                  unsigned long *dirty_bitmap,
+                                  unsigned long pages,
+                                  xc_shadow_control_stats_t *stats)
+{
+    if (dirty_bitmap != NULL && pages > 0) {
+        int i;
+        unsigned char *bmap = (unsigned char *)dirty_bitmap;
+        unsigned long bmap_bytes =
+            ((pages + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1)) / 8;
+        unsigned int bmap_pages = (bmap_bytes + PAGE_SIZE - 1) / PAGE_SIZE; 
+
+        /* Touch the page so that it is in the TC.
+           FIXME: use a more reliable method.  */
+        for (i = 0 ; i < bmap_pages ; i++)
+            bmap[i * PAGE_SIZE] = 0;
+        /* Because bmap is not page aligned (allocated by malloc), be sure the
+           last page is touched.  */
+        bmap[bmap_bytes - 1] = 0;
+    }
+
+    return xc_shadow_control(xc_handle, domid, sop,
+                             dirty_bitmap, pages, stats);
+}
 
 static inline ssize_t
 write_exact(int fd, void *buf, size_t count)
@@ -77,10 +141,10 @@ xc_linux_save(int xc_handle, int io_fd, 
     xc_dominfo_t info;
 
     int rc = 1;
-    unsigned long N;
 
     //int live  = (flags & XCFLAGS_LIVE);
     int debug = (flags & XCFLAGS_DEBUG);
+    int live  = (flags & XCFLAGS_LIVE);
 
     /* The new domain's shared-info frame number. */
     unsigned long shared_info_frame;
@@ -93,10 +157,38 @@ xc_linux_save(int xc_handle, int io_fd, 
     /* Live mapping of shared info structure */
     shared_info_t *live_shinfo = NULL;
 
+    /* Iteration number.  */
+    int iter;
+
+    /* Number of pages sent in the last iteration (live only).  */
+    unsigned int sent_last_iter;
+
+    /* Number of pages sent (live only).  */
+    unsigned int total_sent;
+
+    /* Size of the shadow bitmap (live only).  */
+    unsigned int bitmap_size = 0;
+
+    /* True if last iteration.  */
+    int last_iter;
+
+    /* Bitmap of pages to be sent.  */
+    unsigned long *to_send = NULL;
+    /* Bitmap of pages not to be sent (because dirtied).  */
+    unsigned long *to_skip = NULL;
+
     char *mem;
 
     if (debug)
         fprintf (stderr, "xc_linux_save (ia64): started dom=%d\n", dom);
+
+    /* If no explicit control parameters given, use defaults */
+    if (!max_iters)
+        max_iters = DEF_MAX_ITERS;
+    if (!max_factor)
+        max_factor = DEF_MAX_FACTOR;
+
+    //initialize_mbit_rate();
 
     if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
         ERR("Could not get domain info");
@@ -124,24 +216,9 @@ xc_linux_save(int xc_handle, int io_fd, 
 
     max_pfn = info.max_memkb >> (PAGE_SHIFT - 10);
 
-
-    /* This is a non-live suspend. Issue the call back to get the
-       domain suspended */
-
-    if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
-        ERR("Domain appears not to have suspended");
-        goto out;
-    }
-
     page_array = malloc(max_pfn * sizeof(unsigned long));
     if (page_array == NULL) {
         ERR("Could not allocate memory");
-        goto out;
-    }
-
-    if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
-                             0, max_pfn) != max_pfn) {
-        ERR("Could not get the page frame list");
         goto out;
     }
 
@@ -156,10 +233,13 @@ xc_linux_save(int xc_handle, int io_fd, 
        if the format change.
        The version is hard-coded, don't forget to change the restore code
        too!  */
-    N = 1;
-    if (!write_exact(io_fd, &N, sizeof(unsigned long))) {
-        ERR("write: version");
-        goto out;
+    {
+        unsigned long version = 1;
+
+        if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
+            ERR("write: version");
+            goto out;
+        }
     }
 
     op.cmd = DOM0_DOMAIN_SETUP;
@@ -175,39 +255,165 @@ xc_linux_save(int xc_handle, int io_fd, 
         goto out;
     }
 
-    /* Start writing out the saved-domain record. */
-    for (N = 0; N < max_pfn; N++) {
-        if (page_array[N] == INVALID_MFN)
-            continue;
-        if (debug)
-            fprintf (stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
-                     page_array[N], N, max_pfn);
-
-        if (!write_exact(io_fd, &N, sizeof(N))) {
-            ERR("write: max_pfn");
-            goto out;
-        }
-
-        mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                   PROT_READ|PROT_WRITE, page_array[N]);
-        if (mem == NULL) {
-            ERR("cannot map page");
-            goto out;
-        }
-        if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
-            ERR("Error when writing to state file (5)");
-            goto out;
-        }
-        munmap(mem, PAGE_SIZE);
+    /* Domain is still running at this point */
+    if (live) {
+
+        if (xc_ia64_shadow_control(xc_handle, dom,
+                                   DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
+                                   NULL, 0, NULL ) < 0) {
+            ERR("Couldn't enable shadow mode");
+            goto out;
+        }
+
+        last_iter = 0;
+
+        bitmap_size = ((max_pfn + BITS_PER_LONG-1) & ~(BITS_PER_LONG-1)) / 8;
+        to_send = malloc(bitmap_size);
+        to_skip = malloc(bitmap_size);
+
+        if (!to_send || !to_skip) {
+            ERR("Couldn't allocate bitmap array");
+            goto out;
+        }
+
+        /* Initially all the pages must be sent.  */
+        memset(to_send, 0xff, bitmap_size);
+
+        if (mlock(to_send, bitmap_size)) {
+            ERR("Unable to mlock to_send");
+            goto out;
+        }
+        if (mlock(to_skip, bitmap_size)) {
+            ERR("Unable to mlock to_skip");
+            goto out;
+        }
+        
+    } else {
+
+        /* This is a non-live suspend. Issue the call back to get the
+           domain suspended */
+
+        last_iter = 1;
+
+        if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+            ERR("Domain appears not to have suspended");
+            goto out;
+        }
+
+    }
+
+    sent_last_iter = max_pfn;
+    total_sent = 0;
+
+    for (iter = 1; ; iter++) {
+        unsigned int sent_this_iter, skip_this_iter;
+        unsigned long N;
+
+        sent_this_iter = 0;
+        skip_this_iter = 0;
+
+        /* Get the pfn list, as it may change.  */
+        if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
+                                 0, max_pfn) != max_pfn) {
+            ERR("Could not get the page frame list");
+            goto out;
+        }
+
+        /* Dirtied pages won't be saved.
+           slightly wasteful to peek the whole array evey time,
+           but this is fast enough for the moment. */
+        if (!last_iter) {
+            if (xc_ia64_shadow_control(xc_handle, dom,
+                                       DOM0_SHADOW_CONTROL_OP_PEEK,
+                                       to_skip, max_pfn, NULL) != max_pfn) {
+                ERR("Error peeking shadow bitmap");
+                goto out;
+            }
+        }
+
+        /* Start writing out the saved-domain record. */
+        for (N = 0; N < max_pfn; N++) {
+            if (page_array[N] == INVALID_MFN)
+                continue;
+            if (!last_iter) {
+                if (test_bit(N, to_skip) && test_bit(N, to_send))
+                    skip_this_iter++;
+                if (test_bit(N, to_skip) || !test_bit(N, to_send))
+                    continue;
+            }
+
+            if (debug)
+                fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
+                        page_array[N], N, max_pfn);
+
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, page_array[N]);
+            if (mem == NULL) {
+                /* The page may have move.
+                   It will be remarked dirty.
+                   FIXME: to be tracked.  */
+                fprintf(stderr, "cannot map page %lx: %s\n",
+                        page_array[N], strerror (errno));
+                continue;
+            }
+
+            if (!write_exact(io_fd, &N, sizeof(N))) {
+                ERR("write: max_pfn");
+                goto out;
+            }
+
+            if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
+                ERR("Error when writing to state file (5)");
+                goto out;
+            }
+            munmap(mem, PAGE_SIZE);
+            sent_this_iter++;
+            total_sent++;
+        }
+
+        if (last_iter)
+            break;
+
+        DPRINTF(" %d: sent %d, skipped %d\n",
+                iter, sent_this_iter, skip_this_iter );
+
+        if (live) {
+            if ( /* ((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || */
+                (iter >= max_iters) || (sent_this_iter+skip_this_iter < 50) ||
+                (total_sent > max_pfn*max_factor)) {
+                DPRINTF("Start last iteration\n");
+                last_iter = 1;
+
+                if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+                    ERR("Domain appears not to have suspended");
+                    goto out;
+                }
+            }
+
+            /* Pages to be sent are pages which were dirty.  */
+            if (xc_ia64_shadow_control(xc_handle, dom,
+                                       DOM0_SHADOW_CONTROL_OP_CLEAN,
+                                       to_send, max_pfn, NULL ) != max_pfn) {
+                ERR("Error flushing shadow PT");
+                goto out;
+            }
+
+            sent_last_iter = sent_this_iter;
+
+            //print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
+        }
+
     }
 
     fprintf (stderr, "All memory is saved\n");
 
     /* terminate */
-    N = INVALID_MFN;
-    if (!write_exact(io_fd, &N, sizeof(N))) {
-        ERR("Error when writing to state file (6)");
-        goto out;
+    {
+        unsigned long pfn = INVALID_MFN;
+        if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+            ERR("Error when writing to state file (6)");
+            goto out;
+        }
     }
 
     /* Send through a list of all the PFNs that were not in map at the close */
@@ -274,8 +480,16 @@ xc_linux_save(int xc_handle, int io_fd, 
 
  out:
 
-    free (page_array);
-
+    if (live) {
+        if (xc_ia64_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF,
+                                   NULL, 0, NULL ) < 0) {
+            DPRINTF("Warning - couldn't disable shadow mode");
+        }
+    }
+
+    free(page_array);
+    free(to_send);
+    free(to_skip);
     if (live_shinfo)
         munmap(live_shinfo, PAGE_SIZE);
 
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/asm-offsets.c       Wed Jul 26 09:36:36 2006 -0600
@@ -65,6 +65,11 @@ void foo(void)
        DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
 
        BLANK();
+
+       DEFINE(IA64_DOMAIN_SHADOW_BITMAP_OFFSET, offsetof (struct domain, 
arch.shadow_bitmap));
+
+       BLANK();
+
        DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, 
itm_next));
        DEFINE(IA64_CPUINFO_KSOFTIRQD_OFFSET, offsetof (struct cpuinfo_ia64, 
ksoftirqd));
 
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c      Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/dom0_ops.c      Wed Jul 26 09:36:36 2006 -0600
@@ -265,6 +265,20 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_
     }
     break;
 
+    case DOM0_SHADOW_CONTROL:
+    {
+        struct domain *d; 
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.shadow_control.domain);
+        if ( d != NULL )
+        {
+            ret = shadow_mode_control(d, &op->u.shadow_control);
+            put_domain(d);
+            copy_to_guest(u_dom0_op, op, 1);
+        } 
+    }
+    break;
+
     default:
         printf("arch_do_dom0_op: unrecognized dom0 op: %d!!!\n",op->cmd);
         ret = -ENOSYS;
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/domain.c        Wed Jul 26 09:36:36 2006 -0600
@@ -25,26 +25,15 @@
 #include <xen/mm.h>
 #include <xen/iocap.h>
 #include <asm/asm-xsi-offsets.h>
-#include <asm/ptrace.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/hw_irq.h>
-#include <asm/setup.h>
-//#include <asm/mpspec.h>
-#include <xen/irq.h>
 #include <xen/event.h>
-//#include <xen/shadow.h>
 #include <xen/console.h>
 #include <xen/compile.h>
-
 #include <xen/elf.h>
-//#include <asm/page.h>
 #include <asm/pgalloc.h>
-
 #include <asm/offsets.h>  /* for IA64_THREAD_INFO_SIZE */
-
 #include <asm/vcpu.h>   /* for function declarations */
 #include <public/arch-ia64.h>
 #include <xen/domain.h>
@@ -52,13 +41,12 @@
 #include <asm/vmx_vcpu.h>
 #include <asm/vmx_vpd.h>
 #include <asm/vmx_phy_mode.h>
-#include <asm/pal.h>
 #include <asm/vhpt.h>
-#include <public/hvm/ioreq.h>
 #include <public/arch-ia64.h>
 #include <asm/tlbflush.h>
 #include <asm/regionreg.h>
 #include <asm/dom_fw.h>
+#include <asm/shadow.h>
 #include <asm/privop_stat.h>
 
 #ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -388,8 +376,11 @@ void arch_domain_destroy(struct domain *
        BUG_ON(d->arch.mm.pgd != NULL);
        if (d->shared_info != NULL)
            free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT));
-
-       domain_flush_destroy (d);
+       if (d->arch.shadow_bitmap != NULL)
+               xfree(d->arch.shadow_bitmap);
+
+       /* Clear vTLB for the next domain.  */
+       domain_flush_tlb_vhpt(d);
 
        deallocate_rid_range(d);
 }
@@ -594,6 +585,148 @@ domain_set_shared_info_va (unsigned long
        return 0;
 }
 
+/* Transfer and clear the shadow bitmap in 1kB chunks for L1 cache. */
+#define SHADOW_COPY_CHUNK (1024 / sizeof (unsigned long))
+
+int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
+{
+       unsigned int op = sc->op;
+       int          rc = 0;
+       int i;
+       //struct vcpu *v;
+
+       if (unlikely(d == current->domain)) {
+               DPRINTK("Don't try to do a shadow op on yourself!\n");
+               return -EINVAL;
+       }   
+
+       domain_pause(d);
+
+       switch (op)
+       {
+       case DOM0_SHADOW_CONTROL_OP_OFF:
+               if (shadow_mode_enabled (d)) {
+                       u64 *bm = d->arch.shadow_bitmap;
+
+                       /* Flush vhpt and tlb to restore dirty bit usage.  */
+                       domain_flush_tlb_vhpt(d);
+
+                       /* Free bitmap.  */
+                       d->arch.shadow_bitmap_size = 0;
+                       d->arch.shadow_bitmap = NULL;
+                       xfree(bm);
+               }
+               break;
+
+       case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
+       case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
+               rc = -EINVAL;
+               break;
+
+       case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
+               if (shadow_mode_enabled(d)) {
+                       rc = -EINVAL;
+                       break;
+               }
+
+               atomic64_set(&d->arch.shadow_fault_count, 0);
+               atomic64_set(&d->arch.shadow_dirty_count, 0);
+
+               d->arch.shadow_bitmap_size = (d->max_pages + BITS_PER_LONG-1) &
+                                            ~(BITS_PER_LONG-1);
+               d->arch.shadow_bitmap = xmalloc_array(unsigned long,
+                                  d->arch.shadow_bitmap_size / BITS_PER_LONG);
+               if (d->arch.shadow_bitmap == NULL) {
+                       d->arch.shadow_bitmap_size = 0;
+                       rc = -ENOMEM;
+               }
+               else {
+                       memset(d->arch.shadow_bitmap, 0, 
+                              d->arch.shadow_bitmap_size / 8);
+                       
+                       /* Flush vhtp and tlb to enable dirty bit
+                          virtualization.  */
+                       domain_flush_tlb_vhpt(d);
+               }
+               break;
+
+       case DOM0_SHADOW_CONTROL_OP_FLUSH:
+               atomic64_set(&d->arch.shadow_fault_count, 0);
+               atomic64_set(&d->arch.shadow_dirty_count, 0);
+               break;
+   
+       case DOM0_SHADOW_CONTROL_OP_CLEAN:
+         {
+               int nbr_longs;
+
+               sc->stats.fault_count = 
atomic64_read(&d->arch.shadow_fault_count);
+               sc->stats.dirty_count = 
atomic64_read(&d->arch.shadow_dirty_count);
+
+               atomic64_set(&d->arch.shadow_fault_count, 0);
+               atomic64_set(&d->arch.shadow_dirty_count, 0);
+ 
+               if (guest_handle_is_null(sc->dirty_bitmap) ||
+                   (d->arch.shadow_bitmap == NULL)) {
+                       rc = -EINVAL;
+                       break;
+               }
+
+               if (sc->pages > d->arch.shadow_bitmap_size)
+                       sc->pages = d->arch.shadow_bitmap_size; 
+
+               nbr_longs = (sc->pages + BITS_PER_LONG - 1) / BITS_PER_LONG;
+
+               for (i = 0; i < nbr_longs; i += SHADOW_COPY_CHUNK) {
+                       int size = (nbr_longs - i) > SHADOW_COPY_CHUNK ?
+                                  SHADOW_COPY_CHUNK : nbr_longs - i;
+     
+                       if (copy_to_guest_offset(sc->dirty_bitmap, i,
+                                                d->arch.shadow_bitmap + i,
+                                                size)) {
+                               rc = -EFAULT;
+                               break;
+                       }
+
+                       memset(d->arch.shadow_bitmap + i,
+                              0, size * sizeof(unsigned long));
+               }
+               
+               break;
+         }
+
+       case DOM0_SHADOW_CONTROL_OP_PEEK:
+       {
+               unsigned long size;
+
+               sc->stats.fault_count = 
atomic64_read(&d->arch.shadow_fault_count);
+               sc->stats.dirty_count = 
atomic64_read(&d->arch.shadow_dirty_count);
+
+               if (guest_handle_is_null(sc->dirty_bitmap) ||
+                   (d->arch.shadow_bitmap == NULL)) {
+                       rc = -EINVAL;
+                       break;
+               }
+ 
+               if (sc->pages > d->arch.shadow_bitmap_size)
+                       sc->pages = d->arch.shadow_bitmap_size; 
+
+               size = (sc->pages + BITS_PER_LONG - 1) / BITS_PER_LONG;
+               if (copy_to_guest(sc->dirty_bitmap, 
+                                 d->arch.shadow_bitmap, size)) {
+                       rc = -EFAULT;
+                       break;
+               }
+               break;
+       }
+       default:
+               rc = -EINVAL;
+               break;
+       }
+       
+       domain_unpause(d);
+       
+       return rc;
+}
 
 // remove following line if not privifying in memory
 //#define HAVE_PRIVIFY_MEMORY
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/faults.c
--- a/xen/arch/ia64/xen/faults.c        Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/faults.c        Wed Jul 26 09:36:36 2006 -0600
@@ -1,4 +1,3 @@
-
 /*
  * Miscellaneous process/domain related routines
  * 
@@ -29,6 +28,7 @@
 #include <asm/bundle.h>
 #include <asm/privop_stat.h>
 #include <asm/asm-xsi-offsets.h>
+#include <asm/shadow.h>
 
 extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
 /* FIXME: where these declarations shold be there ? */
@@ -648,3 +648,92 @@ ia64_handle_reflection (unsigned long if
        reflect_interruption(isr,regs,vector);
 }
 
+void
+ia64_shadow_fault(unsigned long ifa, unsigned long itir,
+                  unsigned long isr, struct pt_regs *regs)
+{
+       struct vcpu *v = current;
+       struct domain *d = current->domain;
+       unsigned long gpfn;
+       unsigned long pte = 0;
+       struct vhpt_lf_entry *vlfe;
+
+       /* There are 2 jobs to do:
+          -  marking the page as dirty (the metaphysical address must be
+             extracted to do that).
+          -  reflecting or not the fault (the virtual Dirty bit must be
+             extracted to decide).
+          Unfortunatly these informations are not immediatly available!
+       */
+
+       /* Extract the metaphysical address.
+          Try to get it from VHPT and M2P as we need the flags.  */
+       vlfe = (struct vhpt_lf_entry *)ia64_thash(ifa);
+       pte = vlfe->page_flags;
+       if (vlfe->ti_tag == ia64_ttag(ifa)) {
+               /* The VHPT entry is valid.  */
+               gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+               BUG_ON(gpfn == INVALID_M2P_ENTRY);
+       }
+       else {
+               unsigned long itir, iha;
+               IA64FAULT fault;
+
+               /* The VHPT entry is not valid.  */
+               vlfe = NULL;
+
+               /* FIXME: gives a chance to tpa, as the TC was valid.  */
+
+               fault = vcpu_translate(v, ifa, 1, &pte, &itir, &iha);
+
+               /* Try again!  */
+               if (fault != IA64_NO_FAULT) {
+                       /* This will trigger a dtlb miss.  */
+                       ia64_ptcl(ifa, PAGE_SHIFT << 2);
+                       return;
+               }
+               gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT);
+               if (pte & _PAGE_D)
+                       pte |= _PAGE_VIRT_D;
+       }
+
+       /* Set the dirty bit in the bitmap.  */
+       shadow_mark_page_dirty (d, gpfn);
+
+       /* Update the local TC/VHPT and decides wether or not the fault should
+          be reflected.
+          SMP note: we almost ignore the other processors.  The shadow_bitmap
+          has been atomically updated.  If the dirty fault happen on another
+          processor, it will do its job.
+       */
+
+       if (pte != 0) {
+               /* We will know how to handle the fault.  */
+
+               if (pte & _PAGE_VIRT_D) {
+                       /* Rewrite VHPT entry.
+                          There is no race here because only the
+                          cpu VHPT owner can write page_flags.  */
+                       if (vlfe)
+                               vlfe->page_flags = pte | _PAGE_D;
+                       
+                       /* Purge the TC locally.
+                          It will be reloaded from the VHPT iff the
+                          VHPT entry is still valid.  */
+                       ia64_ptcl(ifa, PAGE_SHIFT << 2);
+
+                       atomic64_inc(&d->arch.shadow_fault_count);
+               }
+               else {
+                       /* Reflect.
+                          In this case there is no need to purge.  */
+                       ia64_handle_reflection(ifa, regs, isr, 0, 8);
+               }
+       }
+       else {
+               /* We don't know wether or not the fault must be
+                  reflected.  The VHPT entry is not valid.  */
+               /* FIXME: in metaphysical mode, we could do an ITC now.  */
+               ia64_ptcl(ifa, PAGE_SHIFT << 2);
+       }
+}
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/ivt.S
--- a/xen/arch/ia64/xen/ivt.S   Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/ivt.S   Wed Jul 26 09:36:36 2006 -0600
@@ -746,7 +746,48 @@ ENTRY(dirty_bit)
 ENTRY(dirty_bit)
        DBG_FAULT(8)
 #ifdef XEN
-       FAULT_OR_REFLECT(8)
+       mov r20=cr.ipsr
+       mov r31=pr;;
+       extr.u r20=r20,IA64_PSR_CPL0_BIT,2;;
+       mov r19=8       /* prepare to save predicates */
+       cmp.eq p6,p0=r0,r20     /* cpl == 0?*/
+(p6)   br.sptk.few dispatch_to_fault_handler
+       /* If shadow mode is not enabled, reflect the fault.  */
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET
+       ;;
+       ld8 r22=[r22]
+       ;;
+       add r22=IA64_VCPU_DOMAIN_OFFSET,r22
+       ;;
+       /* Read domain.  */
+       ld8 r22=[r22]
+       ;;
+       add r22=IA64_DOMAIN_SHADOW_BITMAP_OFFSET,r22
+       ;;
+       ld8 r22=[r22]
+       ;;
+       cmp.eq p6,p0=r0,r22     /* !shadow_bitmap ?*/
+(p6)   br.dptk.many dispatch_reflection
+
+       SAVE_MIN_WITH_COVER
+       alloc r14=ar.pfs,0,0,4,0
+       mov out0=cr.ifa
+       mov out1=cr.itir
+       mov out2=cr.isr
+       adds out3=16,sp
+
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       adds r3=8,r2                            // set up second base pointer
+       ;;
+       SAVE_REST
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.call.sptk.many b6=ia64_shadow_fault
 #else
        /*
         * What we do here is to simply turn on the dirty bit in the PTE.  We 
need to
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/mm.c    Wed Jul 26 09:36:36 2006 -0600
@@ -170,6 +170,7 @@
 #include <asm/pgalloc.h>
 #include <asm/vhpt.h>
 #include <asm/vcpu.h>
+#include <asm/shadow.h>
 #include <linux/efi.h>
 
 #ifndef CONFIG_XEN_IA64_DOM0_VP
@@ -470,7 +471,7 @@ u64 translate_domain_pte(u64 pteval, u64
        pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
        pteval2 |= (pteval & _PAGE_ED);
        pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
-       pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
+       pteval2 |= (pteval & ~_PAGE_PPN_MASK);
        /*
         * Don't let non-dom0 domains map uncached addresses.  This can
         * happen when domU tries to touch i/o port space.  Also prevents
@@ -481,6 +482,18 @@ u64 translate_domain_pte(u64 pteval, u64
         */
        if (d != dom0 && (pteval2 & _PAGE_MA_MASK) != _PAGE_MA_NAT)
                pteval2 &= ~_PAGE_MA_MASK;
+
+    /* If shadow mode is enabled, virtualize dirty bit.  */
+    if (shadow_mode_enabled(d) && (pteval2 & _PAGE_D)) {
+        u64 mp_page = mpaddr >> PAGE_SHIFT;
+        pteval2 |= _PAGE_VIRT_D;
+
+        /* If the page is not already dirty, don't set the dirty bit.
+           This is a small optimization!  */
+        if (mp_page < d->arch.shadow_bitmap_size * 8
+            && !test_bit(mp_page, d->arch.shadow_bitmap))
+            pteval2 = (pteval2 & ~_PAGE_D);
+    }
 
        return pteval2;
 }
@@ -1418,10 +1431,13 @@ guest_physmap_remove_page(struct domain 
 
 //XXX sledgehammer.
 //    flush finer range.
-void
+static void
 domain_page_flush(struct domain* d, unsigned long mpaddr,
                   unsigned long old_mfn, unsigned long new_mfn)
 {
+    if (shadow_mode_enabled(d))
+        shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
+
     domain_flush_vtlb_all();
 }
 
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/privop.c
--- a/xen/arch/ia64/xen/privop.c        Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/privop.c        Wed Jul 26 09:36:36 2006 -0600
@@ -686,7 +686,8 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN
                (void)vcpu_increment_iip(vcpu);
        }
        if (fault == IA64_ILLOP_FAULT)
-               printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr);
+               printf("priv_emulate: priv_handle_op fails, "
+                      "isr=0x%lx iip=%lx\n",isr, regs->cr_iip);
        return fault;
 }
 
diff -r e585c2dade14 -r 86e5d8458c08 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/arch/ia64/xen/vhpt.c  Wed Jul 26 09:36:36 2006 -0600
@@ -236,7 +236,7 @@ static void flush_tlb_vhpt_all (struct d
        local_flush_tlb_all ();
 }
 
-void domain_flush_destroy (struct domain *d)
+void domain_flush_tlb_vhpt(struct domain *d)
 {
        /* Very heavy...  */
        on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/include/asm-ia64/domain.h     Wed Jul 26 09:36:36 2006 -0600
@@ -48,6 +48,9 @@ extern unsigned long domain_set_shared_i
    If sync_only is true, only synchronize I&D caches,
    if false, flush and invalidate caches.  */
 extern void domain_cache_flush (struct domain *d, int sync_only);
+
+/* Control the shadow mode.  */
+extern int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc);
 
 /* Cleanly crash the current domain with a message.  */
 extern void panic_domain(struct pt_regs *, const char *, ...)
@@ -117,6 +120,16 @@ struct arch_domain {
     /* Address of fpswa_interface_t (placed in domain memory)  */
     void *fpswa_inf;
 
+    /* Bitmap of shadow dirty bits.
+       Set iff shadow mode is enabled.  */
+    u64 *shadow_bitmap;
+    /* Length (in bits!) of shadow bitmap.  */
+    unsigned long shadow_bitmap_size;
+    /* Number of bits set in bitmap.  */
+    atomic64_t shadow_dirty_count;
+    /* Number of faults.  */
+    atomic64_t shadow_fault_count;
+
     struct last_vcpu last_vcpu[NR_CPUS];
 };
 #define INT_ENABLE_OFFSET(v)             \
diff -r e585c2dade14 -r 86e5d8458c08 
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Wed Jul 26 09:02:43 
2006 -0600
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Wed Jul 26 09:36:36 
2006 -0600
@@ -62,7 +62,12 @@
 #define _PAGE_D                        (1 << _PAGE_D_BIT)      /* page dirty 
bit */
 #define _PAGE_PPN_MASK         (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & 
~0xfffUL)
 #define _PAGE_ED               (__IA64_UL(1) << 52)    /* exception deferral */
+#ifdef XEN
+#define _PAGE_VIRT_D           (__IA64_UL(1) << 53)    /* Virtual dirty bit */
+#define _PAGE_PROTNONE         0
+#else
 #define _PAGE_PROTNONE         (__IA64_UL(1) << 63)
+#endif
 
 /* Valid only for a PTE with the present bit cleared: */
 #define _PAGE_FILE             (1 << 1)                /* see swap & file pte 
remarks below */
diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/shadow.h
--- a/xen/include/asm-ia64/shadow.h     Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/include/asm-ia64/shadow.h     Wed Jul 26 09:36:36 2006 -0600
@@ -45,6 +45,24 @@ void guest_physmap_remove_page(struct do
 void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned 
long mfn);
 #endif
 
+static inline int
+shadow_mode_enabled(struct domain *d)
+{
+    return d->arch.shadow_bitmap != NULL;
+}
+
+static inline int
+shadow_mark_page_dirty(struct domain *d, unsigned long gpfn)
+{
+    if (gpfn < d->arch.shadow_bitmap_size * 8
+        && !test_and_set_bit(gpfn, d->arch.shadow_bitmap)) {
+        /* The page was not dirty.  */
+        atomic64_inc(&d->arch.shadow_dirty_count);
+        return 1;
+    } else
+        return 0;
+}
+
 #endif // _XEN_SHADOW_H
 
 /*
diff -r e585c2dade14 -r 86e5d8458c08 xen/include/asm-ia64/tlbflush.h
--- a/xen/include/asm-ia64/tlbflush.h   Wed Jul 26 09:02:43 2006 -0600
+++ b/xen/include/asm-ia64/tlbflush.h   Wed Jul 26 09:36:36 2006 -0600
@@ -22,8 +22,8 @@ void domain_flush_vtlb_all (void);
 /* Global range-flush of vTLB.  */
 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
 
-/* Final vTLB flush on every dirty cpus.  */
-void domain_flush_destroy (struct domain *d);
+/* Flush vhpt and mTLB on every dirty cpus.  */
+void domain_flush_tlb_vhpt(struct domain *d);
 
 /* Flush v-tlb on cpus set in mask for current domain.  */
 void flush_tlb_mask(cpumask_t mask);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [IA64] live migration, Xen patchbot-unstable <=