# HG changeset patch # User gingold@virtu10 # Node ID 99f2a44bc49552c80ca503cf9020d8ceb99be949 # Parent 84c521570d8a32cd764a76b03b40d88a29215db4 Shadow mode and live migration. Virtualize Dirty bit. Signed-off-by: Tristan Gingold diff -r 84c521570d8a -r 99f2a44bc495 tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Jul 18 08:27:56 2006 +0200 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Jul 18 09:15:29 2006 +0200 @@ -163,7 +163,7 @@ xc_linux_restore(int xc_handle, int io_f pfn = page_array[mfn]; - DPRINTF ("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn); + //DPRINTF ("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn); if (read_page(xc_handle, io_fd, dom, page_array[mfn]) < 0) goto out; diff -r 84c521570d8a -r 99f2a44bc495 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Tue Jul 18 08:27:56 2006 +0200 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Tue Jul 18 09:15:29 2006 +0200 @@ -15,6 +15,45 @@ #include "xg_private.h" +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_linux_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS (4 - 1) /* limit us to 30 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + + +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, and to skip. +*/ + +#define BITS_PER_LONG (sizeof(unsigned long) * 8) +#define BITMAP_SIZE ((max_pfn + BITS_PER_LONG - 1) / 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int test_bit (int nr, volatile void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void clear_bit (int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); +} + +static inline void set_bit ( int nr, volatile void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + /* total number of pages used by the current guest */ static unsigned long max_pfn; @@ -77,10 +116,11 @@ xc_linux_save(int xc_handle, int io_fd, xc_dominfo_t info; int rc = 1; - unsigned long N; + int i; //int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); + int live = (flags & XCFLAGS_LIVE); /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; @@ -93,10 +133,32 @@ xc_linux_save(int xc_handle, int io_fd, /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; + /* Iteration number. */ + int iter; + + unsigned int sent_last_iter; + unsigned int total_sent; + + /* True if last iteration. */ + int last_iter; + + /* Bitmap of pages to be sent. */ + unsigned long *to_send = NULL; + /* Bitmap of pages not to be sent (because dirtied). */ + unsigned long *to_skip = NULL; + char *mem; if (debug) fprintf (stderr, "xc_linux_save (ia64): started dom=%d\n", dom); + + /* If no explicit control parameters given, use defaults */ + if(!max_iters) + max_iters = DEF_MAX_ITERS; + if(!max_factor) + max_factor = DEF_MAX_FACTOR; + + //initialize_mbit_rate(); if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { ERR("Could not get domain info"); @@ -124,24 +186,9 @@ xc_linux_save(int xc_handle, int io_fd, max_pfn = info.max_memkb >> (PAGE_SHIFT - 10); - - /* This is a non-live suspend. Issue the call back to get the - domain suspended */ - - if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { - ERR("Domain appears not to have suspended"); - goto out; - } - page_array = malloc(max_pfn * sizeof(unsigned long)); if (page_array == NULL) { ERR("Could not allocate memory"); - goto out; - } - - if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, - 0, max_pfn) != max_pfn) { - ERR("Could not get the page frame list"); goto out; } @@ -156,10 +203,13 @@ xc_linux_save(int xc_handle, int io_fd, if the format change. The version is hard-coded, don't forget to change the restore code too! */ - N = 1; - if (!write_exact(io_fd, &N, sizeof(unsigned long))) { - ERR("write: version"); - goto out; + { + unsigned long version = 1; + + if (!write_exact(io_fd, &version, sizeof(unsigned long))) { + ERR("write: version"); + goto out; + } } op.cmd = DOM0_DOMAIN_SETUP; @@ -175,39 +225,172 @@ xc_linux_save(int xc_handle, int io_fd, goto out; } - /* Start writing out the saved-domain record. */ - for (N = 0; N < max_pfn; N++) { - if (page_array[N] == INVALID_MFN) - continue; - if (debug) - fprintf (stderr, "xc_linux_save: page %lx (%lu/%lu)\n", - page_array[N], N, max_pfn); - - if (!write_exact(io_fd, &N, sizeof(N))) { - ERR("write: max_pfn"); - goto out; - } - - mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, page_array[N]); - if (mem == NULL) { - ERR("cannot map page"); - goto out; - } - if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERR("Error when writing to state file (5)"); - goto out; - } - munmap(mem, PAGE_SIZE); + /* Domain is still running at this point */ + if (live) { + + if (xc_shadow_control(xc_handle, dom, + DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL ) < 0) { + ERR("Couldn't enable shadow mode"); + goto out; + } + + last_iter = 0; + + to_send = malloc(BITMAP_SIZE); + to_skip = malloc(BITMAP_SIZE); + + if (!to_send || !to_skip) { + ERR("Couldn't allocate bitmap array"); + goto out; + } + + /* Initially all the pages must be sent. */ + memset(to_send, 0xff, BITMAP_SIZE); + + if (mlock(to_send, BITMAP_SIZE)) { + ERR("Unable to mlock to_send"); + return 1; + } + if (mlock(to_skip, BITMAP_SIZE)) { + ERR("Unable to mlock to_skip"); + return 1; + } + + } else { + + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + last_iter = 1; + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERR("Domain appears not to have suspended"); + goto out; + } + + } + + sent_last_iter = max_pfn; + total_sent = 0; + for (iter = 1; ; iter++) { + unsigned int sent_this_iter, skip_this_iter; + unsigned long N; + + sent_this_iter = 0; + skip_this_iter = 0; + + /* Get the pfn list, as it may change. */ + if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, + 0, max_pfn) != max_pfn) { + ERR("Could not get the page frame list"); + goto out; + } + + /* Dirtied pages won't be saved. + slightly wasteful to peek the whole array evey time, + but this is fast enough for the moment. */ + if (!last_iter) { + /* FIXME!! */ + for (i = 0; i < BITMAP_SIZE; i += PAGE_SIZE) + to_send[i] = 0; + + if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_PEEK, + to_skip, max_pfn, NULL) != max_pfn) { + ERR("Error peeking shadow bitmap"); + goto out; + } + } + + /* Start writing out the saved-domain record. */ + for (N = 0; N < max_pfn; N++) { + if (page_array[N] == INVALID_MFN) + continue; + if (!last_iter) { + if (test_bit (N, to_skip) && test_bit (N, to_send)) + skip_this_iter++; + if (test_bit (N, to_skip) || !test_bit (N, to_send)) + continue; + } + + if (debug) + fprintf (stderr, "xc_linux_save: page %lx (%lu/%lu)\n", + page_array[N], N, max_pfn); + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, page_array[N]); + if (mem == NULL) { + /* The page may have move. + It will be remarked dirty. + FIXME: to be tracked. */ + fprintf (stderr, "cannot map page %lx: %s\n", + page_array[N], strerror (errno)); + continue; + } + + if (!write_exact(io_fd, &N, sizeof(N))) { + ERR("write: max_pfn"); + goto out; + } + + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERR("Error when writing to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); + sent_this_iter++; + total_sent++; + } + + + if (last_iter) + break; + + DPRINTF(" %d: sent %d, skipped %d\n", + iter, sent_this_iter, skip_this_iter ); + + if (live) { + if( + //((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) || + (iter >= max_iters) || + (sent_this_iter+skip_this_iter < 50) || + (total_sent > max_pfn*max_factor) ) { + DPRINTF("Start last iteration\n"); + last_iter = 1; + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERR("Domain appears not to have suspended"); + goto out; + } + } + + /* Pages to be sent are pages which were dirty. */ + /* Touch the page so that it is in the TC. + FIXME: improve this!!! */ + for (i = 0; i < BITMAP_SIZE; i += PAGE_SIZE) + to_send[i] = 0; + if (xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_CLEAN, + to_send, max_pfn, NULL ) != max_pfn) { + ERR("Error flushing shadow PT"); + goto out; + } + + sent_last_iter = sent_this_iter; + + //print_stats(xc_handle, dom, sent_this_iter, &stats, 1); + } + } fprintf (stderr, "All memory is saved\n"); /* terminate */ - N = INVALID_MFN; - if (!write_exact(io_fd, &N, sizeof(N))) { - ERR("Error when writing to state file (6)"); - goto out; + { + unsigned long pfn = INVALID_MFN; + if (!write_exact(io_fd, &pfn, sizeof(pfn))) { + ERR("Error when writing to state file (6)"); + goto out; + } } /* Send through a list of all the PFNs that were not in map at the close */ @@ -274,8 +457,16 @@ xc_linux_save(int xc_handle, int io_fd, out: + if (live) { + if(xc_shadow_control(xc_handle, dom, DOM0_SHADOW_CONTROL_OP_OFF, + NULL, 0, NULL ) < 0) { + DPRINTF("Warning - couldn't disable shadow mode"); + } + } + free (page_array); - + free (to_send); + free (to_skip); if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/asm-offsets.c Tue Jul 18 09:15:29 2006 +0200 @@ -65,6 +65,11 @@ void foo(void) DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb)); BLANK(); + + DEFINE(IA64_DOMAIN_SHADOW_BITMAP_OFFSET, offsetof (struct domain, arch.shadow_bitmap)); + + BLANK(); + DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, itm_next)); DEFINE(IA64_CPUINFO_KSOFTIRQD_OFFSET, offsetof (struct cpuinfo_ia64, ksoftirqd)); diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/dom0_ops.c Tue Jul 18 09:15:29 2006 +0200 @@ -265,6 +265,20 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ } break; + case DOM0_SHADOW_CONTROL: + { + struct domain *d; + ret = -ESRCH; + d = find_domain_by_id(op->u.shadow_control.domain); + if ( d != NULL ) + { + ret = shadow_mode_control(d, &op->u.shadow_control); + put_domain(d); + copy_to_guest(u_dom0_op, op, 1); + } + } + break; + default: printf("arch_do_dom0_op: unrecognized dom0 op: %d!!!\n",op->cmd); ret = -ENOSYS; diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/domain.c Tue Jul 18 09:15:29 2006 +0200 @@ -25,26 +25,15 @@ #include #include #include -#include #include #include #include -#include -#include -#include -//#include -#include #include -//#include #include #include - #include -//#include #include - #include /* for IA64_THREAD_INFO_SIZE */ - #include /* for function declarations */ #include #include @@ -52,13 +41,12 @@ #include #include #include -#include #include -#include #include #include #include #include +#include #include #ifndef CONFIG_XEN_IA64_DOM0_VP @@ -399,8 +387,11 @@ void arch_domain_destroy(struct domain * BUG_ON(d->arch.mm.pgd != NULL); if (d->shared_info != NULL) free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT)); - - domain_flush_destroy (d); + if (d->arch.shadow_bitmap != NULL) + xfree (d->arch.shadow_bitmap); + + /* Clear vTLB for the next domain. */ + domain_flush_tlb_vhpt (d); deallocate_rid_range(d); } @@ -605,6 +596,147 @@ domain_set_shared_info_va (unsigned long return 0; } + +int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) +{ + unsigned int op = sc->op; + int rc = 0; + int i; + //struct vcpu *v; + + if ( unlikely(d == current->domain) ) { + DPRINTK("Don't try to do a shadow op on yourself!\n"); + return -EINVAL; + } + + domain_pause(d); + + switch ( op ) + { + case DOM0_SHADOW_CONTROL_OP_OFF: + if (shadow_mode_enabled (d)) { + u64 *bm = d->arch.shadow_bitmap; + + /* Flush vhpt and tlb to restore dirty bit usage. */ + domain_flush_tlb_vhpt (d); + + /* Free bitmap. */ + d->arch.shadow_bitmap_size = 0; + d->arch.shadow_bitmap = NULL; + xfree (bm); + } + break; + + case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST: + case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE: + rc = -EINVAL; + break; + + case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY: + if (shadow_mode_enabled (d)) { + rc = -EINVAL; + break; + } + + atomic64_set (&d->arch.shadow_fault_count, 0); + atomic64_set (&d->arch.shadow_dirty_count, 0); + + d->arch.shadow_bitmap_size = (d->max_pages + 63) & ~63; + d->arch.shadow_bitmap = + xmalloc_array(unsigned long, + d->arch.shadow_bitmap_size + / (8 * sizeof(u64))); + if ( d->arch.shadow_bitmap == NULL ) { + d->arch.shadow_bitmap_size = 0; + rc = -ENOMEM; + } + else { + memset(d->arch.shadow_bitmap, 0, + d->arch.shadow_bitmap_size/8); + + /* Flush vhtp and tlb to enable dirty bit + virtualization. */ + domain_flush_tlb_vhpt (d); + } + break; + + case DOM0_SHADOW_CONTROL_OP_FLUSH: + atomic64_set (&d->arch.shadow_fault_count, 0); + atomic64_set (&d->arch.shadow_dirty_count, 0); + break; + + case DOM0_SHADOW_CONTROL_OP_CLEAN: + sc->stats.fault_count = atomic64_read (&d->arch.shadow_fault_count); + sc->stats.dirty_count = atomic64_read (&d->arch.shadow_dirty_count); + + atomic64_set (&d->arch.shadow_fault_count, 0); + atomic64_set (&d->arch.shadow_dirty_count, 0); + + if ( guest_handle_is_null(sc->dirty_bitmap) || + (d->arch.shadow_bitmap == NULL) ) + { + rc = -EINVAL; + break; + } + + if ( sc->pages > d->arch.shadow_bitmap_size ) + sc->pages = d->arch.shadow_bitmap_size; + +#define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ + for ( i = 0; i < sc->pages; i += chunk ) + { + int bytes = ((((sc->pages - i) > chunk) ? + chunk : (sc->pages - i)) + 7) / 8; + + if ( copy_to_guest_offset( + sc->dirty_bitmap, + i/(8*sizeof(unsigned long)), + d->arch.shadow_bitmap +(i/(8*sizeof(unsigned long))), + (bytes+sizeof(unsigned long)-1) / sizeof(unsigned long)) ) + { + rc = -EFAULT; + break; + } + + memset(d->arch.shadow_bitmap + + (i/(8*sizeof(unsigned long))), 0, bytes); + } + + break; + + case DOM0_SHADOW_CONTROL_OP_PEEK: + sc->stats.fault_count = atomic64_read (&d->arch.shadow_fault_count); + sc->stats.dirty_count = atomic64_read (&d->arch.shadow_dirty_count); + + if ( guest_handle_is_null(sc->dirty_bitmap) || + (d->arch.shadow_bitmap == NULL) ) + { + rc = -EINVAL; + break; + } + + if ( sc->pages > d->arch.shadow_bitmap_size ) + sc->pages = d->arch.shadow_bitmap_size; + + if ( copy_to_guest(sc->dirty_bitmap, + d->arch.shadow_bitmap, + (((sc->pages+7)/8)+sizeof(unsigned long)-1) / + sizeof(unsigned long)) ) + { + rc = -EFAULT; + break; + } + + break; + default: + rc = -EINVAL; + break; + } + + domain_unpause(d); + + return rc; +} // remove following line if not privifying in memory //#define HAVE_PRIVIFY_MEMORY diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/faults.c Tue Jul 18 09:15:29 2006 +0200 @@ -1,4 +1,3 @@ - /* * Miscellaneous process/domain related routines * @@ -29,6 +28,7 @@ #include #include #include +#include extern void die_if_kernel(char *str, struct pt_regs *regs, long err); /* FIXME: where these declarations shold be there ? */ @@ -644,3 +644,92 @@ ia64_handle_reflection (unsigned long if reflect_interruption(isr,regs,vector); } +void +ia64_shadow_fault (unsigned long ifa, unsigned long itir, + unsigned long isr, struct pt_regs *regs) +{ + struct vcpu *v = current; + struct domain *d = current->domain; + unsigned long gpfn; + unsigned long pte = 0; + struct vhpt_lf_entry *vlfe; + + /* There are 2 jobs to do: + - marking the page as dirty (the metaphysical address must be + extracted to do that). + - reflecting or not the fault (the virtual Dirty bit must be + extracted to decide). + Unfortunatly these informations are not immediatly available! + */ + + /* Extract the metaphysical address. + Try to get it from VHPT and M2P as we need the flags. */ + vlfe = (struct vhpt_lf_entry *)ia64_thash(ifa); + pte = vlfe->page_flags; + if (vlfe->ti_tag == ia64_ttag (ifa)) { + /* The VHPT entry is valid. */ + gpfn = get_gpfn_from_mfn((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT); + BUG_ON (gpfn == INVALID_M2P_ENTRY); + } + else { + unsigned long itir, iha; + IA64FAULT fault; + + /* The VHPT entry is not valid. */ + vlfe = NULL; + + /* FIXME: gives a chance to tpa, as the TC was valid. */ + + fault = vcpu_translate(v, ifa, 1, &pte, &itir, &iha); + + /* Try again! */ + if (fault != IA64_NO_FAULT) { + /* This will trigger a dtlb miss. */ + ia64_ptcl (ifa, PAGE_SHIFT << 2); + return; + } + gpfn = ((pte & _PAGE_PPN_MASK) >> PAGE_SHIFT); + if (pte & _PAGE_D) + pte |= _PAGE_VIRT_D; + } + + /* Set the dirty bit in the bitmap. */ + shadow_mark_page_dirty (d, gpfn); + + /* Update the local TC/VHPT and decides wether or not the fault should + be reflected. + SMP note: we almost ignore the other processors. The shadow_bitmap + has been atomically updated. If the dirty fault happen on another + processor, it will do its job. + */ + + if (pte != 0) { + /* We will know how to handle the fault. */ + + if (pte & _PAGE_VIRT_D) { + /* Rewrite VHPT entry. + There is no race here because only the + cpu VHPT owner can write page_flags. */ + if (vlfe) + vlfe->page_flags = pte | _PAGE_D; + + /* Purge the TC locally. + It will be reloaded from the VHPT iff the + VHPT entry is still valid. */ + ia64_ptcl (ifa, PAGE_SHIFT << 2); + + atomic64_inc (&d->arch.shadow_fault_count); + } + else { + /* Reflect. + In this case there is no need to purge. */ + ia64_handle_reflection (ifa, regs, isr, 0, 8); + } + } + else { + /* We don't know wether or not the fault must be + reflected. The VHPT entry is not valid. */ + /* FIXME: in metaphysical mode, we could do an ITC now. */ + ia64_ptcl (ifa, PAGE_SHIFT << 2); + } +} diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/ivt.S --- a/xen/arch/ia64/xen/ivt.S Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/ivt.S Tue Jul 18 09:15:29 2006 +0200 @@ -746,7 +746,48 @@ ENTRY(dirty_bit) ENTRY(dirty_bit) DBG_FAULT(8) #ifdef XEN - FAULT_OR_REFLECT(8) + mov r20=cr.ipsr + mov r31=pr;; + extr.u r20=r20,IA64_PSR_CPL0_BIT,2;; + mov r19=8 /* prepare to save predicates */ + cmp.eq p6,p0=r0,r20 /* cpl == 0?*/ +(p6) br.sptk.few dispatch_to_fault_handler + /* If shadow mode is not enabled, reflect the fault. */ + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET + ;; + ld8 r22=[r22] + ;; + add r22=IA64_VCPU_DOMAIN_OFFSET,r22 + ;; + /* Read domain. */ + ld8 r22=[r22] + ;; + add r22=IA64_DOMAIN_SHADOW_BITMAP_OFFSET,r22 + ;; + ld8 r22=[r22] + ;; + cmp.eq p6,p0=r0,r22 /* !shadow_bitmap ?*/ +(p6) br.dptk.many dispatch_reflection + + SAVE_MIN_WITH_COVER + alloc r14=ar.pfs,0,0,4,0 + mov out0=cr.ifa + mov out1=cr.itir + mov out2=cr.isr + adds out3=16,sp + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_shadow_fault #else /* * What we do here is to simply turn on the dirty bit in the PTE. We need to diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/mm.c Tue Jul 18 09:15:29 2006 +0200 @@ -170,6 +170,7 @@ #include #include #include +#include #include #ifndef CONFIG_XEN_IA64_DOM0_VP @@ -470,7 +471,7 @@ u64 translate_domain_pte(u64 pteval, u64 pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits pteval2 |= (pteval & _PAGE_ED); pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) - pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; + pteval2 |= (pteval & ~_PAGE_PPN_MASK); /* * Don't let non-dom0 domains map uncached addresses. This can * happen when domU tries to touch i/o port space. Also prevents @@ -481,6 +482,18 @@ u64 translate_domain_pte(u64 pteval, u64 */ if (d != dom0 && (pteval2 & _PAGE_MA_MASK) != _PAGE_MA_NAT) pteval2 &= ~_PAGE_MA_MASK; + + /* If shadow mode is enabled, virtualize dirty bit. */ + if (shadow_mode_enabled (d) && (pteval2 & _PAGE_D)) { + u64 mp_page = mpaddr >> PAGE_SHIFT; + pteval2 |= _PAGE_VIRT_D; + + /* If the page is not already dirty, don't set the dirty bit. + This is a small optimization! */ + if (mp_page < d->arch.shadow_bitmap_size * 8 + && !test_bit (mp_page, d->arch.shadow_bitmap)) + pteval2 = (pteval2 & ~_PAGE_D); + } return pteval2; } @@ -1418,10 +1431,13 @@ guest_physmap_remove_page(struct domain //XXX sledgehammer. // flush finer range. -void +static void domain_page_flush(struct domain* d, unsigned long mpaddr, unsigned long old_mfn, unsigned long new_mfn) { + if (shadow_mode_enabled (d)) + shadow_mark_page_dirty (d, mpaddr >> PAGE_SHIFT); + domain_flush_vtlb_all(); } diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/privop.c Tue Jul 18 09:15:29 2006 +0200 @@ -687,7 +687,7 @@ priv_emulate(VCPU *vcpu, REGS *regs, UIN (void)vcpu_increment_iip(vcpu); } if (fault == IA64_ILLOP_FAULT) - printf("priv_emulate: priv_handle_op fails, isr=0x%lx\n",isr); + printf("priv_emulate: priv_handle_op fails, isr=0x%lx iip=%lx\n",isr, regs->cr_iip); return fault; } diff -r 84c521570d8a -r 99f2a44bc495 xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/arch/ia64/xen/vhpt.c Tue Jul 18 09:15:29 2006 +0200 @@ -236,7 +236,7 @@ static void flush_tlb_vhpt_all (struct d local_flush_tlb_all (); } -void domain_flush_destroy (struct domain *d) +void domain_flush_tlb_vhpt (struct domain *d) { /* Very heavy... */ on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); diff -r 84c521570d8a -r 99f2a44bc495 xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/include/asm-ia64/domain.h Tue Jul 18 09:15:29 2006 +0200 @@ -48,6 +48,9 @@ extern unsigned long domain_set_shared_i If sync_only is true, only synchronize I&D caches, if false, flush and invalidate caches. */ extern void domain_cache_flush (struct domain *d, int sync_only); + +/* Control the shadow mode. */ +extern int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc); /* Cleanly crash the current domain with a message. */ extern void panic_domain(struct pt_regs *, const char *, ...) @@ -117,6 +120,16 @@ struct arch_domain { /* Address of fpswa_interface_t (placed in domain memory) */ void *fpswa_inf; + /* Bitmap of shadow dirty bits. + Set iff shadow mode is enabled. */ + u64 *shadow_bitmap; + /* Length (in byte) of shadow bitmap. */ + unsigned long shadow_bitmap_size; + /* Number of bits set in bitmap. */ + atomic64_t shadow_dirty_count; + /* Number of faults. */ + atomic64_t shadow_fault_count; + struct last_vcpu last_vcpu[NR_CPUS]; }; #define INT_ENABLE_OFFSET(v) \ diff -r 84c521570d8a -r 99f2a44bc495 xen/include/asm-ia64/linux-xen/asm/pgtable.h --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue Jul 18 09:15:29 2006 +0200 @@ -62,7 +62,12 @@ #define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ #define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) #define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ +#ifdef XEN +#define _PAGE_VIRT_D (__IA64_UL(1) << 53) /* Virtual dirty bit */ +#define _PAGE_PROTNONE 0 +#else #define _PAGE_PROTNONE (__IA64_UL(1) << 63) +#endif /* Valid only for a PTE with the present bit cleared: */ #define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */ diff -r 84c521570d8a -r 99f2a44bc495 xen/include/asm-ia64/shadow.h --- a/xen/include/asm-ia64/shadow.h Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/include/asm-ia64/shadow.h Tue Jul 18 09:15:29 2006 +0200 @@ -45,6 +45,24 @@ void guest_physmap_remove_page(struct do void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn); #endif +static inline int shadow_mode_enabled (struct domain *d) +{ + return d->arch.shadow_bitmap != NULL; +} + +static inline int +shadow_mark_page_dirty (struct domain *d, unsigned long gpfn) +{ + if (gpfn < d->arch.shadow_bitmap_size * 8 + && !test_and_set_bit (gpfn, d->arch.shadow_bitmap)) { + /* The page was not dirty. */ + atomic64_inc (&d->arch.shadow_dirty_count); + return 1; + } + else + return 0; +} + #endif // _XEN_SHADOW_H /* diff -r 84c521570d8a -r 99f2a44bc495 xen/include/asm-ia64/tlbflush.h --- a/xen/include/asm-ia64/tlbflush.h Tue Jul 18 08:27:56 2006 +0200 +++ b/xen/include/asm-ia64/tlbflush.h Tue Jul 18 09:15:29 2006 +0200 @@ -22,8 +22,8 @@ void domain_flush_vtlb_all (void); /* Global range-flush of vTLB. */ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range); -/* Final vTLB flush on every dirty cpus. */ -void domain_flush_destroy (struct domain *d); +/* Flush vhpt and mTLB on every dirty cpus. */ +void domain_flush_tlb_vhpt (struct domain *d); /* Flush v-tlb on cpus set in mask for current domain. */ void flush_tlb_mask(cpumask_t mask);