diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/hvm/hvm.c Wed May 30 10:09:48 2007 -0500 @@ -559,7 +559,7 @@ static int __hvm_copy(void *buf, paddr_t if ( dir ) { memcpy(p, buf, count); /* dir == TRUE: *to* guest */ - mark_dirty(current->domain, mfn); + paging_mark_dirty(current->domain, mfn); } else memcpy(buf, p, count); /* dir == FALSE: *from guest */ diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/hvm/io.c Wed May 30 10:09:48 2007 -0500 @@ -865,7 +865,7 @@ void hvm_io_assist(void) if ( (p->dir == IOREQ_READ) && p->data_is_ptr ) { gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data)); - mark_dirty(d, gmfn); + paging_mark_dirty(d, gmfn); } out: diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Wed May 30 10:09:48 2007 -0500 @@ -1013,8 +1013,7 @@ static int svm_do_nested_pgfault(paddr_t return 1; } - /* We should not reach here. Otherwise, P2M table is not correct.*/ - return 0; + return p2m_fix_table(current->domain, gpa); } static void svm_do_no_device_fault(struct vmcb_struct *vmcb) diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/mm.c Wed May 30 10:09:48 2007 -0500 @@ -1552,7 +1552,7 @@ int alloc_page_type(struct page_info *pa /* A page table is dirtied when its type count becomes non-zero. */ if ( likely(owner != NULL) ) - mark_dirty(owner, page_to_mfn(page)); + paging_mark_dirty(owner, page_to_mfn(page)); switch ( type & PGT_type_mask ) { @@ -1598,7 +1598,7 @@ void free_page_type(struct page_info *pa if ( unlikely(paging_mode_enabled(owner)) ) { /* A page table is dirtied when its type count becomes zero. */ - mark_dirty(owner, page_to_mfn(page)); + paging_mark_dirty(owner, page_to_mfn(page)); if ( shadow_mode_refcounts(owner) ) return; @@ -2053,7 +2053,7 @@ int do_mmuext_op( } /* A page is dirtied when its pin status is set. */ - mark_dirty(d, mfn); + paging_mark_dirty(d, mfn); /* We can race domain destruction (domain_relinquish_resources). */ if ( unlikely(this_cpu(percpu_mm_info).foreign != NULL) ) @@ -2085,7 +2085,7 @@ int do_mmuext_op( put_page_and_type(page); put_page(page); /* A page is dirtied when its pin status is cleared. */ - mark_dirty(d, mfn); + paging_mark_dirty(d, mfn); } else { @@ -2420,7 +2420,7 @@ int do_mmu_update( set_gpfn_from_mfn(mfn, gpfn); okay = 1; - mark_dirty(FOREIGNDOM, mfn); + paging_mark_dirty(FOREIGNDOM, mfn); put_page(mfn_to_page(mfn)); break; @@ -2959,7 +2959,7 @@ long do_update_descriptor(u64 pa, u64 de break; } - mark_dirty(dom, mfn); + paging_mark_dirty(dom, mfn); /* All is good so make the update. */ gdt_pent = map_domain_page(mfn); diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/mm/hap/hap.c Wed May 30 10:09:48 2007 -0500 @@ -385,6 +385,211 @@ void hap_destroy_monitor_table(struct vc } /************************************************/ +/* HAP LOG DIRTY SUPPORT */ +/************************************************/ +void hap_mark_dirty(struct domain *d, mfn_t gmfn) +{ + unsigned long pfn; + int do_locking; + + if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ) + return; + + /* Although this is an externally visible function, we do not know + * whether the lock will be held when it is called (since it + * can be called from __hvm_copy during emulation). + * If the lock isn't held, take it for the duration of the call. */ + do_locking = !hap_locked_by_me(d); + if ( do_locking ) + { + hap_lock(d); + /* Check the mode again with the lock held */ + if ( unlikely(!paging_mode_log_dirty(d)) ) + { + hap_unlock(d); + return; + } + } + + ASSERT(d->arch.paging.hap.dirty_bitmap != NULL); + + /* We /really/ mean PFN here, even for non-translated guests. */ + pfn = get_gpfn_from_mfn(mfn_x(gmfn)); + + /* + * Values with the MSB set denote MFNs that aren't really part of the + * domain's pseudo-physical memory map (e.g., the shared info frame). + * Nothing to do here... + */ + if ( unlikely(!VALID_M2P(pfn)) ) + return; + + if ( likely(pfn < d->arch.paging.hap.dirty_bitmap_size) ) + { + if ( !__test_and_set_bit(pfn, d->arch.paging.hap.dirty_bitmap) ) + { + d->arch.paging.hap.dirty_count++; + } + } + else + { + HAP_PRINTK("hap_mark_dirty OOR! " + "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" + "owner=%d c=%08x t=%" PRtype_info "\n", + mfn_x(gmfn), + pfn, + d->arch.paging.hap.dirty_bitmap_size, + d->domain_id, + (page_get_owner(mfn_to_page(gmfn)) + ? page_get_owner(mfn_to_page(gmfn))->domain_id + : -1), + mfn_to_page(gmfn)->count_info, + mfn_to_page(gmfn)->u.inuse.type_info); + } + + if ( do_locking ) hap_unlock(d); +} + +int hap_alloc_log_dirty_bitmap(struct domain *d) +{ + ASSERT(d->arch.paging.hap.dirty_bitmap == NULL); + + d->arch.paging.hap.dirty_bitmap_size = + (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1); + d->arch.paging.hap.dirty_bitmap = + xmalloc_array(unsigned long, + d->arch.paging.hap.dirty_bitmap_size / BITS_PER_LONG); + if ( d->arch.paging.hap.dirty_bitmap == NULL ) + { + d->arch.paging.hap.dirty_bitmap_size = 0; + return -ENOMEM; + } + + memset(d->arch.paging.hap.dirty_bitmap, 0, + d->arch.paging.hap.dirty_bitmap_size/8); + + return 0; +} + +void hap_free_log_dirty_bitmap(struct domain *d) +{ + d->arch.paging.hap.dirty_bitmap_size = 0; + if ( d->arch.paging.hap.dirty_bitmap ) + { + xfree(d->arch.paging.hap.dirty_bitmap); + d->arch.paging.hap.dirty_bitmap = NULL; + } +} + +int hap_log_dirty_enable(struct domain *d) +{ + int ret; + + domain_pause(d); + hap_lock(d); + + ret = hap_alloc_log_dirty_bitmap(d); + if ( ret != 0 ) + { + hap_free_log_dirty_bitmap(d); + goto out; + } + + /* turn on PG_log_dirty bit in paging mode */ + d->arch.paging.mode |= PG_log_dirty; + + /* mark physical memory as not writable */ + p2m_set_l1e_flags(d, __PAGE_HYPERVISOR_NOT_WRITABLE|_PAGE_USER); + flush_tlb_all_pge(); + + out: + hap_unlock(d); + domain_unpause(d); + + return ret; +} + +int hap_log_dirty_disable(struct domain *d) +{ + domain_pause(d); + hap_lock(d); + if ( paging_mode_log_dirty(d) ) + hap_free_log_dirty_bitmap(d); + + /* turn off PG_log_dirty bit in paging mode */ + d->arch.paging.mode &= ~PG_log_dirty; + + /* recover P2M table to normal mode */ + p2m_set_l1e_flags(d, __PAGE_HYPERVISOR|_PAGE_USER); + + hap_unlock(d); + domain_unpause(d); + + return 1; +} + +int hap_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) +{ + int i, ret = 0, clean = 0, peek = 1; + + domain_pause(d); + hap_lock(d); + + clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN); + sc->stats.fault_count = d->arch.paging.hap.fault_count; + sc->stats.dirty_count = d->arch.paging.hap.dirty_count; + + if ( clean ) + { + d->arch.paging.hap.fault_count = 0; + d->arch.paging.hap.dirty_count = 0; + } + + if ( guest_handle_is_null(sc->dirty_bitmap) ) + peek = 0; /* caller just wants to clean the state or access stats */ + + if ( (peek || clean) && (d->arch.paging.hap.dirty_bitmap == NULL) ) { + ret = -EINVAL; + goto out; + } + + if ( sc->pages > d->arch.paging.hap.dirty_bitmap_size ) + sc->pages = d->arch.paging.hap.dirty_bitmap_size; + +#define CHUNK (8*1024) /* Transfer and clean in 1KB chunks for L1 cache */ + for ( i = 0; i < sc->pages; i += CHUNK ) { + int bytes = ((((sc->pages - i) > CHUNK) + ? CHUNK + : (sc->pages - i)) + 7) / 8; + + if ( likely(peek) ) { + if ( copy_to_guest_offset( + sc->dirty_bitmap, i/8, + (uint8_t *)d->arch.paging.hap.dirty_bitmap + (i/8), bytes) ) + { + ret = -EFAULT; + goto out; + } + } + + if ( clean ) + memset((uint8_t *)d->arch.paging.hap.dirty_bitmap + (i/8), 0, bytes); + } +#undef CHUNK + + /* mark physical memory as not writable */ + if ( clean ) { + p2m_set_l1e_flags(d, __PAGE_HYPERVISOR_NOT_WRITABLE|_PAGE_USER); + flush_tlb_all_pge(); + } + + + out: + hap_unlock(d); + domain_unpause(d); + return ret; +} +/************************************************/ /* HAP DOMAIN LEVEL FUNCTIONS */ /************************************************/ void hap_domain_init(struct domain *d) @@ -504,6 +709,19 @@ int hap_domctl(struct domain *d, xen_dom } switch ( sc->op ) { + case XEN_DOMCTL_SHADOW_OP_OFF: + if ( paging_mode_log_dirty(d) ) + if ( (rc = hap_log_dirty_disable(d)) != 0 ) + return rc; + return 0; + + case XEN_DOMCTL_SHADOW_OP_CLEAN: + case XEN_DOMCTL_SHADOW_OP_PEEK: + return hap_log_dirty_op(d, sc); + + case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: + return hap_log_dirty_enable(d); + case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: hap_lock(d); rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); @@ -669,7 +887,6 @@ hap_write_p2m_entry(struct vcpu *v, unsi hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, l1_pgentry_t new, unsigned int level) { - hap_lock(v->domain); safe_write_pte(p, new); #if CONFIG_PAGING_LEVELS == 3 /* install P2M in monitor table for PAE Xen */ @@ -680,7 +897,6 @@ hap_write_p2m_entry(struct vcpu *v, unsi } #endif - hap_unlock(v->domain); } /* Entry points into this mode of the hap code. */ diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/mm/p2m.c Wed May 30 10:09:48 2007 -0500 @@ -169,7 +169,7 @@ p2m_next_level(struct domain *d, mfn_t * // Returns 0 on error (out of memory) static int -set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); @@ -213,7 +213,7 @@ set_p2m_entry(struct domain *d, unsigned d->arch.p2m.max_mapped_pfn = gfn; if ( mfn_valid(mfn) ) - entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); + entry_content = l1e_from_pfn(mfn_x(mfn), l1e_flags); else entry_content = l1e_empty(); @@ -278,7 +278,7 @@ int p2m_alloc_table(struct domain *d, p2m_unlock(d); return -ENOMEM; } -list_add_tail(&p2m_top->list, &d->arch.p2m.pages); + list_add_tail(&p2m_top->list, &d->arch.p2m.pages); p2m_top->count_info = 1; p2m_top->u.inuse.type_info = @@ -297,8 +297,8 @@ list_add_tail(&p2m_top->list, &d->arch.p /* Initialise physmap tables for slot zero. Other code assumes this. */ gfn = 0; -mfn = _mfn(INVALID_MFN); - if ( !set_p2m_entry(d, gfn, mfn) ) + mfn = _mfn(INVALID_MFN); + if ( !set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER) ) goto error; for ( entry = d->page_list.next; @@ -316,7 +316,7 @@ mfn = _mfn(INVALID_MFN); (gfn != 0x55555555L) #endif && gfn != INVALID_M2P_ENTRY - && !set_p2m_entry(d, gfn, mfn) ) + && !set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER) ) goto error; } @@ -626,7 +626,7 @@ p2m_remove_page(struct domain *d, unsign ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); //ASSERT(mfn_to_gfn(d, mfn) == gfn); - set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER); set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } @@ -659,7 +659,7 @@ guest_physmap_add_page(struct domain *d, omfn = gfn_to_mfn(d, gfn); if ( mfn_valid(omfn) ) { - set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } @@ -685,13 +685,87 @@ guest_physmap_add_page(struct domain *d, } } - set_p2m_entry(d, gfn, _mfn(mfn)); + set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER); set_gpfn_from_mfn(mfn, gfn); audit_p2m(d); p2m_unlock(d); } +/* This function goes through P2M table and modify the flags of l1e. Note that + * physical base address of l1e is intact. This function can be used for + * special purpose, such as marking physical memory as Not-Writable for + * tracking dirty pages during live migration. + */ +int p2m_set_l1e_flags(struct domain *d, u32 l1e_flags) +{ + mfn_t mfn; + struct list_head *entry; + struct page_info *page; + unsigned long gfn; + + p2m_lock(d); + + if ( pagetable_get_pfn(d->arch.phys_table) == 0 ) + { + P2M_ERROR("p2m table has not been allocated for this domain yet!\n"); + p2m_unlock(d); + return -EINVAL; + } + + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = page_to_mfn(page); + gfn = get_gpfn_from_mfn(mfn_x(mfn)); + if ( +#ifdef __x86_64__ + (gfn != 0x5555555555555555L) +#else + (gfn != 0x55555555L) +#endif + && gfn != INVALID_M2P_ENTRY + && !set_p2m_entry(d, gfn, mfn, l1e_flags) ) + goto error; + } + + p2m_unlock(d); + return 0; + + error: + P2M_PRINTK("failed to change l1e flags of p2m table, gfn=%05lx, mfn=%" + PRI_mfn "\n", gfn, mfn_x(mfn)); + p2m_unlock(d); + return -ENOMEM; +} + +/* This function handles P2M page faults by fixing l1e flags with correct + * values. It also calls paging_mark_dirty() function to record the dirty + * pages. + */ +int p2m_fix_table(struct domain *d, paddr_t gpa) +{ + unsigned long gfn; + mfn_t mfn; + + p2m_lock(d); + + gfn = gpa >> PAGE_SHIFT; + + mfn = gfn_to_mfn(d, gfn); + if ( mfn_valid(mfn) ) + { + set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER); + } + + paging_mark_dirty(d, mfn_x(mfn)); + + p2m_unlock(d); + + return 1; /* successful */ +} /* * Local variables: diff -r d07ecb861009 -r 41cfce9eeb10 xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Tue May 29 06:02:39 2007 -0500 +++ b/xen/arch/x86/mm/paging.c Wed May 30 10:09:48 2007 -0500 @@ -98,6 +98,18 @@ int paging_enable(struct domain *d, u32 return hap_enable(d, mode | PG_HAP_enable); else return shadow_enable(d, mode | PG_SH_enable); +} + +/* Mark a dirty page for log dirty bitmap during live migration */ +void paging_mark_dirty(struct domain *d, unsigned long gmfn) +{ + if ( likely(!paging_mode_log_dirty(d)) ) + return; + + if ( opt_hap_enabled && is_hvm_domain(d) ) + hap_mark_dirty(d, _mfn(gmfn)); + else + sh_mark_dirty(d, _mfn(gmfn)); } /* Print paging-assistance info to the console */ diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/domain.h Wed May 30 10:09:48 2007 -0500 @@ -129,6 +129,14 @@ struct hap_domain { unsigned int total_pages; /* number of pages allocated */ unsigned int free_pages; /* number of pages on freelists */ unsigned int p2m_pages; /* number of pages allocates to p2m */ + + /* hap log-dirty bitmap */ + unsigned long *dirty_bitmap; + unsigned int dirty_bitmap_size; /* in pages, bit per page */ + + /* hap log-dirty mode statistics */ + unsigned int fault_count; + unsigned int dirty_count; }; /************************************************/ diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/grant_table.h --- a/xen/include/asm-x86/grant_table.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/grant_table.h Wed May 30 10:09:48 2007 -0500 @@ -31,7 +31,7 @@ int destroy_grant_host_mapping( #define gnttab_shared_gmfn(d, t, i) \ (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) -#define gnttab_mark_dirty(d, f) mark_dirty((d), (f)) +#define gnttab_mark_dirty(d, f) paging_mark_dirty((d), (f)) static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr) { diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/hap.h --- a/xen/include/asm-x86/hap.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/hap.h Wed May 30 10:09:48 2007 -0500 @@ -104,6 +104,7 @@ int hap_enable(struct domain *d, u32 m int hap_enable(struct domain *d, u32 mode); void hap_final_teardown(struct domain *d); void hap_teardown(struct domain *d); +void hap_mark_dirty(struct domain *d, mfn_t gmfn); void hap_vcpu_init(struct vcpu *v); extern struct paging_mode hap_paging_real_mode; diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/p2m.h Wed May 30 10:09:48 2007 -0500 @@ -129,6 +129,11 @@ void guest_physmap_remove_page(struct do void guest_physmap_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn); +/* Configure l1e flags of P2M table */ +int p2m_set_l1e_flags(struct domain *d, u32 flags); + +/* Fix P2M table when page faults are related to P2M table entry */ +int p2m_fix_table(struct domain *d, paddr_t gpa); #endif /* _XEN_P2M_H */ diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/page.h Wed May 30 10:09:48 2007 -0500 @@ -334,6 +334,8 @@ void setup_idle_pagetable(void); (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define __PAGE_HYPERVISOR_NOCACHE \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_NOT_WRITABLE \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) #ifndef __ASSEMBLY__ diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/paging.h --- a/xen/include/asm-x86/paging.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/paging.h Wed May 30 10:09:48 2007 -0500 @@ -164,6 +164,8 @@ void paging_final_teardown(struct domain * creation. */ int paging_enable(struct domain *d, u32 mode); +/* Mark dirty pages during live migration */ +void paging_mark_dirty(struct domain *d, unsigned long gmfn); /* Page fault handler * Called from pagefault handler in Xen, and from the HVM trap handlers diff -r d07ecb861009 -r 41cfce9eeb10 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue May 29 06:02:39 2007 -0500 +++ b/xen/include/asm-x86/shadow.h Wed May 30 10:09:48 2007 -0500 @@ -78,13 +78,6 @@ void shadow_final_teardown(struct domain /* Mark a page as dirty in the log-dirty bitmap: called when Xen * makes changes to guest memory on its behalf. */ void sh_mark_dirty(struct domain *d, mfn_t gmfn); -/* Cleaner version so we don't pepper shadow_mode tests all over the place */ -static inline void mark_dirty(struct domain *d, unsigned long gmfn) -{ - if ( unlikely(shadow_mode_log_dirty(d)) ) - /* See the comment about locking in sh_mark_dirty */ - sh_mark_dirty(d, _mfn(gmfn)); -} /* Update all the things that are derived from the guest's CR0/CR3/CR4. * Called to initialize paging structures if the paging mode