# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Node ID b90fff753ca191c329c350184ffeae3990670b77 # Parent 3cee9325a6c60a057f0ed4a95b050e012b64ae09 add tlb insert tracking to do vTLB flush finer grained virtual address range when a page is unmapped from a domain. This is functionality is enabled with a compile time option, xen_ia64_tlb_track=y. PATCHNAME: tlb_track Signed-off-by: Isaku Yamahata diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/Rules.mk Mon Jul 24 23:18:41 2006 +0900 @@ -39,6 +39,9 @@ ifeq ($(xen_ia64_dom0_virtual_physical), ifeq ($(xen_ia64_dom0_virtual_physical),y) CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP endif +ifeq ($(xen_ia64_tlb_track),y) +CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/Makefile --- a/xen/arch/ia64/xen/Makefile Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/xen/Makefile Mon Jul 24 23:18:41 2006 +0900 @@ -27,3 +27,4 @@ obj-y += privop_stat.o obj-y += privop_stat.o obj-$(crash_debug) += gdbstub.o +obj-$(xen_ia64_tlb_track) += tlb_track.o diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/xen/domain.c Mon Jul 24 23:18:41 2006 +0900 @@ -60,6 +60,9 @@ #include #include #include +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#include +#endif #ifndef CONFIG_XEN_IA64_DOM0_VP #define CONFIG_DOMAIN0_CONTIGUOUS @@ -351,6 +354,10 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; +#ifdef CONFIG_XEN_IA64_TLB_TRACK + if (tlb_track_create(d) < 0) + goto fail_nomem; +#endif d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); if (d->shared_info == NULL) goto fail_nomem; @@ -389,6 +396,9 @@ void arch_domain_destroy(struct domain * if (d->shared_info != NULL) free_xenheap_pages(d->shared_info, get_order_from_shift(XSI_SHIFT)); +#ifdef CONFIG_XEN_IA64_TLB_TRACK + tlb_track_destroy(d); +#endif domain_flush_destroy (d); deallocate_rid_range(d); diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/xen/faults.c Mon Jul 24 23:18:41 2006 +0900 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -202,8 +203,15 @@ void ia64_do_page_fault (unsigned long a fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { struct p2m_entry entry; - pteval = translate_domain_pte(pteval, address, itir, &logps, &entry); - vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps); + unsigned long m_pteval; + m_pteval = translate_domain_pte(pteval, address, itir, &logps, &entry); +#ifndef CONFIG_XEN_IA64_TLB_TRACK + vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, + address, m_pteval, pteval, logps); +#else + vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, + address, m_pteval, pteval, logps, &entry); +#endif if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) || p2m_entry_retry(&entry)) { /* dtlb has been purged in-between. This dtlb was diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/xen/mm.c Mon Jul 24 23:18:41 2006 +0900 @@ -170,13 +170,14 @@ #include #include #include +#include #include #ifndef CONFIG_XEN_IA64_DOM0_VP #define CONFIG_DOMAIN0_CONTIGUOUS #else -static void domain_page_flush(struct domain* d, unsigned long mpaddr, - unsigned long old_mfn, unsigned long new_mfn); +static void domain_page_flush(struct domain* d, + volatile pte_t* ptep, pte_t old_pte); #endif static struct domain *dom_xen, *dom_io; @@ -718,6 +719,19 @@ void *domain_mpa_to_imva(struct domain * } #endif +static unsigned long +assign_flags_to_pteflags(unsigned long flags) +{ + unsigned long pteflags = + (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; +#ifdef CONFIG_XEN_IA64_TLB_TRACK + if (flags & ASSIGN_tlb_track) { + pteflags |= _PAGE_TLB_TRACKING; + } +#endif + return pteflags; +} + /* Allocate a new page for domain and map it to the specified metaphysical address. */ static struct page_info * @@ -811,7 +825,7 @@ assign_new_domain0_page(struct domain *d } /* map a physical address to the specified metaphysical addr */ -// flags: currently only ASSIGN_readonly +// flags: ASSIGN_xxx // This is called by assign_domain_mmio_page(). // So accessing to pte is racy. void @@ -823,13 +837,13 @@ __assign_domain_page(struct domain *d, pte_t old_pte; pte_t new_pte; pte_t ret_pte; - unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; + unsigned long pteflags = assign_flags_to_pteflags(flags); pte = lookup_alloc_domain_pte(d, mpaddr); old_pte = __pte(0); new_pte = pfn_pte(physaddr >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)); + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | pteflags)); ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte); if (pte_val(ret_pte) == pte_val(old_pte)) smp_mb(); @@ -945,7 +959,7 @@ assign_domain_mach_page(struct domain *d // caller must call set_gpfn_from_mfn() before call if necessary. // because set_gpfn_from_mfn() result must be visible before pte xchg // caller must use memory barrier. NOTE: xchg has acquire semantics. -// flags: currently only ASSIGN_readonly +// flags: ASSIGN_xxx static void assign_domain_page_replace(struct domain *d, unsigned long mpaddr, unsigned long mfn, unsigned long flags) @@ -954,11 +968,11 @@ assign_domain_page_replace(struct domain volatile pte_t* pte; pte_t old_pte; pte_t npte; - unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; + unsigned long pteflags = assign_flags_to_pteflags(flags); pte = lookup_alloc_domain_pte(d, mpaddr); // update pte - npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)); + npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | pteflags)); old_pte = ptep_xchg(mm, mpaddr, pte, npte); if (pte_mem(old_pte)) { unsigned long old_mfn = pte_pfn(old_pte); @@ -978,7 +992,7 @@ assign_domain_page_replace(struct domain set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); } - domain_page_flush(d, mpaddr, old_mfn, mfn); + domain_page_flush(d, pte, old_pte); try_to_clear_PGC_allocate(d, old_page); put_page(old_page); @@ -997,29 +1011,29 @@ assign_domain_page_cmpxchg_rel(struct do struct mm_struct *mm = &d->arch.mm; volatile pte_t* pte; unsigned long old_mfn; - unsigned long old_arflags; + unsigned long old_pteflags; pte_t old_pte; unsigned long new_mfn; - unsigned long new_arflags; + unsigned long new_pteflags; pte_t new_pte; pte_t ret_pte; pte = lookup_alloc_domain_pte(d, mpaddr); again: - old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; + old_pteflags = pte_val(*pte) & ~_PAGE_PPN_MASK; old_mfn = page_to_mfn(old_page); - old_pte = pfn_pte(old_mfn, __pgprot(old_arflags)); + old_pte = pfn_pte(old_mfn, __pgprot(old_pteflags)); if (!pte_present(old_pte)) { - DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n", - __func__, pte_val(old_pte), old_arflags, old_mfn); + DPRINTK("%s: old_pte 0x%lx old_pteflags 0x%lx old_mfn 0x%lx\n", + __func__, pte_val(old_pte), old_pteflags, old_mfn); return -EINVAL; } - new_arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; + new_pteflags = assign_flags_to_pteflags(flags); new_mfn = page_to_mfn(new_page); new_pte = pfn_pte(new_mfn, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_arflags)); + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_pteflags)); // update pte ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte); @@ -1028,10 +1042,10 @@ assign_domain_page_cmpxchg_rel(struct do goto again; } - DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx " + DPRINTK("%s: old_pte 0x%lx old_pteflags 0x%lx old_mfn 0x%lx " "ret_pte 0x%lx ret_mfn 0x%lx\n", __func__, - pte_val(old_pte), old_arflags, old_mfn, + pte_val(old_pte), old_pteflags, old_mfn, pte_val(ret_pte), pte_pfn(ret_pte)); return -EINVAL; } @@ -1043,7 +1057,7 @@ assign_domain_page_cmpxchg_rel(struct do set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); - domain_page_flush(d, mpaddr, old_mfn, new_mfn); + domain_page_flush(d, pte, old_pte); put_page(old_page); return 0; } @@ -1111,7 +1125,7 @@ zap_domain_page_one(struct domain *d, un set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } - domain_page_flush(d, mpaddr, mfn, INVALID_MFN); + domain_page_flush(d, pte, old_pte); if (page_get_owner(page) != NULL) { try_to_clear_PGC_allocate(d, page); @@ -1199,8 +1213,12 @@ create_grant_host_mapping(unsigned long BUG_ON(ret == 0); BUG_ON(page_get_owner(mfn_to_page(mfn)) == d && get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); - assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)? - ASSIGN_readonly: ASSIGN_writable); + assign_domain_page_replace(d, gpaddr, mfn, +#ifdef CONFIG_XEN_IA64_TLB_TRACK + ASSIGN_tlb_track | +#endif + ((flags & GNTMAP_readonly) ? + ASSIGN_readonly: ASSIGN_writable)); return GNTST_okay; } @@ -1254,7 +1272,7 @@ destroy_grant_host_mapping(unsigned long } BUG_ON(pte_pfn(old_pte) != mfn); - domain_page_flush(d, gpaddr, mfn, INVALID_MFN); + domain_page_flush(d, pte, old_pte); page = mfn_to_page(mfn); BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. @@ -1418,11 +1436,38 @@ guest_physmap_remove_page(struct domain //XXX sledgehammer. // flush finer range. -void -domain_page_flush(struct domain* d, unsigned long mpaddr, - unsigned long old_mfn, unsigned long new_mfn) -{ +static void +domain_page_flush(struct domain* d, volatile pte_t* ptep, pte_t old_pte) +{ +#ifndef CONFIG_XEN_IA64_TLB_TRACK domain_flush_vtlb_all(); +#else + struct tlb_track_entry* entry; + switch (tlb_track_search_and_remove(d->arch.tlb_track, + ptep, old_pte, &entry)) { + case TLB_TRACK_NOT_TRACKED: + //DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__); + domain_flush_vtlb_all(); + break; + case TLB_TRACK_NOT_FOUND: + // do nothing + //DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__); + break; + case TLB_TRACK_FOUND: + //DPRINTK("%s TLB_TRACK_FOUND\n", __func__); + domain_flush_vltb_track_entry(d, entry); + tlb_track_free_entry(d->arch.tlb_track, entry); + break; + case TLB_TRACK_MANY: + DPRINTK("%s TLB_TRACK_MANY\n", __func__); + domain_flush_vtlb_all(); + break; + case TLB_TRACK_AGAIN: + DPRINTK("%s TLB_TRACK_AGAIN\n", __func__); + BUG(); + break; + } +#endif } int diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/xen/vcpu.c Mon Jul 24 23:18:41 2006 +0900 @@ -22,6 +22,7 @@ #include #include #include +#include /* FIXME: where these declarations should be there ? */ extern void getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs); @@ -2003,7 +2004,11 @@ IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 s VCPU translation cache access routines **************************************************************************/ +#ifndef CONFIG_XEN_IA64_TLB_TRACK void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps) +#else +void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps, struct p2m_entry* entry) +#endif { unsigned long psr; unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT; @@ -2017,6 +2022,9 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 #ifdef CONFIG_XEN_IA64_DOM0_VP BUG_ON(logps > PAGE_SHIFT); +#endif +#ifdef CONFIG_XEN_IA64_TLB_TRACK + vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry); #endif psr = ia64_clear_ic(); ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings @@ -2035,7 +2043,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 // PAGE_SIZE mapping in the vhpt for now, else purging is complicated else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2); #endif - if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB + if (IorD & 0x4) // don't place in 1-entry TLB return; if (IorD & 0x1) { vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr); @@ -2060,7 +2068,11 @@ again: pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); if (!pteval) return IA64_ILLOP_FAULT; if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); +#ifndef CONFIG_XEN_IA64_TLB_TRACK vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps); +#else + vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps,&entry); +#endif if (swap_rr0) set_metaphysical_rr0(); if (p2m_entry_retry(&entry)) { vcpu_flush_tlb_vhpt_range(ifa, logps); @@ -2083,7 +2095,11 @@ again: pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); if (!pteval) return IA64_ILLOP_FAULT; if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); +#ifndef CONFIG_XEN_IA64_TLB_TRACK vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps); +#else + vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps,&entry); +#endif if (swap_rr0) set_metaphysical_rr0(); if (p2m_entry_retry(&entry)) { vcpu_flush_tlb_vhpt_range(ifa, logps); diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/arch/ia64/xen/vhpt.c Mon Jul 24 23:18:41 2006 +0900 @@ -227,6 +227,48 @@ void domain_flush_vtlb_range (struct dom ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); } +#ifdef CONFIG_XEN_IA64_TLB_TRACK +#include +void +domain_flush_vltb_track_entry(struct domain* d, + const struct tlb_track_entry* entry) +{ + unsigned long old_rid; + struct vcpu* v; + int cpu; + + //tlb_track_entry_printf(entry); + vcpu_get_rr(current, 0, &old_rid); + vcpu_set_rr(current, 0, entry->rid); + + for_each_vcpu(d, v) { + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + if (!vcpu_isset(v->vcpu_id, entry->vcpu_dirty_mask)) + continue; + + /* Purge TC entries. + FIXME: clear only if match. */ + vcpu_purge_tr_entry(&PSCBX(v, dtlb)); + vcpu_purge_tr_entry(&PSCBX(v, itlb)); + } + smp_mb(); + + for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { + //printk("%s:%d cpu %d\n", __func__, __LINE__, cpu); + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range(cpu, entry->vaddr, PAGE_SIZE); + } + // ptc.ga has release semantics. + + /* ptc.ga */ + ia64_global_tlb_purge(entry->vaddr, entry->vaddr + PAGE_SIZE, + PAGE_SHIFT); + + vcpu_set_rr(current, 0, old_rid); +} +#endif + static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ diff -r 3cee9325a6c6 -r b90fff753ca1 xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/include/asm-ia64/domain.h Mon Jul 24 23:18:41 2006 +0900 @@ -12,28 +12,10 @@ #include #include -struct p2m_entry { - volatile pte_t* pte; - pte_t used; -}; - -static inline void -p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used) -{ - entry->pte = pte; - entry->used = used; -} - -static inline int -p2m_entry_retry(struct p2m_entry* entry) -{ - //XXX see lookup_domain_pte(). - // NULL is set for invalid gpaddr for the time being. - if (entry->pte == NULL) - return 0; - - return (pte_val(*entry->pte) != pte_val(entry->used)); -} +struct p2m_entry; +#ifdef CONFIG_XEN_IA64_TLB_TRACK +struct tlb_track; +#endif extern void domain_relinquish_resources(struct domain *); @@ -118,6 +100,10 @@ struct arch_domain { void *fpswa_inf; struct last_vcpu last_vcpu[NR_CPUS]; + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + struct tlb_track* tlb_track; +#endif }; #define INT_ENABLE_OFFSET(v) \ (sizeof(vcpu_info_t) * (v)->vcpu_id + \ diff -r 3cee9325a6c6 -r b90fff753ca1 xen/include/asm-ia64/tlbflush.h --- a/xen/include/asm-ia64/tlbflush.h Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/include/asm-ia64/tlbflush.h Mon Jul 24 23:18:41 2006 +0900 @@ -22,6 +22,13 @@ void domain_flush_vtlb_all (void); /* Global range-flush of vTLB. */ void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range); +#ifdef CONFIG_XEN_IA64_TLB_TRACK +struct tlb_track_entry; +/* Global entry-flush of vTLB */ +void domain_flush_vltb_track_entry(struct domain* d, + const struct tlb_track_entry* entry); +#endif + /* Final vTLB flush on every dirty cpus. */ void domain_flush_destroy (struct domain *d); diff -r 3cee9325a6c6 -r b90fff753ca1 xen/include/asm-ia64/vcpu.h --- a/xen/include/asm-ia64/vcpu.h Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/include/asm-ia64/vcpu.h Mon Jul 24 23:18:41 2006 +0900 @@ -158,7 +158,12 @@ extern void vcpu_set_next_timer(VCPU *vc extern void vcpu_set_next_timer(VCPU *vcpu); extern BOOLEAN vcpu_timer_expired(VCPU *vcpu); extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu); +#ifndef CONFIG_XEN_IA64_TLB_TRACK extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64); +#else +struct p2m_entry; +extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64, struct p2m_entry*); +#endif extern UINT64 vcpu_get_tmp(VCPU *, UINT64); extern void vcpu_set_tmp(VCPU *, UINT64, UINT64); diff -r 3cee9325a6c6 -r b90fff753ca1 xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Mon Jul 24 23:18:39 2006 +0900 +++ b/xen/include/public/arch-ia64.h Mon Jul 24 23:18:41 2006 +0900 @@ -357,8 +357,14 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte // address space. // flags for page assignement to pseudo physical address space #define _ASSIGN_readonly 0 +#define _ASSIGN_tlb_track 1 + #define ASSIGN_readonly (1UL << _ASSIGN_readonly) #define ASSIGN_writable (0UL << _ASSIGN_readonly) // dummy flag +#ifdef CONFIG_XEN_IA64_TLB_TRACK +# define ASSIGN_tlb_track (1UL << _ASSIGN_tlb_track) +#endif + /* This structure has the same layout of struct ia64_boot_param, defined in . It is redefined here to ease use. */ diff -r 3cee9325a6c6 -r b90fff753ca1 xen/arch/ia64/xen/tlb_track.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/xen/tlb_track.c Mon Jul 24 23:18:41 2006 +0900 @@ -0,0 +1,558 @@ +/****************************************************************************** + * tlb_track.h + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include // for IA64_RR_SHIFT +#include // for PSCB() + +#define CONFIG_TLB_TRACK_DEBUG +#ifdef CONFIG_TLB_TRACK_DEBUG +# define tlb_track_printd(fmt, ...) \ + printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__) +#else +# define tlb_track_printd(fmt, ...) do { } while (0) +#endif + +#define CONFIG_TLB_TRACK_STAT_KEY_HANDLER +#ifdef CONFIG_TLB_TRACK_STAT_KEY_HANDLER +#include +#include + +static void +dump_tlb_track_stat(unsigned char key) +{ + tlb_track_stat_printf(&dom0->arch.tlb_track->stat); +} +#endif + +static int +tlb_track_allocate_entries(struct tlb_track* tlb_track) +{ + struct page_info* entry_page; + struct tlb_track_entry* track_entries; + unsigned int allocated; + unsigned long i; + + BUG_ON(tlb_track->num_free > 0); + if (tlb_track->num_entries >= tlb_track->limit) { + DPRINTK("%s: num_entries %d limit %d\n", + __func__, tlb_track->num_entries, tlb_track->limit); + return -ENOMEM; + } + entry_page = alloc_domheap_page(NULL); + if (entry_page == NULL) { + DPRINTK("%s: domheap page failed. num_entries %d limit %d\n", + __func__, tlb_track->num_entries, tlb_track->limit); + return -ENOMEM; + } + + list_add(&entry_page->list, &tlb_track->page_list); + track_entries = (struct tlb_track_entry*)page_to_virt(entry_page); + allocated = PAGE_SIZE / sizeof(track_entries[0]); + tlb_track->num_entries += allocated; + tlb_track->num_free += allocated; + for (i = 0; i < allocated; i++) { + list_add(&track_entries[i].list, &tlb_track->free_list); + //tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]); + } + tlb_track_printd("allocated %d num_entries %d num_free %d\n", + allocated, tlb_track->num_entries, tlb_track->num_free); + return 0; +} + + +int +tlb_track_create(struct domain* d) +{ + struct tlb_track* tlb_track = NULL; + struct page_info* hash_page = NULL; + unsigned int hash_size; + unsigned int hash_shift; + unsigned int i; + + tlb_track = xmalloc(struct tlb_track); + if (tlb_track == NULL) { + goto out; + } + hash_page = alloc_domheap_page(NULL); + if (hash_page == NULL) { + goto out; + } + + spin_lock_init(&tlb_track->free_list_lock); + INIT_LIST_HEAD(&tlb_track->free_list); + tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES; + tlb_track->num_entries = 0; + tlb_track->num_free = 0; + INIT_LIST_HEAD(&tlb_track->page_list); + if (tlb_track_allocate_entries(tlb_track) < 0) { + goto out; + } + + spin_lock_init(&tlb_track->hash_lock); + //XXX hash size optimization + hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]); + for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++) + /* nothing */; + tlb_track->hash_size = (1 << hash_shift); + tlb_track->hash_shift = hash_shift; + tlb_track->hash_mask = (1 << hash_shift) - 1; + tlb_track->hash = page_to_virt(hash_page); + for (i = 0; i < tlb_track->hash_size; i++) { + INIT_LIST_HEAD(&tlb_track->hash[i]); + } + + memset(&tlb_track->stat, 0, sizeof(tlb_track->stat)); + + smp_mb(); // make initialization visible before use. + d->arch.tlb_track = tlb_track; + printk("%s:%d hash 0x%p hash_size %d \n", + __func__, __LINE__, tlb_track->hash, tlb_track->hash_size); + +#ifdef CONFIG_TLB_TRACK_STAT_KEY_HANDLER + register_keyhandler( + 's', dump_tlb_track_stat, "dump dom0 tlb track stats"); +#endif + return 0; + +out: + if (hash_page != NULL) { + free_domheap_page(hash_page); + } + if (tlb_track != NULL) { + xfree(tlb_track); + } + return -ENOMEM; +} + +void +tlb_track_destroy(struct domain* d) +{ + struct tlb_track* tlb_track = d->arch.tlb_track; + struct page_info* page; + struct page_info* next; + + spin_lock(&tlb_track->free_list_lock); + BUG_ON(tlb_track->num_free != tlb_track->num_entries); + + list_for_each_entry_safe(page, next, &tlb_track->page_list, list) { + list_del(&page->list); + free_domheap_page(page); + } + + free_domheap_page(virt_to_page(tlb_track->hash)); + xfree(tlb_track); + //d->tlb_track = NULL; +} + +static struct tlb_track_entry* +tlb_track_get_entry(struct tlb_track* tlb_track) +{ + struct tlb_track_entry* entry = NULL; + spin_lock(&tlb_track->free_list_lock); + if (tlb_track->num_free == 0) { + (void)tlb_track_allocate_entries(tlb_track); + } + if (tlb_track->num_free > 0) { + BUG_ON(list_empty(&tlb_track->free_list)); + entry = list_entry(tlb_track->free_list.next, + struct tlb_track_entry, list); + tlb_track->num_free--; + list_del(&entry->list); + } + spin_unlock(&tlb_track->free_list_lock); + return entry; +} + +void +tlb_track_free_entry(struct tlb_track* tlb_track, + struct tlb_track_entry* entry) +{ + spin_lock(&tlb_track->free_list_lock); + list_add(&entry->list, &tlb_track->free_list); + tlb_track->num_free++; + spin_unlock(&tlb_track->free_list_lock); +} + + +#include +// XXX hash function. +static struct list_head* +tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep) +{ + unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift); + BUG_ON(hash >= tlb_track->hash_size); + BUG_ON((hash & tlb_track->hash_mask) != hash); + return &tlb_track->hash[hash]; +} + +static int +tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte) +{ + if (pte_pfn(old_pte) != pte_pfn(ret_pte) || + (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) != + (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) { + // Other thread zapped the p2m entry. + return 1; + } + return 0; +} + +static TLB_TRACK_RET_T +tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm, + volatile pte_t* ptep, pte_t old_pte, + unsigned long vaddr, unsigned long rid) +{ + unsigned long mfn = pte_pfn(old_pte); + struct list_head* head = tlb_track_hash_head(tlb_track, ptep); + struct tlb_track_entry* entry; + struct tlb_track_entry* new_entry = NULL; + unsigned long bit_to_be_set = _PAGE_TLB_INSERTED; + pte_t new_pte; + pte_t ret_pte; + + struct vcpu* v = current; + TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND; + + tlb_track->stat.iod++; + if (!pte_tlb_tracking(old_pte)) { + tlb_track->stat.iod_not_tracked++; + return TLB_TRACK_NOT_TRACKED; + } + if (pte_tlb_inserted_many(old_pte)) { + tlb_track->stat.iod_tracked_many++; + return TLB_TRACK_MANY; + } + + // vaddr must be normalized so that it is in rr0 and page aligned. + BUG_ON((vaddr >> IA64_RR_SHIFT) != 0); + BUG_ON((vaddr & ~PAGE_MASK) != 0); +#if 0 + tlb_track_printd("\n" + "\tmfn 0x%016lx\n" + "\told_pte 0x%016lx ptep 0x%p\n" + "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n" + "\ttlb_track 0x%p head 0x%p\n", + mfn, + pte_val(old_pte), ptep, pte_val(*ptep), + vaddr, rid, + tlb_track, head); +#endif + + again: + // zapping side may zap the p2m entry and then remove tlb track entry + // non-atomically. We may see the stale tlb track entry here. + // p2m_entry_retry() handles such a case. + // Or other thread may zap the p2m entry and remove tlb track entry + // and inserted new tlb track entry. + spin_lock(&tlb_track->hash_lock); + list_for_each_entry(entry, head, list) { + if (entry->ptep != ptep) { + continue; + } + + if (pte_pfn(entry->pte_val) == mfn) { + //tlb_track_entry_printf(entry); + if (entry->vaddr == vaddr && entry->rid == rid) { + //tlb_track_printd("TLB_TRACK_FOUND\n"); + ret = TLB_TRACK_FOUND; + tlb_track->stat.iod_found++; +#ifdef CONFIG_TLB_TRACK_CNT + entry->cnt++; + if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) { + // heuristics: + // If a page is used to transfer data by dev channel, + // it would be unmapped with small amount access + // (once or twice tlb insert) after real device + // I/O completion. It would be short period. + // However this page seems to be accessed many times. + // We guess that this page is used I/O ring + // so that tracking this entry might be useless. + //tlb_track_entry_printf(entry); + //tlb_track_printd("cnt = %ld\n", entry->cnt); + tlb_track->stat.iod_force_many++; + goto force_many; + } +#endif + goto found; + } else { +#ifdef CONFIG_TLB_TRACK_CNT + force_many: +#endif + if (!pte_tlb_inserted(old_pte)) { + printk("%s:%d racy update\n", __func__, __LINE__); + old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED); + } + new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY); + ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte); + if (pte_val(ret_pte) != pte_val(old_pte)) { + //tlb_track_printd("TLB_TRACK_AGAIN\n"); + ret = TLB_TRACK_AGAIN; + tlb_track->stat.iod_again++; + } else { + //tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n", entry); + ret = TLB_TRACK_MANY; + list_del(&entry->list); + //tlb_track_entry_printf(entry); + tlb_track->stat.iod_tracked_many_del++; + } + goto out; + } + } + + // Other thread changed the p2m entry and removed and inserted new + // tlb tracn entry after we get old_pte, but before we get + // spinlock. + //tlb_track_printd("TLB_TRACK_AGAIN\n"); + ret = TLB_TRACK_AGAIN; + tlb_track->stat.iod_again++; + goto out; + } + + entry = NULL; // prevent freeing entry. + if (pte_tlb_inserted(old_pte)) { + // Other thread else removed the tlb_track_entry after we got old_pte + // before we got spin lock. + ret = TLB_TRACK_AGAIN; + tlb_track->stat.iod_again++; + goto out; + } + if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) { + spin_unlock(&tlb_track->hash_lock); + new_entry = tlb_track_get_entry(tlb_track); + if (new_entry == NULL) { + tlb_track_printd("get_entry failed\n"); + // entry can't be allocated. + // fall down into full flush mode. + bit_to_be_set |= _PAGE_TLB_INSERTED_MANY; + tlb_track->stat.iod_new_failed++; + } + //tlb_track_printd("new_entry 0x%p\n", new_entry); + tlb_track->stat.iod_new_entry++; + goto again; + } + + BUG_ON(pte_tlb_inserted_many(old_pte)); + new_pte = __pte(pte_val(old_pte) | bit_to_be_set); + ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte); + if (pte_val(old_pte) != pte_val(ret_pte)) { + if (tlb_track_pte_zapped(old_pte, ret_pte)) { + //tlb_track_printd("zapped TLB_TRACK_AGAIN\n"); + ret = TLB_TRACK_AGAIN; + tlb_track->stat.iod_again++; + goto out; + } + + // Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY + if (pte_tlb_inserted_many(ret_pte)) { + // Other thread already set _PAGE_TLB_INSERTED_MANY and + // removed the entry. + //tlb_track_printd("iserted TLB_TRACK_MANY\n"); + BUG_ON(!pte_tlb_inserted(ret_pte)); + ret = TLB_TRACK_MANY; + tlb_track->stat.iod_new_many++; + goto out; + } + BUG_ON(pte_tlb_inserted(ret_pte)); + BUG(); + } + if (new_entry) { + //tlb_track_printd("iserting new_entry 0x%p\n", new_entry); + entry = new_entry; + new_entry = NULL; + + entry->ptep = ptep; + entry->pte_val = old_pte; + entry->vaddr = vaddr; + entry->rid = rid; + cpus_clear(entry->pcpu_dirty_mask); + vcpus_clear(entry->vcpu_dirty_mask); + list_add(&entry->list, head); + +#ifdef CONFIG_TLB_TRACK_CNT + entry->cnt = 0; +#endif + tlb_track->stat.iod_insert++; + //tlb_track_entry_printf(entry); + } else { + goto out; + } + + found: + BUG_ON(v->processor >= NR_CPUS); + cpu_set(v->processor, entry->pcpu_dirty_mask); + BUG_ON(v->vcpu_id >= NR_CPUS); + vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask); + tlb_track->stat.iod_dirtied++; + + out: + spin_unlock(&tlb_track->hash_lock); + if (ret == TLB_TRACK_MANY && entry != NULL) { + tlb_track_free_entry(tlb_track, entry); + } + if (new_entry != NULL) { + tlb_track_free_entry(tlb_track, new_entry); + } + return ret; +} + +void +vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, + struct p2m_entry* entry) +{ + unsigned long vrn = vaddr >> IA64_RR_SHIFT; + unsigned long rid = PSCB(vcpu, rrs[vrn]); + TLB_TRACK_RET_T ret; + + vaddr = (vaddr << 3) >> 3;// mask rid bit + vaddr &= PAGE_MASK; + ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track, + &vcpu->domain->arch.mm, + entry->ptep, entry->used, + vaddr, rid); + if (ret == TLB_TRACK_AGAIN) { + p2m_entry_set_retry(entry); + } +} + +TLB_TRACK_RET_T +tlb_track_search_and_remove(struct tlb_track* tlb_track, + volatile pte_t* ptep, pte_t old_pte, + struct tlb_track_entry** entryp) +{ + unsigned long mfn = pte_pfn(old_pte); + struct list_head* head = tlb_track_hash_head(tlb_track, ptep); + struct tlb_track_entry* entry; + + tlb_track->stat.sar++; + if (!pte_tlb_tracking(old_pte)) { + tlb_track->stat.sar_not_tracked++; + return TLB_TRACK_NOT_TRACKED; + } + if (!pte_tlb_inserted(old_pte)) { + BUG_ON(pte_tlb_inserted_many(old_pte)); + tlb_track->stat.sar_not_found++; + return TLB_TRACK_NOT_FOUND; + } + if (pte_tlb_inserted_many(old_pte)) { + BUG_ON(!pte_tlb_inserted(old_pte)); + tlb_track->stat.sar_many++; + return TLB_TRACK_MANY; + } + + spin_lock(&tlb_track->hash_lock); + list_for_each_entry(entry, head, list) { + if (entry->ptep != ptep) { + continue; + } + if (pte_pfn(entry->pte_val) == mfn) { + list_del(&entry->list); + tlb_track->stat.sar_found++; + spin_unlock(&tlb_track->hash_lock); + *entryp = entry; + //tlb_track_entry_printf(entry); +#ifdef CONFIG_TLB_TRACK_CNT + //tlb_track_printd("cnt = %ld\n", entry->cnt); +#endif + return TLB_TRACK_FOUND; + } + BUG(); + } + BUG(); + spin_unlock(&tlb_track->hash_lock); + return TLB_TRACK_NOT_TRACKED; +} + +void +tlb_track_stat_printf(const struct tlb_track_stat* stat) +{ + printk("iod %ld\n" + "iod_again %ld\n" + "iod_not_tracked %ld\n" + "iod_force_many %ld\n" + "iod_tracked_many %ld\n" + "iod_tracked_many_del %ld\n" + "iod_found %ld\n" + "iod_new_entry %ld\n" + "iod_new_failed %ld\n" + "iod_new_many %ld\n" + "iod_insert %ld\n" + "iod_dirtied %ld\n" + "sar %ld\n" + "sar_not_tracked %ld\n" + "sar_not_found %ld\n" + "sar_found %ld\n" + "sar_many %ld\n", + stat->iod, + stat->iod_again, + stat->iod_not_tracked, + stat->iod_force_many, + stat->iod_tracked_many, + stat->iod_tracked_many_del, + stat->iod_found, + stat->iod_new_entry, + stat->iod_new_failed, + stat->iod_new_many, + stat->iod_insert, + stat->iod_dirtied, + stat->sar, + stat->sar_not_tracked, + stat->sar_not_found, + stat->sar_found, + stat->sar_many); +} + +// for debug +void +__tlb_track_entry_printf(const char* func, int line, + const struct tlb_track_entry* entry) +{ + char pcpumask_buf[NR_CPUS + 1]; + char vcpumask_buf[MAX_VIRT_CPUS + 1]; + cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf), + entry->pcpu_dirty_mask); + vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf), + entry->vcpu_dirty_mask); + printk("%s:%d\n" + "\tmfn 0x%016lx\n" + "\told_pte 0x%016lx ptep 0x%p\n" + "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n" + "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n" + "\tentry 0x%p\n", + func, line, + pte_pfn(entry->pte_val), + pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep), + entry->vaddr, entry->rid, + pcpumask_buf, vcpumask_buf, + entry); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 3cee9325a6c6 -r b90fff753ca1 xen/include/asm-ia64/p2m_entry.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/p2m_entry.h Mon Jul 24 23:18:41 2006 +0900 @@ -0,0 +1,76 @@ +/****************************************************************************** + * p2m_entry.h + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __ASM_P2M_ENTRY_H__ +#define __ASM_P2M_ENTRY_H__ + +#include + +struct p2m_entry { +#define P2M_PTE_ALWAYS_RETRY ((volatile pte_t*) -1) + volatile pte_t* ptep; + pte_t used; +}; + +static inline void +p2m_entry_set(struct p2m_entry* entry, volatile pte_t* ptep, pte_t used) +{ + entry->ptep = ptep; + entry->used = used; +} + +static inline void +p2m_entry_set_retry(struct p2m_entry* entry) +{ + entry->ptep = P2M_PTE_ALWAYS_RETRY; +} + +static inline int +p2m_entry_retry(struct p2m_entry* entry) +{ + //XXX see lookup_domain_pte(). + // NULL is set for invalid gpaddr for the time being. + if (entry->ptep == NULL) + return 0; + + if (entry->ptep == P2M_PTE_ALWAYS_RETRY) + return 1; + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + return ((pte_val(*entry->ptep) & ~_PAGE_TLB_TRACK_MASK) != + (pte_val(entry->used) & ~_PAGE_TLB_TRACK_MASK)); +#else + return (pte_val(*entry->ptep) != pte_val(entry->used)); +#endif +} + +#endif // __ASM_P2M_ENTRY_H__ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 3cee9325a6c6 -r b90fff753ca1 xen/include/asm-ia64/tlb_track.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-ia64/tlb_track.h Mon Jul 24 23:18:41 2006 +0900 @@ -0,0 +1,201 @@ +/****************************************************************************** + * tlb_track.c + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __TLB_TRACK_H__ +#define __TLB_TRACK_H__ + +#ifdef CONFIG_XEN_IA64_TLB_TRACK + +#include +#include + +#define _PAGE_TLB_TRACKING_BIT 53 +#define _PAGE_TLB_INSERTED_BIT 54 +#define _PAGE_TLB_INSERTED_MANY_BIT 55 + +#define _PAGE_TLB_TRACKING (1UL << _PAGE_TLB_TRACKING_BIT) +#define _PAGE_TLB_INSERTED (1UL << _PAGE_TLB_INSERTED_BIT) +#define _PAGE_TLB_INSERTED_MANY (1UL << _PAGE_TLB_INSERTED_MANY_BIT) +#define _PAGE_TLB_TRACK_MASK (_PAGE_TLB_TRACKING | _PAGE_TLB_INSERTED | _PAGE_TLB_INSERTED_MANY) + +#define pte_tlb_tracking(pte) \ + ((pte_val(pte) & _PAGE_TLB_TRACKING) != 0) +#define pte_tlb_inserted(pte) \ + ((pte_val(pte) & _PAGE_TLB_INSERTED) != 0) +#define pte_tlb_inserted_many(pte) \ + ((pte_val(pte) & _PAGE_TLB_INSERTED_MANY) != 0) + + +// vcpu mask +// stolen from cpumask.h +typedef struct { DECLARE_BITMAP(bits, MAX_VIRT_CPUS); } vcpumask_t; + +#define vcpu_set(vcpu, dst) __vcpu_set((vcpu), &(dst)) +static inline void __vcpu_set(int vcpu, volatile vcpumask_t *dstp) +{ + set_bit(vcpu, dstp->bits); +} +#define vcpus_clear(dst) __vcpus_clear(&(dst), MAX_VIRT_CPUS) +static inline void __vcpus_clear(vcpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} +/* No static inline type checking - see Subtlety (1) above. */ +#define vcpu_isset(vcpu, vcpumask) test_bit((vcpu), (vcpumask).bits) + +#define vcpumask_scnprintf(buf, len, src) \ + __vcpumask_scnprintf((buf), (len), &(src), MAX_VIRT_CPUS) +static inline int __vcpumask_scnprintf(char *buf, int len, + const vcpumask_t *srcp, int nbits) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nbits); +} + + +// TODO: compact this structure. +struct tlb_track_entry { + struct list_head list; + + + volatile pte_t* ptep; // corresponding p2m entry + + //XXX should we use TR_ENTRY? + pte_t pte_val; // mfn and other flags + // pte_val.p = 1: + // tlb entry is inserted. + // pte_val.p = 0: + // once tlb entry is inserted, so + // this entry is created. But tlb + // purge is isseued, so this + // virtual address need not to be + // purged. + unsigned long vaddr; // virtual address + unsigned long rid; // rid + + cpumask_t pcpu_dirty_mask; + vcpumask_t vcpu_dirty_mask; + // tlbflush_timestamp; + +#define CONFIG_TLB_TRACK_CNT +#ifdef CONFIG_TLB_TRACK_CNT +#define TLB_TRACK_CNT_FORCE_MANY 256 //XXX how many? + unsigned long cnt; +#endif +}; + +struct tlb_track_stat { + // insert or dirty + unsigned long iod; + unsigned long iod_again; + unsigned long iod_not_tracked; + unsigned long iod_force_many; + unsigned long iod_tracked_many; + unsigned long iod_tracked_many_del; + unsigned long iod_found; + unsigned long iod_new_entry; + unsigned long iod_new_failed; + unsigned long iod_new_many; + unsigned long iod_insert; + unsigned long iod_dirtied; + + // search and remove + unsigned long sar; + unsigned long sar_not_tracked; + unsigned long sar_not_found; + unsigned long sar_found; + unsigned long sar_many; +}; +void tlb_track_stat_printf(const struct tlb_track_stat* stat); + +struct tlb_track { + +// see __gnttab_map_grant_ref() +// A domain can map granted-page up to MAPTRACK_MAX_ENTRIES pages. +#define TLB_TRACK_LIMIT_ENTRIES \ + (MAPTRACK_MAX_ENTRIES * (PAGE_SIZE / sizeof(struct tlb_track))) + + spinlock_t free_list_lock; + struct list_head free_list; + unsigned int limit; + unsigned int num_entries; + unsigned int num_free; + struct list_head page_list; + + // XXX hash table size + spinlock_t hash_lock; + unsigned int hash_size; + unsigned int hash_shift; + unsigned int hash_mask; + struct list_head* hash; + + struct tlb_track_stat stat; +}; + +int tlb_track_create(struct domain* d); +void tlb_track_destroy(struct domain* d); + +void tlb_track_free_entry(struct tlb_track* tlb_track, + struct tlb_track_entry* entry); + +struct p2m_entry; +void +vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr, + struct p2m_entry* entry); + +// return value +// NULL if this entry is used +// entry if this entry isn't used +enum TLB_TRACK_RET { + TLB_TRACK_NOT_TRACKED, + TLB_TRACK_NOT_FOUND, + TLB_TRACK_FOUND, + TLB_TRACK_MANY, + TLB_TRACK_AGAIN, +}; +typedef enum TLB_TRACK_RET TLB_TRACK_RET_T; + +TLB_TRACK_RET_T +tlb_track_search_and_remove(struct tlb_track* tlb_track, + volatile pte_t* ptep, pte_t old_pte, + struct tlb_track_entry** entryp); + +void +__tlb_track_entry_printf(const char* func, int line, + const struct tlb_track_entry* entry); +#define tlb_track_entry_printf(entry) \ + __tlb_track_entry_printf(__func__, __LINE__, (entry)) +#else +//define nop + +#endif // CONFIG_XEN_IA64_TLB_TRACK + +#endif // __TLB_TRACK_H__ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */