# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Node ID f9b91b850f7b71f4a18807955104502d686791b1 # Parent b90fff753ca191c329c350184ffeae3990670b77 implement per vcpu vhpt option. allocate VHPT per vcpu. added compile time option, xen_ia64_pervcpu_vhpt=y, to enable it. added xen boot time option, pervcpu_vhpt=0, to disable it. This patch depends on tlb tracking patch. PATCHNAME: pervcpu_vhpt Signed-off-by: Isaku Yamahata diff -r b90fff753ca1 -r f9b91b850f7b xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Mon Jul 24 23:18:41 2006 +0900 +++ b/xen/arch/ia64/Rules.mk Mon Jul 24 23:19:19 2006 +0900 @@ -42,6 +42,9 @@ ifeq ($(xen_ia64_tlb_track),y) ifeq ($(xen_ia64_tlb_track),y) CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK endif +ifeq ($(xen_ia64_pervcpu_vhpt),y) +CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r b90fff753ca1 -r f9b91b850f7b xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Jul 24 23:18:41 2006 +0900 +++ b/xen/arch/ia64/xen/domain.c Mon Jul 24 23:19:19 2006 +0900 @@ -117,8 +117,12 @@ static void flush_vtlb_for_context_switc if (VMX_DOMAIN(vcpu)) { // currently vTLB for vt-i domian is per vcpu. // so any flushing isn't needed. +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + } else if (HAS_PERVCPU_VHPT(vcpu->domain)) { + // nothing to do +#endif } else { - vhpt_flush(); + local_vhpt_flush(); } local_flush_tlb_all(); } @@ -133,9 +137,13 @@ void schedule_tail(struct vcpu *prev) vmx_do_launch(current); } else { ia64_set_iva(&ia64_ivt); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it flushes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; @@ -186,9 +194,13 @@ if (!i--) { i = 1000000; printk("+"); } nd = current->domain; if (!is_idle_domain(nd)) { - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it changes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); vcpu_set_next_timer(current); if (vcpu_timer_expired(current)) @@ -305,6 +317,20 @@ struct vcpu *alloc_vcpu_struct(struct do v->arch.ending_rid = d->arch.ending_rid; v->arch.breakimm = d->arch.breakimm; v->arch.last_processor = INVALID_PROCESSOR; + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + DPRINTK("%s:%d allocating d 0x%p %d v 0x%p %d has_pervcpu_vhpt %d\n", + __func__, __LINE__, + d, d->domain_id, v, vcpu_id, HAS_PERVCPU_VHPT(d)); + if (HAS_PERVCPU_VHPT(d)) { + if (pervcpu_vhpt_alloc(v) < 0) { + free_xenheap_pages(v->arch.privregs, + get_order(sizeof(mapped_regs_t))); + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); + return NULL; + } + } +#endif } return v; @@ -315,6 +341,10 @@ void free_vcpu_struct(struct vcpu *v) if (VMX_DOMAIN(v)) vmx_relinquish_vcpu_resources(v); else { +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) + pervcpu_vhpt_free(v); +#endif if (v->arch.privregs != NULL) free_xenheap_pages(v->arch.privregs, get_order_from_shift(XMAPPEDREGS_SHIFT)); @@ -340,6 +370,11 @@ static void init_switch_stack(struct vcp memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); } +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static int opt_pervcpu_vhpt = 1; +integer_param("pervcpu_vhpt", opt_pervcpu_vhpt); +#endif + int arch_domain_create(struct domain *d) { int i; @@ -354,6 +389,13 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; +#if 1 + DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", + __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); +#endif +#endif #ifdef CONFIG_XEN_IA64_TLB_TRACK if (tlb_track_create(d) < 0) goto fail_nomem; diff -r b90fff753ca1 -r f9b91b850f7b xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Mon Jul 24 23:18:41 2006 +0900 +++ b/xen/arch/ia64/xen/regionreg.c Mon Jul 24 23:19:19 2006 +0900 @@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigne } else if (rreg == 7) { ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, v->arch.privregs, v->domain->arch.shared_info_va, - __get_cpu_var(vhpt_paddr)); + vcpu_vhpt_maddr(v)); } else { set_rr(rr,newrrv.rrval); } diff -r b90fff753ca1 -r f9b91b850f7b xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Mon Jul 24 23:18:41 2006 +0900 +++ b/xen/arch/ia64/xen/vhpt.c Mon Jul 24 23:19:19 2006 +0900 @@ -23,18 +23,30 @@ DEFINE_PER_CPU (unsigned long, vhpt_padd DEFINE_PER_CPU (unsigned long, vhpt_paddr); DEFINE_PER_CPU (unsigned long, vhpt_pend); -void vhpt_flush(void) -{ - struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); +static void __vhpt_flush(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v =(struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) v->ti_tag = INVALID_TI_TAG; } -static void vhpt_erase(void) -{ - struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; +void local_vhpt_flush(void) +{ + __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); +} + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static void vcpu_vhpt_flush(struct vcpu* v) +{ + __vhpt_flush(vcpu_vhpt_maddr(v)); +} +#endif + +static void vhpt_erase(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { @@ -44,17 +56,6 @@ static void vhpt_erase(void) v->ti_tag = INVALID_TI_TAG; } // initialize cache too??? -} - - -static void vhpt_map(unsigned long pte) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2); - ia64_set_psr(psr); - ia64_srlz_i(); } void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps) @@ -101,7 +102,7 @@ void vhpt_multiple_insert(unsigned long void vhpt_init(void) { - unsigned long paddr, pte; + unsigned long paddr; struct page_info *page; #if !VHPT_ENABLED return; @@ -121,13 +122,53 @@ void vhpt_init(void) __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", paddr, __get_cpu_var(vhpt_pend)); - pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); - vhpt_map(pte); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - vhpt_erase(); -} - + vhpt_erase(paddr); + // we don't enable VHPT here. + // context_switch() or schedule_tail() does it. +} + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +int +pervcpu_vhpt_alloc(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + + v->arch.vhpt_entries = + (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry); + v->arch.vhpt_page = + alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0); + if (!v->arch.vhpt_page) + return -ENOMEM; + + v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page); + DPRINTK("%s:%d allocating v 0x%p maddr 0x%lx\n", + __func__, __LINE__, v, v->arch.vhpt_maddr); + if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1)) + panic("pervcpu_vhpt_init: bad VHPT alignment!\n"); + + v->arch.pta.val = 0; // zero clear + v->arch.pta.ve = 1; // enable vhpt + v->arch.pta.size = VHPT_SIZE_LOG2; + v->arch.pta.vf = 1; // long format + //v->arch.pta.base = __va(v->arch.vhpt_maddr) >> 15; + v->arch.pta.base = VHPT_ADDR >> 15; + + vhpt_erase(v->arch.vhpt_maddr); + return 0; +} + +void +pervcpu_vhpt_free(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + DPRINTK("%s:%d freeing d 0x%p %d v 0x%p %d\n", + __func__, __LINE__, + v->domain, v->domain->domain_id, + v, v->vcpu_id); + + free_domheap_pages(v->arch.vhpt_page, vhpt_size_log2 - PAGE_SHIFT); +} +#endif void vcpu_flush_vtlb_all(struct vcpu *v) { @@ -136,7 +177,15 @@ void vcpu_flush_vtlb_all(struct vcpu *v) vcpu_purge_tr_entry(&PSCBX(v,itlb)); /* Then VHPT. */ - vhpt_flush (); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + vcpu_vhpt_flush(v); + } else { + local_vhpt_flush(); + } +#else + local_vhpt_flush(); +#endif /* Then mTLB. */ local_flush_tlb_all (); @@ -169,9 +218,10 @@ void domain_flush_vtlb_all (void) } } -static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) -{ - void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); +static void __flush_vhpt_range(unsigned long vhpt_maddr, + u64 vadr, u64 addr_range) +{ + void *vhpt_base = __va(vhpt_maddr); while ((long)addr_range > 0) { /* Get the VHPT entry. */ @@ -184,9 +234,30 @@ static void cpu_flush_vhpt_range (int cp } } +static void cpu_vhpt_flush_range(int cpu, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range); +} + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static void vcpu_vhpt_flush_range(struct vcpu* v, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range); +} +#endif + void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) { - cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(current->domain)) { + vcpu_vhpt_flush_range(current, vadr, 1UL << log_range); + } else { + cpu_vhpt_flush_range(current->processor, + vadr, 1UL << log_range); + } +#else + cpu_vhpt_flush_range(current->processor, vadr, 1UL << log_range); +#endif ia64_ptcl(vadr, log_range << 2); ia64_srlz_i(); } @@ -218,8 +289,17 @@ void domain_flush_vtlb_range (struct dom if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) continue; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(d)) { + vcpu_vhpt_flush_range(v, vadr, addr_range); + } else { + /* Invalidate VHPT entries. */ + cpu_vhpt_flush_range(v->processor, vadr, addr_range); + } +#else /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range (v->processor, vadr, addr_range); + cpu_vhpt_flush_range(v->processor, vadr, addr_range); +#endif } // ptc.ga has release semantics. @@ -254,11 +334,30 @@ domain_flush_vltb_track_entry(struct dom } smp_mb(); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(d)) { + for_each_vcpu(d, v) { + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + if (!vcpu_isset(v->vcpu_id, entry->vcpu_dirty_mask)) + continue; + + /* Invalidate VHPT entries. */ + vcpu_vhpt_flush_range(v, entry->vaddr, PAGE_SIZE); + } + } else { + for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { + /* Invalidate VHPT entries. */ + cpu_vhpt_flush_range(cpu, entry->vaddr, PAGE_SIZE); + } + } +#else for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) { //printk("%s:%d cpu %d\n", __func__, __LINE__, cpu); /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range(cpu, entry->vaddr, PAGE_SIZE); - } + cpu_vhpt_flush_range(cpu, entry->vaddr, PAGE_SIZE); + } +#endif // ptc.ga has release semantics. /* ptc.ga */ @@ -272,7 +371,7 @@ static void flush_tlb_vhpt_all (struct d static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ - vhpt_flush (); + local_vhpt_flush (); /* Then mTLB. */ local_flush_tlb_all (); @@ -281,7 +380,14 @@ void domain_flush_destroy (struct domain void domain_flush_destroy (struct domain *d) { /* Very heavy... */ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(d)) + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + else + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); +#else on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); +#endif cpus_clear (d->domain_dirty_cpumask); } diff -r b90fff753ca1 -r f9b91b850f7b xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Mon Jul 24 23:18:41 2006 +0900 +++ b/xen/include/asm-ia64/domain.h Mon Jul 24 23:19:19 2006 +0900 @@ -63,6 +63,9 @@ struct arch_domain { unsigned long flags; struct { unsigned int is_vti : 1; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + unsigned int has_pervcpu_vhpt : 1; +#endif }; }; @@ -108,6 +111,13 @@ struct arch_domain { #define INT_ENABLE_OFFSET(v) \ (sizeof(vcpu_info_t) * (v)->vcpu_id + \ offsetof(vcpu_info_t, evtchn_upcall_mask)) + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#define HAS_PERVCPU_VHPT(d) ((d)->arch.has_pervcpu_vhpt) +#else +#define HAS_PERVCPU_VHPT(d) (0) +#endif + struct arch_vcpu { /* Save the state of vcpu. @@ -158,6 +168,13 @@ struct arch_vcpu { fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */ struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + PTA pta; + unsigned long vhpt_maddr; + struct page_info* vhpt_page; + unsigned long vhpt_entries; +#endif + #define INVALID_PROCESSOR INT_MAX int last_processor; }; diff -r b90fff753ca1 -r f9b91b850f7b xen/include/asm-ia64/vhpt.h --- a/xen/include/asm-ia64/vhpt.h Mon Jul 24 23:18:41 2006 +0900 +++ b/xen/include/asm-ia64/vhpt.h Mon Jul 24 23:19:19 2006 +0900 @@ -42,11 +42,47 @@ extern void vhpt_multiple_insert(unsigne unsigned long logps); extern void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps); -void vhpt_flush(void); +void local_vhpt_flush(void); /* Currently the VHPT is allocated per CPU. */ DECLARE_PER_CPU (unsigned long, vhpt_paddr); DECLARE_PER_CPU (unsigned long, vhpt_pend); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#if !VHPT_ENABLED +#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT" +#endif +#include +int pervcpu_vhpt_alloc(struct vcpu *v); +void pervcpu_vhpt_free(struct vcpu *v); +static inline unsigned long +vcpu_vhpt_maddr(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + return v->arch.vhpt_maddr; + } +#endif + +#if 0 + // referencecing v->processor is racy. + return per_cpu(vhpt_paddr, v->processor); +#endif + BUG_ON(v != current); + return __get_cpu_var(vhpt_paddr); +} + +static inline unsigned long +vcpu_pta(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + return v->arch.pta.val; + } +#endif + return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; +} +#endif + #endif /* !__ASSEMBLY */ #endif