# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1155020300 -32400 # Node ID 44e6aea4077ead8633df36a7fa1b17f029307ad6 # Parent 9233bae3e316571c90176946bd3ba82355287905 implement per vcpu vhpt option. allocate VHPT per vcpu. added compile time option, xen_ia64_pervcpu_vhpt=y, to enable it. added xen boot time option, pervcpu_vhpt=0, to disable it. This patch focuses on vcpu migration between physical cpus becaseu vcpu is heavily migrated with credit scheduler. This patch tries to reduce vTLB flush when vcpu is migrated PATCHNAME: pervcpu_vhpt Signed-off-by: Isaku Yamahata diff -r 9233bae3e316 -r 44e6aea4077e xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/arch/ia64/Rules.mk Tue Aug 08 15:58:20 2006 +0900 @@ -3,6 +3,7 @@ HAS_ACPI := y VALIDATE_VT ?= n +xen_ia64_pervcpu_vhpt ?= y no_warns ?= n ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) @@ -35,6 +36,9 @@ ifeq ($(VALIDATE_VT),y) ifeq ($(VALIDATE_VT),y) CFLAGS += -DVALIDATE_VT endif +ifeq ($(xen_ia64_pervcpu_vhpt),y) +CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r 9233bae3e316 -r 44e6aea4077e xen/arch/ia64/vmx/vmx_entry.S --- a/xen/arch/ia64/vmx/vmx_entry.S Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/arch/ia64/vmx/vmx_entry.S Tue Aug 08 15:58:20 2006 +0900 @@ -669,7 +669,7 @@ 1: // re-pin mappings for guest_vhpt - mov r24=IA64_TR_PERVP_VHPT + mov r24=IA64_TR_VHPT movl r25=PAGE_KERNEL ;; or loc5 = r25,loc5 // construct PA | page properties diff -r 9233bae3e316 -r 44e6aea4077e xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/arch/ia64/xen/domain.c Tue Aug 08 15:58:20 2006 +0900 @@ -98,8 +98,10 @@ static void flush_vtlb_for_context_switc if (VMX_DOMAIN(vcpu)) { // currently vTLB for vt-i domian is per vcpu. // so any flushing isn't needed. + } else if (HAS_PERVCPU_VHPT(vcpu->domain)) { + // nothing to do } else { - vhpt_flush(); + local_vhpt_flush(); } local_flush_tlb_all(); perfc_incrc(flush_vtlb_for_context_switch); @@ -115,9 +117,13 @@ void schedule_tail(struct vcpu *prev) vmx_do_launch(current); } else { ia64_set_iva(&ia64_ivt); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it flushes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; @@ -157,9 +163,13 @@ void context_switch(struct vcpu *prev, s nd = current->domain; if (!is_idle_domain(nd)) { - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it changes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); vcpu_set_next_timer(current); if (vcpu_timer_expired(current)) @@ -257,6 +267,13 @@ struct vcpu *alloc_vcpu_struct(struct do if (!d->arch.is_vti) { int order; int i; + // vti domain has its own vhpt policy. + if (HAS_PERVCPU_VHPT(d)) { + if (pervcpu_vhpt_alloc(v) < 0) { + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); + return NULL; + } + } /* Create privregs page only if not VTi. */ order = get_order_from_shift(XMAPPEDREGS_SHIFT); @@ -295,6 +312,8 @@ struct vcpu *alloc_vcpu_struct(struct do void relinquish_vcpu_resources(struct vcpu *v) { + if (HAS_PERVCPU_VHPT(v->domain)) + pervcpu_vhpt_free(v); if (v->arch.privregs != NULL) { free_xenheap_pages(v->arch.privregs, get_order_from_shift(XMAPPEDREGS_SHIFT)); @@ -329,6 +348,11 @@ static void init_switch_stack(struct vcp memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); } +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static int opt_pervcpu_vhpt = 1; +integer_param("pervcpu_vhpt", opt_pervcpu_vhpt); +#endif + int arch_domain_create(struct domain *d) { int i; @@ -343,6 +367,11 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; + DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", + __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); +#endif d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); if (d->shared_info == NULL) goto fail_nomem; diff -r 9233bae3e316 -r 44e6aea4077e xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/arch/ia64/xen/regionreg.c Tue Aug 08 15:58:20 2006 +0900 @@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigne } else if (rreg == 7) { ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, v->arch.privregs, v->domain->arch.shared_info_va, - __get_cpu_var(vhpt_paddr)); + vcpu_vhpt_maddr(v)); } else { set_rr(rr,newrrv.rrval); } diff -r 9233bae3e316 -r 44e6aea4077e xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/arch/ia64/xen/vhpt.c Tue Aug 08 15:58:20 2006 +0900 @@ -3,6 +3,10 @@ * * Copyright (C) 2004 Hewlett-Packard Co * Dan Magenheimer + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * per vcpu vhpt support */ #include #include @@ -23,18 +27,28 @@ DEFINE_PER_CPU (unsigned long, vhpt_padd DEFINE_PER_CPU (unsigned long, vhpt_paddr); DEFINE_PER_CPU (unsigned long, vhpt_pend); -void vhpt_flush(void) -{ - struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); +static void __vhpt_flush(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v =(struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) v->ti_tag = INVALID_TI_TAG; } -static void vhpt_erase(void) -{ - struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; +void local_vhpt_flush(void) +{ + __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); +} + +static void vcpu_vhpt_flush(struct vcpu* v) +{ + __vhpt_flush(vcpu_vhpt_maddr(v)); +} + +static void vhpt_erase(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { @@ -44,17 +58,6 @@ static void vhpt_erase(void) v->ti_tag = INVALID_TI_TAG; } // initialize cache too??? -} - - -static void vhpt_map(unsigned long pte) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2); - ia64_set_psr(psr); - ia64_srlz_i(); } void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps) @@ -101,7 +104,7 @@ void vhpt_multiple_insert(unsigned long void vhpt_init(void) { - unsigned long paddr, pte; + unsigned long paddr; struct page_info *page; #if !VHPT_ENABLED return; @@ -121,22 +124,65 @@ void vhpt_init(void) __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", paddr, __get_cpu_var(vhpt_pend)); - pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); - vhpt_map(pte); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - vhpt_erase(); -} - - + vhpt_erase(paddr); + // we don't enable VHPT here. + // context_switch() or schedule_tail() does it. +} + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +int +pervcpu_vhpt_alloc(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + + v->arch.vhpt_entries = + (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry); + v->arch.vhpt_page = + alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0); + if (!v->arch.vhpt_page) + return -ENOMEM; + + v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page); + if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1)) + panic("pervcpu_vhpt_init: bad VHPT alignment!\n"); + + v->arch.pta.val = 0; // to zero reserved bits + v->arch.pta.ve = 1; // enable vhpt + v->arch.pta.size = VHPT_SIZE_LOG2; + v->arch.pta.vf = 1; // long format + //v->arch.pta.base = __va(v->arch.vhpt_maddr) >> 15; + v->arch.pta.base = VHPT_ADDR >> 15; + + vhpt_erase(v->arch.vhpt_maddr); + smp_mb(); // per vcpu vhpt may be used by another physical cpu. + return 0; +} + +void +pervcpu_vhpt_free(struct vcpu *v) +{ + free_domheap_pages(v->arch.vhpt_page, VHPT_SIZE_LOG2 - PAGE_SHIFT); +} +#endif + +// SMP: we can't assume v == current, vcpu might move to another physical cpu. +// So memory barrier is necessary. +// if we can guranttee that vcpu can run on only this physical cpu +// (e.g. vcpu == current), smp_mb() is unnecessary. void vcpu_flush_vtlb_all(struct vcpu *v) { /* First VCPU tlb. */ vcpu_purge_tr_entry(&PSCBX(v,dtlb)); vcpu_purge_tr_entry(&PSCBX(v,itlb)); + smp_mb(); /* Then VHPT. */ - vhpt_flush (); + if (HAS_PERVCPU_VHPT(v->domain)) { + vcpu_vhpt_flush(v); + } else { + local_vhpt_flush(); + } + smp_mb(); /* Then mTLB. */ local_flush_tlb_all (); @@ -165,6 +211,13 @@ void domain_flush_vtlb_all (void) if (v->processor == cpu) vcpu_flush_vtlb_all(v); else + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + // takes care of mTLB flush. smp_call_function_single(v->processor, __vcpu_flush_vtlb_all, v, 1, 1); @@ -172,24 +225,41 @@ void domain_flush_vtlb_all (void) perfc_incrc(domain_flush_vtlb_all); } -static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) -{ - void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); +// Callers may need to call smp_mb() before/after calling this. +// Be carefull. +static void __flush_vhpt_range(unsigned long vhpt_maddr, + u64 vadr, u64 addr_range) +{ + void *vhpt_base = __va(vhpt_maddr); while ((long)addr_range > 0) { /* Get the VHPT entry. */ unsigned int off = ia64_thash(vadr) - VHPT_ADDR; - volatile struct vhpt_lf_entry *v; - v = vhpt_base + off; + struct vhpt_lf_entry *v = vhpt_base + off; v->ti_tag = INVALID_TI_TAG; addr_range -= PAGE_SIZE; vadr += PAGE_SIZE; } } +static void cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range); +} + +static void vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range); +} + void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) { - cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); + if (HAS_PERVCPU_VHPT(current->domain)) { + vcpu_flush_vhpt_range(current, vadr, 1UL << log_range); + } else { + cpu_flush_vhpt_range(current->processor, + vadr, 1UL << log_range); + } ia64_ptcl(vadr, log_range << 2); ia64_srlz_i(); perfc_incrc(vcpu_flush_tlb_vhpt_range); @@ -222,8 +292,18 @@ void domain_flush_vtlb_range (struct dom if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) continue; - /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range (v->processor, vadr, addr_range); + if (HAS_PERVCPU_VHPT(d)) { + vcpu_flush_vhpt_range(v, vadr, addr_range); + } else { + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range(v->processor, vadr, addr_range); + } } // ptc.ga has release semantics. @@ -235,7 +315,7 @@ static void flush_tlb_vhpt_all (struct d static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ - vhpt_flush (); + local_vhpt_flush (); /* Then mTLB. */ local_flush_tlb_all (); @@ -244,7 +324,10 @@ void domain_flush_tlb_vhpt(struct domain void domain_flush_tlb_vhpt(struct domain *d) { /* Very heavy... */ - on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + if (HAS_PERVCPU_VHPT(d)) + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + else + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); cpus_clear (d->domain_dirty_cpumask); } diff -r 9233bae3e316 -r 44e6aea4077e xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/include/asm-ia64/domain.h Tue Aug 08 15:58:20 2006 +0900 @@ -87,6 +87,9 @@ struct arch_domain { unsigned long flags; struct { unsigned int is_vti : 1; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + unsigned int has_pervcpu_vhpt : 1; +#endif }; }; @@ -142,6 +145,13 @@ struct arch_domain { (sizeof(vcpu_info_t) * (v)->vcpu_id + \ offsetof(vcpu_info_t, evtchn_upcall_mask)) +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#define HAS_PERVCPU_VHPT(d) ((d)->arch.has_pervcpu_vhpt) +#else +#define HAS_PERVCPU_VHPT(d) (0) +#endif + + struct arch_vcpu { /* Save the state of vcpu. This is the first entry to speed up accesses. */ @@ -191,6 +201,13 @@ struct arch_vcpu { fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */ struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + PTA pta; + unsigned long vhpt_maddr; + struct page_info* vhpt_page; + unsigned long vhpt_entries; +#endif + #define INVALID_PROCESSOR INT_MAX int last_processor; }; diff -r 9233bae3e316 -r 44e6aea4077e xen/include/asm-ia64/vhpt.h --- a/xen/include/asm-ia64/vhpt.h Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/include/asm-ia64/vhpt.h Tue Aug 08 15:58:20 2006 +0900 @@ -37,11 +37,48 @@ extern void vhpt_multiple_insert(unsigne unsigned long logps); extern void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps); -void vhpt_flush(void); +void local_vhpt_flush(void); /* Currently the VHPT is allocated per CPU. */ DECLARE_PER_CPU (unsigned long, vhpt_paddr); DECLARE_PER_CPU (unsigned long, vhpt_pend); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#if !VHPT_ENABLED +#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT" +#endif +#endif + +#include +int pervcpu_vhpt_alloc(struct vcpu *v); +void pervcpu_vhpt_free(struct vcpu *v); +static inline unsigned long +vcpu_vhpt_maddr(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + return v->arch.vhpt_maddr; + } +#endif + +#if 0 + // referencecing v->processor is racy. + return per_cpu(vhpt_paddr, v->processor); +#endif + BUG_ON(v != current); + return __get_cpu_var(vhpt_paddr); +} + +static inline unsigned long +vcpu_pta(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + return v->arch.pta.val; + } +#endif + return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; +} + #endif /* !__ASSEMBLY */ #endif diff -r 9233bae3e316 -r 44e6aea4077e xen/include/asm-ia64/xenkregs.h --- a/xen/include/asm-ia64/xenkregs.h Mon Jul 24 21:25:32 2006 +0900 +++ b/xen/include/asm-ia64/xenkregs.h Tue Aug 08 15:58:20 2006 +0900 @@ -7,7 +7,6 @@ #define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */ #define IA64_TR_VHPT 4 /* dtr4: vhpt */ #define IA64_TR_MAPPED_REGS 5 /* dtr5: vcpu mapped regs */ -#define IA64_TR_PERVP_VHPT 6 #define IA64_DTR_GUEST_KERNEL 7 #define IA64_ITR_GUEST_KERNEL 2 /* Processor status register bits: */