# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Node ID 28e7938f2e6e1096c09ccdf7e8352dfeccba6584 # Parent b2abc70be89e02d0d380674096c8c1fb9e552431 implement per vcpu vhpt option. allocate VHPT per vcpu. added compile time option, xen_ia64_pervcpu_vhpt=n, to disable it. added xen boot time option, pervcpu_vhpt=0, to disable it. PATCHNAME: pervcpu_vhpt Signed-off-by: Isaku Yamahata diff -r b2abc70be89e -r 28e7938f2e6e xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Wed Jul 19 07:17:54 2006 -0600 +++ b/xen/arch/ia64/Rules.mk Tue Jul 25 11:56:38 2006 +0900 @@ -4,6 +4,7 @@ HAS_ACPI := y HAS_ACPI := y VALIDATE_VT ?= n xen_ia64_dom0_virtual_physical ?= y +xen_ia64_pervcpu_vhpt ?= y no_warns ?= n ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) @@ -39,6 +40,9 @@ ifeq ($(xen_ia64_dom0_virtual_physical), ifeq ($(xen_ia64_dom0_virtual_physical),y) CFLAGS += -DCONFIG_XEN_IA64_DOM0_VP endif +ifeq ($(xen_ia64_pervcpu_vhpt),y) +CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif diff -r b2abc70be89e -r 28e7938f2e6e xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Wed Jul 19 07:17:54 2006 -0600 +++ b/xen/arch/ia64/xen/domain.c Tue Jul 25 11:56:38 2006 +0900 @@ -114,8 +114,10 @@ static void flush_vtlb_for_context_switc if (VMX_DOMAIN(vcpu)) { // currently vTLB for vt-i domian is per vcpu. // so any flushing isn't needed. + } else if (HAS_PERVCPU_VHPT(vcpu->domain)) { + // nothing to do } else { - vhpt_flush(); + local_vhpt_flush(); } local_flush_tlb_all(); } @@ -130,9 +132,13 @@ void schedule_tail(struct vcpu *prev) vmx_do_launch(current); } else { ia64_set_iva(&ia64_ivt); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it flushes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; @@ -183,9 +189,13 @@ if (!i--) { i = 1000000; printk("+"); } nd = current->domain; if (!is_idle_domain(nd)) { - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it changes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); vcpu_set_next_timer(current); if (vcpu_timer_expired(current)) @@ -302,6 +312,18 @@ struct vcpu *alloc_vcpu_struct(struct do v->arch.ending_rid = d->arch.ending_rid; v->arch.breakimm = d->arch.breakimm; v->arch.last_processor = INVALID_PROCESSOR; + + DPRINTK("%s:%d allocating d 0x%p %d v 0x%p %d " + "has_pervcpu_vhpt %d\n", + __func__, __LINE__, + d, d->domain_id, v, vcpu_id, HAS_PERVCPU_VHPT(d)); + if (HAS_PERVCPU_VHPT(d)) { + if (pervcpu_vhpt_alloc(v) < 0) { + free_xenheap_pages(v->arch.privregs, get_order(sizeof(mapped_regs_t))); + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); + return NULL; + } + } } return v; @@ -312,6 +334,8 @@ void free_vcpu_struct(struct vcpu *v) if (VMX_DOMAIN(v)) vmx_relinquish_vcpu_resources(v); else { + if (HAS_PERVCPU_VHPT(v->domain)) + pervcpu_vhpt_free(v); if (v->arch.privregs != NULL) free_xenheap_pages(v->arch.privregs, get_order_from_shift(XMAPPEDREGS_SHIFT)); @@ -337,6 +361,11 @@ static void init_switch_stack(struct vcp memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); } +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static int opt_pervcpu_vhpt = 1; +integer_param("pervcpu_vhpt", opt_pervcpu_vhpt); +#endif + int arch_domain_create(struct domain *d) { int i; @@ -350,6 +379,11 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; + DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", + __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); +#endif d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); if (d->shared_info == NULL) diff -r b2abc70be89e -r 28e7938f2e6e xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Wed Jul 19 07:17:54 2006 -0600 +++ b/xen/arch/ia64/xen/regionreg.c Tue Jul 25 11:56:38 2006 +0900 @@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigne } else if (rreg == 7) { ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, v->arch.privregs, v->domain->arch.shared_info_va, - __get_cpu_var(vhpt_paddr)); + vcpu_vhpt_maddr(v)); } else { set_rr(rr,newrrv.rrval); } diff -r b2abc70be89e -r 28e7938f2e6e xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Wed Jul 19 07:17:54 2006 -0600 +++ b/xen/arch/ia64/xen/vhpt.c Tue Jul 25 11:56:38 2006 +0900 @@ -3,6 +3,10 @@ * * Copyright (C) 2004 Hewlett-Packard Co * Dan Magenheimer + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * per vcpu vhpt support */ #include #include @@ -23,18 +27,28 @@ DEFINE_PER_CPU (unsigned long, vhpt_padd DEFINE_PER_CPU (unsigned long, vhpt_paddr); DEFINE_PER_CPU (unsigned long, vhpt_pend); -void vhpt_flush(void) -{ - struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); +static void __vhpt_flush(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v =(struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) v->ti_tag = INVALID_TI_TAG; } -static void vhpt_erase(void) -{ - struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; +void local_vhpt_flush(void) +{ + __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); +} + +static void vcpu_vhpt_flush(struct vcpu* v) +{ + __vhpt_flush(vcpu_vhpt_maddr(v)); +} + +static void vhpt_erase(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { @@ -44,17 +58,6 @@ static void vhpt_erase(void) v->ti_tag = INVALID_TI_TAG; } // initialize cache too??? -} - - -static void vhpt_map(unsigned long pte) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2); - ia64_set_psr(psr); - ia64_srlz_i(); } void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps) @@ -101,7 +104,7 @@ void vhpt_multiple_insert(unsigned long void vhpt_init(void) { - unsigned long paddr, pte; + unsigned long paddr; struct page_info *page; #if !VHPT_ENABLED return; @@ -121,22 +124,73 @@ void vhpt_init(void) __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", paddr, __get_cpu_var(vhpt_pend)); - pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); - vhpt_map(pte); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - vhpt_erase(); -} - - + vhpt_erase(paddr); + // we don't enable VHPT here. + // context_switch() or schedule_tail() does it. +} + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +int +pervcpu_vhpt_alloc(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + + v->arch.vhpt_entries = + (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry); + v->arch.vhpt_page = + alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0); + if (!v->arch.vhpt_page) + return -ENOMEM; + + v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page); + DPRINTK("%s:%d allocating v 0x%p maddr 0x%lx\n", + __func__, __LINE__, v, v->arch.vhpt_maddr); + if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1)) + panic("pervcpu_vhpt_init: bad VHPT alignment!\n"); + + v->arch.pta.val = 0; // to zero reserved bits + v->arch.pta.ve = 1; // enable vhpt + v->arch.pta.size = VHPT_SIZE_LOG2; + v->arch.pta.vf = 1; // long format + //v->arch.pta.base = __va(v->arch.vhpt_maddr) >> 15; + v->arch.pta.base = VHPT_ADDR >> 15; + + vhpt_erase(v->arch.vhpt_maddr); + smp_mb(); // per vcpu vhpt may be used by another physical cpu. + return 0; +} + +void +pervcpu_vhpt_free(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + DPRINTK("%s:%d freeing d 0x%p %d v 0x%p %d\n", + __func__, __LINE__, + v->domain, v->domain->domain_id, + v, v->vcpu_id); + + free_domheap_pages(v->arch.vhpt_page, vhpt_size_log2 - PAGE_SHIFT); +} +#endif + +// SMP: we can't assume v == current, vcpu might move to another physical cpu. +// So memory barrier is necessary. +// if we can guranttee that vcpu can run on only this physical cpu +// (e.g. vcpu == current), smp_mb() is unnecessary. void vcpu_flush_vtlb_all(struct vcpu *v) { /* First VCPU tlb. */ vcpu_purge_tr_entry(&PSCBX(v,dtlb)); vcpu_purge_tr_entry(&PSCBX(v,itlb)); + smp_mb(); /* Then VHPT. */ - vhpt_flush (); + if (HAS_PERVCPU_VHPT(v->domain)) { + vcpu_vhpt_flush(v); + } else { + local_vhpt_flush(); + } + smp_mb(); /* Then mTLB. */ local_flush_tlb_all (); @@ -163,30 +217,54 @@ void domain_flush_vtlb_all (void) if (v->processor == cpu) vcpu_flush_vtlb_all(v); else + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + // takes care of mTLB flush. smp_call_function_single(v->processor, __vcpu_flush_vtlb_all, v, 1, 1); } } -static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) -{ - void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); +// Callers may need to call smp_mb() before/after calling this. +// Be carefull. +static void __flush_vhpt_range(unsigned long vhpt_maddr, + u64 vadr, u64 addr_range) +{ + void *vhpt_base = __va(vhpt_maddr); while ((long)addr_range > 0) { /* Get the VHPT entry. */ unsigned int off = ia64_thash(vadr) - VHPT_ADDR; - volatile struct vhpt_lf_entry *v; - v = vhpt_base + off; + struct vhpt_lf_entry *v = vhpt_base + off; v->ti_tag = INVALID_TI_TAG; addr_range -= PAGE_SIZE; vadr += PAGE_SIZE; } } +static void cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range); +} + +static void vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range); +} + void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) { - cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); + if (HAS_PERVCPU_VHPT(current->domain)) { + vcpu_flush_vhpt_range(current, vadr, 1UL << log_range); + } else { + cpu_flush_vhpt_range(current->processor, + vadr, 1UL << log_range); + } ia64_ptcl(vadr, log_range << 2); ia64_srlz_i(); } @@ -218,8 +296,18 @@ void domain_flush_vtlb_range (struct dom if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) continue; - /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range (v->processor, vadr, addr_range); + if (HAS_PERVCPU_VHPT(d)) { + vcpu_flush_vhpt_range(v, vadr, addr_range); + } else { + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range(v->processor, vadr, addr_range); + } } // ptc.ga has release semantics. @@ -230,7 +318,7 @@ static void flush_tlb_vhpt_all (struct d static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ - vhpt_flush (); + local_vhpt_flush (); /* Then mTLB. */ local_flush_tlb_all (); @@ -239,7 +327,10 @@ void domain_flush_destroy (struct domain void domain_flush_destroy (struct domain *d) { /* Very heavy... */ - on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + if (HAS_PERVCPU_VHPT(d)) + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + else + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); cpus_clear (d->domain_dirty_cpumask); } diff -r b2abc70be89e -r 28e7938f2e6e xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Wed Jul 19 07:17:54 2006 -0600 +++ b/xen/include/asm-ia64/domain.h Tue Jul 25 11:56:38 2006 +0900 @@ -81,6 +81,9 @@ struct arch_domain { unsigned long flags; struct { unsigned int is_vti : 1; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + unsigned int has_pervcpu_vhpt : 1; +#endif }; }; @@ -123,6 +126,13 @@ struct arch_domain { (sizeof(vcpu_info_t) * (v)->vcpu_id + \ offsetof(vcpu_info_t, evtchn_upcall_mask)) +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#define HAS_PERVCPU_VHPT(d) ((d)->arch.has_pervcpu_vhpt) +#else +#define HAS_PERVCPU_VHPT(d) (0) +#endif + + struct arch_vcpu { /* Save the state of vcpu. This is the first entry to speed up accesses. */ @@ -172,6 +182,13 @@ struct arch_vcpu { fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */ struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + PTA pta; + unsigned long vhpt_maddr; + struct page_info* vhpt_page; + unsigned long vhpt_entries; +#endif + #define INVALID_PROCESSOR INT_MAX int last_processor; }; diff -r b2abc70be89e -r 28e7938f2e6e xen/include/asm-ia64/vhpt.h --- a/xen/include/asm-ia64/vhpt.h Wed Jul 19 07:17:54 2006 -0600 +++ b/xen/include/asm-ia64/vhpt.h Tue Jul 25 11:56:38 2006 +0900 @@ -42,11 +42,48 @@ extern void vhpt_multiple_insert(unsigne unsigned long logps); extern void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps); -void vhpt_flush(void); +void local_vhpt_flush(void); /* Currently the VHPT is allocated per CPU. */ DECLARE_PER_CPU (unsigned long, vhpt_paddr); DECLARE_PER_CPU (unsigned long, vhpt_pend); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#if !VHPT_ENABLED +#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT" +#endif +#endif + +#include +int pervcpu_vhpt_alloc(struct vcpu *v); +void pervcpu_vhpt_free(struct vcpu *v); +static inline unsigned long +vcpu_vhpt_maddr(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + return v->arch.vhpt_maddr; + } +#endif + +#if 0 + // referencecing v->processor is racy. + return per_cpu(vhpt_paddr, v->processor); +#endif + BUG_ON(v != current); + return __get_cpu_var(vhpt_paddr); +} + +static inline unsigned long +vcpu_pta(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) { + return v->arch.pta.val; + } +#endif + return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; +} + #endif /* !__ASSEMBLY */ #endif