The CPU masks embedded in these structures prevent NR_CPUS-independent sizing of these structures. Basic concept (in xen/include/cpumask.h) taken from recent Linux. For scalability purposes, many other uses of cpumask_t should be replaced by cpumask_var_t, particularly local variables of functions. This implies that no functions should have by-value cpumask_t parameters, and that the whole old cpumask interface (cpus_...()) should go away in favor of the new (cpumask_...()) one. Signed-off-by: Jan Beulich --- a/xen/arch/ia64/xen/mm.c +++ b/xen/arch/ia64/xen/mm.c @@ -3191,8 +3191,9 @@ int get_page_type(struct page_info *page * may be unnecessary (e.g., page was GDT/LDT) but those * circumstances should be very rare. */ - cpumask_t mask = - page_get_owner(page)->domain_dirty_cpumask; + cpumask_t mask; + + cpumask_copy(&mask, page_get_owner(page)->domain_dirty_cpumask); tlbflush_filter(mask, page->tlbflush_timestamp); if ( unlikely(!cpus_empty(mask)) ) --- a/xen/arch/ia64/xen/vhpt.c +++ b/xen/arch/ia64/xen/vhpt.c @@ -516,7 +516,7 @@ void domain_flush_tlb_vhpt(struct domain on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); else on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1); - cpus_clear (d->domain_dirty_cpumask); + cpumask_clear_cpu(d->domain_dirty_cpumask); } void flush_tlb_for_log_dirty(struct domain *d) @@ -545,7 +545,7 @@ void flush_tlb_for_log_dirty(struct doma } else { on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1); } - cpus_clear (d->domain_dirty_cpumask); + cpumask_clear_cpu(d->domain_dirty_cpumask); } void flush_tlb_mask(const cpumask_t *mask) --- a/xen/arch/x86/cpu/mcheck/vmce.c +++ b/xen/arch/x86/cpu/mcheck/vmce.c @@ -321,8 +321,8 @@ int inject_vmce(struct domain *d) d->domain_id); if ( guest_has_trap_callback(d, 0, TRAP_machine_check) ) { - d->vcpu[0]->cpu_affinity_tmp = - d->vcpu[0]->cpu_affinity; + cpumask_copy(d->vcpu[0]->cpu_affinity_tmp, + d->vcpu[0]->cpu_affinity); cpus_clear(affinity); cpu_set(cpu, affinity); mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n", --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -132,8 +132,8 @@ void startup_cpu_idle_loop(void) struct vcpu *v = current; ASSERT(is_idle_vcpu(v)); - cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask); - cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask); + cpumask_set_cpu(v->processor, v->domain->domain_dirty_cpumask); + cpumask_set_cpu(v->processor, v->vcpu_dirty_cpumask); reset_stack_and_jump(idle_loop); } @@ -1391,7 +1391,7 @@ static void __context_switch(void) struct desc_ptr gdt_desc; ASSERT(p != n); - ASSERT(cpus_empty(n->vcpu_dirty_cpumask)); + ASSERT(cpumask_empty(n->vcpu_dirty_cpumask)); if ( !is_idle_vcpu(p) ) { @@ -1408,8 +1408,8 @@ static void __context_switch(void) * which is synchronised on that function. */ if ( p->domain != n->domain ) - cpu_set(cpu, n->domain->domain_dirty_cpumask); - cpu_set(cpu, n->vcpu_dirty_cpumask); + cpumask_set_cpu(cpu, n->domain->domain_dirty_cpumask); + cpumask_set_cpu(cpu, n->vcpu_dirty_cpumask); if ( !is_idle_vcpu(n) ) { @@ -1452,8 +1452,8 @@ static void __context_switch(void) } if ( p->domain != n->domain ) - cpu_clear(cpu, p->domain->domain_dirty_cpumask); - cpu_clear(cpu, p->vcpu_dirty_cpumask); + cpumask_clear_cpu(cpu, p->domain->domain_dirty_cpumask); + cpumask_clear_cpu(cpu, p->vcpu_dirty_cpumask); per_cpu(curr_vcpu, cpu) = n; } @@ -1462,10 +1462,11 @@ static void __context_switch(void) void context_switch(struct vcpu *prev, struct vcpu *next) { unsigned int cpu = smp_processor_id(); - cpumask_t dirty_mask = next->vcpu_dirty_cpumask; + cpumask_t dirty_mask; ASSERT(local_irq_is_enabled()); + cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask); /* Allow at most one CPU at a time to be dirty. */ ASSERT(cpus_weight(dirty_mask) <= 1); if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) ) @@ -1557,11 +1558,11 @@ void sync_local_execstate(void) void sync_vcpu_execstate(struct vcpu *v) { - if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) ) + if ( cpumask_test_cpu(smp_processor_id(), v->vcpu_dirty_cpumask) ) sync_local_execstate(); /* Other cpus call __sync_local_execstate from flush ipi handler. */ - flush_tlb_mask(&v->vcpu_dirty_cpumask); + flush_tlb_mask(v->vcpu_dirty_cpumask); } #define next_arg(fmt, args) ({ \ @@ -1922,7 +1923,7 @@ int domain_relinquish_resources(struct d int ret; struct vcpu *v; - BUG_ON(!cpus_empty(d->domain_dirty_cpumask)); + BUG_ON(!cpumask_empty(d->domain_dirty_cpumask)); switch ( d->arch.relmem ) { --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -3089,7 +3089,7 @@ static int hvmop_flush_tlb_all(void) paging_update_cr3(v); /* Flush all dirty TLBs. */ - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); /* Done. */ for_each_vcpu ( d, v ) --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1348,7 +1348,7 @@ static int svm_is_erratum_383(struct cpu wrmsrl(MSR_IA32_MCG_STATUS, msr_content & ~(1ULL << 2)); /* flush TLB */ - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(v->domain->domain_dirty_cpumask); return 1; } --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -88,8 +88,14 @@ static int vmx_domain_initialise(struct d->arch.hvm_domain.vmx.ept_control.asr = pagetable_get_pfn(p2m_get_pagetable(p2m_get_hostp2m(d))); + if ( !zalloc_cpumask_var(&d->arch.hvm_domain.vmx.ept_synced) ) + return -ENOMEM; + if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 ) + { + free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); return rc; + } return 0; } @@ -98,6 +104,7 @@ static void vmx_domain_destroy(struct do { if ( paging_mode_hap(d) ) on_each_cpu(__ept_sync_domain, d, 1); + free_cpumask_var(d->arch.hvm_domain.vmx.ept_synced); vmx_free_vlapic_mapping(d); } @@ -660,8 +667,9 @@ static void vmx_ctxt_switch_to(struct vc { unsigned int cpu = smp_processor_id(); /* Test-and-test-and-set this CPU in the EPT-is-synced mask. */ - if ( !cpu_isset(cpu, d->arch.hvm_domain.vmx.ept_synced) && - !cpu_test_and_set(cpu, d->arch.hvm_domain.vmx.ept_synced) ) + if ( !cpumask_test_cpu(cpu, d->arch.hvm_domain.vmx.ept_synced) && + !cpumask_test_and_set_cpu(cpu, + d->arch.hvm_domain.vmx.ept_synced) ) __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(d), 0); } @@ -1217,10 +1225,10 @@ void ept_sync_domain(struct domain *d) * the ept_synced mask before on_selected_cpus() reads it, resulting in * unnecessary extra flushes, to avoid allocating a cpumask_t on the stack. */ - cpus_and(d->arch.hvm_domain.vmx.ept_synced, - d->domain_dirty_cpumask, cpu_online_map); + cpumask_and(d->arch.hvm_domain.vmx.ept_synced, + d->domain_dirty_cpumask, &cpu_online_map); - on_selected_cpus(&d->arch.hvm_domain.vmx.ept_synced, + on_selected_cpus(d->arch.hvm_domain.vmx.ept_synced, __ept_sync_domain, d, 1); } --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -612,7 +612,7 @@ static void invalidate_shadow_ldt(struct /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ if ( flush ) - flush_tlb_mask(&v->vcpu_dirty_cpumask); + flush_tlb_mask(v->vcpu_dirty_cpumask); out: spin_unlock(&v->arch.shadow_ldt_lock); @@ -1338,7 +1338,7 @@ static void pae_flush_pgd( if ( pagetable_get_pfn(v->arch.guest_table) == mfn ) { paging_update_cr3(v); - cpus_or(m, m, v->vcpu_dirty_cpumask); + cpumask_or(&m, &m, v->vcpu_dirty_cpumask); } flush_tlb_mask(&m); } @@ -1365,7 +1365,7 @@ static void pae_flush_pgd( spin_unlock(&cache->lock); } - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); } #else # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) @@ -2421,7 +2421,9 @@ static int __get_page_type(struct page_i * may be unnecessary (e.g., page was GDT/LDT) but those * circumstances should be very rare. */ - cpumask_t mask = d->domain_dirty_cpumask; + cpumask_t mask; + + cpumask_copy(&mask, d->domain_dirty_cpumask); /* Don't flush if the timestamp is old enough */ tlbflush_filter(mask, page->tlbflush_timestamp); @@ -2903,7 +2905,7 @@ static inline int vcpumask_to_pcpumask( if ( (vcpu_id >= d->max_vcpus) ) return 0; if ( ((v = d->vcpu[vcpu_id]) != NULL) ) - cpus_or(*pmask, *pmask, v->vcpu_dirty_cpumask); + cpumask_or(pmask, pmask, v->vcpu_dirty_cpumask); } } } @@ -3161,11 +3163,11 @@ int do_mmuext_op( } case MMUEXT_TLB_FLUSH_ALL: - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); break; case MMUEXT_INVLPG_ALL: - flush_tlb_one_mask(&d->domain_dirty_cpumask, op.arg1.linear_addr); + flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr); break; case MMUEXT_FLUSH_CACHE: @@ -4345,7 +4347,7 @@ static int __do_update_va_mapping( flush_tlb_local(); break; case UVMF_ALL: - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); break; default: rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr, @@ -4365,7 +4367,7 @@ static int __do_update_va_mapping( flush_tlb_one_local(va); break; case UVMF_ALL: - flush_tlb_one_mask(&d->domain_dirty_cpumask, va); + flush_tlb_one_mask(d->domain_dirty_cpumask, va); break; default: rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr, --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -72,7 +72,7 @@ static int hap_enable_vram_tracking(stru for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++) p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); return 0; } @@ -92,7 +92,7 @@ static int hap_disable_vram_tracking(str for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++) p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_logdirty, p2m_ram_rw); - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); return 0; } @@ -108,7 +108,7 @@ static void hap_clean_vram_tracking(stru for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++) p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); } static void hap_vram_tracking_init(struct domain *d) @@ -202,7 +202,7 @@ static int hap_enable_log_dirty(struct d /* set l1e entries of P2M table to be read-only. */ p2m_change_entry_type_global(p2m_get_hostp2m(d), p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); return 0; } @@ -223,7 +223,7 @@ static void hap_clean_dirty_bitmap(struc /* set l1e entries of P2M table to be read-only. */ p2m_change_entry_type_global(p2m_get_hostp2m(d), p2m_ram_rw, p2m_ram_logdirty); - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); } void hap_logdirty_init(struct domain *d) @@ -842,7 +842,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi safe_write_pte(p, new); if ( (old_flags & _PAGE_PRESENT) && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) ) - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(v->domain->domain_dirty_cpumask); #if CONFIG_PAGING_LEVELS == 3 /* install P2M in monitor table for PAE Xen */ --- a/xen/arch/x86/mm/shadow/common.c +++ b/xen/arch/x86/mm/shadow/common.c @@ -703,7 +703,7 @@ static int oos_remove_write_access(struc } if ( ftlb ) - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(v->domain->domain_dirty_cpumask); return 0; } @@ -1153,7 +1153,7 @@ sh_validate_guest_pt_write(struct vcpu * rc = sh_validate_guest_entry(v, gmfn, entry, size); if ( rc & SHADOW_SET_FLUSH ) /* Need to flush TLBs to pick up shadow PT changes */ - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); if ( rc & SHADOW_SET_ERROR ) { /* This page is probably not a pagetable any more: tear it out of the @@ -1369,7 +1369,7 @@ static void _shadow_prealloc( /* See if that freed up enough space */ if ( d->arch.paging.shadow.free_pages >= pages ) { - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); return; } } @@ -1422,7 +1422,7 @@ static void shadow_blow_tables(struct do pagetable_get_mfn(v->arch.shadow_table[i]), 0); /* Make sure everyone sees the unshadowings */ - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); } void shadow_blow_tables_per_domain(struct domain *d) @@ -1535,7 +1535,7 @@ mfn_t shadow_alloc(struct domain *d, sp = page_list_remove_head(&d->arch.paging.shadow.freelist); /* Before we overwrite the old contents of this page, * we need to be sure that no TLB holds a pointer to it. */ - mask = d->domain_dirty_cpumask; + cpumask_copy(&mask, d->domain_dirty_cpumask); tlbflush_filter(mask, sp->tlbflush_timestamp); if ( unlikely(!cpus_empty(mask)) ) { @@ -2767,7 +2767,7 @@ void sh_remove_shadows(struct vcpu *v, m /* Need to flush TLBs now, so that linear maps are safe next time we * take a fault. */ - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(v->domain->domain_dirty_cpumask); if ( do_locking ) shadow_unlock(v->domain); } @@ -3474,7 +3474,7 @@ static void sh_unshadow_for_p2m_change(s { sh_remove_all_shadows_and_parents(v, mfn); if ( sh_remove_all_mappings(v, mfn) ) - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); } } @@ -3509,7 +3509,8 @@ static void sh_unshadow_for_p2m_change(s /* This GFN->MFN mapping has gone away */ sh_remove_all_shadows_and_parents(v, omfn); if ( sh_remove_all_mappings(v, omfn) ) - cpus_or(flushmask, flushmask, d->domain_dirty_cpumask); + cpumask_or(&flushmask, &flushmask, + d->domain_dirty_cpumask); } omfn = _mfn(mfn_x(omfn) + 1); } @@ -3806,7 +3807,7 @@ int shadow_track_dirty_vram(struct domai } } if ( flush_tlb ) - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); goto out; out_sl1ma: --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -3248,7 +3248,7 @@ static int sh_page_fault(struct vcpu *v, */ perfc_incr(shadow_rm_write_flush_tlb); atomic_inc(&d->arch.paging.shadow.gtable_dirty_version); - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); } #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) @@ -4294,7 +4294,7 @@ sh_update_cr3(struct vcpu *v, int do_loc * (old) shadow linear maps in the writeable mapping heuristics. */ #if GUEST_PAGING_LEVELS == 2 if ( sh_remove_write_access(v, gmfn, 2, 0) != 0 ) - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow); #elif GUEST_PAGING_LEVELS == 3 /* PAE guests have four shadow_table entries, based on the @@ -4317,7 +4317,7 @@ sh_update_cr3(struct vcpu *v, int do_loc } } if ( flush ) - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); /* Now install the new shadows. */ for ( i = 0; i < 4; i++ ) { @@ -4338,7 +4338,7 @@ sh_update_cr3(struct vcpu *v, int do_loc } #elif GUEST_PAGING_LEVELS == 4 if ( sh_remove_write_access(v, gmfn, 4, 0) != 0 ) - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow); #else #error This should never happen @@ -4755,7 +4755,7 @@ static void sh_pagetable_dying(struct vc } } if ( flush ) - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(v->domain->domain_dirty_cpumask); /* Remember that we've seen the guest use this interface, so we * can rely on it using it in future, instead of guessing at @@ -4788,7 +4788,7 @@ static void sh_pagetable_dying(struct vc mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying; shadow_unhook_mappings(v, smfn, 1/* user pages only */); /* Now flush the TLB: we removed toplevel mappings. */ - flush_tlb_mask(&v->domain->domain_dirty_cpumask); + flush_tlb_mask(v->domain->domain_dirty_cpumask); } /* Remember that we've seen the guest use this interface, so we --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2965,7 +2965,7 @@ static void nmi_mce_softirq(void) /* Set the tmp value unconditionally, so that * the check in the iret hypercall works. */ - st->vcpu->cpu_affinity_tmp = st->vcpu->cpu_affinity; + cpumask_copy(st->vcpu->cpu_affinity_tmp, st->vcpu->cpu_affinity); if ((cpu != st->processor) || (st->processor != st->vcpu->processor)) @@ -2996,11 +2996,11 @@ void async_exception_cleanup(struct vcpu return; /* Restore affinity. */ - if ( !cpus_empty(curr->cpu_affinity_tmp) && - !cpus_equal(curr->cpu_affinity_tmp, curr->cpu_affinity) ) + if ( !cpumask_empty(curr->cpu_affinity_tmp) && + !cpumask_equal(curr->cpu_affinity_tmp, curr->cpu_affinity) ) { - vcpu_set_affinity(curr, &curr->cpu_affinity_tmp); - cpus_clear(curr->cpu_affinity_tmp); + vcpu_set_affinity(curr, curr->cpu_affinity_tmp); + cpumask_clear(curr->cpu_affinity_tmp); } if ( !(curr->async_exception_mask & (curr->async_exception_mask - 1)) ) @@ -3048,7 +3048,7 @@ void async_exception_cleanup(struct vcpu int cpu = smp_processor_id(); cpumask_t affinity; - curr->cpu_affinity_tmp = curr->cpu_affinity; + cpumask_copy(curr->cpu_affinity_tmp, curr->cpu_affinity); cpus_clear(affinity); cpu_set(cpu, affinity); printk(XENLOG_DEBUG "MCE: CPU%d set affinity, old %d\n", --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -151,6 +151,11 @@ struct vcpu *alloc_vcpu( tasklet_init(&v->continue_hypercall_tasklet, NULL, 0); + if ( !zalloc_cpumask_var(&v->cpu_affinity) || + !zalloc_cpumask_var(&v->cpu_affinity_tmp) || + !zalloc_cpumask_var(&v->vcpu_dirty_cpumask) ) + goto fail_free; + if ( is_idle_domain(d) ) { v->runstate.state = RUNSTATE_running; @@ -167,16 +172,17 @@ struct vcpu *alloc_vcpu( } if ( sched_init_vcpu(v, cpu_id) != 0 ) - { - destroy_waitqueue_vcpu(v); - free_vcpu_struct(v); - return NULL; - } + goto fail_wq; if ( vcpu_initialise(v) != 0 ) { sched_destroy_vcpu(v); + fail_wq: destroy_waitqueue_vcpu(v); + fail_free: + free_cpumask_var(v->cpu_affinity); + free_cpumask_var(v->cpu_affinity_tmp); + free_cpumask_var(v->vcpu_dirty_cpumask); free_vcpu_struct(v); return NULL; } @@ -246,6 +252,9 @@ struct domain *domain_create( spin_lock_init(&d->shutdown_lock); d->shutdown_code = -1; + if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) ) + goto fail; + if ( domcr_flags & DOMCRF_hvm ) d->is_hvm = 1; @@ -346,6 +355,7 @@ struct domain *domain_create( xsm_free_security_domain(d); xfree(d->pirq_mask); xfree(d->pirq_to_evtchn); + free_cpumask_var(d->domain_dirty_cpumask); free_domain_struct(d); return NULL; } @@ -361,7 +371,7 @@ void domain_update_node_affinity(struct spin_lock(&d->node_affinity_lock); for_each_vcpu ( d, v ) - cpus_or(cpumask, cpumask, v->cpu_affinity); + cpumask_or(&cpumask, &cpumask, v->cpu_affinity); for_each_online_node ( node ) if ( cpus_intersects(node_to_cpumask(node), cpumask) ) @@ -658,7 +668,12 @@ static void complete_domain_destroy(stru for ( i = d->max_vcpus - 1; i >= 0; i-- ) if ( (v = d->vcpu[i]) != NULL ) + { + free_cpumask_var(v->cpu_affinity); + free_cpumask_var(v->cpu_affinity_tmp); + free_cpumask_var(v->vcpu_dirty_cpumask); free_vcpu_struct(v); + } if ( d->target != NULL ) put_domain(d->target); @@ -669,6 +684,7 @@ static void complete_domain_destroy(stru xfree(d->pirq_to_evtchn); xsm_free_security_domain(d); + free_cpumask_var(d->domain_dirty_cpumask); free_domain_struct(d); send_guest_global_virq(dom0, VIRQ_DOM_EXC); @@ -789,7 +805,7 @@ void vcpu_reset(struct vcpu *v) v->async_exception_mask = 0; memset(v->async_exception_state, 0, sizeof(v->async_exception_state)); #endif - cpus_clear(v->cpu_affinity_tmp); + cpumask_clear(v->cpu_affinity_tmp); clear_bit(_VPF_blocked, &v->pause_flags); domain_unlock(v->domain); --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -589,7 +589,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc else { ret = cpumask_to_xenctl_cpumap( - &op->u.vcpuaffinity.cpumap, &v->cpu_affinity); + &op->u.vcpuaffinity.cpumap, v->cpu_affinity); } vcpuaffinity_out: --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -1013,7 +1013,7 @@ gnttab_unmap_grant_ref( goto fault; } - flush_tlb_mask(¤t->domain->domain_dirty_cpumask); + flush_tlb_mask(current->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -1028,7 +1028,7 @@ gnttab_unmap_grant_ref( return 0; fault: - flush_tlb_mask(¤t->domain->domain_dirty_cpumask); + flush_tlb_mask(current->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -1075,7 +1075,7 @@ gnttab_unmap_and_replace( goto fault; } - flush_tlb_mask(¤t->domain->domain_dirty_cpumask); + flush_tlb_mask(current->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -1090,7 +1090,7 @@ gnttab_unmap_and_replace( return 0; fault: - flush_tlb_mask(¤t->domain->domain_dirty_cpumask); + flush_tlb_mask(current->domain->domain_dirty_cpumask); for ( i = 0; i < partial_done; i++ ) __gnttab_unmap_common_complete(&(common[i])); @@ -1496,7 +1496,7 @@ gnttab_transfer( #ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */ guest_physmap_remove_page(d, gop.mfn, mfn, 0); #endif - flush_tlb_mask(&d->domain_dirty_cpumask); + flush_tlb_mask(d->domain_dirty_cpumask); /* Find the target domain. */ if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) ) --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -243,7 +243,7 @@ static void dump_domains(unsigned char k { unsigned int i; printk("General information for domain %u:\n", d->domain_id); - cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask); + cpuset_print(tmpstr, sizeof(tmpstr), *d->domain_dirty_cpumask); printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d " "dirty_cpus=%s max_pages=%u\n", atomic_read(&d->refcnt), d->is_dying, @@ -277,9 +277,9 @@ static void dump_domains(unsigned char k v->pause_flags, v->poll_evtchn, vcpu_info(v, evtchn_upcall_pending), vcpu_info(v, evtchn_upcall_mask)); - cpuset_print(tmpstr, sizeof(tmpstr), v->vcpu_dirty_cpumask); + cpuset_print(tmpstr, sizeof(tmpstr), *v->vcpu_dirty_cpumask); printk("dirty_cpus=%s ", tmpstr); - cpuset_print(tmpstr, sizeof(tmpstr), v->cpu_affinity); + cpuset_print(tmpstr, sizeof(tmpstr), *v->cpu_affinity); printk("cpu_affinity=%s\n", tmpstr); arch_dump_vcpu_info(v); periodic_timer_print(tmpstr, sizeof(tmpstr), v->periodic_period); --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -292,7 +292,7 @@ __runq_tickle(unsigned int cpu, struct c { cpumask_t idle_mask; - cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity); + cpumask_and(&idle_mask, &prv->idlers, new->vcpu->cpu_affinity); if ( !cpus_empty(idle_mask) ) { CSCHED_STAT_CRANK(tickle_idlers_some); @@ -305,7 +305,7 @@ __runq_tickle(unsigned int cpu, struct c else cpus_or(mask, mask, idle_mask); } - cpus_and(mask, mask, new->vcpu->cpu_affinity); + cpumask_and(&mask, &mask, new->vcpu->cpu_affinity); } } @@ -455,7 +455,7 @@ __csched_vcpu_is_migrateable(struct vcpu */ return !vc->is_running && !__csched_vcpu_is_cache_hot(vc) && - cpu_isset(dest_cpu, vc->cpu_affinity); + cpumask_test_cpu(dest_cpu, vc->cpu_affinity); } static int @@ -472,7 +472,7 @@ _csched_cpu_pick(const struct scheduler * preference to its current processor if it's in there. */ online = CSCHED_CPUONLINE(vc->domain->cpupool); - cpus_and(cpus, *online, vc->cpu_affinity); + cpumask_and(&cpus, online, vc->cpu_affinity); cpu = cpu_isset(vc->processor, cpus) ? vc->processor : cycle_cpu(vc->processor, cpus); --- a/xen/common/sched_sedf.c +++ b/xen/common/sched_sedf.c @@ -448,7 +448,7 @@ static int sedf_pick_cpu(const struct sc cpumask_t *online; online = SEDF_CPUONLINE(v->domain->cpupool); - cpus_and(online_affinity, v->cpu_affinity, *online); + cpumask_and(&online_affinity, v->cpu_affinity, online); return first_cpu(online_affinity); } --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -196,9 +196,9 @@ int sched_init_vcpu(struct vcpu *v, unsi */ v->processor = processor; if ( is_idle_domain(d) || d->is_pinned ) - v->cpu_affinity = cpumask_of_cpu(processor); + cpumask_copy(v->cpu_affinity, cpumask_of(processor)); else - cpus_setall(v->cpu_affinity); + cpumask_setall(v->cpu_affinity); /* Initialise the per-vcpu timers. */ init_timer(&v->periodic_timer, vcpu_periodic_timer_fn, @@ -273,7 +273,7 @@ int sched_move_domain(struct domain *d, SCHED_OP(VCPU2OP(v), remove_vcpu, v); SCHED_OP(VCPU2OP(v), free_vdata, v->sched_priv); - cpus_setall(v->cpu_affinity); + cpumask_setall(v->cpu_affinity); v->processor = new_p; v->sched_priv = vcpu_priv[v->vcpu_id]; evtchn_move_pirqs(v); @@ -435,7 +435,7 @@ static void vcpu_migrate(struct vcpu *v) */ if ( pick_called && (new_lock == per_cpu(schedule_data, new_cpu).schedule_lock) && - cpu_isset(new_cpu, v->cpu_affinity) && + cpumask_test_cpu(new_cpu, v->cpu_affinity) && cpu_isset(new_cpu, v->domain->cpupool->cpu_valid) ) break; @@ -550,13 +550,13 @@ int cpu_disable_scheduler(unsigned int c { vcpu_schedule_lock_irq(v); - cpus_and(online_affinity, v->cpu_affinity, c->cpu_valid); + cpumask_and(&online_affinity, v->cpu_affinity, &c->cpu_valid); if ( cpus_empty(online_affinity) && - cpu_isset(cpu, v->cpu_affinity) ) + cpumask_test_cpu(cpu, v->cpu_affinity) ) { printk("Breaking vcpu affinity for domain %d vcpu %d\n", v->domain->domain_id, v->vcpu_id); - cpus_setall(v->cpu_affinity); + cpumask_setall(v->cpu_affinity); affinity_broken = 1; } @@ -602,10 +602,10 @@ int vcpu_set_affinity(struct vcpu *v, cp vcpu_schedule_lock_irq(v); - old_affinity = v->cpu_affinity; - v->cpu_affinity = *affinity; - *affinity = old_affinity; - if ( !cpu_isset(v->processor, v->cpu_affinity) ) + cpumask_copy(&old_affinity, v->cpu_affinity); + cpumask_copy(v->cpu_affinity, affinity); + cpumask_copy(affinity, &old_affinity); + if ( !cpumask_test_cpu(v->processor, v->cpu_affinity) ) set_bit(_VPF_migrating, &v->pause_flags); vcpu_schedule_unlock_irq(v); --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -68,7 +68,7 @@ struct vmx_domain { }; u64 eptp; } ept_control; - cpumask_t ept_synced; + cpumask_var_t ept_synced; }; #define ept_get_wl(d) \ --- a/xen/include/xen/cpumask.h +++ b/xen/include/xen/cpumask.h @@ -81,24 +81,26 @@ typedef struct cpumask{ DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) -static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +#define cpu_set(cpu, dst) cpumask_set_cpu(cpu, &(dst)) +static inline void cpumask_set_cpu(int cpu, volatile cpumask_t *dstp) { set_bit(cpu, dstp->bits); } -#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) -static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +#define cpu_clear(cpu, dst) cpumask_clear_cpu(cpu, &(dst)) +static inline void cpumask_clear_cpu(int cpu, volatile cpumask_t *dstp) { clear_bit(cpu, dstp->bits); } +#define cpumask_setall(dst) __cpus_setall(dst, NR_CPUS) #define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) static inline void __cpus_setall(cpumask_t *dstp, int nbits) { bitmap_fill(dstp->bits, nbits); } +#define cpumask_clear(dst) __cpus_clear(dst, NR_CPUS) #define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) static inline void __cpus_clear(cpumask_t *dstp, int nbits) { @@ -109,18 +111,21 @@ static inline void __cpus_clear(cpumask_ #define cpumask_test_cpu(cpu, cpumask) test_bit(cpu, (cpumask)->bits) #define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) -#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) -static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) +#define cpu_test_and_set(cpu, cpumask) \ + cpumask_test_and_set_cpu(cpu, &(cpumask)) +static inline int cpumask_test_and_set_cpu(int cpu, cpumask_t *addr) { return test_and_set_bit(cpu, addr->bits); } -#define cpu_test_and_clear(cpu, cpumask) __cpu_test_and_clear((cpu), &(cpumask)) -static inline int __cpu_test_and_clear(int cpu, cpumask_t *addr) +#define cpu_test_and_clear(cpu, cpumask) \ + cpumask_test_and_clear_cpu(cpu, &(cpumask)) +static inline int cpumask_test_and_clear_cpu(int cpu, cpumask_t *addr) { return test_and_clear_bit(cpu, addr->bits); } +#define cpumask_and(dst, src1, src2) __cpus_and(dst, src1, src2, NR_CPUS) #define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -128,6 +133,7 @@ static inline void __cpus_and(cpumask_t bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define cpumask_or(dst, src1, src2) __cpus_or(dst, src1, src2, NR_CPUS) #define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -135,6 +141,7 @@ static inline void __cpus_or(cpumask_t * bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define cpumask_xor(dst, src1, src2) __cpus_xor(dst, src1, src2, NR_CPUS) #define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, const cpumask_t *src2p, int nbits) @@ -142,6 +149,7 @@ static inline void __cpus_xor(cpumask_t bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define cpumask_andnot(dst, src1, src2) __cpus_andnot(dst, src1, src2, NR_CPUS) #define cpus_andnot(dst, src1, src2) \ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, @@ -150,6 +158,7 @@ static inline void __cpus_andnot(cpumask bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define cpumask_complement(dst, src) __cpus_complement(dst, src, NR_CPUS) #define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) static inline void __cpus_complement(cpumask_t *dstp, const cpumask_t *srcp, int nbits) @@ -186,6 +195,7 @@ static inline int __cpus_empty(const cpu return bitmap_empty(srcp->bits, nbits); } +#define cpumask_full(cpumask) __cpus_full(cpumask, NR_CPUS) #define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) static inline int __cpus_full(const cpumask_t *srcp, int nbits) { @@ -199,8 +209,8 @@ static inline int __cpus_weight(const cp return bitmap_weight(srcp->bits, nbits); } -#define cpus_copy(dest, src) __cpus_copy(&(dest), &(src)) -static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp) +#define cpus_copy(dest, src) cpumask_copy(&(dest), &(src)) +static inline void cpumask_copy(cpumask_t *dstp, const cpumask_t *srcp) { bitmap_copy(dstp->bits, srcp->bits, NR_CPUS); } @@ -322,6 +332,57 @@ static inline int __cpulist_scnprintf(ch return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); } +/* + * cpumask_var_t: struct cpumask for stack usage. + * + * Oh, the wicked games we play! In order to make kernel coding a + * little more difficult, we typedef cpumask_var_t to an array or a + * pointer: doing &mask on an array is a noop, so it still works. + * + * ie. + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * ... use 'tmpmask' like a normal struct cpumask * ... + * + * free_cpumask_var(tmpmask); + */ +#if NR_CPUS > 2 * BITS_PER_LONG +#include + +typedef cpumask_t *cpumask_var_t; + +static inline bool_t alloc_cpumask_var(cpumask_var_t *mask) +{ + return (*mask = xmalloc(cpumask_t)) != NULL; +} + +static inline void free_cpumask_var(cpumask_var_t mask) +{ + xfree(mask); +} +#else +typedef cpumask_t cpumask_var_t[1]; + +static inline bool_t alloc_cpumask_var(cpumask_var_t *mask) +{ + return 1; +} + +static inline void free_cpumask_var(cpumask_var_t mask) +{ +} +#endif + +static inline bool_t zalloc_cpumask_var(cpumask_var_t *mask) +{ + if (!alloc_cpumask_var(mask)) + return 0; + cpumask_clear(*mask); + return 1; +} + #if NR_CPUS > 1 #define for_each_cpu_mask(cpu, mask) \ for ((cpu) = first_cpu(mask); \ --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -161,12 +161,12 @@ struct vcpu spinlock_t virq_lock; /* Bitmask of CPUs on which this VCPU may run. */ - cpumask_t cpu_affinity; + cpumask_var_t cpu_affinity; /* Used to change affinity temporarily. */ - cpumask_t cpu_affinity_tmp; + cpumask_var_t cpu_affinity_tmp; /* Bitmask of CPUs which are holding onto this VCPU's state. */ - cpumask_t vcpu_dirty_cpumask; + cpumask_var_t vcpu_dirty_cpumask; /* Tasklet for continue_hypercall_on_cpu(). */ struct tasklet continue_hypercall_tasklet; @@ -289,7 +289,7 @@ struct domain struct vcpu **vcpu; /* Bitmask of CPUs which are holding onto this domain's state. */ - cpumask_t domain_dirty_cpumask; + cpumask_var_t domain_dirty_cpumask; struct arch_domain arch; @@ -641,7 +641,7 @@ void watchdog_domain_destroy(struct doma #define is_hvm_domain(d) ((d)->is_hvm) #define is_hvm_vcpu(v) (is_hvm_domain(v->domain)) #define is_pinned_vcpu(v) ((v)->domain->is_pinned || \ - cpus_weight((v)->cpu_affinity) == 1) + cpumask_weight((v)->cpu_affinity) == 1) #define need_iommu(d) ((d)->need_iommu) void set_vcpu_migration_delay(unsigned int delay);