These cpu maps get read from all CPUs, so apart from addressing the
square(nr_cpus) growth of these objects, they also get moved into the
previously introduced read-mostly sub-section of the per-CPU section,
in order to not need to waste a full cacheline in order to align (and
properly pad) them, which would be undue overhead on systems with low
NR_CPUS.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- 2009-07-10.orig/xen/arch/ia64/linux-xen/setup.c 2008-08-15
16:18:55.000000000 +0200
+++ 2009-07-10/xen/arch/ia64/linux-xen/setup.c 2009-07-10 13:57:41.000000000
+0200
@@ -577,8 +577,8 @@ late_setup_arch (char **cmdline_p)
cpu_physical_id(0) = hard_smp_processor_id();
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
check_for_logical_procs();
if (smp_num_cpucores > 1)
--- 2009-07-10.orig/xen/arch/ia64/linux-xen/smpboot.c 2009-05-27
13:54:05.000000000 +0200
+++ 2009-07-10/xen/arch/ia64/linux-xen/smpboot.c 2009-07-10
13:57:41.000000000 +0200
@@ -144,8 +144,8 @@ EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
int smp_num_siblings = 1;
int smp_num_cpucores = 1;
@@ -686,13 +686,13 @@ clear_cpu_sibling_map(int cpu)
{
int i;
- for_each_cpu_mask(i, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[i]);
- for_each_cpu_mask(i, cpu_core_map[cpu])
- cpu_clear(cpu, cpu_core_map[i]);
+ for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+ for_each_cpu_mask(i, per_cpu(cpu_core_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_core_map, i));
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
}
static void
@@ -702,12 +702,12 @@ remove_siblinginfo(int cpu)
if (cpu_data(cpu)->threads_per_core == 1 &&
cpu_data(cpu)->cores_per_socket == 1) {
- cpu_clear(cpu, cpu_core_map[cpu]);
- cpu_clear(cpu, cpu_sibling_map[cpu]);
+ cpu_clear(cpu, per_cpu(cpu_core_map, cpu));
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
return;
}
- last = (cpus_weight(cpu_core_map[cpu]) == 1 ? 1 : 0);
+ last = (cpus_weight(per_cpu(cpu_core_map, cpu)) == 1 ? 1 : 0);
/* remove it from all sibling map's */
clear_cpu_sibling_map(cpu);
@@ -800,11 +800,11 @@ set_cpu_sibling_map(int cpu)
for_each_online_cpu(i) {
if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
+ cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_sibling_map, i));
}
}
}
@@ -835,8 +835,8 @@ __cpu_up (unsigned int cpu)
if (cpu_data(cpu)->threads_per_core == 1 &&
cpu_data(cpu)->cores_per_socket == 1) {
- cpu_set(cpu, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_core_map[cpu]);
+ cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, cpu));
return 0;
}
--- 2009-07-10.orig/xen/arch/ia64/xen/dom0_ops.c 2009-07-03
10:20:56.000000000 +0200
+++ 2009-07-10/xen/arch/ia64/xen/dom0_ops.c 2009-07-10 13:57:41.000000000
+0200
@@ -711,9 +711,9 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
memset(pi, 0, sizeof(*pi));
pi->cpu_to_node = cpu_to_node_arr;
- pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
+ pi->threads_per_core = cpus_weight(per_cpu(cpu_sibling_map, 0));
pi->cores_per_socket =
- cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+ cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
pi->nr_cpus = (u32)num_online_cpus();
pi->nr_nodes = num_online_nodes();
pi->total_pages = total_pages;
--- 2009-07-10.orig/xen/arch/x86/oprofile/op_model_p4.c 2009-01-26
09:39:13.000000000 +0100
+++ 2009-07-10/xen/arch/x86/oprofile/op_model_p4.c 2009-07-10
13:57:41.000000000 +0200
@@ -385,7 +385,7 @@ static unsigned int get_stagger(void)
{
#ifdef CONFIG_SMP
int cpu = smp_processor_id();
- return (cpu != first_cpu(cpu_sibling_map[cpu]));
+ return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
#endif
return 0;
}
--- 2009-07-10.orig/xen/arch/x86/smpboot.c 2009-07-06 15:07:20.000000000
+0200
+++ 2009-07-10/xen/arch/x86/smpboot.c 2009-07-10 13:57:41.000000000 +0200
@@ -70,12 +70,9 @@ int phys_proc_id[NR_CPUS] __read_mostly
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
/* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
-
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_core_map);
/* bitmap of online cpus */
cpumask_t cpu_online_map __read_mostly;
@@ -419,35 +416,35 @@ set_cpu_sibling_map(int cpu)
for_each_cpu_mask(i, cpu_sibling_setup_map) {
if (phys_proc_id[cpu] == phys_proc_id[i] &&
cpu_core_id[cpu] == cpu_core_id[i]) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
+ cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
}
}
} else {
- cpu_set(cpu, cpu_sibling_map[cpu]);
+ cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
}
if (c[cpu].x86_max_cores == 1) {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
c[cpu].booted_cores = 1;
return;
}
for_each_cpu_mask(i, cpu_sibling_setup_map) {
if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
/*
* Does this new cpu bringup a new core?
*/
- if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
*/
- if (first_cpu(cpu_sibling_map[i]) == i)
+ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
c[cpu].booted_cores++;
/*
* increment the core count for all
@@ -1052,8 +1049,8 @@ static void __init smp_boot_cpus(unsigne
printk(KERN_NOTICE "Local APIC not detected."
" Using dummy APIC emulation.\n");
map_cpu_to_logical_apicid();
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
return;
}
@@ -1163,16 +1160,16 @@ static void __init smp_boot_cpus(unsigne
Dprintk("Boot done.\n");
/*
- * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * construct cpu_sibling_map, so that we can tell sibling CPUs
* efficiently.
*/
for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
}
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
if (nmi_watchdog == NMI_LOCAL_APIC)
check_nmi_watchdog();
@@ -1215,19 +1212,19 @@ remove_siblinginfo(int cpu)
int sibling;
struct cpuinfo_x86 *c = cpu_data;
- for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
- cpu_clear(cpu, cpu_core_map[sibling]);
+ for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+ cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
/*
* last thread sibling in this cpu core going down
*/
- if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
c[sibling].booted_cores--;
}
- for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[sibling]);
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
+ for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
phys_proc_id[cpu] = BAD_APICID;
cpu_core_id[cpu] = BAD_APICID;
cpu_clear(cpu, cpu_sibling_setup_map);
--- 2009-07-10.orig/xen/arch/x86/sysctl.c 2009-07-03 10:20:57.000000000
+0200
+++ 2009-07-10/xen/arch/x86/sysctl.c 2009-07-10 13:57:41.000000000 +0200
@@ -60,9 +60,9 @@ long arch_do_sysctl(
memset(pi, 0, sizeof(*pi));
pi->cpu_to_node = cpu_to_node_arr;
pi->threads_per_core =
- cpus_weight(cpu_sibling_map[0]);
+ cpus_weight(per_cpu(cpu_sibling_map, 0));
pi->cores_per_socket =
- cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+ cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
pi->nr_cpus = (u32)num_online_cpus();
pi->nr_nodes = num_online_nodes();
pi->total_pages = total_pages;
--- 2009-07-10.orig/xen/common/domctl.c 2009-06-29 11:58:15.000000000 +0200
+++ 2009-07-10/xen/common/domctl.c 2009-07-10 13:57:41.000000000 +0200
@@ -167,18 +167,18 @@ static unsigned int default_vcpu0_locati
* If we're on a HT system, we only auto-allocate to a non-primary HT. We
* favour high numbered CPUs in the event of a tie.
*/
- cpu = first_cpu(cpu_sibling_map[0]);
- if ( cpus_weight(cpu_sibling_map[0]) > 1 )
- cpu = next_cpu(cpu, cpu_sibling_map[0]);
- cpu_exclude_map = cpu_sibling_map[0];
+ cpu = first_cpu(per_cpu(cpu_sibling_map, 0));
+ if ( cpus_weight(per_cpu(cpu_sibling_map, 0)) > 1 )
+ cpu = next_cpu(cpu, per_cpu(cpu_sibling_map, 0));
+ cpu_exclude_map = per_cpu(cpu_sibling_map, 0);
for_each_online_cpu ( i )
{
if ( cpu_isset(i, cpu_exclude_map) )
continue;
- if ( (i == first_cpu(cpu_sibling_map[i])) &&
- (cpus_weight(cpu_sibling_map[i]) > 1) )
+ if ( (i == first_cpu(per_cpu(cpu_sibling_map, i))) &&
+ (cpus_weight(per_cpu(cpu_sibling_map, i)) > 1) )
continue;
- cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]);
+ cpus_or(cpu_exclude_map, cpu_exclude_map, per_cpu(cpu_sibling_map, i));
if ( !cnt || cnt[i] <= cnt[cpu] )
cpu = i;
}
--- 2009-07-10.orig/xen/common/sched_credit.c 2009-04-09 14:05:36.000000000
+0200
+++ 2009-07-10/xen/common/sched_credit.c 2009-07-10 13:57:41.000000000
+0200
@@ -402,17 +402,17 @@ csched_cpu_pick(struct vcpu *vc)
nxt = cycle_cpu(cpu, cpus);
- if ( cpu_isset(cpu, cpu_core_map[nxt]) )
+ if ( cpu_isset(cpu, per_cpu(cpu_core_map, nxt)) )
{
- ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
- cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
- cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
+ ASSERT( cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
+ cpus_and(cpu_idlers, idlers, per_cpu(cpu_sibling_map, cpu));
+ cpus_and(nxt_idlers, idlers, per_cpu(cpu_sibling_map, nxt));
}
else
{
- ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
- cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
- cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
+ ASSERT( !cpu_isset(nxt, per_cpu(cpu_core_map, cpu)) );
+ cpus_and(cpu_idlers, idlers, per_cpu(cpu_core_map, cpu));
+ cpus_and(nxt_idlers, idlers, per_cpu(cpu_core_map, nxt));
}
weight_cpu = cpus_weight(cpu_idlers);
@@ -1205,9 +1205,9 @@ csched_dump_pcpu(int cpu)
spc = CSCHED_PCPU(cpu);
runq = &spc->runq;
- cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_sibling_map[cpu]);
+ cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map, cpu));
printk(" sort=%d, sibling=%s, ", spc->runq_sort_last, cpustr);
- cpumask_scnprintf(cpustr, sizeof(cpustr), cpu_core_map[cpu]);
+ cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_map, cpu));
printk("core=%s\n", cpustr);
/* current VCPU */
--- 2009-07-10.orig/xen/include/asm-ia64/linux-xen/asm/smp.h 2009-05-27
13:54:07.000000000 +0200
+++ 2009-07-10/xen/include/asm-ia64/linux-xen/asm/smp.h 2009-07-10
13:57:41.000000000 +0200
@@ -60,8 +60,8 @@ extern struct smp_boot_data {
extern char no_int_routing __devinitdata;
extern cpumask_t cpu_online_map;
-extern cpumask_t cpu_core_map[NR_CPUS];
-extern cpumask_t cpu_sibling_map[NR_CPUS];
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
extern int smp_num_siblings;
extern int smp_num_cpucores;
extern void __iomem *ipi_base_addr;
--- 2009-07-10.orig/xen/include/asm-x86/smp.h 2009-05-27 13:54:07.000000000
+0200
+++ 2009-07-10/xen/include/asm-x86/smp.h 2009-07-10 13:57:41.000000000
+0200
@@ -32,8 +32,8 @@
extern void smp_alloc_memory(void);
extern int pic_mode;
-extern cpumask_t cpu_sibling_map[];
-extern cpumask_t cpu_core_map[];
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
void smp_send_nmi_allbutself(void);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|