With the larger default NR_CPUS config setting (and the more with
build time settings exceeding this default) the wasting of memory (and
potentially other resources) just because cpu_possible_map doesn't get
set up properly increases. Use Linux' (accordingly modified to fit
Xen) prefill_possible_map() to overcome this.
This makes necessary an adjustment to tasklet initialization (which
must not happen before cpu_possible_map is guaranteed to be fully set
up - according to my static code analysis this was a problem on ia64
anyway).
Tracing code also needed a minor adjustment, as it still had a simple
counted loop accessing per-CPU data in its body.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- 2010-05-04.orig/xen/arch/ia64/linux-xen/smpboot.c 2010-05-04
16:04:09.000000000 +0200
+++ 2010-05-04/xen/arch/ia64/linux-xen/smpboot.c 2010-05-04
13:22:06.000000000 +0200
@@ -776,7 +776,7 @@ void __cpu_die(unsigned int cpu)
#endif /* CONFIG_HOTPLUG_CPU */
void
-smp_cpus_done (unsigned int dummy)
+smp_cpus_done(void)
{
int cpu;
unsigned long bogosum = 0;
--- 2010-05-04.orig/xen/arch/ia64/xen/xensetup.c 2010-05-04
16:04:09.000000000 +0200
+++ 2010-05-04/xen/arch/ia64/xen/xensetup.c 2010-05-04 16:42:36.000000000
+0200
@@ -562,10 +562,12 @@ skip_move:
end_boot_allocator();
softirq_init();
- tasklet_subsys_init();
+ tasklet_early_init();
late_setup_arch(&cmdline);
+ tasklet_subsys_init();
+
scheduler_init();
idle_vcpu[0] = (struct vcpu*) ia64_r13;
idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
@@ -626,7 +628,7 @@ printk("num_online_cpus=%d, max_cpus=%d\
local_irq_disable();
printk("Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(max_cpus);
+ smp_cpus_done();
#endif
initialise_gdb(); /* could be moved earlier */
--- 2010-05-04.orig/xen/arch/x86/mpparse.c 2010-05-04 16:04:09.000000000
+0200
+++ 2010-05-04/xen/arch/x86/mpparse.c 2010-05-04 13:22:06.000000000 +0200
@@ -35,7 +35,6 @@
/* Have we found an MP table */
int smp_found_config;
-unsigned int __devinitdata maxcpus = NR_CPUS;
/*
* Various Linux-internal data structures created from the
@@ -66,7 +65,7 @@ unsigned int def_to_bigsmp = 0;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/* Internal processor count */
-static unsigned int __devinitdata num_processors;
+unsigned int __devinitdata num_processors;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
@@ -105,8 +104,10 @@ static int __devinit MP_processor_info (
int ver, apicid, cpu = 0;
physid_mask_t phys_cpu;
- if (!(m->mpc_cpuflag & CPU_ENABLED))
+ if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+ ++disabled_cpus;
return -EINVAL;
+ }
apicid = mpc_apic_id(m, translation_table[mpc_record]);
@@ -185,9 +186,9 @@ static int __devinit MP_processor_info (
return -ENOSPC;
}
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
+ if (max_cpus && num_processors >= max_cpus) {
+ printk(KERN_WARNING "WARNING: maxcpus limit of %u reached."
+ " Processor ignored.\n", max_cpus);
return -ENOSPC;
}
--- 2010-05-04.orig/xen/arch/x86/setup.c 2010-05-04 16:04:09.000000000
+0200
+++ 2010-05-04/xen/arch/x86/setup.c 2010-05-04 16:43:22.000000000 +0200
@@ -61,7 +61,7 @@ static int __initdata opt_nosmp = 0;
boolean_param("nosmp", opt_nosmp);
/* maxcpus: maximum number of CPUs to activate. */
-static unsigned int __initdata max_cpus = NR_CPUS;
+unsigned int __devinitdata max_cpus;
integer_param("maxcpus", max_cpus);
/* opt_watchdog: If true, run a watchdog NMI on each processor. */
@@ -568,6 +568,11 @@ void __init __start_xen(unsigned long mb
if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
EARLY_FAIL("Misaligned CPU0 stack.\n");
+ if ( opt_nosmp )
+ max_cpus = prefill_possible_map(1);
+ else if ( max_cpus )
+ max_cpus = prefill_possible_map(max_cpus);
+
if ( e820_raw_nr != 0 )
{
memmap_type = "Xen-e820";
@@ -978,7 +983,7 @@ void __init __start_xen(unsigned long mb
#endif
softirq_init();
- tasklet_subsys_init();
+ tasklet_early_init();
early_cpu_init();
@@ -1017,6 +1022,11 @@ void __init __start_xen(unsigned long mb
zap_low_mappings();
#endif
+ if ( !max_cpus )
+ max_cpus = prefill_possible_map(0);
+
+ tasklet_subsys_init();
+
init_apic_mappings();
percpu_free_unused_areas();
@@ -1049,12 +1059,9 @@ void __init __start_xen(unsigned long mb
vesa_mtrr_init();
#endif
- if ( opt_nosmp )
- max_cpus = 0;
-
iommu_setup(); /* setup iommu if available */
- smp_prepare_cpus(max_cpus);
+ smp_prepare_cpus(!opt_nosmp * max_cpus);
spin_debug_enable();
@@ -1087,7 +1094,7 @@ void __init __start_xen(unsigned long mb
}
printk("Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(max_cpus);
+ smp_cpus_done();
initialise_gdb(); /* could be moved earlier */
--- 2010-05-04.orig/xen/arch/x86/smpboot.c 2010-05-04 16:04:09.000000000
+0200
+++ 2010-05-04/xen/arch/x86/smpboot.c 2010-05-04 13:22:06.000000000 +0200
@@ -83,10 +83,12 @@ EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
+cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
+unsigned int __devinitdata disabled_cpus;
+
/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
* is no way to resync one AP against BP. TBD: for prescott and above, we
* should use IA64's algorithm
@@ -829,7 +831,11 @@ int alloc_cpu_id(void)
{
cpumask_t tmp_map;
int cpu;
- cpus_complement(tmp_map, cpu_present_map);
+
+ if (max_cpus)
+ cpus_andnot(tmp_map, cpu_possible_map, cpu_present_map);
+ else
+ cpus_complement(tmp_map, cpu_present_map);
cpu = first_cpu(tmp_map);
if (cpu >= NR_CPUS)
return -ENODEV;
@@ -1243,6 +1249,52 @@ void __devinit smp_prepare_boot_cpu(void
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
+/*
+ * cpu_possible_mask should be static, it cannot change as cpu's
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_mask on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - The user can overwrite it with max_cpus=NUM
+ * - Otherwise don't reserve additional CPUs.
+ * We do this because additional CPUs waste a lot of memory.
+ * -AK
+ */
+unsigned int __init prefill_possible_map(unsigned int max_cpus)
+{
+ unsigned int i, possible;
+
+ /* no processor from mptable or madt */
+ if (!num_processors)
+ num_processors = 1;
+
+ if (!max_cpus)
+ possible = num_processors + disabled_cpus;
+ else
+ possible = max_cpus;
+
+ if (possible > NR_CPUS) {
+ printk(KERN_WARNING
+ "%u processors exceeds NR_CPUS limit of %d\n",
+ possible, NR_CPUS);
+ possible = NR_CPUS;
+ }
+
+ printk(KERN_INFO "SMP: Allowing %u CPUs, %d hotplug CPUs\n",
+ possible, max_t(int, possible - num_processors, 0));
+
+ for (i = 0; i < possible; i++)
+ cpu_set(i, cpu_possible_map);
+
+ return possible;
+}
+
static void
remove_siblinginfo(int cpu)
{
@@ -1568,7 +1620,7 @@ int __devinit __cpu_up(unsigned int cpu)
}
-void __init smp_cpus_done(unsigned int max_cpus)
+void __init smp_cpus_done(void)
{
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
--- 2010-05-04.orig/xen/common/tasklet.c 2010-04-22 14:43:25.000000000
+0200
+++ 2010-05-04/xen/common/tasklet.c 2010-05-04 16:46:03.000000000 +0200
@@ -18,7 +18,7 @@
#include <xen/tasklet.h>
/* Some subsystems call into us before we are initialised. We ignore them. */
-static bool_t tasklets_initialised;
+static unsigned int __read_mostly tasklets_initialised = UINT_MAX;
/*
* NB. Any modification to a tasklet_list requires the scheduler to run
@@ -35,7 +35,8 @@ void tasklet_schedule_on_cpu(struct task
spin_lock_irqsave(&tasklet_lock, flags);
- if ( tasklets_initialised && !t->is_dead )
+ if ( (tasklets_initialised == NR_CPUS || tasklets_initialised == cpu) &&
+ !t->is_dead )
{
t->scheduled_on = cpu;
if ( !t->is_running )
@@ -161,14 +162,24 @@ void tasklet_init(
t->data = data;
}
+void __init tasklet_early_init(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
+
+ tasklets_initialised = cpu;
+}
+
void __init tasklet_subsys_init(void)
{
unsigned int cpu;
for_each_possible_cpu ( cpu )
- INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
+ if ( cpu != tasklets_initialised )
+ INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
- tasklets_initialised = 1;
+ tasklets_initialised = NR_CPUS;
}
/*
--- 2010-05-04.orig/xen/common/trace.c 2010-04-22 14:43:25.000000000 +0200
+++ 2010-05-04/xen/common/trace.c 2010-05-04 16:26:50.000000000 +0200
@@ -289,7 +289,7 @@ void __init init_trace_bufs(void)
return;
}
- for(i = 0; i < NR_CPUS; i++)
+ for_each_possible_cpu(i)
spin_lock_init(&per_cpu(t_lock, i));
for(i=0; i<T_INFO_PAGES; i++)
--- 2010-05-04.orig/xen/include/asm-x86/smp.h 2010-05-04 16:04:09.000000000
+0200
+++ 2010-05-04/xen/include/asm-x86/smp.h 2010-05-04 13:22:06.000000000
+0200
@@ -34,6 +34,10 @@ extern void smp_alloc_memory(void);
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+extern unsigned int max_cpus, num_processors, disabled_cpus;
+
+unsigned int prefill_possible_map(unsigned int max_cpus);
+
void smp_send_nmi_allbutself(void);
void send_IPI_mask(const cpumask_t *mask, int vector);
--- 2010-05-04.orig/xen/include/xen/smp.h 2010-05-04 16:04:09.000000000
+0200
+++ 2010-05-04/xen/include/xen/smp.h 2010-05-04 13:22:06.000000000 +0200
@@ -26,7 +26,7 @@ extern int __cpu_up(unsigned int cpunum)
/*
* Final polishing of CPUs
*/
-extern void smp_cpus_done(unsigned int max_cpus);
+extern void smp_cpus_done(void);
/*
* Call a function on all other processors
--- 2010-05-04.orig/xen/include/xen/tasklet.h 2010-04-22 14:43:25.000000000
+0200
+++ 2010-05-04/xen/include/xen/tasklet.h 2010-05-04 16:42:13.000000000
+0200
@@ -32,6 +32,7 @@ void tasklet_kill(struct tasklet *t);
void migrate_tasklets_from_cpu(unsigned int cpu);
void tasklet_init(
struct tasklet *t, void (*func)(unsigned long), unsigned long data);
+void tasklet_early_init(void);
void tasklet_subsys_init(void);
#endif /* __XEN_TASKLET_H__ */
x86-count-disabled-cpus.patch
Description: Text document
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|