WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86: introduce and use prefill_possible_map()

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86: introduce and use prefill_possible_map()
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Tue, 04 May 2010 17:00:35 +0100
Cc: xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Tue, 04 May 2010 09:01:39 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
With the larger default NR_CPUS config setting (and the more with
build time settings exceeding this default) the wasting of memory (and
potentially other resources) just because cpu_possible_map doesn't get
set up properly increases. Use Linux' (accordingly modified to fit
Xen) prefill_possible_map() to overcome this.

This makes necessary an adjustment to tasklet initialization (which
must not happen before cpu_possible_map is guaranteed to be fully set
up - according to my static code analysis this was a problem on ia64
anyway).

Tracing code also needed a minor adjustment, as it still had a simple
counted loop accessing per-CPU data in its body.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2010-05-04.orig/xen/arch/ia64/linux-xen/smpboot.c   2010-05-04 
16:04:09.000000000 +0200
+++ 2010-05-04/xen/arch/ia64/linux-xen/smpboot.c        2010-05-04 
13:22:06.000000000 +0200
@@ -776,7 +776,7 @@ void __cpu_die(unsigned int cpu)
 #endif /* CONFIG_HOTPLUG_CPU */
 
 void
-smp_cpus_done (unsigned int dummy)
+smp_cpus_done(void)
 {
        int cpu;
        unsigned long bogosum = 0;
--- 2010-05-04.orig/xen/arch/ia64/xen/xensetup.c        2010-05-04 
16:04:09.000000000 +0200
+++ 2010-05-04/xen/arch/ia64/xen/xensetup.c     2010-05-04 16:42:36.000000000 
+0200
@@ -562,10 +562,12 @@ skip_move:
     end_boot_allocator();
 
     softirq_init();
-    tasklet_subsys_init();
+    tasklet_early_init();
 
     late_setup_arch(&cmdline);
 
+    tasklet_subsys_init();
+
     scheduler_init();
     idle_vcpu[0] = (struct vcpu*) ia64_r13;
     idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
@@ -626,7 +628,7 @@ printk("num_online_cpus=%d, max_cpus=%d\
     local_irq_disable();
 
     printk("Brought up %ld CPUs\n", (long)num_online_cpus());
-    smp_cpus_done(max_cpus);
+    smp_cpus_done();
 #endif
 
     initialise_gdb(); /* could be moved earlier */
--- 2010-05-04.orig/xen/arch/x86/mpparse.c      2010-05-04 16:04:09.000000000 
+0200
+++ 2010-05-04/xen/arch/x86/mpparse.c   2010-05-04 13:22:06.000000000 +0200
@@ -35,7 +35,6 @@
 
 /* Have we found an MP table */
 int smp_found_config;
-unsigned int __devinitdata maxcpus = NR_CPUS;
 
 /*
  * Various Linux-internal data structures created from the
@@ -66,7 +65,7 @@ unsigned int def_to_bigsmp = 0;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
 /* Internal processor count */
-static unsigned int __devinitdata num_processors;
+unsigned int __devinitdata num_processors;
 
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
@@ -105,8 +104,10 @@ static int __devinit MP_processor_info (
        int ver, apicid, cpu = 0;
        physid_mask_t phys_cpu;
        
-       if (!(m->mpc_cpuflag & CPU_ENABLED))
+       if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+               ++disabled_cpus;
                return -EINVAL;
+       }
 
        apicid = mpc_apic_id(m, translation_table[mpc_record]);
 
@@ -185,9 +186,9 @@ static int __devinit MP_processor_info (
                return -ENOSPC;
        }
 
-       if (num_processors >= maxcpus) {
-               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
-                       " Processor ignored.\n", maxcpus);
+       if (max_cpus && num_processors >= max_cpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %u reached."
+                      " Processor ignored.\n", max_cpus);
                return -ENOSPC;
        }
 
--- 2010-05-04.orig/xen/arch/x86/setup.c        2010-05-04 16:04:09.000000000 
+0200
+++ 2010-05-04/xen/arch/x86/setup.c     2010-05-04 16:43:22.000000000 +0200
@@ -61,7 +61,7 @@ static int __initdata opt_nosmp = 0;
 boolean_param("nosmp", opt_nosmp);
 
 /* maxcpus: maximum number of CPUs to activate. */
-static unsigned int __initdata max_cpus = NR_CPUS;
+unsigned int __devinitdata max_cpus;
 integer_param("maxcpus", max_cpus);
 
 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
@@ -568,6 +568,11 @@ void __init __start_xen(unsigned long mb
     if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
         EARLY_FAIL("Misaligned CPU0 stack.\n");
 
+    if ( opt_nosmp )
+        max_cpus = prefill_possible_map(1);
+    else if ( max_cpus )
+        max_cpus = prefill_possible_map(max_cpus);
+
     if ( e820_raw_nr != 0 )
     {
         memmap_type = "Xen-e820";
@@ -978,7 +983,7 @@ void __init __start_xen(unsigned long mb
 #endif
 
     softirq_init();
-    tasklet_subsys_init();
+    tasklet_early_init();
 
     early_cpu_init();
 
@@ -1017,6 +1022,11 @@ void __init __start_xen(unsigned long mb
     zap_low_mappings();
 #endif
 
+    if ( !max_cpus )
+        max_cpus = prefill_possible_map(0);
+
+    tasklet_subsys_init();
+
     init_apic_mappings();
 
     percpu_free_unused_areas();
@@ -1049,12 +1059,9 @@ void __init __start_xen(unsigned long mb
     vesa_mtrr_init();
 #endif
 
-    if ( opt_nosmp )
-        max_cpus = 0;
-
     iommu_setup();    /* setup iommu if available */
 
-    smp_prepare_cpus(max_cpus);
+    smp_prepare_cpus(!opt_nosmp * max_cpus);
 
     spin_debug_enable();
 
@@ -1087,7 +1094,7 @@ void __init __start_xen(unsigned long mb
     }
 
     printk("Brought up %ld CPUs\n", (long)num_online_cpus());
-    smp_cpus_done(max_cpus);
+    smp_cpus_done();
 
     initialise_gdb(); /* could be moved earlier */
 
--- 2010-05-04.orig/xen/arch/x86/smpboot.c      2010-05-04 16:04:09.000000000 
+0200
+++ 2010-05-04/xen/arch/x86/smpboot.c   2010-05-04 13:22:06.000000000 +0200
@@ -83,10 +83,12 @@ EXPORT_SYMBOL(cpu_online_map);
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 EXPORT_SYMBOL(cpu_callout_map);
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
+cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
 
+unsigned int __devinitdata disabled_cpus;
+
 /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
  * is no way to resync one AP against BP. TBD: for prescott and above, we
  * should use IA64's algorithm
@@ -829,7 +831,11 @@ int alloc_cpu_id(void)
 {
        cpumask_t       tmp_map;
        int cpu;
-       cpus_complement(tmp_map, cpu_present_map);
+
+       if (max_cpus)
+               cpus_andnot(tmp_map, cpu_possible_map, cpu_present_map);
+       else
+               cpus_complement(tmp_map, cpu_present_map);
        cpu = first_cpu(tmp_map);
        if (cpu >= NR_CPUS)
                return -ENODEV;
@@ -1243,6 +1249,52 @@ void __devinit smp_prepare_boot_cpu(void
        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 }
 
+/*
+ * cpu_possible_mask should be static, it cannot change as cpu's
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_mask on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - The user can overwrite it with max_cpus=NUM
+ * - Otherwise don't reserve additional CPUs.
+ * We do this because additional CPUs waste a lot of memory.
+ * -AK
+ */
+unsigned int __init prefill_possible_map(unsigned int max_cpus)
+{
+       unsigned int i, possible;
+
+       /* no processor from mptable or madt */
+       if (!num_processors)
+               num_processors = 1;
+
+       if (!max_cpus)
+               possible = num_processors + disabled_cpus;
+       else
+               possible = max_cpus;
+
+       if (possible > NR_CPUS) {
+               printk(KERN_WARNING
+                      "%u processors exceeds NR_CPUS limit of %d\n",
+                      possible, NR_CPUS);
+               possible = NR_CPUS;
+       }
+
+       printk(KERN_INFO "SMP: Allowing %u CPUs, %d hotplug CPUs\n",
+              possible, max_t(int, possible - num_processors, 0));
+
+       for (i = 0; i < possible; i++)
+               cpu_set(i, cpu_possible_map);
+
+       return possible;
+}
+
 static void
 remove_siblinginfo(int cpu)
 {
@@ -1568,7 +1620,7 @@ int __devinit __cpu_up(unsigned int cpu)
 }
 
 
-void __init smp_cpus_done(unsigned int max_cpus)
+void __init smp_cpus_done(void)
 {
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
--- 2010-05-04.orig/xen/common/tasklet.c        2010-04-22 14:43:25.000000000 
+0200
+++ 2010-05-04/xen/common/tasklet.c     2010-05-04 16:46:03.000000000 +0200
@@ -18,7 +18,7 @@
 #include <xen/tasklet.h>
 
 /* Some subsystems call into us before we are initialised. We ignore them. */
-static bool_t tasklets_initialised;
+static unsigned int __read_mostly tasklets_initialised = UINT_MAX;
 
 /*
  * NB. Any modification to a tasklet_list requires the scheduler to run
@@ -35,7 +35,8 @@ void tasklet_schedule_on_cpu(struct task
 
     spin_lock_irqsave(&tasklet_lock, flags);
 
-    if ( tasklets_initialised && !t->is_dead )
+    if ( (tasklets_initialised == NR_CPUS || tasklets_initialised == cpu) &&
+         !t->is_dead )
     {
         t->scheduled_on = cpu;
         if ( !t->is_running )
@@ -161,14 +162,24 @@ void tasklet_init(
     t->data = data;
 }
 
+void __init tasklet_early_init(void)
+{
+    unsigned int cpu = smp_processor_id();
+
+    INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
+
+    tasklets_initialised = cpu;
+}
+
 void __init tasklet_subsys_init(void)
 {
     unsigned int cpu;
 
     for_each_possible_cpu ( cpu )
-        INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
+        if ( cpu != tasklets_initialised )
+            INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
 
-    tasklets_initialised = 1;
+    tasklets_initialised = NR_CPUS;
 }
 
 /*
--- 2010-05-04.orig/xen/common/trace.c  2010-04-22 14:43:25.000000000 +0200
+++ 2010-05-04/xen/common/trace.c       2010-05-04 16:26:50.000000000 +0200
@@ -289,7 +289,7 @@ void __init init_trace_bufs(void)
         return;
     }
 
-    for(i = 0; i < NR_CPUS; i++)
+    for_each_possible_cpu(i)
         spin_lock_init(&per_cpu(t_lock, i));
 
     for(i=0; i<T_INFO_PAGES; i++)
--- 2010-05-04.orig/xen/include/asm-x86/smp.h   2010-05-04 16:04:09.000000000 
+0200
+++ 2010-05-04/xen/include/asm-x86/smp.h        2010-05-04 13:22:06.000000000 
+0200
@@ -34,6 +34,10 @@ extern void smp_alloc_memory(void);
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 
+extern unsigned int max_cpus, num_processors, disabled_cpus;
+
+unsigned int prefill_possible_map(unsigned int max_cpus);
+
 void smp_send_nmi_allbutself(void);
 
 void  send_IPI_mask(const cpumask_t *mask, int vector);
--- 2010-05-04.orig/xen/include/xen/smp.h       2010-05-04 16:04:09.000000000 
+0200
+++ 2010-05-04/xen/include/xen/smp.h    2010-05-04 13:22:06.000000000 +0200
@@ -26,7 +26,7 @@ extern int __cpu_up(unsigned int cpunum)
 /*
  * Final polishing of CPUs
  */
-extern void smp_cpus_done(unsigned int max_cpus);
+extern void smp_cpus_done(void);
 
 /*
  * Call a function on all other processors
--- 2010-05-04.orig/xen/include/xen/tasklet.h   2010-04-22 14:43:25.000000000 
+0200
+++ 2010-05-04/xen/include/xen/tasklet.h        2010-05-04 16:42:13.000000000 
+0200
@@ -32,6 +32,7 @@ void tasklet_kill(struct tasklet *t);
 void migrate_tasklets_from_cpu(unsigned int cpu);
 void tasklet_init(
     struct tasklet *t, void (*func)(unsigned long), unsigned long data);
+void tasklet_early_init(void);
 void tasklet_subsys_init(void);
 
 #endif /* __XEN_TASKLET_H__ */


Attachment: x86-count-disabled-cpus.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>