WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] x86: introduce and use prefill_possible_map()

To: Jan Beulich <JBeulich@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] x86: introduce and use prefill_possible_map()
From: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Date: Fri, 14 May 2010 17:32:41 +0100
Cc: "xen-ia64-devel@xxxxxxxxxxxxxxxxxxx" <xen-ia64-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Fri, 14 May 2010 09:33:45 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4BE060C3020000780000135E@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcrroxFmIk9bAoJJR/ahee3OFGkXyQH4AEZi
Thread-topic: [Xen-devel] [PATCH] x86: introduce and use prefill_possible_map()
User-agent: Microsoft-Entourage/12.24.0.100205
Jan,

If you look in the current xen-unstable staging tree, you'll see I've added
a cpu notifier chain to Xen. We can use this to dynamically
allocate/initialise/de-allocate subsystem state in response to CPU hotplug
events, which is better than static initialisation off cpu_possible_map.

My aim is to get rid of cpu_possible_map from the x86 build altogether (sio
it is not used in common or x86 code) and to restrict cpu_present_map only
to the CPU hotplug subsystem.

Next thing in my sights is to improve the percpu memory area management, but
any other uses of cpu_possible_map, cpu_present_map, or bad for-loops
0...NR_CPUS, please do provide patches to switch to cpu notifiers if you
have time!

 -- Keir

On 04/05/2010 17:00, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:

> With the larger default NR_CPUS config setting (and the more with
> build time settings exceeding this default) the wasting of memory (and
> potentially other resources) just because cpu_possible_map doesn't get
> set up properly increases. Use Linux' (accordingly modified to fit
> Xen) prefill_possible_map() to overcome this.
> 
> This makes necessary an adjustment to tasklet initialization (which
> must not happen before cpu_possible_map is guaranteed to be fully set
> up - according to my static code analysis this was a problem on ia64
> anyway).
> 
> Tracing code also needed a minor adjustment, as it still had a simple
> counted loop accessing per-CPU data in its body.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- 2010-05-04.orig/xen/arch/ia64/linux-xen/smpboot.c   2010-05-04
> 16:04:09.000000000 +0200
> +++ 2010-05-04/xen/arch/ia64/linux-xen/smpboot.c        2010-05-04
> 13:22:06.000000000 +0200
> @@ -776,7 +776,7 @@ void __cpu_die(unsigned int cpu)
>  #endif /* CONFIG_HOTPLUG_CPU */
> 
>  void
> -smp_cpus_done (unsigned int dummy)
> +smp_cpus_done(void)
>  {
>         int cpu;
>         unsigned long bogosum = 0;
> --- 2010-05-04.orig/xen/arch/ia64/xen/xensetup.c        2010-05-04
> 16:04:09.000000000 +0200
> +++ 2010-05-04/xen/arch/ia64/xen/xensetup.c     2010-05-04 16:42:36.000000000
> +0200
> @@ -562,10 +562,12 @@ skip_move:
>      end_boot_allocator();
> 
>      softirq_init();
> -    tasklet_subsys_init();
> +    tasklet_early_init();
> 
>      late_setup_arch(&cmdline);
> 
> +    tasklet_subsys_init();
> +
>      scheduler_init();
>      idle_vcpu[0] = (struct vcpu*) ia64_r13;
>      idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
> @@ -626,7 +628,7 @@ printk("num_online_cpus=%d, max_cpus=%d\
>      local_irq_disable();
> 
>      printk("Brought up %ld CPUs\n", (long)num_online_cpus());
> -    smp_cpus_done(max_cpus);
> +    smp_cpus_done();
>  #endif
> 
>      initialise_gdb(); /* could be moved earlier */
> --- 2010-05-04.orig/xen/arch/x86/mpparse.c      2010-05-04 16:04:09.000000000
> +0200
> +++ 2010-05-04/xen/arch/x86/mpparse.c   2010-05-04 13:22:06.000000000 +0200
> @@ -35,7 +35,6 @@
> 
>  /* Have we found an MP table */
>  int smp_found_config;
> -unsigned int __devinitdata maxcpus = NR_CPUS;
> 
>  /*
>   * Various Linux-internal data structures created from the
> @@ -66,7 +65,7 @@ unsigned int def_to_bigsmp = 0;
>  /* Processor that is doing the boot up */
>  unsigned int boot_cpu_physical_apicid = -1U;
>  /* Internal processor count */
> -static unsigned int __devinitdata num_processors;
> +unsigned int __devinitdata num_processors;
> 
>  /* Bitmask of physically existing CPUs */
>  physid_mask_t phys_cpu_present_map;
> @@ -105,8 +104,10 @@ static int __devinit MP_processor_info (
>         int ver, apicid, cpu = 0;
>         physid_mask_t phys_cpu;
> 
> -       if (!(m->mpc_cpuflag & CPU_ENABLED))
> +       if (!(m->mpc_cpuflag & CPU_ENABLED)) {
> +               ++disabled_cpus;
>                 return -EINVAL;
> +       }
> 
>         apicid = mpc_apic_id(m, translation_table[mpc_record]);
> 
> @@ -185,9 +186,9 @@ static int __devinit MP_processor_info (
>                 return -ENOSPC;
>         }
> 
> -       if (num_processors >= maxcpus) {
> -               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
> -                       " Processor ignored.\n", maxcpus);
> +       if (max_cpus && num_processors >= max_cpus) {
> +               printk(KERN_WARNING "WARNING: maxcpus limit of %u reached."
> +                      " Processor ignored.\n", max_cpus);
>                 return -ENOSPC;
>         }
> 
> --- 2010-05-04.orig/xen/arch/x86/setup.c        2010-05-04 16:04:09.000000000
> +0200
> +++ 2010-05-04/xen/arch/x86/setup.c     2010-05-04 16:43:22.000000000 +0200
> @@ -61,7 +61,7 @@ static int __initdata opt_nosmp = 0;
>  boolean_param("nosmp", opt_nosmp);
> 
>  /* maxcpus: maximum number of CPUs to activate. */
> -static unsigned int __initdata max_cpus = NR_CPUS;
> +unsigned int __devinitdata max_cpus;
>  integer_param("maxcpus", max_cpus);
> 
>  /* opt_watchdog: If true, run a watchdog NMI on each processor. */
> @@ -568,6 +568,11 @@ void __init __start_xen(unsigned long mb
>      if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
>          EARLY_FAIL("Misaligned CPU0 stack.\n");
> 
> +    if ( opt_nosmp )
> +        max_cpus = prefill_possible_map(1);
> +    else if ( max_cpus )
> +        max_cpus = prefill_possible_map(max_cpus);
> +
>      if ( e820_raw_nr != 0 )
>      {
>          memmap_type = "Xen-e820";
> @@ -978,7 +983,7 @@ void __init __start_xen(unsigned long mb
>  #endif
> 
>      softirq_init();
> -    tasklet_subsys_init();
> +    tasklet_early_init();
> 
>      early_cpu_init();
> 
> @@ -1017,6 +1022,11 @@ void __init __start_xen(unsigned long mb
>      zap_low_mappings();
>  #endif
> 
> +    if ( !max_cpus )
> +        max_cpus = prefill_possible_map(0);
> +
> +    tasklet_subsys_init();
> +
>      init_apic_mappings();
> 
>      percpu_free_unused_areas();
> @@ -1049,12 +1059,9 @@ void __init __start_xen(unsigned long mb
>      vesa_mtrr_init();
>  #endif
> 
> -    if ( opt_nosmp )
> -        max_cpus = 0;
> -
>      iommu_setup();    /* setup iommu if available */
> 
> -    smp_prepare_cpus(max_cpus);
> +    smp_prepare_cpus(!opt_nosmp * max_cpus);
> 
>      spin_debug_enable();
> 
> @@ -1087,7 +1094,7 @@ void __init __start_xen(unsigned long mb
>      }
> 
>      printk("Brought up %ld CPUs\n", (long)num_online_cpus());
> -    smp_cpus_done(max_cpus);
> +    smp_cpus_done();
> 
>      initialise_gdb(); /* could be moved earlier */
> 
> --- 2010-05-04.orig/xen/arch/x86/smpboot.c      2010-05-04 16:04:09.000000000
> +0200
> +++ 2010-05-04/xen/arch/x86/smpboot.c   2010-05-04 13:22:06.000000000 +0200
> @@ -83,10 +83,12 @@ EXPORT_SYMBOL(cpu_online_map);
>  cpumask_t cpu_callin_map;
>  cpumask_t cpu_callout_map;
>  EXPORT_SYMBOL(cpu_callout_map);
> -cpumask_t cpu_possible_map = CPU_MASK_ALL;
> +cpumask_t cpu_possible_map;
>  EXPORT_SYMBOL(cpu_possible_map);
>  static cpumask_t smp_commenced_mask;
> 
> +unsigned int __devinitdata disabled_cpus;
> +
>  /* TSC's upper 32 bits can't be written in eariler CPU (before prescott),
> there
>   * is no way to resync one AP against BP. TBD: for prescott and above, we
>   * should use IA64's algorithm
> @@ -829,7 +831,11 @@ int alloc_cpu_id(void)
>  {
>         cpumask_t       tmp_map;
>         int cpu;
> -       cpus_complement(tmp_map, cpu_present_map);
> +
> +       if (max_cpus)
> +               cpus_andnot(tmp_map, cpu_possible_map, cpu_present_map);
> +       else
> +               cpus_complement(tmp_map, cpu_present_map);
>         cpu = first_cpu(tmp_map);
>         if (cpu >= NR_CPUS)
>                 return -ENODEV;
> @@ -1243,6 +1249,52 @@ void __devinit smp_prepare_boot_cpu(void
>         per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
>  }
> 
> +/*
> + * cpu_possible_mask should be static, it cannot change as cpu's
> + * are onlined, or offlined. The reason is per-cpu data-structures
> + * are allocated by some modules at init time, and dont expect to
> + * do this dynamically on cpu arrival/departure.
> + * cpu_present_mask on the other hand can change dynamically.
> + * In case when cpu_hotplug is not compiled, then we resort to current
> + * behaviour, which is cpu_possible == cpu_present.
> + * - Ashok Raj
> + *
> + * Three ways to find out the number of additional hotplug CPUs:
> + * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
> + * - The user can overwrite it with max_cpus=NUM
> + * - Otherwise don't reserve additional CPUs.
> + * We do this because additional CPUs waste a lot of memory.
> + * -AK
> + */
> +unsigned int __init prefill_possible_map(unsigned int max_cpus)
> +{
> +       unsigned int i, possible;
> +
> +       /* no processor from mptable or madt */
> +       if (!num_processors)
> +               num_processors = 1;
> +
> +       if (!max_cpus)
> +               possible = num_processors + disabled_cpus;
> +       else
> +               possible = max_cpus;
> +
> +       if (possible > NR_CPUS) {
> +               printk(KERN_WARNING
> +                      "%u processors exceeds NR_CPUS limit of %d\n",
> +                      possible, NR_CPUS);
> +               possible = NR_CPUS;
> +       }
> +
> +       printk(KERN_INFO "SMP: Allowing %u CPUs, %d hotplug CPUs\n",
> +              possible, max_t(int, possible - num_processors, 0));
> +
> +       for (i = 0; i < possible; i++)
> +               cpu_set(i, cpu_possible_map);
> +
> +       return possible;
> +}
> +
>  static void
>  remove_siblinginfo(int cpu)
>  {
> @@ -1568,7 +1620,7 @@ int __devinit __cpu_up(unsigned int cpu)
>  }
> 
> 
> -void __init smp_cpus_done(unsigned int max_cpus)
> +void __init smp_cpus_done(void)
>  {
>  #ifdef CONFIG_X86_IO_APIC
>         setup_ioapic_dest();
> --- 2010-05-04.orig/xen/common/tasklet.c        2010-04-22 14:43:25.000000000
> +0200
> +++ 2010-05-04/xen/common/tasklet.c     2010-05-04 16:46:03.000000000 +0200
> @@ -18,7 +18,7 @@
>  #include <xen/tasklet.h>
> 
>  /* Some subsystems call into us before we are initialised. We ignore them. */
> -static bool_t tasklets_initialised;
> +static unsigned int __read_mostly tasklets_initialised = UINT_MAX;
> 
>  /*
>   * NB. Any modification to a tasklet_list requires the scheduler to run
> @@ -35,7 +35,8 @@ void tasklet_schedule_on_cpu(struct task
> 
>      spin_lock_irqsave(&tasklet_lock, flags);
> 
> -    if ( tasklets_initialised && !t->is_dead )
> +    if ( (tasklets_initialised == NR_CPUS || tasklets_initialised == cpu) &&
> +         !t->is_dead )
>      {
>          t->scheduled_on = cpu;
>          if ( !t->is_running )
> @@ -161,14 +162,24 @@ void tasklet_init(
>      t->data = data;
>  }
> 
> +void __init tasklet_early_init(void)
> +{
> +    unsigned int cpu = smp_processor_id();
> +
> +    INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
> +
> +    tasklets_initialised = cpu;
> +}
> +
>  void __init tasklet_subsys_init(void)
>  {
>      unsigned int cpu;
> 
>      for_each_possible_cpu ( cpu )
> -        INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
> +        if ( cpu != tasklets_initialised )
> +            INIT_LIST_HEAD(&per_cpu(tasklet_list, cpu));
> 
> -    tasklets_initialised = 1;
> +    tasklets_initialised = NR_CPUS;
>  }
> 
>  /*
> --- 2010-05-04.orig/xen/common/trace.c  2010-04-22 14:43:25.000000000 +0200
> +++ 2010-05-04/xen/common/trace.c       2010-05-04 16:26:50.000000000 +0200
> @@ -289,7 +289,7 @@ void __init init_trace_bufs(void)
>          return;
>      }
> 
> -    for(i = 0; i < NR_CPUS; i++)
> +    for_each_possible_cpu(i)
>          spin_lock_init(&per_cpu(t_lock, i));
> 
>      for(i=0; i<T_INFO_PAGES; i++)
> --- 2010-05-04.orig/xen/include/asm-x86/smp.h   2010-05-04 16:04:09.000000000
> +0200
> +++ 2010-05-04/xen/include/asm-x86/smp.h        2010-05-04 13:22:06.000000000
> +0200
> @@ -34,6 +34,10 @@ extern void smp_alloc_memory(void);
>  DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
>  DECLARE_PER_CPU(cpumask_t, cpu_core_map);
> 
> +extern unsigned int max_cpus, num_processors, disabled_cpus;
> +
> +unsigned int prefill_possible_map(unsigned int max_cpus);
> +
>  void smp_send_nmi_allbutself(void);
> 
>  void  send_IPI_mask(const cpumask_t *mask, int vector);
> --- 2010-05-04.orig/xen/include/xen/smp.h       2010-05-04 16:04:09.000000000
> +0200
> +++ 2010-05-04/xen/include/xen/smp.h    2010-05-04 13:22:06.000000000 +0200
> @@ -26,7 +26,7 @@ extern int __cpu_up(unsigned int cpunum)
>  /*
>   * Final polishing of CPUs
>   */
> -extern void smp_cpus_done(unsigned int max_cpus);
> +extern void smp_cpus_done(void);
> 
>  /*
>   * Call a function on all other processors
> --- 2010-05-04.orig/xen/include/xen/tasklet.h   2010-04-22 14:43:25.000000000
> +0200
> +++ 2010-05-04/xen/include/xen/tasklet.h        2010-05-04 16:42:13.000000000
> +0200
> @@ -32,6 +32,7 @@ void tasklet_kill(struct tasklet *t);
>  void migrate_tasklets_from_cpu(unsigned int cpu);
>  void tasklet_init(
>      struct tasklet *t, void (*func)(unsigned long), unsigned long data);
> +void tasklet_early_init(void);
>  void tasklet_subsys_init(void);
> 
>  #endif /* __XEN_TASKLET_H__ */
> 
> 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>