WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] Rendezvous selected cpus

To: "Tian, Kevin" <kevin.tian@xxxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] Rendezvous selected cpus
From: Keir Fraser <Keir.Fraser@xxxxxxxxxxxx>
Date: Sun, 03 Feb 2008 17:44:24 +0000
Delivery-date: Sun, 03 Feb 2008 09:44:23 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
In-reply-to: <D470B4E54465E3469E2ABBC5AFAC390F024D8F2D@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Achlcy9d/rIcXbNERJmDq57BBZNC3QBGTqlD
Thread-topic: [Xen-devel] [PATCH] Rendezvous selected cpus
User-agent: Microsoft-Entourage/11.3.6.070618
This is a good thing to have, and my main comments are stylistic. Firstly, I
think the name and interface change compared with Linux's stop_machine
mechanism is gratuitous. I would prefer us to have the same interface (and
also probably use the same or similar names throughout the implementation,
where appropriate). I don't think the more flexible interface expressed here
is useful -- certainly finding an application for rendezvousing a set of
CPUs and running a function on a subset of those, with optional irq disable,
is a bit of a brain bender. :-) So, stick to stop_machine interface,
semantics, and naming....

Also, I'd prefer this to be implemented in common/stop_machine.c if at all
possible. It's not really x86 specific. Certainly I do not want it in smp.c,
as that file is full enough already of random cruft with no other home.

Oh, also I think you are missing local_irq_disable() on the CPU that calls
on_rendezvous_cpus(). Like the Linxu implementation you should probably do
it at the same time you signal other CPUs to do so.

Apart from that it's a good idea, and I'll look more closely at how you tie
it in to CPU hotplug when you resubmit it.

 Thanks,
 Keir

On 2/2/08 08:11, "Tian, Kevin" <kevin.tian@xxxxxxxxx> wrote:

> Rendezvous selected cpus in softirq
> 
> This is similar to stop_machine_run stub from Linux, to pull
> selected cpus in rendezvous point and the do some batch work
> under a safe environment. Current one usage is from S3 path,
> where individual cpu is pulled down with related online
> footprints being cleared. It's dangerous to have other cpus
> checking clobbered data structure in the middle, such as
> cpu_online_map, cpu_sibling_map, etc.
> 
> Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>
> 
> diff -r 667d1a73d2ca xen/arch/x86/domain.c
> --- a/xen/arch/x86/domain.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/domain.c Sat Feb 02 14:11:45 2008 +0800
> @@ -82,7 +82,6 @@ static void default_idle(void)
>  
>  static void play_dead(void)
>  {
> -    __cpu_disable();
>      /* This must be done before dead CPU ack */
>      cpu_exit_clear();
>      hvm_cpu_down();
> diff -r 667d1a73d2ca xen/arch/x86/smp.c
> --- a/xen/arch/x86/smp.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smp.c Sat Feb 02 14:11:30 2008 +0800
> @@ -22,6 +22,7 @@
>  #include <asm/ipi.h>
>  #include <asm/hvm/support.h>
>  #include <mach_apic.h>
> +#include <xen/softirq.h>
>  
>  /*
>   * Some notes on x86 processor bugs affecting SMP operation:
> @@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup
>  
>      irq_exit();
>  }
> +
> +enum rendezvous_action {
> +    RENDEZVOUS_START,
> +    RENDEZVOUS_DISABLE_IRQ,
> +    RENDEZVOUS_INVOKE,
> +    RENDEZVOUS_EXIT,
> +};
> +
> +struct rendezvous_data {
> +    void (*fn) (void *);
> +    void *data;
> +    cpumask_t selected;
> +    cpumask_t fn_mask;
> +    atomic_t  done;
> +    enum rendezvous_action action;
> +};
> +
> +static struct rendezvous_data *rdz_data;
> +static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED;
> +static void rendezvous_set_action(enum rendezvous_action action, int
> cpus)
> +{
> +    atomic_set(&rdz_data->done, 0);
> +    smp_wmb();
> +    rdz_data->action = action;
> +    while ( atomic_read(&rdz_data->done) != cpus )
> +        cpu_relax();
> +}
> +
> +/* Rendezouvs selected cpus in softirq context and call (*fn) set in
> fn_mask */ 
> +int on_rendezvous_cpus (
> +    cpumask_t selected,
> +    cpumask_t fn_mask,
> +    void (*fn) (void *),
> +    void *data,
> +    int disable_irq)
> +{
> +    struct rendezvous_data rdz;
> +    unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur =
> smp_processor_id();
> +
> +    ASSERT(local_irq_is_enabled());
> +    ASSERT(cpus_subset(fn_mask, selected));
> +
> +    if ( cpus_weight(fn_mask) && !fn )
> +        return -1;
> +
> +    /* current cpu conducts the rendezvous process */
> +    cpu_clear(cur, selected);
> +    self = cpu_test_and_clear(cur, fn_mask);
> +    nr_cpus = cpus_weight(selected);
> +    nr_fn_cpus = cpus_weight(fn_mask);
> +
> +    if ( nr_cpus == 0 )
> +    {
> +        if ( self )
> +            (*fn)(data);
> +        return 0;
> +    }
> +
> +    rdz.fn = fn;
> +    rdz.data = data;
> +    rdz.selected = selected;
> +    rdz.fn_mask = fn_mask;
> +    atomic_set(&rdz.done, 0);
> +    rdz.action = RENDEZVOUS_START;
> +
> +    /* Note: We shouldn't spin on lock when it's held by others since
> others
> +     * is expecting this cpus to enter softirq context. Or else
> deadlock
> +     * is caused.
> +     */
> +    if ( !spin_trylock(&rendezvous_lock) )
> +        return -1;
> +
> +    rdz_data = &rdz;
> +    smp_wmb();
> +
> +    for_each_cpu_mask(cpu, selected)
> +        cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ);
> +
> +    /* Wait all concerned cpu to enter rendezvous point */
> +    while ( atomic_read(&rdz_data->done) != nr_cpus )
> +        cpu_relax();
> +
> +    if ( disable_irq )
> +        rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus);
> +
> +    if ( self )
> +        (*fn)(data);
> +
> +    /* Wait cpus to finish work if applicable */
> +    if ( nr_fn_cpus != 0 )
> +        rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus);
> +
> +    rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus);
> +    spin_unlock(&rendezvous_lock);
> +    return 0;
> +}
> +
> +static void rendezvous_softirq(void)
> +{
> +    int irq_disabled = 0;
> +    int invoked = 0;
> +    int required;
> +
> +    ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected));
> +
> +    required = cpu_isset(smp_processor_id(), rdz_data->fn_mask);
> +    smp_mb();
> +    atomic_inc(&rdz_data->done);
> +
> +    while ( rdz_data->action != RENDEZVOUS_EXIT )
> +    {
> +        switch ( rdz_data->action )
> +        {
> +        case RENDEZVOUS_DISABLE_IRQ:
> +            if ( !irq_disabled )
> +            {
> +                local_irq_disable();
> +                irq_disabled = 1;
> +                smp_mb();
> +                atomic_inc(&rdz_data->done);
> +            }
> +            break;
> +        case RENDEZVOUS_INVOKE:
> +            if ( required && !invoked )
> +            {
> +                rdz_data->fn(rdz_data->data);
> +                invoked = 1;
> +                smp_mb();
> +                atomic_inc(&rdz_data->done);
> +            }
> +            break;
> +        default:
> +            break;
> +        }
> +
> +        cpu_relax();
> +    }
> +
> +    smp_mb();
> +    atomic_inc(&rdz_data->done);
> +}
> +
> +static int __init cpu_rendezvous_init(void)
> +{
> +    open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq);
> +    return 0;
> +}
> +__initcall(cpu_rendezvous_init);
> diff -r 667d1a73d2ca xen/arch/x86/smpboot.c
> --- a/xen/arch/x86/smpboot.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smpboot.c Sat Feb 02 14:11:30 2008 +0800
> @@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu)
>  }
>  
>  extern void fixup_irqs(cpumask_t map);
> -int __cpu_disable(void)
> +void __cpu_disable(void *data)
>  {
> cpumask_t map = cpu_online_map;
> int cpu = smp_processor_id();
> @@ -1206,7 +1206,15 @@ int __cpu_disable(void)
> * Especially so if we're not using an IOAPIC -zwane
> */
> if (cpu == 0)
> -  return -EBUSY;
> +  return;
> +
> + /* By far only S3 is using this path, and thus only idle vcpus
> +  * are running on all APs when it's called. To support full
> +  * cpu hotplug, other notification mechanisms should be
> introduced
> +  * like to migrate vcpus out of this one before rendezvous point
> +  */
> + if (!is_idle_vcpu(current))
> +  return;
>  
> local_irq_disable();
> clear_local_APIC();
> @@ -1223,7 +1231,6 @@ int __cpu_disable(void)
> fixup_irqs(map);
> /* It's now safe to remove this processor from the online map */
> cpu_clear(cpu, cpu_online_map);
> - return 0;
>  }
>  
>  void __cpu_die(unsigned int cpu)
> @@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu)
>  int cpu_down(unsigned int cpu)
>  {
> int err = 0;
> - cpumask_t mask;
> + cpumask_t rmask = cpu_online_map;
> + cpumask_t mask = CPU_MASK_NONE;
>  
> spin_lock(&cpu_add_remove_lock);
> if (num_online_cpus() == 1) {
> @@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu)
> }
>  
> printk("Prepare to bring CPU%d down...\n", cpu);
> - /* Send notification to remote idle vcpu */
> - cpus_clear(mask);
> - cpu_set(cpu, mask);
> - per_cpu(cpu_state, cpu) = CPU_DYING;
> - smp_send_event_check_mask(mask);
> + cpu_clear(smp_processor_id(), rmask); /* all except self */
> + cpu_set(cpu, mask); /* cpu to be die */
> + on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1);
>  
> __cpu_die(cpu);
>  
> @@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void)
> cpus_clear(frozen_cpus);
>  }
>  #else /* ... !CONFIG_HOTPLUG_CPU */
> -int __cpu_disable(void)
> -{
> - return -ENOSYS;
> +void __cpu_disable(void *data)
> +{
>  }
>  
>  void __cpu_die(unsigned int cpu)
> diff -r 667d1a73d2ca xen/include/asm-x86/smp.h
> --- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800
> @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
>  
>  /* State of each CPU. */
>  #define CPU_ONLINE 0x0002 /* CPU is up */
> -#define CPU_DYING 0x0003 /* CPU is requested to die */
>  #define CPU_DEAD 0x0004 /* CPU is dead */
>  DECLARE_PER_CPU(int, cpu_state);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) ==
> CPU_DYING)
> +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
>  extern int cpu_down(unsigned int cpu);
>  extern int cpu_up(unsigned int cpu);
>  extern void cpu_exit_clear(void);
> @@ -102,8 +101,10 @@ static __inline int logical_smp_processo
>  
>  #endif
>  
> -extern int __cpu_disable(void);
> +extern void __cpu_disable(void *data);
>  extern void __cpu_die(unsigned int cpu);
> +extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask,
> +    void (*fn) (void *), void *data, int disable_irq);
>  #endif /* !__ASSEMBLY__ */
>  
>  #else /* CONFIG_SMP */
> diff -r 667d1a73d2ca xen/include/xen/softirq.h
> --- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800
> @@ -10,8 +10,9 @@
>  #define PAGE_SCRUB_SOFTIRQ                5
>  #define TRACE_SOFTIRQ                     6
>  #define RCU_SOFTIRQ                       7
> +#define RENDEZVOUS_SOFTIRQ                8
>  
> -#define NR_COMMON_SOFTIRQS                8
> +#define NR_COMMON_SOFTIRQS                9
>  
>  #include <asm/softirq.h>
>  
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel