This is a good thing to have, and my main comments are stylistic. Firstly, I
think the name and interface change compared with Linux's stop_machine
mechanism is gratuitous. I would prefer us to have the same interface (and
also probably use the same or similar names throughout the implementation,
where appropriate). I don't think the more flexible interface expressed here
is useful -- certainly finding an application for rendezvousing a set of
CPUs and running a function on a subset of those, with optional irq disable,
is a bit of a brain bender. :-) So, stick to stop_machine interface,
semantics, and naming....
Also, I'd prefer this to be implemented in common/stop_machine.c if at all
possible. It's not really x86 specific. Certainly I do not want it in smp.c,
as that file is full enough already of random cruft with no other home.
Oh, also I think you are missing local_irq_disable() on the CPU that calls
on_rendezvous_cpus(). Like the Linxu implementation you should probably do
it at the same time you signal other CPUs to do so.
Apart from that it's a good idea, and I'll look more closely at how you tie
it in to CPU hotplug when you resubmit it.
Thanks,
Keir
On 2/2/08 08:11, "Tian, Kevin" <kevin.tian@xxxxxxxxx> wrote:
> Rendezvous selected cpus in softirq
>
> This is similar to stop_machine_run stub from Linux, to pull
> selected cpus in rendezvous point and the do some batch work
> under a safe environment. Current one usage is from S3 path,
> where individual cpu is pulled down with related online
> footprints being cleared. It's dangerous to have other cpus
> checking clobbered data structure in the middle, such as
> cpu_online_map, cpu_sibling_map, etc.
>
> Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>
>
> diff -r 667d1a73d2ca xen/arch/x86/domain.c
> --- a/xen/arch/x86/domain.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/domain.c Sat Feb 02 14:11:45 2008 +0800
> @@ -82,7 +82,6 @@ static void default_idle(void)
>
> static void play_dead(void)
> {
> - __cpu_disable();
> /* This must be done before dead CPU ack */
> cpu_exit_clear();
> hvm_cpu_down();
> diff -r 667d1a73d2ca xen/arch/x86/smp.c
> --- a/xen/arch/x86/smp.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smp.c Sat Feb 02 14:11:30 2008 +0800
> @@ -22,6 +22,7 @@
> #include <asm/ipi.h>
> #include <asm/hvm/support.h>
> #include <mach_apic.h>
> +#include <xen/softirq.h>
>
> /*
> * Some notes on x86 processor bugs affecting SMP operation:
> @@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup
>
> irq_exit();
> }
> +
> +enum rendezvous_action {
> + RENDEZVOUS_START,
> + RENDEZVOUS_DISABLE_IRQ,
> + RENDEZVOUS_INVOKE,
> + RENDEZVOUS_EXIT,
> +};
> +
> +struct rendezvous_data {
> + void (*fn) (void *);
> + void *data;
> + cpumask_t selected;
> + cpumask_t fn_mask;
> + atomic_t done;
> + enum rendezvous_action action;
> +};
> +
> +static struct rendezvous_data *rdz_data;
> +static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED;
> +static void rendezvous_set_action(enum rendezvous_action action, int
> cpus)
> +{
> + atomic_set(&rdz_data->done, 0);
> + smp_wmb();
> + rdz_data->action = action;
> + while ( atomic_read(&rdz_data->done) != cpus )
> + cpu_relax();
> +}
> +
> +/* Rendezouvs selected cpus in softirq context and call (*fn) set in
> fn_mask */
> +int on_rendezvous_cpus (
> + cpumask_t selected,
> + cpumask_t fn_mask,
> + void (*fn) (void *),
> + void *data,
> + int disable_irq)
> +{
> + struct rendezvous_data rdz;
> + unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur =
> smp_processor_id();
> +
> + ASSERT(local_irq_is_enabled());
> + ASSERT(cpus_subset(fn_mask, selected));
> +
> + if ( cpus_weight(fn_mask) && !fn )
> + return -1;
> +
> + /* current cpu conducts the rendezvous process */
> + cpu_clear(cur, selected);
> + self = cpu_test_and_clear(cur, fn_mask);
> + nr_cpus = cpus_weight(selected);
> + nr_fn_cpus = cpus_weight(fn_mask);
> +
> + if ( nr_cpus == 0 )
> + {
> + if ( self )
> + (*fn)(data);
> + return 0;
> + }
> +
> + rdz.fn = fn;
> + rdz.data = data;
> + rdz.selected = selected;
> + rdz.fn_mask = fn_mask;
> + atomic_set(&rdz.done, 0);
> + rdz.action = RENDEZVOUS_START;
> +
> + /* Note: We shouldn't spin on lock when it's held by others since
> others
> + * is expecting this cpus to enter softirq context. Or else
> deadlock
> + * is caused.
> + */
> + if ( !spin_trylock(&rendezvous_lock) )
> + return -1;
> +
> + rdz_data = &rdz;
> + smp_wmb();
> +
> + for_each_cpu_mask(cpu, selected)
> + cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ);
> +
> + /* Wait all concerned cpu to enter rendezvous point */
> + while ( atomic_read(&rdz_data->done) != nr_cpus )
> + cpu_relax();
> +
> + if ( disable_irq )
> + rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus);
> +
> + if ( self )
> + (*fn)(data);
> +
> + /* Wait cpus to finish work if applicable */
> + if ( nr_fn_cpus != 0 )
> + rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus);
> +
> + rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus);
> + spin_unlock(&rendezvous_lock);
> + return 0;
> +}
> +
> +static void rendezvous_softirq(void)
> +{
> + int irq_disabled = 0;
> + int invoked = 0;
> + int required;
> +
> + ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected));
> +
> + required = cpu_isset(smp_processor_id(), rdz_data->fn_mask);
> + smp_mb();
> + atomic_inc(&rdz_data->done);
> +
> + while ( rdz_data->action != RENDEZVOUS_EXIT )
> + {
> + switch ( rdz_data->action )
> + {
> + case RENDEZVOUS_DISABLE_IRQ:
> + if ( !irq_disabled )
> + {
> + local_irq_disable();
> + irq_disabled = 1;
> + smp_mb();
> + atomic_inc(&rdz_data->done);
> + }
> + break;
> + case RENDEZVOUS_INVOKE:
> + if ( required && !invoked )
> + {
> + rdz_data->fn(rdz_data->data);
> + invoked = 1;
> + smp_mb();
> + atomic_inc(&rdz_data->done);
> + }
> + break;
> + default:
> + break;
> + }
> +
> + cpu_relax();
> + }
> +
> + smp_mb();
> + atomic_inc(&rdz_data->done);
> +}
> +
> +static int __init cpu_rendezvous_init(void)
> +{
> + open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq);
> + return 0;
> +}
> +__initcall(cpu_rendezvous_init);
> diff -r 667d1a73d2ca xen/arch/x86/smpboot.c
> --- a/xen/arch/x86/smpboot.c Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/arch/x86/smpboot.c Sat Feb 02 14:11:30 2008 +0800
> @@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu)
> }
>
> extern void fixup_irqs(cpumask_t map);
> -int __cpu_disable(void)
> +void __cpu_disable(void *data)
> {
> cpumask_t map = cpu_online_map;
> int cpu = smp_processor_id();
> @@ -1206,7 +1206,15 @@ int __cpu_disable(void)
> * Especially so if we're not using an IOAPIC -zwane
> */
> if (cpu == 0)
> - return -EBUSY;
> + return;
> +
> + /* By far only S3 is using this path, and thus only idle vcpus
> + * are running on all APs when it's called. To support full
> + * cpu hotplug, other notification mechanisms should be
> introduced
> + * like to migrate vcpus out of this one before rendezvous point
> + */
> + if (!is_idle_vcpu(current))
> + return;
>
> local_irq_disable();
> clear_local_APIC();
> @@ -1223,7 +1231,6 @@ int __cpu_disable(void)
> fixup_irqs(map);
> /* It's now safe to remove this processor from the online map */
> cpu_clear(cpu, cpu_online_map);
> - return 0;
> }
>
> void __cpu_die(unsigned int cpu)
> @@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu)
> int cpu_down(unsigned int cpu)
> {
> int err = 0;
> - cpumask_t mask;
> + cpumask_t rmask = cpu_online_map;
> + cpumask_t mask = CPU_MASK_NONE;
>
> spin_lock(&cpu_add_remove_lock);
> if (num_online_cpus() == 1) {
> @@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu)
> }
>
> printk("Prepare to bring CPU%d down...\n", cpu);
> - /* Send notification to remote idle vcpu */
> - cpus_clear(mask);
> - cpu_set(cpu, mask);
> - per_cpu(cpu_state, cpu) = CPU_DYING;
> - smp_send_event_check_mask(mask);
> + cpu_clear(smp_processor_id(), rmask); /* all except self */
> + cpu_set(cpu, mask); /* cpu to be die */
> + on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1);
>
> __cpu_die(cpu);
>
> @@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void)
> cpus_clear(frozen_cpus);
> }
> #else /* ... !CONFIG_HOTPLUG_CPU */
> -int __cpu_disable(void)
> -{
> - return -ENOSYS;
> +void __cpu_disable(void *data)
> +{
> }
>
> void __cpu_die(unsigned int cpu)
> diff -r 667d1a73d2ca xen/include/asm-x86/smp.h
> --- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800
> @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
>
> /* State of each CPU. */
> #define CPU_ONLINE 0x0002 /* CPU is up */
> -#define CPU_DYING 0x0003 /* CPU is requested to die */
> #define CPU_DEAD 0x0004 /* CPU is dead */
> DECLARE_PER_CPU(int, cpu_state);
>
> #ifdef CONFIG_HOTPLUG_CPU
> -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) ==
> CPU_DYING)
> +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
> extern int cpu_down(unsigned int cpu);
> extern int cpu_up(unsigned int cpu);
> extern void cpu_exit_clear(void);
> @@ -102,8 +101,10 @@ static __inline int logical_smp_processo
>
> #endif
>
> -extern int __cpu_disable(void);
> +extern void __cpu_disable(void *data);
> extern void __cpu_die(unsigned int cpu);
> +extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask,
> + void (*fn) (void *), void *data, int disable_irq);
> #endif /* !__ASSEMBLY__ */
>
> #else /* CONFIG_SMP */
> diff -r 667d1a73d2ca xen/include/xen/softirq.h
> --- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800
> +++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800
> @@ -10,8 +10,9 @@
> #define PAGE_SCRUB_SOFTIRQ 5
> #define TRACE_SOFTIRQ 6
> #define RCU_SOFTIRQ 7
> +#define RENDEZVOUS_SOFTIRQ 8
>
> -#define NR_COMMON_SOFTIRQS 8
> +#define NR_COMMON_SOFTIRQS 9
>
> #include <asm/softirq.h>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|