WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] Rendezvous selected cpus

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] Rendezvous selected cpus
From: "Tian, Kevin" <kevin.tian@xxxxxxxxx>
Date: Sat, 2 Feb 2008 16:11:17 +0800
Delivery-date: Sat, 02 Feb 2008 00:13:16 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Achlcy9d/rIcXbNERJmDq57BBZNC3Q==
Thread-topic: [PATCH] Rendezvous selected cpus
Rendezvous selected cpus in softirq

This is similar to stop_machine_run stub from Linux, to pull
selected cpus in rendezvous point and the do some batch work
under a safe environment. Current one usage is from S3 path,
where individual cpu is pulled down with related online
footprints being cleared. It's dangerous to have other cpus
checking clobbered data structure in the middle, such as
cpu_online_map, cpu_sibling_map, etc.

Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>

diff -r 667d1a73d2ca xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/arch/x86/domain.c     Sat Feb 02 14:11:45 2008 +0800
@@ -82,7 +82,6 @@ static void default_idle(void)
 
 static void play_dead(void)
 {
-    __cpu_disable();
     /* This must be done before dead CPU ack */
     cpu_exit_clear();
     hvm_cpu_down();
diff -r 667d1a73d2ca xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/arch/x86/smp.c        Sat Feb 02 14:11:30 2008 +0800
@@ -22,6 +22,7 @@
 #include <asm/ipi.h>
 #include <asm/hvm/support.h>
 #include <mach_apic.h>
+#include <xen/softirq.h>
 
 /*
  *     Some notes on x86 processor bugs affecting SMP operation:
@@ -374,3 +375,151 @@ fastcall void smp_call_function_interrup
 
     irq_exit();
 }
+
+enum rendezvous_action {
+    RENDEZVOUS_START,
+    RENDEZVOUS_DISABLE_IRQ,
+    RENDEZVOUS_INVOKE,
+    RENDEZVOUS_EXIT,
+};
+
+struct rendezvous_data {
+    void (*fn) (void *);
+    void *data;
+    cpumask_t selected;
+    cpumask_t fn_mask;
+    atomic_t  done;
+    enum rendezvous_action action;
+};
+
+static struct rendezvous_data *rdz_data;
+static spinlock_t rendezvous_lock = SPIN_LOCK_UNLOCKED;
+static void rendezvous_set_action(enum rendezvous_action action, int
cpus)
+{
+    atomic_set(&rdz_data->done, 0);
+    smp_wmb();
+    rdz_data->action = action;
+    while ( atomic_read(&rdz_data->done) != cpus )
+        cpu_relax();
+}
+
+/* Rendezouvs selected cpus in softirq context and call (*fn) set in
fn_mask */ 
+int on_rendezvous_cpus (
+    cpumask_t selected,
+    cpumask_t fn_mask,
+    void (*fn) (void *),
+    void *data,
+    int disable_irq)
+{
+    struct rendezvous_data rdz;
+    unsigned int nr_cpus, nr_fn_cpus, self, cpu, cur =
smp_processor_id();
+
+    ASSERT(local_irq_is_enabled());
+    ASSERT(cpus_subset(fn_mask, selected));
+
+    if ( cpus_weight(fn_mask) && !fn )
+        return -1;
+
+    /* current cpu conducts the rendezvous process */
+    cpu_clear(cur, selected);
+    self = cpu_test_and_clear(cur, fn_mask);
+    nr_cpus = cpus_weight(selected);
+    nr_fn_cpus = cpus_weight(fn_mask);
+
+    if ( nr_cpus == 0 )
+    {
+        if ( self )
+            (*fn)(data);
+        return 0;
+    }
+
+    rdz.fn = fn;
+    rdz.data = data;
+    rdz.selected = selected;
+    rdz.fn_mask = fn_mask;
+    atomic_set(&rdz.done, 0);
+    rdz.action = RENDEZVOUS_START;
+
+    /* Note: We shouldn't spin on lock when it's held by others since
others
+     * is expecting this cpus to enter softirq context. Or else
deadlock
+     * is caused.
+     */
+    if ( !spin_trylock(&rendezvous_lock) )
+        return -1;
+
+    rdz_data = &rdz;
+    smp_wmb();
+
+    for_each_cpu_mask(cpu, selected)
+        cpu_raise_softirq(cpu, RENDEZVOUS_SOFTIRQ);
+
+    /* Wait all concerned cpu to enter rendezvous point */
+    while ( atomic_read(&rdz_data->done) != nr_cpus )
+        cpu_relax();
+
+    if ( disable_irq )
+        rendezvous_set_action(RENDEZVOUS_DISABLE_IRQ, nr_cpus);
+
+    if ( self )
+        (*fn)(data);
+
+    /* Wait cpus to finish work if applicable */
+    if ( nr_fn_cpus != 0 )
+        rendezvous_set_action(RENDEZVOUS_INVOKE, nr_fn_cpus);
+
+    rendezvous_set_action(RENDEZVOUS_EXIT, nr_cpus);
+    spin_unlock(&rendezvous_lock);
+    return 0;
+}
+
+static void rendezvous_softirq(void)
+{
+    int irq_disabled = 0;
+    int invoked = 0;
+    int required;
+
+    ASSERT(cpu_isset(smp_processor_id(), rdz_data->selected));
+
+    required = cpu_isset(smp_processor_id(), rdz_data->fn_mask);
+    smp_mb();
+    atomic_inc(&rdz_data->done);
+
+    while ( rdz_data->action != RENDEZVOUS_EXIT )
+    {
+        switch ( rdz_data->action )
+        {
+        case RENDEZVOUS_DISABLE_IRQ:
+            if ( !irq_disabled )
+            {
+                local_irq_disable();
+                irq_disabled = 1;
+                smp_mb();
+                atomic_inc(&rdz_data->done);
+            }
+            break;
+        case RENDEZVOUS_INVOKE:
+            if ( required && !invoked )
+            {
+                rdz_data->fn(rdz_data->data);
+                invoked = 1;
+                smp_mb();
+                atomic_inc(&rdz_data->done);
+            }
+            break;
+        default:
+            break;
+        }
+
+        cpu_relax();
+    }
+
+    smp_mb();
+    atomic_inc(&rdz_data->done);
+}
+
+static int __init cpu_rendezvous_init(void)
+{
+    open_softirq(RENDEZVOUS_SOFTIRQ, rendezvous_softirq);
+    return 0;
+}
+__initcall(cpu_rendezvous_init);
diff -r 667d1a73d2ca xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/arch/x86/smpboot.c    Sat Feb 02 14:11:30 2008 +0800
@@ -1192,7 +1192,7 @@ remove_siblinginfo(int cpu)
 }
 
 extern void fixup_irqs(cpumask_t map);
-int __cpu_disable(void)
+void __cpu_disable(void *data)
 {
        cpumask_t map = cpu_online_map;
        int cpu = smp_processor_id();
@@ -1206,7 +1206,15 @@ int __cpu_disable(void)
         * Especially so if we're not using an IOAPIC   -zwane
         */
        if (cpu == 0)
-               return -EBUSY;
+               return;
+
+       /* By far only S3 is using this path, and thus only idle vcpus
+        * are running on all APs when it's called. To support full
+        * cpu hotplug, other notification mechanisms should be
introduced
+        * like to migrate vcpus out of this one before rendezvous point
+        */
+       if (!is_idle_vcpu(current))
+               return;
 
        local_irq_disable();
        clear_local_APIC();
@@ -1223,7 +1231,6 @@ int __cpu_disable(void)
        fixup_irqs(map);
        /* It's now safe to remove this processor from the online map */
        cpu_clear(cpu, cpu_online_map);
-       return 0;
 }
 
 void __cpu_die(unsigned int cpu)
@@ -1269,7 +1276,8 @@ int cpu_down(unsigned int cpu)
 int cpu_down(unsigned int cpu)
 {
        int err = 0;
-       cpumask_t mask;
+       cpumask_t rmask = cpu_online_map;
+       cpumask_t mask = CPU_MASK_NONE;
 
        spin_lock(&cpu_add_remove_lock);
        if (num_online_cpus() == 1) {
@@ -1283,11 +1291,9 @@ int cpu_down(unsigned int cpu)
        }
 
        printk("Prepare to bring CPU%d down...\n", cpu);
-       /* Send notification to remote idle vcpu */
-       cpus_clear(mask);
-       cpu_set(cpu, mask);
-       per_cpu(cpu_state, cpu) = CPU_DYING;
-       smp_send_event_check_mask(mask);
+       cpu_clear(smp_processor_id(), rmask); /* all except self */
+       cpu_set(cpu, mask); /* cpu to be die */
+       on_rendezvous_cpus(rmask, mask, __cpu_disable, NULL, 1);
 
        __cpu_die(cpu);
 
@@ -1364,9 +1370,8 @@ void enable_nonboot_cpus(void)
        cpus_clear(frozen_cpus);
 }
 #else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
-{
-       return -ENOSYS;
+void __cpu_disable(void *data)
+{
 }
 
 void __cpu_die(unsigned int cpu)
diff -r 667d1a73d2ca xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/include/asm-x86/smp.h Sat Feb 02 14:11:30 2008 +0800
@@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
 
 /* State of each CPU. */
 #define CPU_ONLINE     0x0002  /* CPU is up */
-#define CPU_DYING      0x0003  /* CPU is requested to die */
 #define CPU_DEAD       0x0004  /* CPU is dead */
 DECLARE_PER_CPU(int, cpu_state);
 
 #ifdef CONFIG_HOTPLUG_CPU
-#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) ==
CPU_DYING)
+#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
 extern int cpu_down(unsigned int cpu);
 extern int cpu_up(unsigned int cpu);
 extern void cpu_exit_clear(void);
@@ -102,8 +101,10 @@ static __inline int logical_smp_processo
 
 #endif
 
-extern int __cpu_disable(void);
+extern void __cpu_disable(void *data);
 extern void __cpu_die(unsigned int cpu);
+extern int on_rendezvous_cpus(cpumask_t selected, cpumask_t fn_mask,
+    void (*fn) (void *), void *data, int disable_irq);
 #endif /* !__ASSEMBLY__ */
 
 #else /* CONFIG_SMP */
diff -r 667d1a73d2ca xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Sat Feb 02 11:06:02 2008 +0800
+++ b/xen/include/xen/softirq.h Sat Feb 02 14:12:06 2008 +0800
@@ -10,8 +10,9 @@
 #define PAGE_SCRUB_SOFTIRQ                5
 #define TRACE_SOFTIRQ                     6
 #define RCU_SOFTIRQ                       7
+#define RENDEZVOUS_SOFTIRQ                8
 
-#define NR_COMMON_SOFTIRQS                8
+#define NR_COMMON_SOFTIRQS                9
 
 #include <asm/softirq.h>
 

Attachment: rendezvous_cpu.patch
Description: rendezvous_cpu.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel