diff -r f5e72cbfbb17 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c	Wed Sep 10 11:26:16 2008 +0100
+++ b/xen/arch/x86/irq.c	Thu Sep 11 15:48:05 2008 +0800
@@ -739,6 +739,7 @@
 {
     unsigned int irq;
     static int warned;
+    irq_guest_action_t *action;
 
     for ( irq = 0; irq < NR_IRQS; irq++ )
     {
@@ -756,6 +757,16 @@
             irq_desc[irq].handler->set_affinity(irq, mask);
         else if ( irq_desc[irq].action && !(warned++) )
             printk("Cannot set affinity for irq %i\n", irq);
+
+        if ( !(irq_desc[irq].status & IRQ_GUEST) )
+            continue;
+        action = (irq_guest_action_t *)irq_desc[irq].action;
+        if ( cpu_isset(smp_processor_id(), action->cpu_eoi_map) )
+        {
+            ack_APIC_irq();
+            cpu_clear(smp_processor_id(), action->cpu_eoi_map);
+            printk("Flushing pending eoi for irq %i\n", irq);
+        }
     }
 
     local_irq_enable();
diff -r f5e72cbfbb17 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c	Wed Sep 10 11:26:16 2008 +0100
+++ b/xen/arch/x86/smpboot.c	Thu Sep 11 15:48:05 2008 +0800
@@ -1225,15 +1225,6 @@
 	if (cpu == 0)
 		return -EBUSY;
 
-	/*
-	 * Only S3 is using this path, and thus idle vcpus are running on all
-	 * APs when we are called. To support full cpu hotplug, other 
-	 * notification mechanisms should be introduced (e.g., migrate vcpus
-	 * off this physical cpu before rendezvous point).
-	 */
-	if (!is_idle_vcpu(current))
-		return -EINVAL;
-
 	local_irq_disable();
 	clear_local_APIC();
 	/* Allow any queued timer interrupts to get serviced */
@@ -1249,6 +1240,8 @@
 	fixup_irqs(map);
 	/* It's now safe to remove this processor from the online map */
 	cpu_clear(cpu, cpu_online_map);
+
+    migrate_all_vcpus_on_cpu(smp_processor_id());
 	return 0;
 }
 
@@ -1275,28 +1268,6 @@
     return __cpu_disable();
 }
 
-/* 
- * XXX: One important thing missed here is to migrate vcpus
- * from dead cpu to other online ones and then put whole
- * system into a stop state. It assures a safe environment
- * for a cpu hotplug/remove at normal running state.
- *
- * However for xen PM case, at this point:
- * 	-> All other domains should be notified with PM event,
- *	   and then in following states:
- *		* Suspend state, or
- *		* Paused state, which is a force step to all
- *		  domains if they do nothing to suspend
- *	-> All vcpus of dom0 (except vcpu0) have already beem
- *	   hot removed
- * with the net effect that all other cpus only have idle vcpu
- * running. In this special case, we can avoid vcpu migration
- * then and system can be considered in a stop state.
- *
- * So current cpu hotplug is a special version for PM specific
- * usage, and need more effort later for full cpu hotplug.
- * (ktian1)
- */
 int cpu_down(unsigned int cpu)
 {
 	int err = 0;
@@ -1306,6 +1277,13 @@
 		err = -EBUSY;
 		goto out;
 	}
+
+    /* Can not offline BSP */
+    if ( cpu == 0 )
+    {
+        err = -EINVAL;
+        goto out;
+    }
 
 	if (!cpu_online(cpu)) {
 		err = -EINVAL;
diff -r f5e72cbfbb17 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Wed Sep 10 11:26:16 2008 +0100
+++ b/xen/common/sched_credit.c	Thu Sep 11 15:48:05 2008 +0800
@@ -407,11 +407,14 @@
 static inline int
 __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
 {
+    cpumask_t mask;
+
     /*
      * Don't pick up work that's in the peer's scheduling tail. Also only pick
      * up work that's allowed to run on our CPU.
      */
-    return !vc->is_running && cpu_isset(dest_cpu, vc->cpu_affinity);
+    cpus_and(mask, vc->cpu_affinity, cpu_online_map);
+    return !vc->is_running && cpu_isset(dest_cpu, mask);
 }
 
 static int
diff -r f5e72cbfbb17 xen/common/schedule.c
--- a/xen/common/schedule.c	Wed Sep 10 11:26:16 2008 +0100
+++ b/xen/common/schedule.c	Thu Sep 11 15:48:05 2008 +0800
@@ -286,6 +286,67 @@
         vcpu_sleep_nosync(v);
         vcpu_migrate(v);
     }
+}
+
+static void vcpu_force_migrate(struct vcpu *v, int cpu_from)
+{
+    unsigned long flags;
+
+    vcpu_schedule_lock_irqsave(v, flags);
+
+    if ( v->processor != cpu_from )
+    {
+        vcpu_schedule_unlock_irqrestore(v, flags);
+        return;
+    }
+
+    set_bit(_VPF_migrating, &v->pause_flags);
+    vcpu_schedule_unlock_irqrestore(v, flags);
+
+    if ( test_bit(_VPF_migrating, &v->pause_flags) )
+    {
+        vcpu_sleep_nosync(v);
+        vcpu_migrate(v);
+    }
+}
+
+/* This function is used by cpu_hotplug code. All vcpus but idle vcpu is
+ * migrated to other cpus. The caller should already prevented migration
+ * to this dying cpu.
+ */
+void migrate_all_vcpus_on_cpu(int cpu)
+{
+    struct domain *d = NULL;
+    struct vcpu *v = NULL;
+    unsigned long flags;
+
+    for_each_domain(d)
+        for_each_vcpu(d, v)
+        {
+            if ( is_idle_vcpu(v) )
+                continue;
+
+            /* If vcpu is pinned on dying cpu, give warning here and vcpu can
+             * be continued on any cpus.
+             */
+            if ( cpus_weight(v->cpu_affinity) == 1
+                 && cpu_isset(cpu, v->cpu_affinity) )
+            {
+                printk("Breaking vcpu affinity for domain %d vcpu %d\n",
+                        v->domain->domain_id, v->vcpu_id);
+                vcpu_schedule_lock_irqsave(v, flags);
+                cpus_setall(v->cpu_affinity);
+                vcpu_schedule_unlock_irqrestore(v, flags);
+            }
+            /* Single shot timer might be left active on this cpu, migrate it
+             * to bsp. A new cpu will be automatically shosen when * the timer
+             * is set again.
+             */
+            if ( v->singleshot_timer.cpu == cpu )
+                migrate_timer(&v->singleshot_timer, 0);
+
+            vcpu_force_migrate(v, cpu);
+        }
 }
 
 static int __vcpu_set_affinity(
diff -r f5e72cbfbb17 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h	Wed Sep 10 11:26:16 2008 +0100
+++ b/xen/include/xen/sched.h	Thu Sep 11 15:48:05 2008 +0800
@@ -524,6 +524,7 @@
 void cpu_init(void);
 
 void vcpu_force_reschedule(struct vcpu *v);
+void migrate_all_vcpus_on_cpu(int cpu);
 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);