diff -r f5e72cbfbb17 xen/arch/x86/irq.c --- a/xen/arch/x86/irq.c Wed Sep 10 11:26:16 2008 +0100 +++ b/xen/arch/x86/irq.c Thu Sep 11 15:48:05 2008 +0800 @@ -739,6 +739,7 @@ { unsigned int irq; static int warned; + irq_guest_action_t *action; for ( irq = 0; irq < NR_IRQS; irq++ ) { @@ -756,6 +757,16 @@ irq_desc[irq].handler->set_affinity(irq, mask); else if ( irq_desc[irq].action && !(warned++) ) printk("Cannot set affinity for irq %i\n", irq); + + if ( !(irq_desc[irq].status & IRQ_GUEST) ) + continue; + action = (irq_guest_action_t *)irq_desc[irq].action; + if ( cpu_isset(smp_processor_id(), action->cpu_eoi_map) ) + { + ack_APIC_irq(); + cpu_clear(smp_processor_id(), action->cpu_eoi_map); + printk("Flushing pending eoi for irq %i\n", irq); + } } local_irq_enable(); diff -r f5e72cbfbb17 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Wed Sep 10 11:26:16 2008 +0100 +++ b/xen/arch/x86/smpboot.c Thu Sep 11 15:48:05 2008 +0800 @@ -1225,15 +1225,6 @@ if (cpu == 0) return -EBUSY; - /* - * Only S3 is using this path, and thus idle vcpus are running on all - * APs when we are called. To support full cpu hotplug, other - * notification mechanisms should be introduced (e.g., migrate vcpus - * off this physical cpu before rendezvous point). - */ - if (!is_idle_vcpu(current)) - return -EINVAL; - local_irq_disable(); clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ @@ -1249,6 +1240,8 @@ fixup_irqs(map); /* It's now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); + + migrate_all_vcpus_on_cpu(smp_processor_id()); return 0; } @@ -1275,28 +1268,6 @@ return __cpu_disable(); } -/* - * XXX: One important thing missed here is to migrate vcpus - * from dead cpu to other online ones and then put whole - * system into a stop state. It assures a safe environment - * for a cpu hotplug/remove at normal running state. - * - * However for xen PM case, at this point: - * -> All other domains should be notified with PM event, - * and then in following states: - * * Suspend state, or - * * Paused state, which is a force step to all - * domains if they do nothing to suspend - * -> All vcpus of dom0 (except vcpu0) have already beem - * hot removed - * with the net effect that all other cpus only have idle vcpu - * running. In this special case, we can avoid vcpu migration - * then and system can be considered in a stop state. - * - * So current cpu hotplug is a special version for PM specific - * usage, and need more effort later for full cpu hotplug. - * (ktian1) - */ int cpu_down(unsigned int cpu) { int err = 0; @@ -1306,6 +1277,13 @@ err = -EBUSY; goto out; } + + /* Can not offline BSP */ + if ( cpu == 0 ) + { + err = -EINVAL; + goto out; + } if (!cpu_online(cpu)) { err = -EINVAL; diff -r f5e72cbfbb17 xen/common/sched_credit.c --- a/xen/common/sched_credit.c Wed Sep 10 11:26:16 2008 +0100 +++ b/xen/common/sched_credit.c Thu Sep 11 15:48:05 2008 +0800 @@ -407,11 +407,14 @@ static inline int __csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu) { + cpumask_t mask; + /* * Don't pick up work that's in the peer's scheduling tail. Also only pick * up work that's allowed to run on our CPU. */ - return !vc->is_running && cpu_isset(dest_cpu, vc->cpu_affinity); + cpus_and(mask, vc->cpu_affinity, cpu_online_map); + return !vc->is_running && cpu_isset(dest_cpu, mask); } static int diff -r f5e72cbfbb17 xen/common/schedule.c --- a/xen/common/schedule.c Wed Sep 10 11:26:16 2008 +0100 +++ b/xen/common/schedule.c Thu Sep 11 15:48:05 2008 +0800 @@ -286,6 +286,67 @@ vcpu_sleep_nosync(v); vcpu_migrate(v); } +} + +static void vcpu_force_migrate(struct vcpu *v, int cpu_from) +{ + unsigned long flags; + + vcpu_schedule_lock_irqsave(v, flags); + + if ( v->processor != cpu_from ) + { + vcpu_schedule_unlock_irqrestore(v, flags); + return; + } + + set_bit(_VPF_migrating, &v->pause_flags); + vcpu_schedule_unlock_irqrestore(v, flags); + + if ( test_bit(_VPF_migrating, &v->pause_flags) ) + { + vcpu_sleep_nosync(v); + vcpu_migrate(v); + } +} + +/* This function is used by cpu_hotplug code. All vcpus but idle vcpu is + * migrated to other cpus. The caller should already prevented migration + * to this dying cpu. + */ +void migrate_all_vcpus_on_cpu(int cpu) +{ + struct domain *d = NULL; + struct vcpu *v = NULL; + unsigned long flags; + + for_each_domain(d) + for_each_vcpu(d, v) + { + if ( is_idle_vcpu(v) ) + continue; + + /* If vcpu is pinned on dying cpu, give warning here and vcpu can + * be continued on any cpus. + */ + if ( cpus_weight(v->cpu_affinity) == 1 + && cpu_isset(cpu, v->cpu_affinity) ) + { + printk("Breaking vcpu affinity for domain %d vcpu %d\n", + v->domain->domain_id, v->vcpu_id); + vcpu_schedule_lock_irqsave(v, flags); + cpus_setall(v->cpu_affinity); + vcpu_schedule_unlock_irqrestore(v, flags); + } + /* Single shot timer might be left active on this cpu, migrate it + * to bsp. A new cpu will be automatically shosen when * the timer + * is set again. + */ + if ( v->singleshot_timer.cpu == cpu ) + migrate_timer(&v->singleshot_timer, 0); + + vcpu_force_migrate(v, cpu); + } } static int __vcpu_set_affinity( diff -r f5e72cbfbb17 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Wed Sep 10 11:26:16 2008 +0100 +++ b/xen/include/xen/sched.h Thu Sep 11 15:48:05 2008 +0800 @@ -524,6 +524,7 @@ void cpu_init(void); void vcpu_force_reschedule(struct vcpu *v); +void migrate_all_vcpus_on_cpu(int cpu); int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity); int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity); void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);