Utilize cpu hotplug infrastructure to pull down all other
cpus except cpu0, before starting suspend sequence. One
trick point is, cpu0 is a bit special and we'd better do
suspend on it. However vcpu0/dom0 is the one to trigger
power event which may not bind to cpu0. So a new softirq
is introduced to switch flow to idle vcpu on cpu0 if such
case happens.
Be careful that lazy context on all processors have to be
sync-ed before suspend flow, or else they may be lost or
result incorrect behavior due to recover guest setting in
xen context (like fs/gs).
Also recover S5 support.
Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>
diff -r 5ee7cfab745e xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Mon May 14 22:51:52 2007 -0400
+++ b/xen/arch/x86/acpi/power.c Tue May 15 00:27:37 2007 -0400
@@ -24,6 +24,7 @@
#include <xen/sched.h>
#include <xen/domain.h>
#include <xen/console.h>
+#include <xen/softirq.h>
u8 sleep_states[ACPI_S_STATE_COUNT];
DEFINE_SPINLOCK(pm_lock);
@@ -134,27 +135,57 @@ static void device_power_up(void)
console_resume();
}
-/* Main interface to do xen specific suspend/resume */
-int enter_state(u32 state)
+static void freeze_domains(void)
{
struct domain *d;
- unsigned long flags;
- int error;
-
- if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
- return -EINVAL;
-
- if (!spin_trylock(&pm_lock))
- return -EBUSY;
-
+
for_each_domain(d)
if (d->domain_id != 0) {
domain_pause(d);
arch_domain_suspend(d);
}
-
+}
+
+static void thaw_domains(void)
+{
+ struct domain *d;
+
+ for_each_domain(d)
+ if (d->domain_id != 0)
+ domain_unpause(d);
+}
+
+/* Main interface to do xen specific suspend/resume */
+int enter_state(u32 state)
+{
+ unsigned long flags;
+ cpumask_t mask = cpu_online_map;
+ int error;
+
+ if (smp_processor_id() != 0)
+ return -EPERM;
+
+ if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
+ return -EINVAL;
+
+ if (!spin_trylock(&pm_lock))
+ return -EBUSY;
+
pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n",
acpi_states[state]);
+
+ /* Sync all lazy states on other cpus, since APs will be
+ * re-intialized like fresh boot and stale context loses
+ */
+ cpu_clear(0, mask);
+ flush_tlb_mask(mask);
+ pmprintk(XENLOG_INFO, "Finish lazy state sync\n");
+
+ disable_nonboot_cpus();
+ if (num_online_cpus() != 1) {
+ error = -EBUSY;
+ goto Enable_cpu;
+ }
local_irq_save(flags);
@@ -188,12 +219,10 @@ int enter_state(u32 state)
device_power_up();
pmprintk(XENLOG_INFO, "PM: Finishing wakeup.\n");
- for_each_domain(d)
- if (d->domain_id!=0)
- domain_unpause(d);
-
Done:
local_irq_restore(flags);
+ Enable_cpu:
+ enable_nonboot_cpus();
spin_unlock(&pm_lock);
return error;
@@ -221,6 +250,14 @@ int set_acpi_sleep_info(struct xenpf_set
acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_evt,
info->xen_waking_vec);
return 0;
+}
+
+static void acpi_power_off(void)
+{
+ printk("%s called\n", __FUNCTION__);
+ local_irq_disable();
+ /* Some SMP machines only can poweroff in boot CPU */
+ acpi_enter_sleep_state(ACPI_STATE_S5);
}
/*
@@ -262,7 +299,29 @@ int acpi_enter_sleep(struct xenpf_enter_
acpi_video_flags = sleep->video_flags;
saved_videomode = sleep->video_mode;
- return enter_state(acpi_sinfo.sleep_state);
+ /* acpi power off method */
+ if (acpi_sinfo.sleep_state == ACPI_STATE_S5) {
+ acpi_power_off();
+ /* Shouldn't return */
+ while(1);
+ }
+
+ freeze_domains();
+ if (current->processor == 0) {
+ int ret;
+
+ pmprintk(XENLOG_INFO, "vcpu0 on cpu0, sleep direclty\n");
+ ret = enter_state(acpi_sinfo.sleep_state);
+ thaw_domains();
+ return ret;
+ }
+
+ pmprintk(XENLOG_INFO, "vcpu0 on cpu%d, pause self and notify
cpu0\n",
+ current->processor);
+ cpu_raise_softirq(0, PM_SOFTIRQ);
+ vcpu_pause_self();
+ /* return value doens't matter here. */
+ return 0;
}
static int acpi_get_wake_status(void)
@@ -288,6 +347,59 @@ acpi_status asmlinkage acpi_enter_sleep_
/* Wait until we enter sleep state, and spin until we wake */
while (!acpi_get_wake_status());
return_ACPI_STATUS(AE_OK);
+}
+
+/*
+ * Power management related softirq, and cpu0 only.
+ *
+ * The reason for introducing this softirq is that cpu0 is a bit
+ * special as the last one to be pull down. However the sleep request
+ * is issued from vcpu0 of dom0 and this vcpu may not bind to cpu0.
+ *
+ * So if above case happens, the CPU receiving sleep request will
+ * raise a softirq to cpu0 and idle vcpu on cpu0 then execute this
+ * handler immediately.
+ *
+ * If vcpu0 is already running on cpu0, this softirq is not triggered
+ */
+static void pm_softirq(void)
+{
+ int cpu = smp_processor_id();
+ struct vcpu *v = dom0->vcpu[0];
+ struct cpu_user_regs *regs;
+
+ pmprintk(XENLOG_DEBUG, "In pm_softirq\n");
+ /* only cpu0 handles this irq for now */
+ if (cpu != 0)
+ return;
+
+ pmprintk(XENLOG_DEBUG, "handled by cpu0\n");
+ /* Wait vcpu0/dom0 to be paused */
+ while ( !atomic_read(&v->pause_count) )
+ cpu_relax();
+
+ /* Then wait for context of vcpu/dom0 to be sync-ed */
+ while ( test_bit(_VPF_need_sync, &v->pause_flags) )
+ cpu_relax();
+
+ pmprintk(XENLOG_INFO, "vcpu0/dom0 has been paused\n");
+
+ /* Sync lazy state on ths cpu, to avoid any stale context from
+ * previous domain crashing system after resume. For example,
+ * ds/es/fs/gs won't be restored after resume for x86-64. Then
+ * this step ensures them flushed into guest context.
+ */
+ __sync_lazy_execstate();
+ pmprintk(XENLOG_INFO, "Flush lazy state\n");
+
+ /* now safe to suspend whole system from cpu 0 */
+ regs = &v->arch.guest_context.user_regs;
+ regs->eax = enter_state(acpi_sinfo.sleep_state);
+
+ /* Now unpause vcpu0/dom0 */
+ vcpu_unpause(v);
+
+ thaw_domains();
}
static int __init acpi_sleep_init(void)
@@ -307,6 +419,8 @@ static int __init acpi_sleep_init(void)
printk(")\n");
acpi_reserve_bootmem();
+
+ open_softirq(PM_SOFTIRQ, pm_softirq);
return 0;
}
__initcall(acpi_sleep_init);
diff -r 5ee7cfab745e xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Mon May 14 22:51:52 2007 -0400
+++ b/xen/include/asm-x86/smp.h Mon May 14 22:51:53 2007 -0400
@@ -70,6 +70,8 @@ extern void enable_nonboot_cpus(void);
extern void enable_nonboot_cpus(void);
#else
static inline int cpu_is_offline(int cpu) {return 0;}
+static inline void disable_nonboot_cpus(void) {}
+static inline void enable_nonboot_cpus(void) {}
#endif
/*
diff -r 5ee7cfab745e xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Mon May 14 22:51:52 2007 -0400
+++ b/xen/include/xen/softirq.h Mon May 14 22:51:53 2007 -0400
@@ -10,8 +10,9 @@
#define PAGE_SCRUB_SOFTIRQ 5
#define TRACE_SOFTIRQ 6
#define RCU_SOFTIRQ 7
+#define PM_SOFTIRQ 8
-#define NR_COMMON_SOFTIRQS 8
+#define NR_COMMON_SOFTIRQS 9
#include <asm/softirq.h>
xen_smp_pm_support.patch
Description: xen_smp_pm_support.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|