Add basic infrastructure for xen power management. Now
only S3 (suspend to ram) is supported.
Signed-off-by Ke Yu <ke.yu@xxxxxxxxx>
Signed-off-by Kevin Tian <kevin.tian@xxxxxxxxx>
diff -r 84c103f8881a xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/acpi/Makefile Mon May 14 16:34:31 2007 -0400
@@ -1,1 +1,2 @@ obj-y += boot.o
obj-y += boot.o
+obj-y += power.o
diff -r 84c103f8881a xen/arch/x86/acpi/power.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/power.c Mon May 14 20:49:36 2007 -0400
@@ -0,0 +1,209 @@
+/* drivers/acpi/sleep/power.c - PM core functionality for Xen
+ *
+ * Copyrights from Linux side:
+ * Copyright (c) 2000-2003 Patrick Mochel
+ * Copyright (C) 2001-2003 Pavel Machek <pavel@xxxxxxx>
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 David Shaohua Li <shaohua.li@xxxxxxxxx>
+ * Copyright (c) 2005 Alexey Starikovskiy
<alexey.y.starikovskiy@xxxxxxxxx>
+ *
+ * Slimmed with Xen specific support.
+ */
+
+#include <asm/io.h>
+#define CONFIG_ACPI_SLEEP
+#include <asm/acpi.h>
+#include <xen/acpi.h>
+#include <xen/errno.h>
+#include <xen/iocap.h>
+#include <xen/sched.h>
+#include <asm/acpi.h>
+#include <asm/irq.h>
+#include <asm/init.h>
+#include <xen/spinlock.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/console.h>
+
+u8 sleep_states[ACPI_S_STATE_COUNT];
+DEFINE_SPINLOCK(pm_lock);
+
+extern void do_suspend_lowlevel(void);
+
+static char *acpi_states[ACPI_S_STATE_COUNT] =
+{
+ [ACPI_STATE_S1] = "standby",
+ [ACPI_STATE_S3] = "mem",
+ [ACPI_STATE_S4] = "disk",
+};
+
+/* address in low memory of the wakeup routine. */
+unsigned long acpi_wakeup_address;
+unsigned long acpi_video_flags;
+extern char wakeup_start, wakeup_end;
+unsigned long saved_videomode;
+extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
+
+/**
+ * acpi_save_state_mem - save kernel state
+ *
+ * Create an identity mapped page table and copy the wakeup routine to
+ * low memory.
+ */
+int acpi_save_state_mem(void)
+{
+ if (!acpi_wakeup_address)
+ return 1;
+
+ init_low_mappings();
+ memcpy((void *)acpi_wakeup_address, &wakeup_start,
+ &wakeup_end - &wakeup_start);
+ acpi_copy_wakeup_routine(acpi_wakeup_address);
+ return 0;
+}
+
+/*
+ * acpi_restore_state - undo effects of acpi_save_state_mem
+ */
+void acpi_restore_state_mem(void)
+{
+#ifdef CONFIG_X86_64
+ zap_low_mappings();
+#else
+ zap_low_mappings(idle_pg_table_l2);
+#endif
+}
+
+/**
+ * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ *
+ * We allocate a page from the first 1MB of memory for the wakeup
+ * routine for when we come back from a sleep state. The
+ * runtime allocator allows specification of <16MB pages, but not
+ * <1MB pages.
+ */
+void __init acpi_reserve_bootmem(void)
+{
+ if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) {
+ pmprintk(XENLOG_ERR, "ACPI: Wakeup code way too big, S3
disabled.\n");
+ return;
+ }
+
+ /* 0~640K is not used by anyone, except 0x9000 is used by smp
+ * trampoline code, so choose 0x7000 for XEN acpi wake up code
+ */
+ acpi_wakeup_address = (unsigned long)__va(0x7000);
+}
+
+/* Add suspend failure recover later */
+static int device_power_down(void)
+{
+ console_suspend();
+
+ time_suspend();
+
+ i8259A_suspend();
+
+ ioapic_suspend();
+
+ lapic_suspend();
+
+ return 0;
+}
+
+static void device_power_up(void)
+{
+ lapic_resume();
+
+ ioapic_resume();
+
+ i8259A_resume();
+
+ time_resume();
+
+ console_resume();
+}
+
+int enter_state(u32 state)
+{
+ struct domain *d;
+ unsigned long flags;
+ int error;
+
+ if (state <= ACPI_STATE_S0 || state > ACPI_S_STATES_MAX)
+ return -EINVAL;
+
+ if (!spin_trylock(&pm_lock))
+ return -EBUSY;
+
+ for_each_domain(d)
+ if (d->domain_id != 0)
+ domain_pause(d);
+
+ pmprintk(XENLOG_INFO, "PM: Preparing system for %s sleep\n",
+ acpi_states[state]);
+
+ local_irq_save(flags);
+
+ if ((error = device_power_down())) {
+ printk(XENLOG_ERR "Some devices failed to power down\n");
+ goto Done;
+ }
+
+ ACPI_FLUSH_CPU_CACHE();
+
+ /* Do arch specific saving of state. */
+ if (state > ACPI_STATE_S1) {
+ error = acpi_save_state_mem();
+ if (error)
+ goto Powerup;
+ }
+
+ switch (state) {
+ case ACPI_STATE_S3:
+ do_suspend_lowlevel();
+ break;
+ default:
+ error = -EINVAL;
+ goto Powerup;
+ }
+
+ pmprintk(XENLOG_INFO, "Back to C!\n");
+ if (state > ACPI_STATE_S1)
+ acpi_restore_state_mem();
+
+ Powerup:
+ device_power_up();
+
+ pmprintk(XENLOG_INFO, "PM: Finishing wakeup.\n");
+ for_each_domain(d)
+ if (d->domain_id!=0)
+ domain_unpause(d);
+
+ Done:
+ local_irq_restore(flags);
+ spin_unlock(&pm_lock);
+ return error;
+
+}
+
+static int __init acpi_sleep_init(void)
+{
+ int i = 0;
+
+ pmprintk(XENLOG_INFO, "ACPI (supports");
+ for (i = 0; i < ACPI_S_STATE_COUNT; i++) {
+ if (i == ACPI_STATE_S3){
+ sleep_states[i] = 1;
+ printk(" S%d", i);
+ }
+ else{
+ sleep_states[i] = 0;
+ }
+ }
+ printk(")\n");
+
+ acpi_reserve_bootmem();
+ return 0;
+}
+__initcall(acpi_sleep_init);
diff -r 84c103f8881a xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/boot/x86_32.S Mon May 14 16:34:31 2007 -0400
@@ -146,6 +146,8 @@ start_paging:
rdmsr
bts $_EFER_NX,%eax
wrmsr
+ mov $1,%eax
+ mov %eax, nx_enabled-__PAGE_OFFSET
no_execute_disable:
pop %ebx
#endif
diff -r 84c103f8881a xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/boot/x86_64.S Mon May 14 16:34:31 2007 -0400
@@ -198,6 +198,7 @@ multiboot_ptr:
.long 0
.word 0
+ .global nopaging_gdt_descr
nopaging_gdt_descr:
.word LAST_RESERVED_GDT_BYTE
.quad gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
@@ -207,6 +208,7 @@ cpuid_ext_features:
.word 0
gdt_descr:
+ .global gdt_descr
.word LAST_RESERVED_GDT_BYTE
.quad gdt_table - FIRST_RESERVED_GDT_BYTE
diff -r 84c103f8881a xen/arch/x86/x86_32/Makefile
--- a/xen/arch/x86/x86_32/Makefile Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/Makefile Mon May 14 16:34:31 2007 -0400
@@ -6,3 +6,5 @@ obj-y += traps.o
obj-y += traps.o
obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
+subdir-y += acpi
+subdir-y += power
diff -r 84c103f8881a xen/arch/x86/x86_32/acpi/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/acpi/Makefile Mon May 14 16:34:31 2007 -0400
@@ -0,0 +1,1 @@
+obj-y += wakeup.o
diff -r 84c103f8881a xen/arch/x86/x86_32/acpi/wakeup.S
--- a/xen/arch/x86/x86_32/acpi/wakeup.S Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/acpi/wakeup.S Mon May 14 16:34:31 2007 -0400
@@ -1,6 +1,11 @@
.text
+#ifndef __XEN__
#include <linux/linkage.h>
#include <asm/segment.h>
+#else
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+#endif
#include <asm/page.h>
#
@@ -56,7 +61,11 @@ 1:
1:
# set up page table
+#ifndef __XEN__
movl $swsusp_pg_dir-__PAGE_OFFSET, %eax
+#else
+ movl $idle_pg_table-__PAGE_OFFSET, %eax
+#endif
movl %eax, %cr3
testl $1, real_efer_save_restore - wakeup_code
@@ -88,7 +97,11 @@ 1:
cmpl $0x12345678, %eax
jne bogus_real_magic
+#ifndef __XEN__
ljmpl $__KERNEL_CS,$wakeup_pmode_return
+#else
+ ljmpl $(__HYPERVISOR_CS),$wakeup_pmode_return
+#endif
real_save_gdt: .word 0
.long 0
@@ -184,7 +197,11 @@ ENTRY(wakeup_end)
.org 0x1000
wakeup_pmode_return:
+#ifndef __XEN__
movw $__KERNEL_DS, %ax
+#else
+ movw $__HYPERVISOR_DS, %ax
+#endif
movw %ax, %ss
movw %ax, %ds
movw %ax, %es
@@ -196,7 +213,11 @@ wakeup_pmode_return:
lgdt saved_gdt
lidt saved_idt
lldt saved_ldt
+#ifndef __XEN__
ljmp $(__KERNEL_CS),$1f
+#else
+ ljmp $(__HYPERVISOR_CS),$1f
+#endif
1:
movl %cr3, %eax
movl %eax, %cr3
diff -r 84c103f8881a xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/mm.c Mon May 14 16:34:31 2007 -0400
@@ -34,6 +34,7 @@ unsigned int PAGE_HYPERVISOR_NOCACHE = _
unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
static unsigned long mpt_size;
+int nx_enabled = 0;
void *alloc_xen_pagetable(void)
{
@@ -133,7 +134,7 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
-void __init zap_low_mappings(l2_pgentry_t *base)
+void zap_low_mappings(l2_pgentry_t *base)
{
int i;
u32 addr;
@@ -147,6 +148,18 @@ void __init zap_low_mappings(l2_pgentry_
continue;
l2e_write(&base[i], l2e_empty());
}
+
+ flush_tlb_all_pge();
+}
+
+void init_low_mappings(void)
+{
+ int sz = ((DIRECTMAP_MBYTES << 20) >> L2_PAGETABLE_SHIFT) *
+ sizeof(l2_pgentry_t);
+
+ memcpy(idle_pg_table_l2,
+ idle_pg_table_l2 + (DIRECTMAP_VIRT_START >>
L2_PAGETABLE_SHIFT),
+ sz);
flush_tlb_all_pge();
}
diff -r 84c103f8881a xen/arch/x86/x86_32/power/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_32/power/Makefile Mon May 14 16:34:31 2007
-0400
@@ -0,0 +1,1 @@
+obj-y += cpu.o
diff -r 84c103f8881a xen/arch/x86/x86_32/power/cpu.c
--- a/xen/arch/x86/x86_32/power/cpu.c Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_32/power/cpu.c Mon May 14 20:49:34 2007 -0400
@@ -7,10 +7,91 @@
* Copyright (c) 2001 Patrick Mochel <mochel@xxxxxxxx>
*/
+#ifndef __XEN__
#include <linux/module.h>
#include <linux/suspend.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
+#else
+#include <xen/config.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/flushtlb.h>
+
+/* image of the saved processor state */
+struct saved_context {
+ u16 es, fs, gs, ss;
+ unsigned long cr0, cr2, cr3, cr4;
+ u16 gdt_pad;
+ u16 gdt_limit;
+ unsigned long gdt_base;
+ u16 idt_pad;
+ u16 idt_limit;
+ unsigned long idt_base;
+ u16 ldt;
+ u16 tss;
+ unsigned long tr;
+ unsigned long safety;
+ unsigned long return_address;
+} __attribute__((packed));
+
+#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q"
(GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q"
(GDT_ENTRY_LDT*8))
+
+#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
+
+#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
+#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
+#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
+#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value) \
+ asm volatile("\n" \
+ "1:\t" \
+ "mov %0,%%" #seg "\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3:\t" \
+ "pushl $0\n\t" \
+ "popl %%" #seg "\n\t" \
+ "jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".align 4\n\t" \
+ ".long 1b,3b\n" \
+ ".previous" \
+ : :"rm" (value))
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+ asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+#define set_debugreg(value, register) \
+ __asm__("movl %0,%%db" #register \
+ : /* no output */ \
+ :"r" (value))
+
+void kernel_fpu_begin(void)
+{
+ clts();
+}
+
+void kernel_fpu_end(void)
+{
+ stts();
+}
+#endif
static struct saved_context saved_context;
@@ -34,8 +115,10 @@ void __save_processor_state(struct saved
* segment registers
*/
savesegment(es, ctxt->es);
+#ifndef __XEN__
savesegment(fs, ctxt->fs);
savesegment(gs, ctxt->gs);
+#endif
savesegment(ss, ctxt->ss);
/*
@@ -60,6 +143,7 @@ static void do_fpu_end(void)
kernel_fpu_end();
}
+#ifndef __XEN__
static void fix_processor_context(void)
{
int cpu = smp_processor_id();
@@ -84,6 +168,32 @@ static void fix_processor_context(void)
}
}
+#else
+static void fix_processor_context(void)
+{
+ int cpu = smp_processor_id();
+ struct tss_struct * t = &init_tss[cpu];;
+
+ if ( supervisor_mode_kernel && cpu_has_sep )
+ wrmsr(MSR_IA32_SYSENTER_ESP, &t->esp1, 0);
+
+ set_tss_desc(cpu,t); /* This just modifies memory; should not
be necessary. But... This is necessary, because 386 hardware has concept
of busy TSS or some similar stupidity. */
+
+ load_TR(cpu); /* This does ltr */
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (0) );/* This does
lldt */
+
+ /*
+ * Now maybe reset the debug registers
+ */
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+ /* no 4 and 5 */
+ set_debugreg(0UL, 6);
+ set_debugreg(0UL, 7);
+}
+#endif
void __restore_processor_state(struct saved_context *ctxt)
{
@@ -106,15 +216,19 @@ void __restore_processor_state(struct sa
* segment registers
*/
loadsegment(es, ctxt->es);
+#ifndef __XEN__
loadsegment(fs, ctxt->fs);
loadsegment(gs, ctxt->gs);
+#endif
loadsegment(ss, ctxt->ss);
+#ifndef __XEN__
/*
* sysenter MSRs
*/
if (boot_cpu_has(X86_FEATURE_SEP))
enable_sep_cpu();
+#endif
fix_processor_context();
do_fpu_end();
@@ -127,6 +241,8 @@ void restore_processor_state(void)
__restore_processor_state(&saved_context);
}
+#ifndef __XEN__
/* Needed by apm.c */
EXPORT_SYMBOL(save_processor_state);
EXPORT_SYMBOL(restore_processor_state);
+#endif
diff -r 84c103f8881a xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_64/Makefile Mon May 14 16:34:31 2007 -0400
@@ -5,6 +5,8 @@ obj-y += gpr_switch.o
obj-y += gpr_switch.o
obj-y += mm.o
obj-y += traps.o
+
+subdir-y += power
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_COMPAT) += domain.o
diff -r 84c103f8881a xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Mon May 14 15:12:50 2007 -0400
+++ b/xen/arch/x86/x86_64/mm.c Mon May 14 16:34:31 2007 -0400
@@ -191,9 +191,16 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
-void __init zap_low_mappings(void)
+void zap_low_mappings(void)
{
l4e_write(&idle_pg_table[0], l4e_empty());
+ flush_tlb_all_pge();
+}
+
+void init_low_mappings(void)
+{
+ l4e_write(&idle_pg_table[0],
+ l4e_from_paddr(__pa(idle_pg_table_l3),
__PAGE_HYPERVISOR));
flush_tlb_all_pge();
}
diff -r 84c103f8881a xen/arch/x86/x86_64/power/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/power/Makefile Mon May 14 16:34:31 2007
-0400
@@ -0,0 +1,2 @@
+obj-y += wakeup.o
+obj-y += suspend.o
diff -r 84c103f8881a xen/arch/x86/x86_64/power/suspend.c
--- a/xen/arch/x86/x86_64/power/suspend.c Mon May 14 15:12:50 2007
-0400
+++ b/xen/arch/x86/x86_64/power/suspend.c Mon May 14 21:03:02 2007
-0400
@@ -6,12 +6,17 @@
* Copyright (c) 2002 Pavel Machek <pavel@xxxxxxx>
* Copyright (c) 2001 Patrick Mochel <mochel@xxxxxxxx>
*/
-
-#include <linux/smp.h>
-#include <linux/suspend.h>
-#include <asm/proto.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <xen/config.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/flushtlb.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/x86_64/suspend.h>
+#include <asm/x86_64/asm_defns.h>
+#include <asm/ldt.h>
struct saved_context saved_context;
@@ -21,6 +26,44 @@ unsigned long saved_context_r12, saved_c
unsigned long saved_context_r12, saved_context_r13, saved_context_r14,
saved_context_r15;
unsigned long saved_context_eflags;
+#ifdef __XEN__
+unsigned long saved_context_msr_cstar, saved_context_msr_lstar;
+unsigned long saved_video_mode;
+
+#define MSR_KERNEL_GS_BASE MSR_SHADOW_GS_BASE
+
+static inline void kernel_fpu_begin(void){
+ clts();
+}
+
+static inline void kernel_fpu_end(void){
+ stts();
+}
+
+static inline void syscall_init(void){
+ wrmsrl(MSR_LSTAR, saved_context_msr_lstar);
+ wrmsrl(MSR_CSTAR, saved_context_msr_cstar);
+ wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
+ wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
+}
+
+static inline void load_gs_index(unsigned base){
+ __asm__ __volatile__ (
+ " swapgs \n"
+ "1: movl %k0,%%gs \n"
+ " "safe_swapgs" \n"
+ ".section .fixup,\"ax\" \n"
+ "2: xorl %k0,%k0 \n"
+ " jmp 1b \n"
+ ".previous \n"
+ ".section __ex_table,\"a\"\n"
+ " .align 8 \n"
+ " .quad 1b,2b \n"
+ ".previous "
+ : : "r" (base&0xffff) );
+}
+#endif /* __XEN__*/
+
void __save_processor_state(struct saved_context *ctxt)
{
kernel_fpu_begin();
@@ -55,6 +98,9 @@ void __save_processor_state(struct saved
asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3));
asm volatile ("movq %%cr4, %0" : "=r" (ctxt->cr4));
asm volatile ("movq %%cr8, %0" : "=r" (ctxt->cr8));
+
+ rdmsrl(MSR_CSTAR, saved_context_msr_cstar);
+ rdmsrl(MSR_LSTAR, saved_context_msr_lstar);
}
void save_processor_state(void)
@@ -91,10 +137,24 @@ void __restore_processor_state(struct sa
/*
* segment registers
*/
+#ifndef __XEN__
asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
load_gs_index(ctxt->gs);
+#else
+ /* Xen doesn't care these selectors. However if previous suspend
+ * happens on an idle context, we should avoid recover them since
+ * idle page table only maps xen portion of gdt table and that
+ * load may result page fault badly for guest portion.
+ */
+ if (!is_idle_vcpu(current)) {
+ asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
+ asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
+ asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
+ load_gs_index(ctxt->gs);
+ }
+#endif
asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
@@ -114,6 +174,28 @@ void restore_processor_state(void)
void fix_processor_context(void)
{
+#ifdef __XEN__
+ int cpu = smp_processor_id();
+
+ struct tss_struct *t = &init_tss[cpu];
+
+ set_tss_desc(cpu,t); /* This just modifies memory; should not be
neccessary. But... This is neccessary, because 386 hardware has concept
of busy TSS or some similar stupidity. */
+
+ syscall_init(); /* This sets MSR_*STAR and
related */
+ load_TR(cpu); /* This does ltr */
+ load_LDT(current); /* This does lldt */
+
+ /*
+ * Now maybe reset the debug registers
+ */
+ set_debugreg(current, 0UL, 0);
+ set_debugreg(current, 0UL, 1);
+ set_debugreg(current, 0UL, 2);
+ set_debugreg(current, 0UL, 3);
+ /* no 4 and 5 */
+ set_debugreg(current, 0UL, 6);
+ set_debugreg(current, 0UL, 7);
+#else
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
@@ -137,85 +219,6 @@ void fix_processor_context(void)
loaddebug(¤t->thread, 6);
loaddebug(¤t->thread, 7);
}
-}
-
-#ifdef CONFIG_SOFTWARE_SUSPEND
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-pgd_t *temp_level4_pgt;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address,
unsigned long end)
-{
- long i, j;
-
- i = pud_index(address);
- pud = pud + i;
- for (; i < PTRS_PER_PUD; pud++, i++) {
- unsigned long paddr;
- pmd_t *pmd;
-
- paddr = address + i*PUD_SIZE;
- if (paddr >= end)
- break;
-
- pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!pmd)
- return -ENOMEM;
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end)
- break;
- pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
- }
- return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
- unsigned long start, end, next;
- int error;
-
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
- return -ENOMEM;
-
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
- /* Set up the direct mapping from scratch */
- start = (unsigned long)pfn_to_kaddr(0);
- end = (unsigned long)pfn_to_kaddr(end_pfn);
-
- for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!pud)
- return -ENOMEM;
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
- return error;
- set_pgd(temp_level4_pgt + pgd_index(start),
- mk_kernel_pgd(__pa(pud)));
- }
- return 0;
-}
-
-int swsusp_arch_resume(void)
-{
- int error;
-
- /* We have got enough memory and from now on we cannot recover */
- if ((error = set_up_temporary_mappings()))
- return error;
- restore_image();
- return 0;
-}
-#endif /* CONFIG_SOFTWARE_SUSPEND */
+#endif /* __XEN__ */
+}
+
diff -r 84c103f8881a xen/arch/x86/x86_64/power/wakeup.S
--- a/xen/arch/x86/x86_64/power/wakeup.S Mon May 14 15:12:50 2007
-0400
+++ b/xen/arch/x86/x86_64/power/wakeup.S Mon May 14 16:34:31 2007
-0400
@@ -1,8 +1,16 @@
.text
+
+#ifdef __XEN__
+#include <asm/page.h>
+#include <asm/msr.h>
+#include <xen/config.h>
+#include <asm/config.h>
+#else
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/msr.h>
+#endif /* __XEN__ */
# Copyright 2003 Pavel Machek <pavel@xxxxxxx>, distribute under GPLv2
#
@@ -15,6 +23,13 @@
# cs = 0x1234, eip = 0x05
#
+#ifdef __XEN__
+/* A wakeup gdt is used before restoring cpu context. Clean it later */
+#define __KERNEL_CS 0x10
+#define __KERNEL_DS 0x18
+#define __START_KERNEL_map __PAGE_OFFSET
+#define SYM_PHYS(sym) (sym - __PAGE_OFFSET)
+#endif
ALIGN
.align 16
@@ -121,7 +136,13 @@ wakeup_32:
movl %eax, %cr4
/* Setup early boot stage 4 level pagetables */
+
+#ifdef __XEN__
+ movl $SYM_PHYS(idle_pg_table), %eax
+#else
movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax
+#endif /* __XEN__ */
+
movl %eax, %cr3
/* Setup EFER (Extended Feature Enable Register) */
@@ -178,8 +199,12 @@ reach_compatibility_mode:
movb $0xa9, %al ; outb %al, $0x80
/* Load new GDT with the 64bit segment using 32bit descriptor */
+#ifdef __XEN__
+ lgdt %cs:SYM_PHYS(nopaging_gdt_descr)
+#else
movl $(pGDT32 - __START_KERNEL_map), %eax
lgdt (%eax)
+#endif /* __XEN__ */
movl $(wakeup_jumpvector - __START_KERNEL_map), %eax
/* Finally jump in 64bit mode */
@@ -187,7 +212,11 @@ reach_compatibility_mode:
wakeup_jumpvector:
.long wakeup_long64 - __START_KERNEL_map
+#ifdef __XEN__
+ .word __HYPERVISOR_CS
+#else
.word __KERNEL_CS
+#endif
.code64
@@ -199,20 +228,44 @@ wakeup_long64:
* addresses where we're currently running on. We have to do that
here
* because in 32bit we couldn't load a 64bit linear address.
*/
+#ifdef __XEN__
+ lgdt SYM_PHYS(nopaging_gdt_descr)
+#else
lgdt cpu_gdt_descr - __START_KERNEL_map
+#endif
movw $0x0e00 + 'u', %ds:(0xb8016)
nop
nop
+#ifdef __XEN__
+ movw $__HYPERVISOR_DS, %ax
+#else
movw $__KERNEL_DS, %ax
+#endif
movw %ax, %ss
movw %ax, %ds
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
+
+#ifdef __XEN__
+ /* Xen doesn't use large memory mode, and can we? */
+ movq SYM_PHYS(saved_esp), %rsp
+
+ movw $0x0e00 + 'x', %ds:(0xb8018)
+ movq SYM_PHYS(saved_ebx), %rbx
+ movq SYM_PHYS(saved_edi), %rdi
+ movq SYM_PHYS(saved_esi), %rsi
+ movq SYM_PHYS(saved_ebp), %rbp
+
+ movw $0x0e00 + '!', %ds:(0xb801a)
+ movq SYM_PHYS(saved_eip), %rax
+
+#else
+
movq saved_esp, %rsp
-
+
movw $0x0e00 + 'x', %ds:(0xb8018)
movq saved_ebx, %rbx
movq saved_edi, %rdi
@@ -221,6 +274,8 @@ wakeup_long64:
movw $0x0e00 + '!', %ds:(0xb801a)
movq saved_eip, %rax
+
+#endif /* __XEN__ */
jmp *%rax
.code32
@@ -355,7 +410,6 @@ bogus_magic2:
movw $0x0e00 + '2', %ds:(0xb8018)
jmp bogus_magic2
-
wakeup_stack_begin: # Stack grows down
.org 0xff0
@@ -378,6 +432,35 @@ ENTRY(acpi_copy_wakeup_routine)
pushq %rcx
pushq %rdx
+#ifdef __XEN__
+
+ sgdt saved_gdt(%rip)
+ sidt saved_idt(%rip)
+ sldt saved_ldt(%rip)
+ str saved_tss(%rip)
+
+ movq %cr3, %rdx
+ movq %rdx, saved_cr3(%rip)
+ movq %cr4, %rdx
+ movq %rdx, saved_cr4(%rip)
+ movq %cr0, %rdx
+ movq %rdx, saved_cr0(%rip)
+ sgdt real_save_gdt - wakeup_start (,%rdi)
+ movl $MSR_EFER, %ecx
+ rdmsr
+ movl %eax, saved_efer(%rip)
+ movl %edx, saved_efer2(%rip)
+
+ movl saved_video_mode(%rip), %edx
+ movl %edx, video_mode - wakeup_start (,%rdi)
+ movl acpi_video_flags(%rip), %edx
+ movl %edx, video_flags - wakeup_start (,%rdi)
+ movq $0x12345678, real_magic - wakeup_start (,%rdi)
+ movq $0x123456789abcdef0, %rdx
+ movq %rdx, saved_magic(%rip)
+
+#else
+
sgdt saved_gdt
sidt saved_idt
sldt saved_ldt
@@ -415,6 +498,8 @@ ENTRY(acpi_copy_wakeup_routine)
movq %rax, %cr0
jmp 1f # Flush pipelines
1:
+#endif /* __XEN__ */
+
# restore the regs we used
popq %rdx
popq %rcx
@@ -450,6 +535,19 @@ do_suspend_lowlevel:
movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip)
+#ifdef __XEN__
+/* Xen did not use large memory mode, so change code to ip relative */
+
+ lea .L97(%rip), %rax
+ movq %rax, saved_eip(%rip)
+
+ movq %rsp,saved_esp(%rip)
+ movq %rbp,saved_ebp(%rip)
+ movq %rbx,saved_ebx(%rip)
+ movq %rdi,saved_edi(%rip)
+ movq %rsi,saved_esi(%rip)
+
+#else
movq $.L97, saved_eip(%rip)
movq %rsp,saved_esp
@@ -458,6 +556,8 @@ do_suspend_lowlevel:
movq %rdi,saved_edi
movq %rsi,saved_esi
+#endif /* __XEN__ */
+
addq $8, %rsp
movl $3, %edi
xorl %eax, %eax
@@ -466,7 +566,11 @@ do_suspend_lowlevel:
.p2align 4,,7
.L99:
.align 4
+#ifdef __XEN__
+ movl $__HYPERVISOR_DS32, %eax
+#else
movl $24, %eax
+#endif
movw %ax, %ds
movq saved_context+58(%rip), %rax
movq %rax, %cr4
@@ -525,3 +629,4 @@ saved_cr4: .quad 0
saved_cr4: .quad 0
saved_efer: .quad 0
saved_efer2: .quad 0
+
diff -r 84c103f8881a xen/include/asm-x86/acpi.h
--- a/xen/include/asm-x86/acpi.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/acpi.h Mon May 14 18:14:26 2007 -0400
@@ -178,4 +178,6 @@ extern u8 x86_acpiid_to_apicid[];
extern u8 x86_acpiid_to_apicid[];
#define MAX_LOCAL_APIC 256
+#define pmprintk(_l, _f, _a...) \
+ printk(_l "<PM>" _f, ## _a )
#endif /*_ASM_ACPI_H*/
diff -r 84c103f8881a xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/config.h Mon May 14 20:49:27 2007 -0400
@@ -367,4 +367,6 @@ extern unsigned long xenheap_phys_end; /
#define ELFSIZE 32
#endif
+#define FASTCALL(x) x __attribute__((regparm(3)))
+
#endif /* __X86_CONFIG_H__ */
diff -r 84c103f8881a xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/page.h Mon May 14 16:34:31 2007 -0400
@@ -287,6 +287,9 @@ extern l2_pgentry_t idle_pg_table_l2[R
#else
extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
+#if CONFIG_PAGING_LEVELS == 4
+extern l3_pgentry_t idle_pg_table_l3[L3_PAGETABLE_ENTRIES];
+#endif
#ifdef CONFIG_COMPAT
extern l2_pgentry_t *compat_idle_pg_table_l2;
extern unsigned int m2p_compat_vstart;
diff -r 84c103f8881a xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/processor.h Mon May 14 16:34:31 2007 -0400
@@ -297,6 +297,11 @@ static inline unsigned long read_cr2(voi
return __cr2;
}
+static inline void write_cr2(unsigned long val)
+{
+ __asm__("mov %0,%%cr2": :"r" ((unsigned long)val));
+}
+
static inline unsigned long read_cr4(void)
{
unsigned long __cr4;
diff -r 84c103f8881a xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Mon May 14 15:12:50 2007 -0400
+++ b/xen/include/asm-x86/smp.h Mon May 14 20:49:27 2007 -0400
@@ -45,6 +45,7 @@ extern void zap_low_mappings(l2_pgentry_
extern void zap_low_mappings(l2_pgentry_t *base);
#endif
+extern void init_low_mappings(void);
#define MAX_APICID 256
extern u8 x86_cpu_to_apicid[];
diff -r 84c103f8881a xen/include/asm-x86/x86_64/suspend.h
--- a/xen/include/asm-x86/x86_64/suspend.h Mon May 14 15:12:50 2007
-0400
+++ b/xen/include/asm-x86/x86_64/suspend.h Mon May 14 16:34:31 2007
-0400
@@ -39,11 +39,12 @@ extern unsigned long saved_context_eflag
extern unsigned long saved_context_eflags;
#define loaddebug(thread,register) \
- set_debugreg((thread)->debugreg##register, register)
+ __asm__("movq %0,%%db" #register \
+ : /* no output */ \
+ :"r" ((thread)->debugreg##register))
extern void fix_processor_context(void);
-#ifdef CONFIG_ACPI_SLEEP
extern unsigned long saved_eip;
extern unsigned long saved_esp;
extern unsigned long saved_ebp;
@@ -53,4 +54,3 @@ extern unsigned long saved_edi;
/* routines for saving/restoring kernel state */
extern int acpi_save_state_mem(void);
-#endif
xen_pm_arch.patch
Description: xen_pm_arch.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|