kexec: framework This is an implementation of kexec for dom0/xen, that allows kexecing of the physical machine from xen. The approach taken is to move the architecture-dependant kexec code into a new hypercall. This patch only includes the framework, it cann't be used without architecture dependant hooks, however the code should compile as is. linux-2.6-xen-sparse/drivers/xen/core/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/core/crash.c | 48 ++ linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 100 ++++++ linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 4 linux-2.6-xen-sparse/include/asm-i386/kexec-xen.h | 27 + patches/linux-2.6.16.13/kexec.patch | 274 +++++++++++++++++ xen/arch/x86/Makefile | 2 xen/arch/x86/crash.c | 26 + xen/arch/x86/machine_kexec.c | 51 +++ xen/common/Makefile | 1 xen/common/kexec.c | 183 +++++++++++ xen/common/page_alloc.c | 33 +- xen/drivers/char/console.c | 3 xen/include/asm-x86/kexec.h | 32 + xen/include/public/kexec.h | 48 ++ xen/include/public/xen.h | 12 xen/include/xen/elfcore.h | 73 ++++ xen/include/xen/kexec.h | 33 ++ xen/include/xen/mm.h | 1 19 files changed, 941 insertions(+), 11 deletions(-) --- x/linux-2.6-xen-sparse/drivers/xen/core/Makefile +++ x/linux-2.6-xen-sparse/drivers/xen/core/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_NET) += skbuff.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o --- /dev/null +++ x/linux-2.6-xen-sparse/drivers/xen/core/crash.c @@ -0,0 +1,48 @@ +/* + * Architecture independent functions for kexec based crash dumps in xen. + * + * Created by: Horms + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This passes the registers's down to the hypervisor and has it kexec() + * This is a bit different to the linux implementation which + * has this call save registers and stop CPUs and then goes into + * machine_kexec() later. But for Xen it makes more sense to + * have the kexec hypercall do everything, and this call + * has the registers parameter that is needed. + * to the hypervisor to allow the hypervisor to kdump itself + * on an internal panic + */ +void machine_crash_shutdown(struct pt_regs *regs) +{ + kexec_arg_t hypercall_arg; + printk("machine_crash_shutdown: %d\n", smp_processor_id()); + local_irq_disable(); +#ifdef CONFIG_X86_IO_APIC + disable_IO_APIC(); +#endif + crash_translate_regs(regs, &hypercall_arg.u.regs); + HYPERVISOR_kexec(KEXEC_CMD_kexec, KEXEC_TYPE_CRASH, &hypercall_arg); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- /dev/null +++ x/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c @@ -0,0 +1,100 @@ +/* + * drivers/xen/core/machine_kexec.c + * handle transition of Linux booting another kernel + * + * Created By: Horms + * + * Losely based on arch/i386/kernel/machine_kexec.c + */ + +#include +#include +#include +#include +#include + +const extern unsigned char relocate_new_kernel[]; +extern unsigned int relocate_new_kernel_size; + +/* + * A architecture hook called to validate the + * proposed image and prepare the control pages + * as needed. The pages for KEXEC_CONTROL_CODE_SIZE + * have been allocated, but the segments have yet + * been copied into the kernel. + * + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + * + * Currently nothing. + */ +int machine_kexec_prepare(struct kimage *image) +{ + return HYPERVISOR_kexec(KEXEC_CMD_kexec_prepare, image->type, NULL); +} + +/* + * Undo anything leftover by machine_kexec_prepare + * when an image is freed. + */ +void machine_kexec_cleanup(struct kimage *image) +{ + HYPERVISOR_kexec(KEXEC_CMD_kexec_cleanup, image->type, NULL); +} + + +/* + * Load the image into xen so xen can kdump itself + * This might have been done in prepare, but prepare + * is currently called too early. It might make sense + * to move prepare, buf for now, just add an extra hook. + */ +int machine_kexec_load(struct kimage *image) +{ + kexec_arg_t hypercall_arg; + hypercall_arg.u.image.indirection_page = image->head; + hypercall_arg.u.image.reboot_code_buffer = + pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT; + hypercall_arg.u.image.start_address = image->start; + hypercall_arg.u.image.relocate_new_kernel = relocate_new_kernel; + hypercall_arg.u.image.relocate_new_kernel_size = + relocate_new_kernel_size; + return HYPERVISOR_kexec(KEXEC_CMD_kexec_load, image->type, + &hypercall_arg); +} + +/* + * Unload the image that was stored by machine_kexec_load() + * This might have been done in machine_kexec_cleanup() but it + * is called too late, and its possible xen could try and kdump + * using resources that have been freed. + */ +void machine_kexec_unload(struct kimage *image) +{ + HYPERVISOR_kexec(KEXEC_CMD_kexec_unload, image->type, NULL); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + * + * This has the hypervisor move to the prefered reboot CPU, + * stop all CPUs and kexec. That is it combines machine_shutdown() + * and machine_kexec() in Linux kexec terms. + */ +NORET_TYPE void machine_kexec(struct kimage *image) +{ + printk("machine_kexec\n"); + HYPERVISOR_kexec(KEXEC_CMD_kexec, image->type, NULL); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- x/linux-2.6-xen-sparse/drivers/xen/core/reboot.c +++ x/linux-2.6-xen-sparse/drivers/xen/core/reboot.c @@ -66,6 +66,10 @@ void machine_power_off(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); } +#ifdef CONFIG_KEXEC +void machine_shutdown(void) { } +#endif + int reboot_thru_bios = 0; /* for dmi_scan.c */ EXPORT_SYMBOL(machine_restart); EXPORT_SYMBOL(machine_halt); --- /dev/null +++ x/linux-2.6-xen-sparse/include/asm-i386/kexec-xen.h @@ -0,0 +1,27 @@ +/* + * include/asm-i386/kexec-xen.h + * + * Created By: Horms + */ + +#ifndef _I386_KEXEC_XEN_H +#define _I386_KEXEC_XEN_H + +static inline void crash_translate_regs(struct pt_regs *linux_regs, + struct cpu_user_regs *xen_regs) +{ + printk("STUB: include/asm-i386/kexec-xen.h: crash_translate_regs: " + "not implemented\n"); +} + +#endif /* _I386_KEXEC_XEN_H */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- x/xen/arch/x86/Makefile +++ x/xen/arch/x86/Makefile @@ -39,6 +39,8 @@ obj-y += trampoline.o obj-y += traps.o obj-y += usercopy.o obj-y += x86_emulate.o +obj-y += machine_kexec.o +obj-y += crash.o ifneq ($(pae),n) obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o --- /dev/null +++ x/xen/arch/x86/crash.c @@ -0,0 +1,26 @@ +/****************************************************************************** + * arch/x86/crash.c + * + * Created By: Horms + * + * Should be based heavily on arch/i386/kernel/crash.c from Linux 2.6.16 + */ + +#include /* for printk() used in stub */ +#include +#include + +void machine_crash_shutdown(struct cpu_user_regs *regs) +{ + printk("STUB: arch/x86/crash.c: machine_crash_shutdown: not implemented\n"); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/arch/x86/machine_kexec.c @@ -0,0 +1,51 @@ +/****************************************************************************** + * arch/x86/machine_kexec.c + * + * Created By: Horms + * + */ + +#include /* for printk() used in stubs */ +#include +#include + +int machine_kexec_prepare(int type, struct kexec_arg *arg) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec_prepare: " + "not implemented\n"); + return -1; +} + +void machine_kexec_cleanup(int type, struct kexec_arg *arg) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec_cleanup: " + "not implemented\n"); +} + +void machine_kexec_reserved(struct kexec_arg *arg) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec_reserved: " + "not implemented\n"); +} + +void machine_kexec(struct kexec_arg *arg) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec: " + "not implemented\n"); +} + +void machine_shutdown(struct kexec_arg *arg) +{ + printk("STUB: arch/x86/machine_shutdown.c: machine_shutdown: " + "not implemented\n"); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/common/Makefile +++ x/xen/common/Makefile @@ -7,6 +7,7 @@ obj-y += event_channel.o obj-y += grant_table.o obj-y += kernel.o obj-y += keyhandler.o +obj-y += kexec.o obj-y += lib.o obj-y += memory.o obj-y += multicall.o --- /dev/null +++ x/xen/common/kexec.c @@ -0,0 +1,183 @@ +/* + * common/kexec.c - Achitecture independent kexec code for Xen + * + * Created By: Horms + * + * Based in part on Linux 2.6.16's kernel/kexec.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU (note_buf_t, crash_notes); + +static struct kexec_arg kexec_image; +static int kexec_image_set = 0; +static struct kexec_arg kexec_crash_image; +static int kexec_crash_image_set = 0; +static int kexec_crash_lock = 0; + +/* Must call with kexec_crash_lock held */ +void __crash_kexec(struct cpu_user_regs *regs) +{ + struct cpu_user_regs fixed_regs; + + if (!kexec_crash_image_set) + return; + crash_setup_regs(&fixed_regs, regs); + machine_crash_shutdown(&fixed_regs); + machine_kexec(&kexec_crash_image); /* Does not return */ +} + +void crash_kexec(struct cpu_user_regs *regs) +{ + int locked; + + locked = xchg(&kexec_crash_lock, 1); + if (locked) + return; + __crash_kexec(regs); + xchg(&kexec_crash_lock, 0); +} + +static int get_crash_note(XEN_GUEST_HANDLE(kexec_arg_t) uarg) +{ + struct kexec_arg arg; + int locked, cpu; + + if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) ) + { + printk("do_kexec: get_crash_note: __copy_from_guest failed\n"); + return -EFAULT; + } + + cpu = (int)arg.u.crash_note; + if (cpu < 0) + return -EINVAL; + + if (cpu > num_booting_cpus()) + arg.u.crash_note = 0L; + else { + locked = xchg(&kexec_crash_lock, 1); + if (locked) + { + printk("do_kexec: get_crash_note: in xen-generated kdump\n"); + return -EFAULT; + } + arg.u.crash_note = __pa((unsigned long)per_cpu(crash_notes, cpu)); + xchg(&kexec_crash_lock, 0); + } + + if ( unlikely(copy_to_guest(uarg, &arg, 1) != 0) ) + { + printk("do_kexec: get_crash_note: copy_to_guest failed\n"); + return -EFAULT; + } + + return 0; +} + + +int do_kexec(unsigned long op, int type, XEN_GUEST_HANDLE(kexec_arg_t) uarg) +{ + struct kexec_arg *image, tmp_arg; + int locked; + int *image_set; + int status = -EINVAL; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + switch (op) + { + case KEXEC_CMD_kexec_crash_note: + return get_crash_note(uarg); + case KEXEC_CMD_kexec_reserve: + machine_kexec_reserved(&tmp_arg); + if ( unlikely(copy_to_guest(uarg, &tmp_arg, 1) != 0) ) + { + printk("do_kexec (CMD_kexec_reserve): copy_to_guest failed\n"); + return -EFAULT; + } + return 0; + } + + if (type == KEXEC_TYPE_CRASH) + { + image = &kexec_crash_image; + image_set = &kexec_crash_image_set; + locked = xchg(&kexec_crash_lock, 1); + if (locked) + { + printk("do_kexec: dump is locked\n"); + return -EFAULT; + } + } + else + { + image = &kexec_image; + image_set = &kexec_image_set; + } + + switch(op) { + case KEXEC_CMD_kexec: + BUG_ON(!*image_set); + if (type == KEXEC_TYPE_CRASH) { + if ( unlikely(copy_from_guest(&tmp_arg, uarg, 1) != 0) ) + { + printk("do_kexec (CMD_kexec): copy_from_guest failed\n"); + status = -EFAULT; + break; + } + __crash_kexec(&tmp_arg.u.regs); + } + else + machine_shutdown(image); /* Does not return */ + break; + case KEXEC_CMD_kexec_prepare: + /* Might need to tighten up kexec_crash_lock semantics, + * but this currently does nothing. ditto for cleanup */ + status = machine_kexec_prepare(type, image); + break; + case KEXEC_CMD_kexec_cleanup: + machine_kexec_cleanup(type, image); + status = 0; + break; + case KEXEC_CMD_kexec_load: + BUG_ON(*image_set); + if ( unlikely(copy_from_guest(image, uarg, 1) != 0) ) + { + printk("do_kexec (CMD_kexec_load): copy_from_guest failed\n"); + status = -EFAULT; + break; + } + *image_set = 1; + status = 0; + break; + case KEXEC_CMD_kexec_unload: + BUG_ON(!*image_set); + *image_set = 0; + status = 0; + break; + } + + if (type == KEXEC_TYPE_CRASH) + xchg(&kexec_crash_lock, 0); + return status; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/common/page_alloc.c +++ x/xen/common/page_alloc.c @@ -212,24 +212,35 @@ void init_boot_pages(paddr_t ps, paddr_t } } +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at) +{ + unsigned long i; + + for ( i = 0; i < nr_pfns; i++ ) + if ( allocated_in_map(pfn_at + i) ) + break; + + if ( i == nr_pfns ) + { + map_alloc(pfn_at, nr_pfns); + return pfn_at; + } + + return 0; +} + unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align) { - unsigned long pg, i; + unsigned long pg, i = 0; for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align ) { - for ( i = 0; i < nr_pfns; i++ ) - if ( allocated_in_map(pg + i) ) - break; - - if ( i == nr_pfns ) - { - map_alloc(pg, nr_pfns); - return pg; - } + i = alloc_boot_pages_at(nr_pfns, pg); + if (i != 0) + break; } - return 0; + return i; } --- x/xen/drivers/char/console.c +++ x/xen/drivers/char/console.c @@ -677,6 +677,7 @@ void panic(const char *fmt, ...) unsigned long flags; static spinlock_t lock = SPIN_LOCK_UNLOCKED; extern void machine_restart(char *); + extern void crash_kexec(struct cpu_user_regs *regs); debugtrace_dump(); @@ -696,6 +697,8 @@ void panic(const char *fmt, ...) debugger_trap_immediate(); + crash_kexec(NULL); + watchdog_disable(); mdelay(5000); machine_restart(0); --- /dev/null +++ x/xen/include/asm-x86/kexec.h @@ -0,0 +1,32 @@ +/****************************************************************************** + * include/asm-x86/kexec.h + * + * Created By: Horms + * + */ + +#ifndef __X86_KEXEC_H__ +#define __X86_KEXEC_H__ + +#include /* for printk() used in stub */ +#include +#include + +static void crash_setup_regs(struct cpu_user_regs *newregs, + struct cpu_user_regs *oldregs) +{ + printk("STUB: include/asm-x86/kexec.h: crash_setup_regs: " + "not implemented\n"); +} + +#endif /* __X86_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/public/kexec.h @@ -0,0 +1,48 @@ +/* + * kexec.h - Public portion + * + * Created By: Horms + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + +#include "xen.h" + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + +/* + * Scratch space for passing arguments to the kexec hypercall + */ +typedef struct kexec_arg { + union { + struct { + unsigned long indirection_page; + unsigned long reboot_code_buffer; + unsigned long start_address; + const char *relocate_new_kernel; + unsigned int relocate_new_kernel_size; + unsigned int type; + } image; + struct cpu_user_regs regs; + struct { + unsigned long size; + unsigned long start; + } reserve; + unsigned long crash_note; + } u; +} kexec_arg_t; +DEFINE_XEN_GUEST_HANDLE(kexec_arg_t); + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/include/public/xen.h +++ x/xen/include/public/xen.h @@ -64,6 +64,7 @@ #define __HYPERVISOR_xenoprof_op 31 #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_kexec_op 34 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -238,6 +239,17 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #define VMASST_TYPE_writable_pagetables 2 #define MAX_VMASST_TYPE 2 +/* + * Operations for kexec. + */ +#define KEXEC_CMD_kexec 0 +#define KEXEC_CMD_kexec_prepare 1 +#define KEXEC_CMD_kexec_cleanup 2 +#define KEXEC_CMD_kexec_load 3 +#define KEXEC_CMD_kexec_unload 4 +#define KEXEC_CMD_kexec_reserve 5 +#define KEXEC_CMD_kexec_crash_note 6 + #ifndef __ASSEMBLY__ typedef uint16_t domid_t; --- /dev/null +++ x/xen/include/xen/elfcore.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * include/xen/elfcore.h + * + * Created By: Horms + * + * Based heavily on include/linux/elfcore.h from Linux 2.6.16 + * Naming scheeme based on include/xen/elf.h (not include/linux/elfcore.h) + * + */ + +#ifndef __ELFCOREC_H__ +#define __ELFCOREC_H__ + +#include +#include +#include + +#define NT_PRSTATUS 1 + +typedef struct +{ + int signo; /* signal number */ + int code; /* extra code */ + int errno; /* errno */ +} ELF_Signifo; + +/* These seem to be the same length on all architectures on Linux */ +typedef int ELF_Pid; +typedef struct { + long tv_sec; + long tv_usec; +} ELF_Timeval; +typedef unsigned long ELF_Greg; +#define ELF_NGREG (sizeof (struct cpu_user_regs) / sizeof(ELF_Greg)) +typedef ELF_Greg ELF_Gregset[ELF_NGREG]; + +/* + * Definitions to generate Intel SVR4-like core files. + * These mostly have the same names as the SVR4 types with "elf_" + * tacked on the front to prevent clashes with linux definitions, + * and the typedef forms have been avoided. This is mostly like + * the SVR4 structure, but more Linuxy, with things that Linux does + * not support and which gdb doesn't really use excluded. + */ +typedef struct +{ + ELF_Signifo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned long pr_sigpend; /* Set of pending signals */ + unsigned long pr_sighold; /* Set of held signals */ + ELF_Pid pr_pid; + ELF_Pid pr_ppid; + ELF_Pid pr_pgrp; + ELF_Pid pr_sid; + ELF_Timeval pr_utime; /* User time */ + ELF_Timeval pr_stime; /* System time */ + ELF_Timeval pr_cutime; /* Cumulative user time */ + ELF_Timeval pr_cstime; /* Cumulative system time */ + ELF_Gregset pr_reg; /* GP registers */ + int pr_fpvalid; /* True if math co-processor being used. */ +} ELF_Prstatus; + +#endif /* __ELFCOREC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/xen/kexec.h @@ -0,0 +1,33 @@ +/* + * include/xen/kexec.h - Internal archtecture independant portion + * + * Created By: Horms + * + */ + +#include + +#define MAX_NOTE_BYTES 1024 + +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; +DECLARE_PER_CPU (note_buf_t, crash_notes); + +int machine_kexec_prepare(int type, struct kexec_arg *arg); +void machine_kexec_cleanup(int type, struct kexec_arg *arg); +void machine_kexec_reserved(struct kexec_arg *arg); +void machine_kexec(struct kexec_arg *arg); +void machine_shutdown(struct kexec_arg *arg); +void machine_crash_shutdown(struct cpu_user_regs *regs); + +extern unsigned int opt_kdump_megabytes; +extern unsigned int opt_kdump_megabytes_base; + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/include/xen/mm.h +++ x/xen/include/xen/mm.h @@ -40,6 +40,7 @@ struct page_info; paddr_t init_boot_allocator(paddr_t bitmap_start); void init_boot_pages(paddr_t ps, paddr_t pe); unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align); +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at); void end_boot_allocator(void); /* Generic allocator. These functions are *not* interrupt-safe. */ --- /dev/null 2006-05-09 15:32:30.399072192 +0900 +++ x/patches/linux-2.6.16.13/kexec.patch 2006-05-17 18:37:45.000000000 +0900 @@ -0,0 +1,274 @@ +--- x/drivers/base/cpu.c ++++ x/drivers/base/cpu.c +@@ -80,12 +80,30 @@ void unregister_cpu(struct cpu *cpu, str + #else /* ... !CONFIG_HOTPLUG_CPU */ + static inline void register_cpu_control(struct cpu *cpu) + { ++#ifdef CONFIG_XEN ++#include ++ kexec_arg_t hypercall_arg; ++ hypercall_arg.u.crash_note = (unsigned long)cpu; ++#endif + } + #endif /* CONFIG_HOTPLUG_CPU */ + + #ifdef CONFIG_KEXEC + #include + ++#ifdef CONFIG_XEN ++static unsigned long get_crash_notes(int cpu) ++{ ++ kexec_arg_t hypercall_arg; ++ ++ hypercall_arg.u.crash_note = (unsigned long)cpu; ++ if (HYPERVISOR_kexec(KEXEC_CMD_kexec_crash_note, 0, &hypercall_arg) < 0) ++ return 0L; ++ return hypercall_arg.u.crash_note; ++} ++#endif ++ ++/* XXX: This only finds dom0's CPU's */ + static ssize_t show_crash_notes(struct sys_device *dev, char *buf) + { + struct cpu *cpu = container_of(dev, struct cpu, sysdev); +@@ -101,7 +119,11 @@ static ssize_t show_crash_notes(struct s + * boot up and this data does not change there after. Hence this + * operation should be safe. No locking required. + */ ++#ifndef CONFIG_XEN + addr = __pa(per_cpu_ptr(crash_notes, cpunum)); ++#else ++ addr = (unsigned long long)get_crash_notes(cpunum); ++#endif + rc = sprintf(buf, "%Lx\n", addr); + return rc; + } +--- x/include/linux/kexec.h ++++ x/include/linux/kexec.h +@@ -91,6 +91,10 @@ struct kimage { + extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET; + extern int machine_kexec_prepare(struct kimage *image); + extern void machine_kexec_cleanup(struct kimage *image); ++#ifdef CONFIG_XEN ++extern int machine_kexec_load(struct kimage *image); ++extern void machine_kexec_unload(struct kimage *image); ++#endif + extern asmlinkage long sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct kexec_segment __user *segments, +--- x/kernel/kexec.c ++++ x/kernel/kexec.c +@@ -27,8 +27,10 @@ + #include + #include + ++#ifndef CONFIG_XEN + /* Per cpu memory for storing cpu states in case of system crash. */ + note_buf_t* crash_notes; ++#endif + + /* Location of the reserved area for the crash kernel */ + struct resource crashk_res = { +@@ -38,6 +40,20 @@ struct resource crashk_res = { + .flags = IORESOURCE_BUSY | IORESOURCE_MEM + }; + ++/* Kexec needs to know about the actually physical addresss. ++ * But in xen, a physical address is a pseudo-physical addresss. */ ++#ifndef CONFIG_XEN ++#define kexec_page_to_pfn(page) page_to_pfn(page) ++#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) ++#define kexec_virt_to_phys(addr) virt_to_phys(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(addr) ++#else ++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) ++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) ++#define kexec_virt_to_phys(addr) virt_to_machine(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) ++#endif ++ + int kexec_should_crash(struct task_struct *p) + { + if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) +@@ -403,7 +419,7 @@ static struct page *kimage_alloc_normal_ + pages = kimage_alloc_pages(GFP_KERNEL, order); + if (!pages) + break; +- pfn = page_to_pfn(pages); ++ pfn = kexec_page_to_pfn(pages); + epfn = pfn + count; + addr = pfn << PAGE_SHIFT; + eaddr = epfn << PAGE_SHIFT; +@@ -437,6 +453,7 @@ static struct page *kimage_alloc_normal_ + return pages; + } + ++#ifndef CONFIG_XEN + static struct page *kimage_alloc_crash_control_pages(struct kimage *image, + unsigned int order) + { +@@ -490,7 +507,7 @@ static struct page *kimage_alloc_crash_c + } + /* If I don't overlap any segments I have found my hole! */ + if (i == image->nr_segments) { +- pages = pfn_to_page(hole_start >> PAGE_SHIFT); ++ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT); + break; + } + } +@@ -517,6 +534,13 @@ struct page *kimage_alloc_control_pages( + + return pages; + } ++#else /* !CONFIG_XEN */ ++struct page *kimage_alloc_control_pages(struct kimage *image, ++ unsigned int order) ++{ ++ return kimage_alloc_normal_control_pages(image, order); ++} ++#endif + + static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) + { +@@ -532,7 +556,7 @@ static int kimage_add_entry(struct kimag + return -ENOMEM; + + ind_page = page_address(page); +- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; ++ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION; + image->entry = ind_page; + image->last_entry = ind_page + + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); +@@ -593,13 +617,13 @@ static int kimage_terminate(struct kimag + #define for_each_kimage_entry(image, ptr, entry) \ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ + ptr = (entry & IND_INDIRECTION)? \ +- phys_to_virt((entry & PAGE_MASK)): ptr +1) ++ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1) + + static void kimage_free_entry(kimage_entry_t entry) + { + struct page *page; + +- page = pfn_to_page(entry >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(entry >> PAGE_SHIFT); + kimage_free_pages(page); + } + +@@ -611,6 +635,10 @@ static void kimage_free(struct kimage *i + if (!image) + return; + ++#ifdef CONFIG_XEN ++ machine_kexec_unload(image); ++#endif ++ + kimage_free_extra_pages(image); + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_INDIRECTION) { +@@ -686,7 +714,7 @@ static struct page *kimage_alloc_page(st + * have a match. + */ + list_for_each_entry(page, &image->dest_pages, lru) { +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + if (addr == destination) { + list_del(&page->lru); + return page; +@@ -701,12 +729,12 @@ static struct page *kimage_alloc_page(st + if (!page) + return NULL; + /* If the page cannot be used file it away */ +- if (page_to_pfn(page) > ++ if (kexec_page_to_pfn(page) > + (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { + list_add(&page->lru, &image->unuseable_pages); + continue; + } +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + + /* If it is the destination page we want use it */ + if (addr == destination) +@@ -729,7 +757,7 @@ static struct page *kimage_alloc_page(st + struct page *old_page; + + old_addr = *old & PAGE_MASK; +- old_page = pfn_to_page(old_addr >> PAGE_SHIFT); ++ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT); + copy_highpage(page, old_page); + *old = addr | (*old & ~PAGE_MASK); + +@@ -779,7 +807,7 @@ static int kimage_load_normal_segment(st + result = -ENOMEM; + goto out; + } +- result = kimage_add_page(image, page_to_pfn(page) ++ result = kimage_add_page(image, kexec_page_to_pfn(page) + << PAGE_SHIFT); + if (result < 0) + goto out; +@@ -811,6 +839,7 @@ out: + return result; + } + ++#ifndef CONFIG_XEN + static int kimage_load_crash_segment(struct kimage *image, + struct kexec_segment *segment) + { +@@ -833,7 +862,7 @@ static int kimage_load_crash_segment(str + char *ptr; + size_t uchunk, mchunk; + +- page = pfn_to_page(maddr >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT); + if (page == 0) { + result = -ENOMEM; + goto out; +@@ -881,6 +910,13 @@ static int kimage_load_segment(struct ki + + return result; + } ++#else /* CONFIG_XEN */ ++static int kimage_load_segment(struct kimage *image, ++ struct kexec_segment *segment) ++{ ++ return kimage_load_normal_segment(image, segment); ++} ++#endif + + /* + * Exec Kernel system call: for obvious reasons only root may call it. +@@ -991,6 +1027,11 @@ asmlinkage long sys_kexec_load(unsigned + if (result) + goto out; + } ++#ifdef CONFIG_XEN ++ result = machine_kexec_load(image); ++ if (result) ++ goto out; ++#endif + /* Install the new kernel, and Uninstall the old */ + image = xchg(dest_image, image); + +@@ -1045,7 +1086,6 @@ void crash_kexec(struct pt_regs *regs) + struct kimage *image; + int locked; + +- + /* Take the kexec_lock here to prevent sys_kexec_load + * running on one cpu from replacing the crash kernel + * we are using after a panic on a different cpu. +@@ -1067,6 +1107,7 @@ void crash_kexec(struct pt_regs *regs) + } + } + ++#ifndef CONFIG_XEN + static int __init crash_notes_memory_init(void) + { + /* Allocate memory for saving cpu registers. */ +@@ -1079,3 +1120,4 @@ static int __init crash_notes_memory_ini + return 0; + } + module_init(crash_notes_memory_init) ++#endif