kexec: framework This is an implementation of kexec for dom0/xen, that allows kexecing of the physical machine from xen. The approach taken is to move the architecture-dependant kexec code into a new hypercall. This patch only includes the framework, it cann't be used without architecture dependant hooks, however the code should compile as is. Signed-Off-By: Horms Signed-Off-By: Magnus Damm linux-2.6-xen-sparse/drivers/xen/core/Makefile | 1 linux-2.6-xen-sparse/drivers/xen/core/crash.c | 48 + linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 84 ++ linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 4 patches/linux-2.6.16.13/0-linux-2.6.16-kexec_page_table_a_stubs.patch | 85 ++ patches/linux-2.6.16.13/kexec-generic.patch | 294 ++++++++++ xen/arch/x86/Makefile | 2 xen/arch/x86/crash.c | 26 xen/arch/x86/machine_kexec.c | 51 + xen/common/Makefile | 1 xen/common/kexec.c | 188 ++++++ xen/common/page_alloc.c | 33 - xen/drivers/char/console.c | 3 xen/include/asm-x86/kexec.h | 32 + xen/include/public/kexec.h | 85 ++ xen/include/public/xen.h | 1 xen/include/xen/elfcore.h | 73 ++ xen/include/xen/kexec.h | 33 + xen/include/xen/mm.h | 1 19 files changed, 1034 insertions(+), 11 deletions(-) --- x/linux-2.6-xen-sparse/drivers/xen/core/Makefile +++ x/linux-2.6-xen-sparse/drivers/xen/core/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o --- /dev/null +++ x/linux-2.6-xen-sparse/drivers/xen/core/crash.c @@ -0,0 +1,48 @@ +/* + * Architecture independent functions for kexec based crash dumps in xen. + * + * Created by: Horms + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This passes the registers's down to the hypervisor and has it kexec() + * This is a bit different to the linux implementation which + * has this call save registers and stop CPUs and then goes into + * machine_kexec() later. But for Xen it makes more sense to + * have the kexec hypercall do everything, and this call + * has the registers parameter that is needed. + * to the hypervisor to allow the hypervisor to kdump itself + * on an internal panic + */ +void machine_crash_shutdown(struct pt_regs *regs) +{ + struct cpu_user_regs xen_regs; + printk("machine_crash_shutdown: %d\n", smp_processor_id()); + local_irq_disable(); +#ifdef CONFIG_X86_IO_APIC + disable_IO_APIC(); +#endif + crash_translate_regs(regs, &xen_regs); + HYPERVISOR_kexec(KEXEC_CMD_kexec, KEXEC_TYPE_CRASH, &xen_regs); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- /dev/null +++ x/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c @@ -0,0 +1,84 @@ +/* + * drivers/xen/core/machine_kexec.c + * handle transition of Linux booting another kernel + * + * Created By: Horms + * + * Losely based on arch/i386/kernel/machine_kexec.c + */ + +#include +#include +#include +#include +#include + +const extern unsigned char relocate_new_kernel[]; +extern unsigned int relocate_new_kernel_size; + +static inline unsigned long machine_address(struct page *page) +{ + return pfn_to_mfn(page_to_pfn(page)) << PAGE_SHIFT; +} + +static void setup_hypercall_arg(xen_kexec_image_t *xki, struct kimage *image) +{ + memset(xki, 0, sizeof(*xki)); + + xki->indirection_page = image->head; + xki->reboot_code_buffer = + machine_address(image->control_code_page); + xki->start_address = image->start; +} + +/* + * Load the image into xen so xen can kdump itself + * This might have been done in prepare, but prepare + * is currently called too early. It might make sense + * to move prepare, but for now, just add an extra hook. + */ +int xen_machine_kexec_load(struct kimage *image) +{ + xen_kexec_image_t xki; + + setup_hypercall_arg(&xki, image); + return HYPERVISOR_kexec(KEXEC_CMD_kexec_load, image->type, &xki); +} + +/* + * Unload the image that was stored by machine_kexec_load() + * This might have been done in machine_kexec_cleanup() but it + * is called too late, and its possible xen could try and kdump + * using resources that have been freed. + */ +void xen_machine_kexec_unload(struct kimage *image) +{ + xen_kexec_image_t xki; + + setup_hypercall_arg(&xki, image); + HYPERVISOR_kexec(KEXEC_CMD_kexec_unload, image->type, &xki); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + * + * This has the hypervisor move to the prefered reboot CPU, + * stop all CPUs and kexec. That is it combines machine_shutdown() + * and machine_kexec() in Linux kexec terms. + */ +NORET_TYPE void xen_machine_kexec(struct kimage *image) +{ + HYPERVISOR_kexec(KEXEC_CMD_kexec, image->type, NULL); + panic("KEXEC_CMD_kexec hypercall should not return\n"); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ --- x/linux-2.6-xen-sparse/drivers/xen/core/reboot.c +++ x/linux-2.6-xen-sparse/drivers/xen/core/reboot.c @@ -67,6 +67,10 @@ void machine_power_off(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); } +#ifdef CONFIG_KEXEC +void machine_shutdown(void) { } +#endif + int reboot_thru_bios = 0; /* for dmi_scan.c */ EXPORT_SYMBOL(machine_restart); EXPORT_SYMBOL(machine_halt); --- x/xen/arch/x86/Makefile +++ x/xen/arch/x86/Makefile @@ -39,6 +39,8 @@ obj-y += trampoline.o obj-y += traps.o obj-y += usercopy.o obj-y += x86_emulate.o +obj-y += machine_kexec.o +obj-y += crash.o ifneq ($(pae),n) obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o --- /dev/null +++ x/xen/arch/x86/crash.c @@ -0,0 +1,26 @@ +/****************************************************************************** + * arch/x86/crash.c + * + * Created By: Horms + * + * Should be based heavily on arch/i386/kernel/crash.c from Linux 2.6.16 + */ + +#include /* for printk() used in stub */ +#include +#include + +void machine_crash_shutdown(struct cpu_user_regs *regs) +{ + printk("STUB: arch/x86/crash.c: machine_crash_shutdown: not implemented\n"); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/arch/x86/machine_kexec.c @@ -0,0 +1,51 @@ +/****************************************************************************** + * arch/x86/machine_kexec.c + * + * Created By: Horms + * + */ + +#include /* for printk() used in stubs */ +#include +#include + +int machine_kexec_load(int type, xen_kexec_image_t *image) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec_load: " + "not implemented\n"); + return -1; +} + +void machine_kexec_unload(int type, xen_kexec_image_t *image) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec_unload: " + "not implemented\n"); +} + +void machine_kexec_reserved(xen_kexec_reserve_t *reservation) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec_reserved: " + "not implemented\n"); +} + +void machine_kexec(xen_kexec_image_t *image) +{ + printk("STUB: arch/x86/machine_kexec.c: machine_kexec: " + "not implemented\n"); +} + +void machine_shutdown(xen_kexec_image_t *image) +{ + printk("STUB: arch/x86/machine_shutdown.c: machine_shutdown: " + "not implemented\n"); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/common/Makefile +++ x/xen/common/Makefile @@ -7,6 +7,7 @@ obj-y += event_channel.o obj-y += grant_table.o obj-y += kernel.o obj-y += keyhandler.o +obj-y += kexec.o obj-y += lib.o obj-y += memory.o obj-y += multicall.o --- /dev/null +++ x/xen/common/kexec.c @@ -0,0 +1,188 @@ +/* + * common/kexec.c - Achitecture independent kexec code for Xen + * + * Created By: Horms + * + * Based in part on Linux 2.6.16's kernel/kexec.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU (note_buf_t, crash_notes); + +static xen_kexec_image_t kexec_image; +static int kexec_image_set = 0; +static xen_kexec_image_t kexec_crash_image; +static int kexec_crash_image_set = 0; +static int kexec_crash_lock = 0; + +/* Must call with kexec_crash_lock held */ +void __crash_kexec(struct cpu_user_regs *regs) +{ + struct cpu_user_regs fixed_regs; + + if (!kexec_crash_image_set) + return; + crash_setup_regs(&fixed_regs, regs); + machine_crash_shutdown(&fixed_regs); + machine_kexec(&kexec_crash_image); /* Does not return */ +} + +void crash_kexec(struct cpu_user_regs *regs) +{ + int locked; + + locked = xchg(&kexec_crash_lock, 1); + if (locked) + return; + __crash_kexec(regs); + xchg(&kexec_crash_lock, 0); +} + +static int get_crash_note(int vcpuid, XEN_GUEST_HANDLE(void) uarg) +{ + struct domain *domain = current->domain; + unsigned long crash_note; + struct vcpu *vcpu; + int locked; + + if (vcpuid < 0 || vcpuid > MAX_VIRT_CPUS) + return -EINVAL; + + if ( ! (vcpu = domain->vcpu[vcpuid]) ) + return -EINVAL; + + locked = xchg(&kexec_crash_lock, 1); + if (locked) + { + printk("do_kexec: (CMD_kexec_crash_note): dump is locked\n"); + return -EFAULT; + } + crash_note = __pa((unsigned long)per_cpu(crash_notes, vcpu->processor)); + xchg(&kexec_crash_lock, 0); + + if ( unlikely(copy_to_guest(uarg, &crash_note, 1) != 0) ) + { + printk("do_kexec: (CMD_kexec_crash_note): copy_to_guest failed\n"); + return -EFAULT; + } + + return 0; +} + +static int get_reserve(XEN_GUEST_HANDLE(void) uarg) +{ + xen_kexec_reserve_t reservation; + + machine_kexec_reserved(&reservation); + if ( unlikely(copy_to_guest(uarg, &reservation, 1) != 0) ) + { + printk("do_kexec (CMD_kexec_reserve): copy_to_guest failed\n"); + return -EFAULT; + } + + return 0; +} + +static int __do_kexec(unsigned long type, XEN_GUEST_HANDLE(void) uarg, + xen_kexec_image_t *image) +{ + cpu_user_regs_t regs; + + if (type == KEXEC_TYPE_DEFAULT) + machine_shutdown(image); /* Does not return */ + else + { + if ( unlikely(copy_from_guest(®s, uarg, 1) != 0) ) + { + printk("do_kexec (CMD_kexec): copy_from_guest failed\n"); + return -EFAULT; + } + __crash_kexec(®s); /* Does not return */ + } + + return -EINVAL; +} + +int do_kexec(unsigned long op, int arg1, XEN_GUEST_HANDLE(void) uarg) +{ + xen_kexec_image_t *image; + int locked; + int *image_set; + int status = -EINVAL; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + switch (op) + { + case KEXEC_CMD_kexec_crash_note: + return get_crash_note(arg1, uarg); + case KEXEC_CMD_kexec_reserve: + return get_reserve(uarg); + } + + /* For all other ops, arg1 is the type of kexec, that is + * KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH */ + if (arg1 == KEXEC_TYPE_CRASH) + { + image = &kexec_crash_image; + image_set = &kexec_crash_image_set; + locked = xchg(&kexec_crash_lock, 1); + if (locked) + { + printk("do_kexec: dump is locked\n"); + return -EFAULT; + } + } + else + { + image = &kexec_image; + image_set = &kexec_image_set; + } + + switch(op) { + case KEXEC_CMD_kexec: + BUG_ON(!*image_set); + status = __do_kexec(arg1, uarg, image); + break; + case KEXEC_CMD_kexec_load: + BUG_ON(*image_set); + if ( unlikely(copy_from_guest(image, uarg, 1) != 0) ) + { + printk("do_kexec (CMD_kexec_load): copy_from_guest failed\n"); + status = -EFAULT; + break; + } + *image_set = 1; + status = machine_kexec_load(arg1, image); + break; + case KEXEC_CMD_kexec_unload: + BUG_ON(!*image_set); + *image_set = 0; + machine_kexec_unload(arg1, image); + status = 0; + break; + } + + if (arg1 == KEXEC_TYPE_CRASH) + xchg(&kexec_crash_lock, 0); + return status; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/common/page_alloc.c +++ x/xen/common/page_alloc.c @@ -212,24 +212,35 @@ void init_boot_pages(paddr_t ps, paddr_t } } +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at) +{ + unsigned long i; + + for ( i = 0; i < nr_pfns; i++ ) + if ( allocated_in_map(pfn_at + i) ) + break; + + if ( i == nr_pfns ) + { + map_alloc(pfn_at, nr_pfns); + return pfn_at; + } + + return 0; +} + unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align) { - unsigned long pg, i; + unsigned long pg, i = 0; for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align ) { - for ( i = 0; i < nr_pfns; i++ ) - if ( allocated_in_map(pg + i) ) - break; - - if ( i == nr_pfns ) - { - map_alloc(pg, nr_pfns); - return pg; - } + i = alloc_boot_pages_at(nr_pfns, pg); + if (i != 0) + break; } - return 0; + return i; } --- x/xen/drivers/char/console.c +++ x/xen/drivers/char/console.c @@ -677,6 +677,7 @@ void panic(const char *fmt, ...) unsigned long flags; static spinlock_t lock = SPIN_LOCK_UNLOCKED; extern void machine_restart(char *); + extern void crash_kexec(struct cpu_user_regs *regs); debugtrace_dump(); @@ -696,6 +697,8 @@ void panic(const char *fmt, ...) debugger_trap_immediate(); + crash_kexec(NULL); + watchdog_disable(); mdelay(5000); machine_restart(0); --- /dev/null +++ x/xen/include/asm-x86/kexec.h @@ -0,0 +1,32 @@ +/****************************************************************************** + * include/asm-x86/kexec.h + * + * Created By: Horms + * + */ + +#ifndef __X86_KEXEC_H__ +#define __X86_KEXEC_H__ + +#include /* for printk() used in stub */ +#include +#include + +static void crash_setup_regs(struct cpu_user_regs *newregs, + struct cpu_user_regs *oldregs) +{ + printk("STUB: include/asm-x86/kexec.h: crash_setup_regs: " + "not implemented\n"); +} + +#endif /* __X86_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/public/kexec.h @@ -0,0 +1,85 @@ +/* + * kexec.h - Public portion + * + * Created By: Horms + * + * Types based on those in ./vcpu.h on request from Keir Frasier + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, int type, void *extra_args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @arg1 == Operation-specific unsigned long argument + * This could be in extra_args, but by putting it here + * copy_from_user can be avoided, inparticular in + * KEXEC_CMD_kexec during a crash dump, which is a failry + * critical section of code.If this turns out not to be + * important then it can be collapsed into extra_args. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropritate. + * @arg1 == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH + * @extra_arg == pointer to cpu_user_regs_t structure. + */ +#define KEXEC_CMD_kexec 0 + +/* + * Load kernel image in preperation for kexec or kdump. + * @arg1 == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH + * @extra_arg == pointer to xen_kexec_image_t structure. + */ +#define KEXEC_CMD_kexec_load 1 +typedef struct xen_kexec_image { + unsigned long indirection_page; + unsigned long reboot_code_buffer; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Clean up image loaded by KEXEC_CMD_kexec_load + * @arg1 == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH + */ +#define KEXEC_CMD_kexec_unload 2 + +/* + * Find the base pointer and size of the area that xen has + * reserved for use by the crash kernel. + * @extra_arg == pointer to xen_kexec_reserve_t structure. + */ +#define KEXEC_CMD_kexec_reserve 3 +typedef struct xen_kexec_reserve { + unsigned long size; + unsigned long start; +} xen_kexec_reserve_t; + +/* + * Find the base pointer of the area that xen has + * reserved for use by a crash note for a given VCPU + * @extra_arg == pointer to unsigned long. + */ +#define KEXEC_CMD_kexec_crash_note 4 + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/include/public/xen.h +++ x/xen/include/public/xen.h @@ -64,6 +64,7 @@ #define __HYPERVISOR_xenoprof_op 31 #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_kexec_op 34 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 --- /dev/null +++ x/xen/include/xen/elfcore.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * include/xen/elfcore.h + * + * Created By: Horms + * + * Based heavily on include/linux/elfcore.h from Linux 2.6.16 + * Naming scheeme based on include/xen/elf.h (not include/linux/elfcore.h) + * + */ + +#ifndef __ELFCOREC_H__ +#define __ELFCOREC_H__ + +#include +#include +#include + +#define NT_PRSTATUS 1 + +typedef struct +{ + int signo; /* signal number */ + int code; /* extra code */ + int errno; /* errno */ +} ELF_Signifo; + +/* These seem to be the same length on all architectures on Linux */ +typedef int ELF_Pid; +typedef struct { + long tv_sec; + long tv_usec; +} ELF_Timeval; +typedef unsigned long ELF_Greg; +#define ELF_NGREG (sizeof (struct cpu_user_regs) / sizeof(ELF_Greg)) +typedef ELF_Greg ELF_Gregset[ELF_NGREG]; + +/* + * Definitions to generate Intel SVR4-like core files. + * These mostly have the same names as the SVR4 types with "elf_" + * tacked on the front to prevent clashes with linux definitions, + * and the typedef forms have been avoided. This is mostly like + * the SVR4 structure, but more Linuxy, with things that Linux does + * not support and which gdb doesn't really use excluded. + */ +typedef struct +{ + ELF_Signifo pr_info; /* Info associated with signal */ + short pr_cursig; /* Current signal */ + unsigned long pr_sigpend; /* Set of pending signals */ + unsigned long pr_sighold; /* Set of held signals */ + ELF_Pid pr_pid; + ELF_Pid pr_ppid; + ELF_Pid pr_pgrp; + ELF_Pid pr_sid; + ELF_Timeval pr_utime; /* User time */ + ELF_Timeval pr_stime; /* System time */ + ELF_Timeval pr_cutime; /* Cumulative user time */ + ELF_Timeval pr_cstime; /* Cumulative system time */ + ELF_Gregset pr_reg; /* GP registers */ + int pr_fpvalid; /* True if math co-processor being used. */ +} ELF_Prstatus; + +#endif /* __ELFCOREC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/xen/kexec.h @@ -0,0 +1,33 @@ +/* + * include/xen/kexec.h - Internal archtecture independant portion + * + * Created By: Horms + * + */ + +#include + +#define MAX_NOTE_BYTES 1024 + +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; +DECLARE_PER_CPU (note_buf_t, crash_notes); + +int machine_kexec_load(int type, xen_kexec_image_t *image); +void machine_kexec_unload(int type, xen_kexec_image_t *image); +void machine_kexec_reserved(xen_kexec_reserve_t *reservation); +void machine_kexec(xen_kexec_image_t *image); +void machine_shutdown(xen_kexec_image_t *image); +void machine_crash_shutdown(cpu_user_regs_t *regs); + +extern unsigned int opt_kdump_megabytes; +extern unsigned int opt_kdump_megabytes_base; + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/include/xen/mm.h +++ x/xen/include/xen/mm.h @@ -40,6 +40,7 @@ struct page_info; paddr_t init_boot_allocator(paddr_t bitmap_start); void init_boot_pages(paddr_t ps, paddr_t pe); unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align); +unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at); void end_boot_allocator(void); /* Generic allocator. These functions are *not* interrupt-safe. */ --- /dev/null +++ x/patches/linux-2.6.16.13/kexec-generic.patch @@ -0,0 +1,294 @@ + drivers/base/cpu.c | 20 ++++++++++++++ + include/linux/kexec.h | 5 +++ + kernel/kexec.c | 68 ++++++++++++++++++++++++++++++++++++++++--------- + kernel/sys.c | 4 ++ + 4 files changed, 85 insertions(+), 12 deletions(-) + +--- x/drivers/base/cpu.c ++++ x/drivers/base/cpu.c +@@ -11,6 +11,10 @@ + + #include "base.h" + ++#ifdef CONFIG_XEN ++#include ++#endif ++ + struct sysdev_class cpu_sysdev_class = { + set_kset_name("cpu"), + }; +@@ -86,6 +90,18 @@ static inline void register_cpu_control( + #ifdef CONFIG_KEXEC + #include + ++#ifdef CONFIG_XEN ++static unsigned long get_crash_notes(int cpu) ++{ ++ unsigned long crash_note; ++ ++ if (HYPERVISOR_kexec(KEXEC_CMD_kexec_crash_note, cpu, &crash_note) < 0) ++ return 0UL; ++ return crash_note; ++} ++#endif ++ ++/* XXX: This only finds dom0's CPU's */ + static ssize_t show_crash_notes(struct sys_device *dev, char *buf) + { + struct cpu *cpu = container_of(dev, struct cpu, sysdev); +@@ -101,7 +117,11 @@ static ssize_t show_crash_notes(struct s + * boot up and this data does not change there after. Hence this + * operation should be safe. No locking required. + */ ++#ifndef CONFIG_XEN + addr = __pa(per_cpu_ptr(crash_notes, cpunum)); ++#else ++ addr = (unsigned long long)get_crash_notes(cpunum); ++#endif + rc = sprintf(buf, "%Lx\n", addr); + return rc; + } +--- x/include/linux/kexec.h ++++ x/include/linux/kexec.h +@@ -91,6 +91,11 @@ struct kimage { + extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET; + extern int machine_kexec_prepare(struct kimage *image); + extern void machine_kexec_cleanup(struct kimage *image); ++#ifdef CONFIG_XEN ++extern int xen_machine_kexec_load(struct kimage *image); ++extern void xen_machine_kexec_unload(struct kimage *image); ++extern NORET_TYPE void xen_machine_kexec(struct kimage *image) ATTRIB_NORET; ++#endif + extern asmlinkage long sys_kexec_load(unsigned long entry, + unsigned long nr_segments, + struct kexec_segment __user *segments, +--- x/kernel/kexec.c ++++ x/kernel/kexec.c +@@ -38,6 +38,20 @@ struct resource crashk_res = { + .flags = IORESOURCE_BUSY | IORESOURCE_MEM + }; + ++/* Kexec needs to know about the actually physical addresss. ++ * But in xen, a physical address is a pseudo-physical addresss. */ ++#ifndef CONFIG_XEN ++#define kexec_page_to_pfn(page) page_to_pfn(page) ++#define kexec_pfn_to_page(pfn) pfn_to_page(pfn) ++#define kexec_virt_to_phys(addr) virt_to_phys(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(addr) ++#else ++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page)) ++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn)) ++#define kexec_virt_to_phys(addr) virt_to_machine(addr) ++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr)) ++#endif ++ + int kexec_should_crash(struct task_struct *p) + { + if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) +@@ -403,7 +417,7 @@ static struct page *kimage_alloc_normal_ + pages = kimage_alloc_pages(GFP_KERNEL, order); + if (!pages) + break; +- pfn = page_to_pfn(pages); ++ pfn = kexec_page_to_pfn(pages); + epfn = pfn + count; + addr = pfn << PAGE_SHIFT; + eaddr = epfn << PAGE_SHIFT; +@@ -437,6 +451,7 @@ static struct page *kimage_alloc_normal_ + return pages; + } + ++#ifndef CONFIG_XEN + static struct page *kimage_alloc_crash_control_pages(struct kimage *image, + unsigned int order) + { +@@ -490,7 +505,7 @@ static struct page *kimage_alloc_crash_c + } + /* If I don't overlap any segments I have found my hole! */ + if (i == image->nr_segments) { +- pages = pfn_to_page(hole_start >> PAGE_SHIFT); ++ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT); + break; + } + } +@@ -517,6 +532,13 @@ struct page *kimage_alloc_control_pages( + + return pages; + } ++#else /* !CONFIG_XEN */ ++struct page *kimage_alloc_control_pages(struct kimage *image, ++ unsigned int order) ++{ ++ return kimage_alloc_normal_control_pages(image, order); ++} ++#endif + + static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) + { +@@ -532,7 +554,7 @@ static int kimage_add_entry(struct kimag + return -ENOMEM; + + ind_page = page_address(page); +- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; ++ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION; + image->entry = ind_page; + image->last_entry = ind_page + + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); +@@ -593,13 +615,13 @@ static int kimage_terminate(struct kimag + #define for_each_kimage_entry(image, ptr, entry) \ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ + ptr = (entry & IND_INDIRECTION)? \ +- phys_to_virt((entry & PAGE_MASK)): ptr +1) ++ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1) + + static void kimage_free_entry(kimage_entry_t entry) + { + struct page *page; + +- page = pfn_to_page(entry >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(entry >> PAGE_SHIFT); + kimage_free_pages(page); + } + +@@ -611,6 +633,10 @@ static void kimage_free(struct kimage *i + if (!image) + return; + ++#ifdef CONFIG_XEN ++ xen_machine_kexec_unload(image); ++#endif ++ + kimage_free_extra_pages(image); + for_each_kimage_entry(image, ptr, entry) { + if (entry & IND_INDIRECTION) { +@@ -686,7 +712,7 @@ static struct page *kimage_alloc_page(st + * have a match. + */ + list_for_each_entry(page, &image->dest_pages, lru) { +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + if (addr == destination) { + list_del(&page->lru); + return page; +@@ -701,12 +727,12 @@ static struct page *kimage_alloc_page(st + if (!page) + return NULL; + /* If the page cannot be used file it away */ +- if (page_to_pfn(page) > ++ if (kexec_page_to_pfn(page) > + (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { + list_add(&page->lru, &image->unuseable_pages); + continue; + } +- addr = page_to_pfn(page) << PAGE_SHIFT; ++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT; + + /* If it is the destination page we want use it */ + if (addr == destination) +@@ -729,7 +755,7 @@ static struct page *kimage_alloc_page(st + struct page *old_page; + + old_addr = *old & PAGE_MASK; +- old_page = pfn_to_page(old_addr >> PAGE_SHIFT); ++ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT); + copy_highpage(page, old_page); + *old = addr | (*old & ~PAGE_MASK); + +@@ -779,7 +805,7 @@ static int kimage_load_normal_segment(st + result = -ENOMEM; + goto out; + } +- result = kimage_add_page(image, page_to_pfn(page) ++ result = kimage_add_page(image, kexec_page_to_pfn(page) + << PAGE_SHIFT); + if (result < 0) + goto out; +@@ -811,6 +837,7 @@ out: + return result; + } + ++#ifndef CONFIG_XEN + static int kimage_load_crash_segment(struct kimage *image, + struct kexec_segment *segment) + { +@@ -833,7 +860,7 @@ static int kimage_load_crash_segment(str + char *ptr; + size_t uchunk, mchunk; + +- page = pfn_to_page(maddr >> PAGE_SHIFT); ++ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT); + if (page == 0) { + result = -ENOMEM; + goto out; +@@ -881,6 +908,13 @@ static int kimage_load_segment(struct ki + + return result; + } ++#else /* CONFIG_XEN */ ++static int kimage_load_segment(struct kimage *image, ++ struct kexec_segment *segment) ++{ ++ return kimage_load_normal_segment(image, segment); ++} ++#endif + + /* + * Exec Kernel system call: for obvious reasons only root may call it. +@@ -991,6 +1025,11 @@ asmlinkage long sys_kexec_load(unsigned + if (result) + goto out; + } ++#ifdef CONFIG_XEN ++ result = xen_machine_kexec_load(image); ++ if (result) ++ goto out; ++#endif + /* Install the new kernel, and Uninstall the old */ + image = xchg(dest_image, image); + +@@ -1045,7 +1084,6 @@ void crash_kexec(struct pt_regs *regs) + struct kimage *image; + int locked; + +- + /* Take the kexec_lock here to prevent sys_kexec_load + * running on one cpu from replacing the crash kernel + * we are using after a panic on a different cpu. +@@ -1061,12 +1099,17 @@ void crash_kexec(struct pt_regs *regs) + struct pt_regs fixed_regs; + crash_setup_regs(&fixed_regs, regs); + machine_crash_shutdown(&fixed_regs); ++#ifdef CONFIG_XEN ++ xen_machine_kexec(image); ++#else + machine_kexec(image); ++#endif + } + xchg(&kexec_lock, 0); + } + } + ++#ifndef CONFIG_XEN + static int __init crash_notes_memory_init(void) + { + /* Allocate memory for saving cpu registers. */ +@@ -1079,3 +1122,4 @@ static int __init crash_notes_memory_ini + return 0; + } + module_init(crash_notes_memory_init) ++#endif +--- x/kernel/sys.c ++++ x/kernel/sys.c +@@ -435,8 +435,12 @@ void kernel_kexec(void) + kernel_restart_prepare(NULL); + printk(KERN_EMERG "Starting new kernel\n"); + machine_shutdown(); ++#ifdef CONFIG_XEN ++ xen_machine_kexec(image); ++#else + machine_kexec(image); + #endif ++#endif + } + EXPORT_SYMBOL_GPL(kernel_kexec); + --- /dev/null +++ x/patches/linux-2.6.16.13/0-linux-2.6.16-kexec_page_table_a_stubs.patch @@ -0,0 +1,85 @@ +kexec: Avoid overwriting the current pgd (V2, stubs) + +This patch adds an architecture specific structure "struct kimage_arch" to +struct kimage. This structure is filled in with members by the architecture +specific patches followed by this one. + +Signed-off-by: Magnus Damm +--- + + Applies on top of 2.6.16 and 2.6.17-rc4. + + include/asm-i386/kexec.h | 2 ++ + include/asm-powerpc/kexec.h | 2 ++ + include/asm-s390/kexec.h | 2 ++ + include/asm-sh/kexec.h | 2 ++ + include/asm-x86_64/kexec.h | 2 ++ + include/linux/kexec.h | 2 ++ + 6 files changed, 12 insertions(+) + +--- x/include/asm-i386/kexec.h ++++ x/include/asm-i386/kexec.h +@@ -29,6 +29,8 @@ + + #define MAX_NOTE_BYTES 1024 + ++struct kimage_arch {}; ++ + /* CPU does not save ss and esp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. +--- x/include/asm-powerpc/kexec.h ++++ x/include/asm-powerpc/kexec.h +@@ -108,6 +108,8 @@ static inline void crash_setup_regs(stru + + #define MAX_NOTE_BYTES 1024 + ++struct kimage_arch {}; ++ + #ifdef __powerpc64__ + extern void kexec_smp_wait(void); /* get and clear naca physid, wait for + master to copy new code to 0 */ +--- x/include/asm-s390/kexec.h ++++ x/include/asm-s390/kexec.h +@@ -36,6 +36,8 @@ + + #define MAX_NOTE_BYTES 1024 + ++struct kimage_arch {}; ++ + /* Provide a dummy definition to avoid build failures. */ + static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) { } +--- x/include/asm-sh/kexec.h ++++ x/include/asm-sh/kexec.h +@@ -25,6 +25,8 @@ + + #ifndef __ASSEMBLY__ + ++struct kimage_arch {}; ++ + extern void machine_shutdown(void); + extern void *crash_notes; + +--- x/include/asm-x86_64/kexec.h ++++ x/include/asm-x86_64/kexec.h +@@ -29,6 +29,8 @@ + + #define MAX_NOTE_BYTES 1024 + ++struct kimage_arch {}; ++ + /* + * Saving the registers of the cpu on which panic occured in + * crash_kexec to save a valid sp. The registers of other cpus +--- x/include/linux/kexec.h ++++ x/include/linux/kexec.h +@@ -69,6 +69,8 @@ struct kimage { + unsigned long start; + struct page *control_code_page; + ++ struct kimage_arch arch_data; ++ + unsigned long nr_segments; + struct kexec_segment segment[KEXEC_SEGMENT_MAX]; +