kexec: x86 This is the x86 component of kexec for xen. The generic component is a prerequsite for this patch. The x86_64 or x86_32 (i386) patch is also needed in order to use this code, however the code should compile is. xen/arch/x86/crash.c | 171 ++++++++++++++++++++++++++++++++++- xen/arch/x86/dom0_ops.c | 3 xen/arch/x86/machine_kexec.c | 58 +++++++++-- xen/arch/x86/setup.c | 75 +++++++++++++-- xen/arch/x86/x86_32/Makefile | 1 xen/arch/x86/x86_32/machine_kexec.c | 27 +++++ xen/arch/x86/x86_64/Makefile | 1 xen/arch/x86/x86_64/machine_kexec.c | 28 +++++ xen/common/kexec.c | 3 xen/include/asm-x86/elf.h | 27 +++++ xen/include/asm-x86/hypercall.h | 6 + xen/include/asm-x86/kexec.h | 14 +- xen/include/asm-x86/x86_32/elf.h | 30 ++++++ xen/include/asm-x86/x86_32/kexec.h | 51 ++++++++++ xen/include/asm-x86/x86_64/elf.h | 30 ++++++ xen/include/asm-x86/x86_64/kexec.h | 50 ++++++++++ 16 files changed, 542 insertions(+), 33 deletions(-) --- x/xen/arch/x86/crash.c +++ x/xen/arch/x86/crash.c @@ -3,16 +3,181 @@ * * Created By: Horms * - * Should be based heavily on arch/i386/kernel/crash.c from Linux 2.6.16 + * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16 */ -#include /* for printk() used in stub */ +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +static int crashing_cpu; + +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + Elf_Note note; + + note.namesz = strlen(name) + 1; + note.descsz = data_len; + note.type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) +3)/4; + memcpy(buf, name, note.namesz); + buf += (note.namesz + 3)/4; + memcpy(buf, data, note.descsz); + buf += (note.descsz + 3)/4; + + return buf; +} + +static void final_note(u32 *buf) +{ + Elf_Note note; + + note.namesz = 0; + note.descsz = 0; + note.type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +static void crash_save_this_cpu(struct cpu_user_regs *regs, int cpu) +{ + ELF_Prstatus prstatus; + uint32_t *buf; + + printk("crash_save_this_cpu: %d\n", cpu); + + if ((cpu < 0) || (cpu >= NR_CPUS)) + return; + + /* Using ELF notes here is opportunistic. + * A well defined structure format with tags is needed + * ELF notes happen to provide this and there is infastructure + * in the Linux kernel to supprot them. In order to make + * crash dumps produced by xen the same, the same + * technique is used here. + */ + + /* It should be safe to use per_cpu() here instead of per_cpu_ptr() + * (which does not exist in xen) as kexecing_lock must be held in + * order to get anywhere near here */ + buf = (uint32_t *)per_cpu(crash_notes, cpu); + if (!buf) /* XXX: Can this ever occur? */ + return; + memset(&prstatus, 0, sizeof(prstatus)); + /* XXX: Xen does not have processes. For the crashing CPU on a dom0 + * crash this could be pased down from dom0, but is this + * neccessary? + * prstatus.pr_pid = current->pid; */ + ELF_CORE_COPY_REGS(prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + +static void crash_save_self(struct cpu_user_regs *regs) +{ + crash_save_this_cpu(regs, smp_processor_id()); +} + +#ifdef CONFIG_SMP +static atomic_t waiting_for_crash_ipi; + +static int crash_nmi_callback(struct cpu_user_regs *regs, int cpu) +{ + struct cpu_user_regs fixed_regs; + + /* Don't do anything if this handler is invoked on crashing cpu. + * Otherwise, system will completely hang. Crashing cpu can get + * an NMI if system was initially booted with nmi_watchdog parameter. + */ + if (cpu == crashing_cpu) + return 1; + local_irq_disable(); + + if (!user_mode(regs)) { + crash_fixup_ss_esp(&fixed_regs, regs); + regs = &fixed_regs; + } + crash_save_this_cpu(regs, cpu); + disable_local_APIC(); + atomic_dec(&waiting_for_crash_ipi); + /* Assume hlt works */ + __asm__ __volatile__ ( "hlt" ); + for(;;); + + return 1; + + /* Need to use this somewhere as Xen builds with -Werror */ + crash_setup_regs(&fixed_regs, regs); +} + +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +static void smp_send_nmi_allbutself(void) +{ + cpumask_t allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + send_IPI_mask(allbutself, APIC_DM_NMI); +} + +static void nmi_shootdown_cpus(void) +{ + unsigned long msecs; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + /* Would it be better to replace the trap vector here? */ + set_nmi_callback(crash_nmi_callback); + /* Ensure the new callback function is set before sending + * out the NMI + */ + wmb(); + + smp_send_nmi_allbutself(); + + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + + /* Leave the nmi callback set */ + disable_local_APIC(); +} +#else +static void nmi_shootdown_cpus(void) +{ + /* There are no cpus to shootdown */ +} +#endif + void machine_crash_shutdown(struct cpu_user_regs *regs) { - printk("STUB: arch/x86/crash.c: machine_crash_shutdown: not implemented\n"); + printk("machine_crash_shutdown: %d\n", smp_processor_id()); + local_irq_disable(); + + crashing_cpu = smp_processor_id(); + nmi_shootdown_cpus(); +#ifdef CONFIG_X86_IO_APIC + disable_IO_APIC(); +#endif + crash_save_self(regs); } /* --- x/xen/arch/x86/dom0_ops.c +++ x/xen/arch/x86/dom0_ops.c @@ -29,6 +29,9 @@ #include #include "cpu/mtrr/mtrr.h" +extern unsigned int opt_kdump_megabytes; +extern unsigned int opt_kdump_megabytes_base; + #define TRC_DOM0OP_ENTER_BASE 0x00020000 #define TRC_DOM0OP_LEAVE_BASE 0x00030000 --- x/xen/arch/x86/machine_kexec.c +++ x/xen/arch/x86/machine_kexec.c @@ -5,39 +5,71 @@ * */ -#include /* for printk() used in stubs */ +#include +#include +#include +#include +#include +#include #include +#include +#include #include int machine_kexec_prepare(int type, struct kexec_arg *arg) { - printk("STUB: arch/x86/machine_kexec.c: machine_kexec_prepare: " - "not implemented\n"); - return -1; + return 0; } void machine_kexec_cleanup(int type, struct kexec_arg *arg) { - printk("STUB: arch/x86/machine_kexec.c: machine_kexec_cleanup: " - "not implemented\n"); } void machine_kexec_reserved(struct kexec_arg *arg) { - printk("STUB: arch/x86/machine_kexec.c: machine_kexec_reserved: " - "not implemented\n"); + arg->u.reserve.size = opt_kdump_megabytes << 20; + arg->u.reserve.start = opt_kdump_megabytes_base << 20; } -void machine_kexec(struct kexec_arg *arg) +static void __machine_shutdown(void *data) { - printk("STUB: arch/x86/machine_kexec.c: machine_kexec: " - "not implemented\n"); + struct kexec_arg *arg = (struct kexec_arg *)data; + + printk("__machine_shutdown: cpu=%u\n", smp_processor_id()); + + watchdog_disable(); + console_start_sync(); + + smp_send_stop(); + +#ifdef CONFIG_X86_IO_APIC + disable_IO_APIC(); +#endif + + machine_kexec(arg); } void machine_shutdown(struct kexec_arg *arg) { - printk("STUB: arch/x86/machine_shutdown.c: machine_shutdown: " - "not implemented\n"); + int reboot_cpu_id; + cpumask_t reboot_cpu; + + + reboot_cpu_id = 0; + + if (!cpu_isset(reboot_cpu_id, cpu_online_map)) + reboot_cpu_id = smp_processor_id(); + + if (reboot_cpu_id != smp_processor_id()) { + cpus_clear(reboot_cpu); + cpu_set(reboot_cpu_id, reboot_cpu); + on_selected_cpus(reboot_cpu, __machine_shutdown, arg, 1, 0); + for (;;) + ; /* nothing */ + } + else + __machine_shutdown(arg); + BUG(); } /* --- x/xen/arch/x86/setup.c +++ x/xen/arch/x86/setup.c @@ -38,6 +38,11 @@ static unsigned int opt_xenheap_megabyte integer_param("xenheap_megabytes", opt_xenheap_megabytes); #endif +unsigned int opt_kdump_megabytes = 0; +integer_param("kdump_megabytes", opt_kdump_megabytes); +unsigned int opt_kdump_megabytes_base = 0; +integer_param("kdump_megabytes_base", opt_kdump_megabytes_base); + /* opt_nosmp: If true, secondary processors are ignored. */ static int opt_nosmp = 0; boolean_param("nosmp", opt_nosmp); @@ -192,6 +197,20 @@ static void percpu_free_unused_areas(voi __pa(__per_cpu_end)); } +void __init move_memory(unsigned long dst, + unsigned long src_start, unsigned long src_end) +{ +#if defined(CONFIG_X86_32) + memmove((void *)dst, /* use low mapping */ + (void *)src_start, /* use low mapping */ + src_end - src_start); +#elif defined(CONFIG_X86_64) + memmove(__va(dst), + __va(src_start), + src_end - src_start); +#endif +} + void __init __start_xen(multiboot_info_t *mbi) { char __cmdline[] = "", *cmdline = __cmdline; @@ -327,15 +346,8 @@ void __init __start_xen(multiboot_info_t initial_images_start = xenheap_phys_end; initial_images_end = initial_images_start + modules_length; -#if defined(CONFIG_X86_32) - memmove((void *)initial_images_start, /* use low mapping */ - (void *)mod[0].mod_start, /* use low mapping */ - mod[mbi->mods_count-1].mod_end - mod[0].mod_start); -#elif defined(CONFIG_X86_64) - memmove(__va(initial_images_start), - __va(mod[0].mod_start), - mod[mbi->mods_count-1].mod_end - mod[0].mod_start); -#endif + move_memory(initial_images_start, + mod[0].mod_start, mod[mbi->mods_count-1].mod_end); /* Initialise boot-time allocator with all RAM situated after modules. */ xenheap_phys_start = init_boot_allocator(__pa(&_end)); @@ -383,6 +395,51 @@ void __init __start_xen(multiboot_info_t #endif } + if (opt_kdump_megabytes) { + unsigned long kdump_start, kdump_size, k; + + /* mark images pages as free for now */ + + init_boot_pages(initial_images_start, initial_images_end); + + kdump_start = opt_kdump_megabytes_base << 20; + kdump_size = opt_kdump_megabytes << 20; + + printk("Kdump: %luMB (%lukB) at 0x%lx\n", + kdump_size >> 20, + kdump_size >> 10, + kdump_start); + + if ((kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK)) + panic("Kdump parameters not page aligned\n"); + + kdump_start >>= PAGE_SHIFT; + kdump_size >>= PAGE_SHIFT; + + /* allocate pages for Kdump memory area */ + + k = alloc_boot_pages_at(kdump_size, kdump_start); + + if (k != kdump_start) + panic("Unable to reserve Kdump memory\n"); + + /* allocate pages for relocated initial images */ + + k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 : 0; + k += (initial_images_end - initial_images_start) >> PAGE_SHIFT; + + k = alloc_boot_pages(k, 1); + + if (!k) + panic("Unable to allocate initial images memory\n"); + + move_memory(k << PAGE_SHIFT, initial_images_start, initial_images_end); + + initial_images_end -= initial_images_start; + initial_images_start = k << PAGE_SHIFT; + initial_images_end += initial_images_start; + } + memguard_init(); printk("System RAM: %luMB (%lukB)\n", --- x/xen/arch/x86/x86_32/Makefile +++ x/xen/arch/x86/x86_32/Makefile @@ -3,5 +3,6 @@ obj-y += entry.o obj-y += mm.o obj-y += seg_fixup.o obj-y += traps.o +obj-y += machine_kexec.o obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o --- /dev/null +++ x/xen/arch/x86/x86_32/machine_kexec.c @@ -0,0 +1,27 @@ +/* + * arch/x86/x86_32/machine_kexec.c + * Handle transition of Linux booting another kernel + * + * Created By: Horms + * + * Should be losely based on arch/i386/kernel/machine_kexec.c + */ + +#include /* for printk() used in stub */ +#include + +void machine_kexec(struct kexec_arg *arg) +{ + printk("STUB: arch/x86/x86_32/machine_kexec.c: machine_kexec: " + "not implemented\n"); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/arch/x86/x86_64/Makefile +++ x/xen/arch/x86/x86_64/Makefile @@ -1,3 +1,4 @@ obj-y += entry.o obj-y += mm.o obj-y += traps.o +obj-y += machine_kexec.o --- /dev/null +++ x/xen/arch/x86/x86_64/machine_kexec.c @@ -0,0 +1,28 @@ +/* + * arch/x86/x86_64/machine_kexec.c + * Handle transition of Linux booting another kernel + * + * Created By: Horms + * + * Should be losely based on arch/x86_64/kernel/machine_kexec.c + */ + +#include /* for printk() used in stubs */ +#include +#include + +void machine_kexec(struct kexec_arg *arg) +{ + printk("STUB: arch/x86/x86_64/machine_kexec.c: machine_kexec: " + "not implemented\n"); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/common/kexec.c +++ x/xen/common/kexec.c @@ -99,7 +99,8 @@ int do_kexec(unsigned long op, int type, case KEXEC_CMD_kexec_crash_note: return get_crash_note(uarg); case KEXEC_CMD_kexec_reserve: - machine_kexec_reserved(&tmp_arg); + tmp_arg.u.reserve.size = opt_kdump_megabytes << 20; + tmp_arg.u.reserve.start = opt_kdump_megabytes_base << 20; if ( unlikely(copy_to_guest(uarg, &tmp_arg, 1) != 0) ) { printk("do_kexec (CMD_kexec_reserve): copy_to_guest failed\n"); --- /dev/null +++ x/xen/include/asm-x86/elf.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * include/asm-x86/elf.h + * + * Created By: Horms + * + */ + +#ifndef __X86_ELF_H__ +#define __X86_ELF_H__ + +#ifdef __x86_64__ +#include +#else +#include +#endif + +#endif /* __X86_ELF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- x/xen/include/asm-x86/hypercall.h +++ x/xen/include/asm-x86/hypercall.h @@ -6,6 +6,8 @@ #define __ASM_X86_HYPERCALL_H__ #include +#include +#include extern long do_event_channel_op_compat( @@ -87,6 +89,10 @@ extern long arch_do_vcpu_op( int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg); +extern int +do_kexec( + unsigned long op, XEN_GUEST_HANDLE(kexec_arg_t) uarg); + #ifdef __x86_64__ extern long --- x/xen/include/asm-x86/kexec.h +++ x/xen/include/asm-x86/kexec.h @@ -8,16 +8,16 @@ #ifndef __X86_KEXEC_H__ #define __X86_KEXEC_H__ -#include /* for printk() used in stub */ +#include #include +#include #include -static void crash_setup_regs(struct cpu_user_regs *newregs, - struct cpu_user_regs *oldregs) -{ - printk("STUB: include/asm-x86/kexec.h: crash_setup_regs: " - "not implemented\n"); -} +#ifdef __x86_64__ +#include +#else +#include +#endif #endif /* __X86_KEXEC_H__ */ --- /dev/null +++ x/xen/include/asm-x86/x86_32/elf.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * include/asm-x86/x86_32/elf.h + * + * Created By: Horms + * + * Should pull be based on include/asm-i386/elf.h:ELF_CORE_COPY_REGS + * from Linux 2.6.16 + */ + +#ifndef __X86_ELF_X86_32_H__ +#define __X86_ELF_X86_32_H__ + +#include /* for printk() used in stub */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + printk("STUB: include/asm-x86/x86_32/kexec.h: ELF_CORE_COPY_REGS: " \ + "not implemented\n") + + +#endif /* __X86_ELF_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/asm-x86/x86_32/kexec.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * include/asm-x86/x86_32/kexec.h + * + * Created By: Horms + * + * Should be based heavily on include/asm-i386/kexec.h from Linux 2.6.16 + * + */ + +#ifndef __X86_32_KEXEC_H__ +#define __X86_32_KEXEC_H__ + +#include /* for printk() used in stub */ +#include +#include + +static void crash_fixup_ss_esp(struct cpu_user_regs *newregs, + struct cpu_user_regs *oldregs) +{ + printk("STUB: include/asm-x86/x86_32/kexec.h: crash_fixup_ss_esp: " + "not implemented\n"); + return; + crash_fixup_ss_esp(newregs, oldregs); +} + +static void crash_setup_regs(struct cpu_user_regs *newregs, + struct cpu_user_regs *oldregs) +{ + printk("STUB: include/asm-x86/x86_32/kexec.h: crash_setup_regs: " + "not implemented\n"); +} + +static inline int user_mode(struct cpu_user_regs *regs) +{ + printk("STUB: include/asm-x86/x86_32/kexec.h: user_mode: " + "not implemented\n"); + return -1; +} + + +#endif /* __X86_32_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/asm-x86/x86_64/elf.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * include/asm-x86/x86_64/elf.h + * + * Created By: Horms + * + * Should pull be based on include/asm-x86_64/elf.h:ELF_CORE_COPY_REGS + * from Linux 2.6.16 + */ + +#ifndef __X86_ELF_X86_64_H__ +#define __X86_ELF_X86_64_H__ + +#include /* for printk() used in stub */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + printk("STUB: include/asm-x86/x86_64/kexec.h: ELF_CORE_COPY_REGS: " \ + "not implemented\n") + + +#endif /* __X86_ELF_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- /dev/null +++ x/xen/include/asm-x86/x86_64/kexec.h @@ -0,0 +1,50 @@ +/****************************************************************************** + * include/asm-x86/x86_64/kexec.h + * + * Created By: Horms + * + * Should be based heavily on include/asm-x86_64/kexec.h from Linux 2.6.16 + * + */ + +#ifndef __X86_64_KEXEC_H__ +#define __X86_64_KEXEC_H__ + +#include /* for printk() used in stub */ +#include +#include + +static void crash_fixup_ss_esp(struct cpu_user_regs *newregs, + struct cpu_user_regs *oldregs) +{ + printk("STUB: include/asm-x86/x86_64/kexec.h: crash_fixup_ss_esp: " + "not implemented\n"); +} + +static void crash_setup_regs(struct cpu_user_regs *newregs, + struct cpu_user_regs *oldregs) +{ + printk("STUB: include/asm-x86/x86_64/kexec.h: crash_setup_regs: " + "not implemented\n"); + return; + crash_fixup_ss_esp(newregs, oldregs); +} + +static inline int user_mode(struct cpu_user_regs *regs) +{ + printk("STUB: include/asm-x86/x86_64/kexec.h: user_mode: " + "not implemented\n"); + return -1; +} + +#endif /* __X86_64_KEXEC_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */