kexec: framework and i386
Here is a first cut of kexec for dom0/xen, which will actually
kexec the physical machine from xen. The approach taken is
to move the architecture-dependant kexec code into a new hypercall.
Some notes:
* machine_kexec_cleanup() and machine_kexec_prepare() don't do
anything in i386. So while this patch adds a framework for them,
I am not sure what parameters are needs at this stage.
* Only works for UP, as machine_shutdown is not implemented yet
* kexecing into xen does not seem to work, I think that
kexec-tools needs updating, but I have not investigated yet
* I don't believe that kdump works yet
* This patch was prepared against xen-unstable.hg 9514
As of today (9574) two new hypercalls have been added.
I rediffed and moved the kexec hypercall to 33. However
this exceedes hypercall_NR, which is currently 32.
I tried increasing this, but the dom0 now crashes
in entry.S on init. Even after rebuilding both xen and the kernel
completely from scratch after a make distclean. Help!!
Prepared with the assistance of my colleague Magnus Damm
Signed-Off-By: Horms <horms@xxxxxxxxxxxx>
--- from-0001/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ to-work/linux-2.6-xen-sparse/arch/i386/Kconfig 2006-04-03
15:13:38.000000000 +0900
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL && !X86_XEN
+ depends on EXPERIMENTAL
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
--- /dev/null
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/crash-xen.c 2006-04-03
15:13:38.000000000 +0900
@@ -0,0 +1,15 @@
+/*
+ * Architecture specific (i386-xen) functions for kexec based crash dumps.
+ *
+ * Created by: Horms <horms@xxxxxxxxxxxx>
+ *
+ */
+
+#include <linux/kernel.h> /* For printk */
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* XXX: This should do something */
+ printk("xen-kexec: Need to turn of other CPUS in "
+ "machine_crash_shutdown()\n");
+}
--- /dev/null
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/machine_kexec-xen.c
2006-04-07 12:59:51.000000000 +0900
@@ -0,0 +1,80 @@
+/*
+ * machine_kexec-xen.c - handle transition of Linux booting another kernel
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ *
+ * Losely based on arch/i386/kernel/machine_kexec-xen.c
+ */
+
+#include <linux/kexec.h>
+#include <xen/interface/kexec.h>
+#include <linux/mm.h>
+#include <asm/hypercall.h>
+
+const extern unsigned char relocate_new_kernel[];
+extern unsigned int relocate_new_kernel_size;
+static kexec_arg_t hypercall_arg;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+ kimage_entry_t *ptr, entry;
+
+ /*
+ * Translate addresses inside head from physcical to machine
+ * In practice, this only needs to change the pointer to
+ * indirection pages as non-indirected pages are relative.
+ */
+ ptr = &image->head;
+ while ((entry = *ptr) && !(entry & IND_DONE)) {
+ if (!(entry & IND_DESTINATION))
+ *ptr = phys_to_machine(entry & PAGE_MASK) |
+ (entry & ~PAGE_MASK);
+
+ if (entry & IND_INDIRECTION)
+ ptr = __va(entry & PAGE_MASK);
+ else
+ ptr++;
+ }
+
+ /* Set up arguments to hypercall */
+ hypercall_arg.u.kexec.indirection_page = image->head;
+ hypercall_arg.u.kexec.reboot_code_buffer =
+ pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT;
+ hypercall_arg.u.kexec.start_address = image->start;
+ hypercall_arg.u.kexec.relocate_new_kernel = relocate_new_kernel;
+ hypercall_arg.u.kexec.relocate_new_kernel_size =
+ relocate_new_kernel_size;
+
+ /* Let Xen do the rest of the work */
+ HYPERVISOR_kexec(KEXEC_CMD_kexec, &hypercall_arg);
+}
--- from-0001/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ to-work/linux-2.6-xen-sparse/drivers/xen/core/reboot.c 2006-04-03
15:13:38.000000000 +0900
@@ -38,6 +38,11 @@ extern void ctrl_alt_del(void);
*/
#define SHUTDOWN_HALT 4
+void machine_shutdown(void)
+{
+ printk("machine_shutdown: does nothing\n");
+}
+
void machine_emergency_restart(void)
{
/* We really want to get pending console data out before we die. */
--- from-0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ to-work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
2006-04-06 11:00:03.000000000 +0900
@@ -37,6 +37,8 @@
# error "please don't include this file directly"
#endif
+#include <xen/interface/kexec.h>
+
#define __STR(x) #x
#define STR(x) __STR(x)
@@ -329,6 +331,13 @@ HYPERVISOR_nmi_op(
return _hypercall2(int, nmi_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec(
+ unsigned long op, kexec_arg_t * arg)
+{
+ return _hypercall2(int, kexec_op, op, arg);
+}
+
#endif /* __HYPERCALL_H__ */
/*
バイナリー・ファイル/dev/nullとto-work/linux-2.6.16-xen/kernel/.kexec.c.swpは違います
--- from-0001/xen/arch/x86/x86_32/Makefile
+++ to-work/xen/arch/x86/x86_32/Makefile 2006-04-03 16:25:31.000000000
+0900
@@ -5,6 +5,7 @@ obj-y += entry.o
obj-y += mm.o
obj-y += seg_fixup.o
obj-y += traps.o
+obj-y += machine_kexec.o
obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
--- from-0001/xen/arch/x86/x86_32/entry.S
+++ to-work/xen/arch/x86/x86_32/entry.S 2006-04-04 13:02:36.000000000 +0900
@@ -648,6 +648,7 @@ ENTRY(hypercall_table)
.long do_acm_op
.long do_nmi_op
.long do_arch_sched_op
+ .long do_kexec /* 30 */
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -683,6 +684,7 @@ ENTRY(hypercall_args_table)
.byte 1 /* do_acm_op */
.byte 2 /* do_nmi_op */
.byte 2 /* do_arch_sched_op */
+ .byte 2 /* do_kexec */ /* 30 */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
--- /dev/null
+++ to-work/xen/arch/x86/x86_32/machine_kexec.c 2006-04-07 12:44:16.000000000
+0900
@@ -0,0 +1,168 @@
+/******************************************************************************
+ * arch/x86/machine_kexec.c
+ *
+ * Created By: Horms
+ *
+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/domain_page.h>
+#include <xen/timer.h>
+#include <xen/sched.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address,
+ unsigned int has_pae);
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#ifndef CONFIG_X86_PAE
+
+static u32 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long mfn;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ mfn = read_cr3() >> PAGE_SHIFT;
+ pgtable_level2 = map_domain_page(mfn);
+
+ /* Identity map the page table entry */
+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ write_cr3(mfn << PAGE_SHIFT);
+
+ unmap_domain_page(pgtable_level2);
+}
+
+#else
+static u64 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+static u64 pgtable_level2[L2_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ int mfn;
+ intpte_t *pgtable_level3;
+
+ /* Find the current page table */
+ mfn = read_cr3() >> PAGE_SHIFT;
+ pgtable_level3 = map_domain_page(mfn);
+
+ /* Identity map the page table entry */
+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[l3_table_offset(address)],
+ __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(mfn << PAGE_SHIFT);
+
+ unmap_domain_page(pgtable_level3);
+}
+#endif
+
+static void kexec_load_segments(void)
+{
+#define __SSTR(X) #X
+#define SSTR(X) __SSTR(X)
+ __asm__ __volatile__ (
+ "\tljmp $"SSTR(__HYPERVISOR_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"SSTR(__HYPERVISOR_DS)",%%eax\n"
+ "\tmovl %%eax,%%ds\n"
+ "\tmovl %%eax,%%es\n"
+ "\tmovl %%eax,%%fs\n"
+ "\tmovl %%eax,%%gs\n"
+ "\tmovl %%eax,%%ss\n"
+ ::: "eax", "memory");
+#undef SSTR
+#undef __SSTR
+}
+
+#define kexec_load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+static void kexec_set_idt(void *newidt, __u16 limit)
+{
+ struct Xgt_desc_struct curidt;
+
+ /* ia32 supports unaliged loads & stores */
+ curidt.size = limit;
+ curidt.address = (unsigned long)newidt;
+
+ kexec_load_idt(&curidt);
+
+};
+
+#define kexec_load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+static void kexec_set_gdt(void *newgdt, __u16 limit)
+{
+ struct Xgt_desc_struct curgdt;
+
+ /* ia32 supports unaligned loads & stores */
+ curgdt.size = limit;
+ curgdt.address = (unsigned long)newgdt;
+
+ kexec_load_gdt(&curgdt);
+};
+
+int machine_kexec_prepare(struct kexec_arg *arg)
+{
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kexec_arg *arg)
+{
+}
+
+void machine_kexec(struct kexec_arg *arg)
+{
+ relocate_new_kernel_t rnk;
+
+ local_irq_disable();
+
+ identity_map_page(arg->u.kexec.reboot_code_buffer);
+
+ copy_from_user((void *)arg->u.kexec.reboot_code_buffer,
+ arg->u.kexec.relocate_new_kernel,
+ arg->u.kexec.relocate_new_kernel_size);
+
+ kexec_load_segments();
+
+ kexec_set_gdt(__va(0),0);
+
+ kexec_set_idt(__va(0),0);
+
+ rnk = (relocate_new_kernel_t) arg->u.kexec.reboot_code_buffer;
+
+ (*rnk)(arg->u.kexec.indirection_page, arg->u.kexec.reboot_code_buffer,
+ arg->u.kexec.start_address, cpu_has_pae);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/common/Makefile
+++ to-work/xen/common/Makefile 2006-04-03 15:13:38.000000000 +0900
@@ -24,6 +24,7 @@ obj-y += trace.o
obj-y += timer.o
obj-y += vsprintf.o
obj-y += xmalloc.o
+obj-y += kexec.o
obj-$(perfc) += perfc.o
obj-$(crash_debug) += gdbstub.o
--- /dev/null
+++ to-work/xen/common/kexec.c 2006-04-07 13:06:54.000000000 +0900
@@ -0,0 +1,54 @@
+/*
+ * Achitecture independent kexec code for Xen
+ *
+ * At this statge, just a switch for the kexec hypercall into
+ * architecture dependent code.
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+extern int machine_kexec_prepare(struct kexec_arg *arg);
+extern void machine_kexec_cleanup(struct kexec_arg *arg);
+extern void machine_kexec(struct kexec_arg *arg);
+
+int do_kexec(unsigned long op,
+ GUEST_HANDLE(kexec_arg_t) uarg)
+{
+ struct kexec_arg arg;
+
+ if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) )
+ {
+ printk("do_kexec: __copy_from_guest failed");
+ return -EFAULT;
+ }
+
+ switch(op) {
+ case KEXEC_CMD_kexec:
+ machine_kexec(&arg);
+ return -EINVAL; /* Not Reached */
+ case KEXEC_CMD_kexec_prepare:
+ return machine_kexec_prepare(&arg);
+ case KEXEC_CMD_kexec_cleanup:
+ machine_kexec_cleanup(&arg);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
--- from-0001/xen/include/asm-x86/hypercall.h
+++ to-work/xen/include/asm-x86/hypercall.h 2006-04-07 13:05:06.000000000
+0900
@@ -6,6 +6,7 @@
#define __ASM_X86_HYPERCALL_H__
#include <public/physdev.h>
+#include <public/kexec.h>
extern long
do_set_trap_table(
@@ -79,6 +80,11 @@ extern long
arch_do_vcpu_op(
int cmd, struct vcpu *v, GUEST_HANDLE(void) arg);
+extern int
+do_kexec(
+ unsigned long op,
+ GUEST_HANDLE(kexec_arg_t) uarg);
+
#ifdef __x86_64__
extern long
--- /dev/null
+++ to-work/xen/include/public/kexec.h 2006-04-07 12:44:43.000000000 +0900
@@ -0,0 +1,39 @@
+/*
+ * kexec.h: Xen kexec
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+/*
+ * Scratch space for passing arguments to the kexec hypercall
+ */
+typedef struct kexec_arg {
+ union {
+ struct {
+ unsigned long data; /* Not sure what this should be yet */
+ } helper;
+ struct {
+ unsigned long indirection_page;
+ unsigned long reboot_code_buffer;
+ unsigned long start_address;
+ const char *relocate_new_kernel;
+ unsigned int relocate_new_kernel_size;
+ } kexec;
+ } u;
+} kexec_arg_t;
+DEFINE_GUEST_HANDLE(kexec_arg_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/include/public/xen.h
+++ to-work/xen/include/public/xen.h 2006-04-04 13:29:54.000000000 +0900
@@ -60,6 +60,7 @@
#define __HYPERVISOR_acm_op 27
#define __HYPERVISOR_nmi_op 28
#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_kexec_op 30
/*
* VIRTUAL INTERRUPTS
@@ -206,6 +207,13 @@ DEFINE_GUEST_HANDLE(mmuext_op_t);
#define VMASST_TYPE_writable_pagetables 2
#define MAX_VMASST_TYPE 2
+/*
+ * Commands to HYPERVISOR_kexec().
+ */
+#define KEXEC_CMD_kexec 0
+#define KEXEC_CMD_kexec_prepare 1
+#define KEXEC_CMD_kexec_cleanup 2
+
#ifndef __ASSEMBLY__
typedef uint16_t domid_t;
diff -r 0010df11836d buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32 Fri Apr 7 00:32:54 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Apr 7 14:54:45 2006 +0900
@@ -184,6 +184,7 @@ CONFIG_HZ_100=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_HOTPLUG_CPU=y
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|