kexec: framework and i386
Hi,
here is a second take at this patch. The main changes over
the predecessor are that kdump now works, mfns are used instead
of pfns (was wrong before), and some code has been moved about.
The code still uses the basic approach of moving architecture
specific opperations into the hypervisor.
Some notes:
* machine_kexec_cleanup() and machine_kexec_prepare() don't do
anything in i386. So while this patch adds a framework for them,
I am not sure what parameters are needs at this stage.
* Only works for UP, as machine_shutdown is not implemented yet
* kexecing into xen does not seem to work, I think that
kexec-tools needs updating, but I have not investigated yet
* I don't believe that kdump works yet
* This patch was prepared against xen-unstable.hg 9514
As of today (9574) two new hypercalls have been added.
I rediffed and moved the kexec hypercall to 33. However
this exceedes hypercall_NR, which is currently 32.
I tried increasing this, but the dom0 now crashes
in entry.S on init. Even after rebuilding both xen and the kernel
completely from scratch after a make distclean. Help!!
Prepared with the assistance of my colleague Magnus Damm
Signed-Off-By: Horms <horms@xxxxxxxxxxxx>
--- from-0002/buildconfigs/linux-defconfig_xen_x86_32
+++ to-work/buildconfigs/linux-defconfig_xen_x86_32 2006-04-10
12:29:46.000000000 +0900
@@ -183,6 +183,7 @@ CONFIG_HZ_100=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
+CONFIG_KEXEC=y
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_HOTPLUG_CPU=y
--- from-0001/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ to-work/linux-2.6-xen-sparse/arch/i386/Kconfig 2006-04-10
12:29:46.000000000 +0900
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL && !X86_XEN
+ depends on EXPERIMENTAL
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
--- from-0001/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
+++ to-work/linux-2.6-xen-sparse/arch/i386/kernel/Makefile 2006-04-10
12:29:46.000000000 +0900
@@ -92,7 +92,7 @@ include $(srctree)/scripts/Makefile.xen
obj-y += fixup.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
-n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o machine_kexec.o
crash.o
obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
obj-y := $(call cherrypickxen, $(obj-y))
--- from-0001/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
+++ to-work/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile 2006-04-10
12:29:46.000000000 +0900
@@ -59,7 +59,7 @@ pci-dma-y += ../../i386/kernel/pci-dma
microcode-$(subst m,y,$(CONFIG_MICROCODE)) :=
../../i386/kernel/microcode-xen.o
quirks-y := ../../i386/kernel/quirks-xen.o
-n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
+n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o machine_kexec.o
crash.o
include $(srctree)/scripts/Makefile.xen
--- from-0001/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ to-work/linux-2.6-xen-sparse/drivers/xen/core/reboot.c 2006-04-10
12:29:46.000000000 +0900
@@ -17,6 +17,11 @@
#include <linux/kthread.h>
#include <xen/gnttab.h>
#include <xen/xencons.h>
+#include <linux/kexec.h>
+#include <xen/interface/kexec.h>
+#include <linux/mm.h>
+#include <asm/hypercall.h>
+
#if defined(__i386__) || defined(__x86_64__)
/*
@@ -38,6 +43,86 @@ extern void ctrl_alt_del(void);
*/
#define SHUTDOWN_HALT 4
+void machine_shutdown(void)
+{
+ printk("machine_shutdown: does nothing\n");
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* XXX: This should do something */
+ printk("xen-kexec: Need to turn of other CPUS in "
+ "machine_crash_shutdown()\n");
+}
+
+const extern unsigned char relocate_new_kernel[];
+extern unsigned int relocate_new_kernel_size;
+static kexec_arg_t hypercall_arg;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ return 0;
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+ kimage_entry_t *ptr, entry;
+
+ /*
+ * Translate addresses inside head from physcical to machine
+ * In practice, this only needs to change the pointer to
+ * indirection pages as non-indirected pages are relative.
+ */
+ ptr = &image->head;
+ while ((entry = *ptr) && !(entry & IND_DONE)) {
+ if (!(entry & IND_DESTINATION))
+ *ptr = phys_to_machine(entry & PAGE_MASK) |
+ (entry & ~PAGE_MASK);
+
+ if (entry & IND_INDIRECTION)
+ ptr = __va(entry & PAGE_MASK);
+ else
+ ptr++;
+ }
+
+ /* Set up arguments to hypercall */
+ hypercall_arg.u.kexec.indirection_page = image->head;
+ hypercall_arg.u.kexec.reboot_code_buffer =
+ pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT;
+ hypercall_arg.u.kexec.start_address = image->start;
+ hypercall_arg.u.kexec.relocate_new_kernel = relocate_new_kernel;
+ hypercall_arg.u.kexec.relocate_new_kernel_size =
+ relocate_new_kernel_size;
+
+ /* Let Xen do the rest of the work */
+ HYPERVISOR_kexec(KEXEC_CMD_kexec, &hypercall_arg);
+}
+
void machine_emergency_restart(void)
{
/* We really want to get pending console data out before we die. */
--- from-0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ to-work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
2006-04-10 12:29:46.000000000 +0900
@@ -37,6 +37,8 @@
# error "please don't include this file directly"
#endif
+#include <xen/interface/kexec.h>
+
#define __STR(x) #x
#define STR(x) __STR(x)
@@ -329,6 +331,13 @@ HYPERVISOR_nmi_op(
return _hypercall2(int, nmi_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec(
+ unsigned long op, kexec_arg_t * arg)
+{
+ return _hypercall2(int, kexec_op, op, arg);
+}
+
#endif /* __HYPERCALL_H__ */
/*
--- from-0001/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
+++ to-work/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
2006-04-10 12:29:46.000000000 +0900
@@ -41,6 +41,8 @@
# error "please don't include this file directly"
#endif
+#include <xen/interface/kexec.h>
+
#define __STR(x) #x
#define STR(x) __STR(x)
@@ -330,6 +332,13 @@ HYPERVISOR_nmi_op(
return _hypercall2(int, nmi_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec(
+ unsigned long op, kexec_arg_t * arg)
+{
+ return _hypercall2(int, kexec_op, op, arg);
+}
+
#endif /* __HYPERCALL_H__ */
/*
--- from-0001/xen/arch/x86/x86_32/Makefile
+++ to-work/xen/arch/x86/x86_32/Makefile 2006-04-10 12:29:46.000000000
+0900
@@ -5,6 +5,7 @@ obj-y += entry.o
obj-y += mm.o
obj-y += seg_fixup.o
obj-y += traps.o
+obj-y += machine_kexec.o
obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
--- from-0001/xen/arch/x86/x86_32/entry.S
+++ to-work/xen/arch/x86/x86_32/entry.S 2006-04-10 12:29:46.000000000 +0900
@@ -648,6 +648,7 @@ ENTRY(hypercall_table)
.long do_acm_op
.long do_nmi_op
.long do_arch_sched_op
+ .long do_kexec /* 30 */
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -683,6 +684,7 @@ ENTRY(hypercall_args_table)
.byte 1 /* do_acm_op */
.byte 2 /* do_nmi_op */
.byte 2 /* do_arch_sched_op */
+ .byte 2 /* do_kexec */ /* 30 */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
--- /dev/null
+++ to-work/xen/arch/x86/x86_32/machine_kexec.c 2006-04-10 12:29:46.000000000
+0900
@@ -0,0 +1,168 @@
+/******************************************************************************
+ * arch/x86/machine_kexec.c
+ *
+ * Created By: Horms
+ *
+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/domain_page.h>
+#include <xen/timer.h>
+#include <xen/sched.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address,
+ unsigned int has_pae);
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#ifndef CONFIG_X86_PAE
+
+static u32 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long mfn;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ mfn = read_cr3() >> PAGE_SHIFT;
+ pgtable_level2 = map_domain_page(mfn);
+
+ /* Identity map the page table entry */
+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ write_cr3(mfn << PAGE_SHIFT);
+
+ unmap_domain_page(pgtable_level2);
+}
+
+#else
+static u64 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+static u64 pgtable_level2[L2_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ int mfn;
+ intpte_t *pgtable_level3;
+
+ /* Find the current page table */
+ mfn = read_cr3() >> PAGE_SHIFT;
+ pgtable_level3 = map_domain_page(mfn);
+
+ /* Identity map the page table entry */
+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[l3_table_offset(address)],
+ __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(mfn << PAGE_SHIFT);
+
+ unmap_domain_page(pgtable_level3);
+}
+#endif
+
+static void kexec_load_segments(void)
+{
+#define __SSTR(X) #X
+#define SSTR(X) __SSTR(X)
+ __asm__ __volatile__ (
+ "\tljmp $"SSTR(__HYPERVISOR_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"SSTR(__HYPERVISOR_DS)",%%eax\n"
+ "\tmovl %%eax,%%ds\n"
+ "\tmovl %%eax,%%es\n"
+ "\tmovl %%eax,%%fs\n"
+ "\tmovl %%eax,%%gs\n"
+ "\tmovl %%eax,%%ss\n"
+ ::: "eax", "memory");
+#undef SSTR
+#undef __SSTR
+}
+
+#define kexec_load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+static void kexec_set_idt(void *newidt, __u16 limit)
+{
+ struct Xgt_desc_struct curidt;
+
+ /* ia32 supports unaliged loads & stores */
+ curidt.size = limit;
+ curidt.address = (unsigned long)newidt;
+
+ kexec_load_idt(&curidt);
+
+};
+
+#define kexec_load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+static void kexec_set_gdt(void *newgdt, __u16 limit)
+{
+ struct Xgt_desc_struct curgdt;
+
+ /* ia32 supports unaligned loads & stores */
+ curgdt.size = limit;
+ curgdt.address = (unsigned long)newgdt;
+
+ kexec_load_gdt(&curgdt);
+};
+
+int machine_kexec_prepare(struct kexec_arg *arg)
+{
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kexec_arg *arg)
+{
+}
+
+void machine_kexec(struct kexec_arg *arg)
+{
+ relocate_new_kernel_t rnk;
+
+ local_irq_disable();
+
+ identity_map_page(arg->u.kexec.reboot_code_buffer);
+
+ copy_from_user((void *)arg->u.kexec.reboot_code_buffer,
+ arg->u.kexec.relocate_new_kernel,
+ arg->u.kexec.relocate_new_kernel_size);
+
+ kexec_load_segments();
+
+ kexec_set_gdt(__va(0),0);
+
+ kexec_set_idt(__va(0),0);
+
+ rnk = (relocate_new_kernel_t) arg->u.kexec.reboot_code_buffer;
+
+ (*rnk)(arg->u.kexec.indirection_page, arg->u.kexec.reboot_code_buffer,
+ arg->u.kexec.start_address, cpu_has_pae);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/common/Makefile
+++ to-work/xen/common/Makefile 2006-04-10 12:29:46.000000000 +0900
@@ -24,6 +24,7 @@ obj-y += trace.o
obj-y += timer.o
obj-y += vsprintf.o
obj-y += xmalloc.o
+obj-y += kexec.o
obj-$(perfc) += perfc.o
obj-$(crash_debug) += gdbstub.o
--- /dev/null
+++ to-work/xen/common/kexec.c 2006-04-10 12:38:29.000000000 +0900
@@ -0,0 +1,58 @@
+/*
+ * Achitecture independent kexec code for Xen
+ *
+ * At this statge, just a switch for the kexec hypercall into
+ * architecture dependent code.
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include <public/kexec.h>
+
+extern int machine_kexec_prepare(struct kexec_arg *arg);
+extern void machine_kexec_cleanup(struct kexec_arg *arg);
+extern void machine_kexec(struct kexec_arg *arg);
+
+int do_kexec(unsigned long op,
+ GUEST_HANDLE(kexec_arg_t) uarg)
+{
+ struct kexec_arg arg;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) )
+ {
+ printk("do_kexec: __copy_from_guest failed");
+ return -EFAULT;
+ }
+
+ switch(op) {
+ case KEXEC_CMD_kexec:
+ machine_kexec(&arg);
+ return -EINVAL; /* Not Reached */
+ case KEXEC_CMD_kexec_prepare:
+ return machine_kexec_prepare(&arg);
+ case KEXEC_CMD_kexec_cleanup:
+ machine_kexec_cleanup(&arg);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
--- from-0001/xen/include/asm-x86/hypercall.h
+++ to-work/xen/include/asm-x86/hypercall.h 2006-04-10 12:29:46.000000000
+0900
@@ -6,6 +6,7 @@
#define __ASM_X86_HYPERCALL_H__
#include <public/physdev.h>
+#include <public/kexec.h>
extern long
do_set_trap_table(
@@ -79,6 +80,11 @@ extern long
arch_do_vcpu_op(
int cmd, struct vcpu *v, GUEST_HANDLE(void) arg);
+extern int
+do_kexec(
+ unsigned long op,
+ GUEST_HANDLE(kexec_arg_t) uarg);
+
#ifdef __x86_64__
extern long
--- /dev/null
+++ to-work/xen/include/public/kexec.h 2006-04-10 12:29:46.000000000 +0900
@@ -0,0 +1,39 @@
+/*
+ * kexec.h: Xen kexec
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+/*
+ * Scratch space for passing arguments to the kexec hypercall
+ */
+typedef struct kexec_arg {
+ union {
+ struct {
+ unsigned long data; /* Not sure what this should be yet */
+ } helper;
+ struct {
+ unsigned long indirection_page;
+ unsigned long reboot_code_buffer;
+ unsigned long start_address;
+ const char *relocate_new_kernel;
+ unsigned int relocate_new_kernel_size;
+ } kexec;
+ } u;
+} kexec_arg_t;
+DEFINE_GUEST_HANDLE(kexec_arg_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- from-0001/xen/include/public/xen.h
+++ to-work/xen/include/public/xen.h 2006-04-10 12:29:46.000000000 +0900
@@ -60,6 +60,7 @@
#define __HYPERVISOR_acm_op 27
#define __HYPERVISOR_nmi_op 28
#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_kexec_op 30
/*
* VIRTUAL INTERRUPTS
@@ -206,6 +207,13 @@ DEFINE_GUEST_HANDLE(mmuext_op_t);
#define VMASST_TYPE_writable_pagetables 2
#define MAX_VMASST_TYPE 2
+/*
+ * Commands to HYPERVISOR_kexec().
+ */
+#define KEXEC_CMD_kexec 0
+#define KEXEC_CMD_kexec_prepare 1
+#define KEXEC_CMD_kexec_cleanup 2
+
#ifndef __ASSEMBLY__
typedef uint16_t domid_t;
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|