On Tue, May 09, 2006 at 01:16:32PM +0900, Horms wrote:
> On Sun, May 07, 2006 at 01:45:22PM +0900, Horms wrote:
> > On Sat, May 06, 2006 at 05:44:44PM +0900, Akio Takebe wrote:
> >
> > > I think you can make a patch in patches/linux-2.6.16/ if you would
> > > modify these.
> >
> > Yes, that is probably the best way forward, I'll work on breaking it
> > out in that manner.
>
> Hi Takebe-san,
>
> here is an updated version of the patch which moves portions into
> patches/linux-2.6.16/ as you suggested. It also moves to
> xen-unstable 9969 / Linux 2.6.16.13 and has some minor build fixes,
> for problems that crept into the previous patch.
Sorry, this mornin's patch had the internal patch in the wrong location
and with the wrong diff level.
--
Horms http://www.vergenet.net/~horms/
kexec: framework and i386
This is an implementation of kexec for dom0/xen, that allows
kexecing of the physical machine from xen. The approach taken is
to move the architecture-dependant kexec code into a new hypercall.
Some notes:
* machine_kexec_cleanup() and machine_kexec_prepare() don't do
anything in i386. So while this patch adds a framework for them,
I am not sure what parameters are needs at this stage.
* Only works for UP, as machine_shutdown is not implemented yet
* kexecing into xen does not seem to work, I think that
kexec-tools needs updating, but I have not investigated yet
* Kdump works by first copying the kernel into dom0 segments
and relocating them later in xen, the same way that kexec does
The only difference is that the relocation is made into
an area reserved by xen
* Kdump reservation is made using the xen command line parameters,
kdump_megabytes and kdump_megabytes_base, rather than
the linux option crashkernel, which is now ignored.
Two parameters are used instead of one to simplify parsing.
This can be cleaned up later if desired. But the reservation
seems to need to be made by xen to make sure that it happens
early enough.
The tested values are kdump_megabytes=16, kdump_megabytes_base=32
(kdump_megabytes_base=16 does not seem to work)
* This patch uses a new kexec hypercall
* SMP Kexec works, Kdump is next on the list
Highlights since the previous posted version:
* Diff now applies to a xen checkout from hg
(previously it assumed that the kernel was unpacked)
- xen-unstable-hg 9660 / Linux 2.6.16.13
* Added machine_shutdown, which disapperared in the previous release of
this patch
* Fixed include problems in kexec.h
Prepared by Horms and Magnus Damm
Signed-Off-By: Magnus Damm <magnus@xxxxxxxxxxxxx>
Signed-Off-By: Horms <horms@xxxxxxxxxxxx>
buildconfigs/linux-defconfig_xen_x86_32 | 1
linux-2.6-xen-sparse/arch/i386/Kconfig | 2
linux-2.6-xen-sparse/arch/i386/kernel/Makefile | 2
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 24 +
linux-2.6-xen-sparse/drivers/xen/core/Makefile | 1
linux-2.6-xen-sparse/drivers/xen/core/crash.c | 98 ++++
linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c | 73 +++
linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 4
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 10
patches/linux-2.6.16.13/kexec.patch | 175 ++++++++
xen/arch/x86/Makefile | 1
xen/arch/x86/dom0_ops.c | 3
xen/arch/x86/machine_kexec.c | 28 +
xen/arch/x86/setup.c | 75 +++
xen/arch/x86/x86_32/Makefile | 1
xen/arch/x86/x86_32/entry.S | 2
xen/arch/x86/x86_32/machine_kexec.c | 205
++++++++++
xen/arch/x86/x86_64/Makefile | 1
xen/arch/x86/x86_64/machine_kexec.c | 25 +
xen/common/Makefile | 1
xen/common/kexec.c | 73 +++
xen/common/page_alloc.c | 33 +
xen/include/asm-x86/hypercall.h | 6
xen/include/public/kexec.h | 45 ++
xen/include/public/xen.h | 9
xen/include/xen/mm.h | 1
26 files changed, 877 insertions(+), 22 deletions(-)
--- x/buildconfigs/linux-defconfig_xen_x86_32
+++ x/buildconfigs/linux-defconfig_xen_x86_32
@@ -184,6 +184,7 @@ CONFIG_MTRR=y
CONFIG_REGPARM=y
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
+CONFIG_KEXEC=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
--- x/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ x/linux-2.6-xen-sparse/arch/i386/Kconfig
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL && !X86_XEN
+ depends on EXPERIMENTAL
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
--- x/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
+++ x/linux-2.6-xen-sparse/arch/i386/kernel/Makefile
@@ -89,7 +89,7 @@ include $(srctree)/scripts/Makefile.xen
obj-y += fixup.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
-n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o machine_kexec.o
crash.o
obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
obj-y := $(call cherrypickxen, $(obj-y))
--- x/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
+++ x/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
@@ -68,6 +68,10 @@
#include "setup_arch_pre.h"
#include <bios_ebda.h>
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
/* Forward Declaration. */
void __init find_max_pfn(void);
@@ -932,6 +936,7 @@ static void __init parse_cmdline_early (
* after a kernel panic.
*/
else if (!memcmp(from, "crashkernel=", 12)) {
+#ifndef CONFIG_XEN
unsigned long size, base;
size = memparse(from+12, &from);
if (*from == '@') {
@@ -942,6 +947,10 @@ static void __init parse_cmdline_early (
crashk_res.start = base;
crashk_res.end = base + size - 1;
}
+#else
+ printk("Ignoring crashkernel command line, "
+ "parameter will be supplied by xen\n");
+#endif
}
#endif
#ifdef CONFIG_PROC_VMCORE
@@ -1318,9 +1327,21 @@ void __init setup_bootmem_allocator(void
}
#endif
#ifdef CONFIG_KEXEC
+#ifndef CONFIG_XEN
if (crashk_res.start != crashk_res.end)
reserve_bootmem(crashk_res.start,
crashk_res.end - crashk_res.start + 1);
+#else
+ {
+ struct kexec_arg xen_kexec_arg;
+ BUG_ON(HYPERVISOR_kexec(KEXEC_CMD_reserve, &xen_kexec_arg));
+ if (xen_kexec_arg.u.reserve.size) {
+ crashk_res.start = xen_kexec_arg.u.reserve.start;
+ crashk_res.end = xen_kexec_arg.u.reserve.start +
+ xen_kexec_arg.u.reserve.size - 1;
+ }
+ }
+#endif
#endif
if (!xen_feature(XENFEAT_auto_translated_physmap))
@@ -1395,6 +1416,9 @@ legacy_init_iomem_resources(struct resou
res->end = map[i].end - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
+#ifdef CONFIG_KEXEC
+ request_resource(res, &crashk_res);
+#endif
}
free_bootmem(__pa(map), PAGE_SIZE);
--- x/linux-2.6-xen-sparse/drivers/xen/core/Makefile
+++ x/linux-2.6-xen-sparse/drivers/xen/core/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_NET) += skbuff.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o
--- /dev/null
+++ x/linux-2.6-xen-sparse/drivers/xen/core/crash.c
@@ -0,0 +1,98 @@
+/*
+ * Architecture specific (i386-xen) functions for kexec based crash dumps.
+ *
+ * Created by: Horms <horms@xxxxxxxxxxxx>
+ *
+ */
+
+#include <linux/kernel.h> /* For printk */
+
+/* XXX: final_note(), crash_save_this_cpu() and crash_save_self()
+ * are copied from arch/i386/kernel/crash.c, might be good to either
+ * the original functions non-static and use them, or just
+ * merge this this into that file.
+ */
+#include <linux/elf.h> /* For struct elf_note */
+#include <linux/elfcore.h> /* For struct elf_prstatus */
+#include <linux/kexec.h> /* crash_notes */
+
+static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
+ size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, ¬e, sizeof(note));
+ buf += (sizeof(note) +3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, ¬e, sizeof(note));
+}
+
+static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
+{
+ struct elf_prstatus prstatus;
+ u32 *buf;
+
+ if ((cpu < 0) || (cpu >= NR_CPUS))
+ return;
+
+ /* Using ELF notes here is opportunistic.
+ * I need a well defined structure format
+ * for the data I pass, and I need tags
+ * on the data to indicate what information I have
+ * squirrelled away. ELF notes happen to provide
+ * all of that that no need to invent something new.
+ */
+ buf = (u32*)per_cpu_ptr(crash_notes, cpu);
+ if (!buf)
+ return;
+ memset(&prstatus, 0, sizeof(prstatus));
+ prstatus.pr_pid = current->pid;
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ final_note(buf);
+}
+
+static void crash_save_self(struct pt_regs *regs)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+ crash_save_this_cpu(regs, cpu);
+}
+
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* XXX: This should do something */
+ printk("xen-kexec: Need to turn of other CPUS in "
+ "machine_crash_shutdown()\n");
+ crash_save_self(regs);
+}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
--- /dev/null
+++ x/linux-2.6-xen-sparse/drivers/xen/core/machine_kexec.c
@@ -0,0 +1,73 @@
+/*
+ * machine_kexec.c - handle transition of Linux booting another kernel
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ *
+ * Losely based on arch/i386/kernel/machine_kexec.c
+ */
+
+#include <linux/kexec.h>
+#include <xen/interface/kexec.h>
+#include <linux/mm.h>
+#include <asm/hypercall.h>
+
+const extern unsigned char relocate_new_kernel[];
+extern unsigned int relocate_new_kernel_size;
+
+/*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+ * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
+ * have been allocated, but the segments have yet
+ * been copied into the kernel.
+ *
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ *
+ * Currently nothing.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+ kexec_arg_t hypercall_arg;
+ hypercall_arg.u.helper.data = NULL;
+ return HYPERVISOR_kexec(KEXEC_CMD_kexec_prepare, &hypercall_arg);
+}
+
+/*
+ * Undo anything leftover by machine_kexec_prepare
+ * when an image is freed.
+ */
+void machine_kexec_cleanup(struct kimage *image)
+{
+ kexec_arg_t hypercall_arg;
+ hypercall_arg.u.helper.data = NULL;
+ HYPERVISOR_kexec(KEXEC_CMD_kexec_cleanup, &hypercall_arg);
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+NORET_TYPE void machine_kexec(struct kimage *image)
+{
+ kexec_arg_t hypercall_arg;
+ hypercall_arg.u.kexec.indirection_page = image->head;
+ hypercall_arg.u.kexec.reboot_code_buffer =
+ pfn_to_mfn(page_to_pfn(image->control_code_page)) << PAGE_SHIFT;
+ hypercall_arg.u.kexec.start_address = image->start;
+ hypercall_arg.u.kexec.relocate_new_kernel = relocate_new_kernel;
+ hypercall_arg.u.kexec.relocate_new_kernel_size =
+ relocate_new_kernel_size;
+ HYPERVISOR_kexec(KEXEC_CMD_kexec, &hypercall_arg);
+}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
--- x/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ x/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
@@ -66,6 +66,10 @@ void machine_power_off(void)
HYPERVISOR_shutdown(SHUTDOWN_poweroff);
}
+#ifdef CONFIG_KEXEC
+void machine_shutdown(void) { }
+#endif
+
int reboot_thru_bios = 0; /* for dmi_scan.c */
EXPORT_SYMBOL(machine_restart);
EXPORT_SYMBOL(machine_halt);
--- x/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ x/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
@@ -39,6 +39,8 @@
# error "please don't include this file directly"
#endif
+#include <xen/interface/kexec.h>
+
#define __STR(x) #x
#define STR(x) __STR(x)
@@ -359,6 +361,14 @@ HYPERVISOR_xenoprof_op(
return _hypercall2(int, xenoprof_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec(
+ unsigned long op, kexec_arg_t * arg)
+{
+ return _hypercall2(int, kexec_op, op, arg);
+}
+
+
#endif /* __HYPERCALL_H__ */
--- x/xen/arch/x86/Makefile
+++ x/xen/arch/x86/Makefile
@@ -39,6 +39,7 @@ obj-y += trampoline.o
obj-y += traps.o
obj-y += usercopy.o
obj-y += x86_emulate.o
+obj-y += machine_kexec.o
ifneq ($(pae),n)
obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o
--- x/xen/arch/x86/dom0_ops.c
+++ x/xen/arch/x86/dom0_ops.c
@@ -29,6 +29,9 @@
#include <asm/mtrr.h>
#include "cpu/mtrr/mtrr.h"
+extern unsigned int opt_kdump_megabytes;
+extern unsigned int opt_kdump_megabytes_base;
+
#define TRC_DOM0OP_ENTER_BASE 0x00020000
#define TRC_DOM0OP_LEAVE_BASE 0x00030000
--- /dev/null
+++ x/xen/arch/x86/machine_kexec.c
@@ -0,0 +1,28 @@
+/******************************************************************************
+ * arch/x86/machine_kexec.c
+ *
+ * Created By: Horms
+ *
+ */
+
+#include <xen/types.h>
+#include <public/kexec.h>
+
+int machine_kexec_prepare(struct kexec_arg *arg)
+{
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kexec_arg *arg)
+{
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- x/xen/arch/x86/setup.c
+++ x/xen/arch/x86/setup.c
@@ -38,6 +38,11 @@ static unsigned int opt_xenheap_megabyte
integer_param("xenheap_megabytes", opt_xenheap_megabytes);
#endif
+unsigned int opt_kdump_megabytes = 0;
+integer_param("kdump_megabytes", opt_kdump_megabytes);
+unsigned int opt_kdump_megabytes_base = 0;
+integer_param("kdump_megabytes_base", opt_kdump_megabytes_base);
+
/* opt_nosmp: If true, secondary processors are ignored. */
static int opt_nosmp = 0;
boolean_param("nosmp", opt_nosmp);
@@ -192,6 +197,20 @@ static void percpu_free_unused_areas(voi
__pa(__per_cpu_end));
}
+void __init move_memory(unsigned long dst,
+ unsigned long src_start, unsigned long src_end)
+{
+#if defined(CONFIG_X86_32)
+ memmove((void *)dst, /* use low mapping */
+ (void *)src_start, /* use low mapping */
+ src_end - src_start);
+#elif defined(CONFIG_X86_64)
+ memmove(__va(dst),
+ __va(src_start),
+ src_end - src_start);
+#endif
+}
+
void __init __start_xen(multiboot_info_t *mbi)
{
char __cmdline[] = "", *cmdline = __cmdline;
@@ -327,15 +346,8 @@ void __init __start_xen(multiboot_info_t
initial_images_start = xenheap_phys_end;
initial_images_end = initial_images_start + modules_length;
-#if defined(CONFIG_X86_32)
- memmove((void *)initial_images_start, /* use low mapping */
- (void *)mod[0].mod_start, /* use low mapping */
- mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
-#elif defined(CONFIG_X86_64)
- memmove(__va(initial_images_start),
- __va(mod[0].mod_start),
- mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
-#endif
+ move_memory(initial_images_start,
+ mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
/* Initialise boot-time allocator with all RAM situated after modules. */
xenheap_phys_start = init_boot_allocator(__pa(&_end));
@@ -383,6 +395,51 @@ void __init __start_xen(multiboot_info_t
#endif
}
+ if (opt_kdump_megabytes) {
+ unsigned long kdump_start, kdump_size, k;
+
+ /* mark images pages as free for now */
+
+ init_boot_pages(initial_images_start, initial_images_end);
+
+ kdump_start = opt_kdump_megabytes_base << 20;
+ kdump_size = opt_kdump_megabytes << 20;
+
+ printk("Kdump: %luMB (%lukB) at 0x%lx\n",
+ kdump_size >> 20,
+ kdump_size >> 10,
+ kdump_start);
+
+ if ((kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK))
+ panic("Kdump parameters not page aligned\n");
+
+ kdump_start >>= PAGE_SHIFT;
+ kdump_size >>= PAGE_SHIFT;
+
+ /* allocate pages for Kdump memory area */
+
+ k = alloc_boot_pages_at(kdump_size, kdump_start);
+
+ if (k != kdump_start)
+ panic("Unable to reserve Kdump memory\n");
+
+ /* allocate pages for relocated initial images */
+
+ k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 : 0;
+ k += (initial_images_end - initial_images_start) >> PAGE_SHIFT;
+
+ k = alloc_boot_pages(k, 1);
+
+ if (!k)
+ panic("Unable to allocate initial images memory\n");
+
+ move_memory(k << PAGE_SHIFT, initial_images_start, initial_images_end);
+
+ initial_images_end -= initial_images_start;
+ initial_images_start = k << PAGE_SHIFT;
+ initial_images_end += initial_images_start;
+ }
+
memguard_init();
printk("System RAM: %luMB (%lukB)\n",
--- x/xen/arch/x86/x86_32/Makefile
+++ x/xen/arch/x86/x86_32/Makefile
@@ -3,5 +3,6 @@ obj-y += entry.o
obj-y += mm.o
obj-y += seg_fixup.o
obj-y += traps.o
+obj-y += machine_kexec.o
obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
--- x/xen/arch/x86/x86_32/entry.S
+++ x/xen/arch/x86/x86_32/entry.S
@@ -648,6 +648,7 @@ ENTRY(hypercall_table)
.long do_xenoprof_op
.long do_event_channel_op
.long do_physdev_op
+ .long do_kexec
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -687,6 +688,7 @@ ENTRY(hypercall_args_table)
.byte 2 /* do_xenoprof_op */
.byte 2 /* do_event_channel_op */
.byte 2 /* do_physdev_op */
+ .byte 2 /* do_kexec */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
--- /dev/null
+++ x/xen/arch/x86/x86_32/machine_kexec.c
@@ -0,0 +1,205 @@
+/******************************************************************************
+ * arch/x86/x86_32/machine_kexec.c
+ *
+ * Created By: Horms
+ *
+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/domain_page.h>
+#include <xen/timer.h>
+#include <xen/sched.h>
+#include <xen/reboot.h>
+#include <xen/console.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <public/kexec.h>
+
+static void __machine_kexec(struct kexec_arg *arg);
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long reboot_code_buffer,
+ unsigned long start_address,
+ unsigned int has_pae);
+
+#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+
+#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define L2_ATTR (_PAGE_PRESENT)
+
+#ifndef CONFIG_X86_PAE
+
+static u32 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ unsigned long mfn;
+ u32 *pgtable_level2;
+
+ /* Find the current page table */
+ mfn = read_cr3() >> PAGE_SHIFT;
+ pgtable_level2 = map_domain_page(mfn);
+
+ /* Identity map the page table entry */
+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ write_cr3(mfn << PAGE_SHIFT);
+
+ unmap_domain_page(pgtable_level2);
+}
+
+#else
+static u64 pgtable_level1[L1_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+static u64 pgtable_level2[L2_PAGETABLE_ENTRIES] PAGE_ALIGNED;
+
+static void identity_map_page(unsigned long address)
+{
+ int mfn;
+ intpte_t *pgtable_level3;
+
+ /* Find the current page table */
+ mfn = read_cr3() >> PAGE_SHIFT;
+ pgtable_level3 = map_domain_page(mfn);
+
+ /* Identity map the page table entry */
+ pgtable_level1[l1_table_offset(address)] = address | L0_ATTR;
+ pgtable_level2[l2_table_offset(address)] = __pa(pgtable_level1) | L1_ATTR;
+ set_64bit(&pgtable_level3[l3_table_offset(address)],
+ __pa(pgtable_level2) | L2_ATTR);
+
+ /* Flush the tlb so the new mapping takes effect.
+ * Global tlb entries are not flushed but that is not an issue.
+ */
+ load_cr3(mfn << PAGE_SHIFT);
+
+ unmap_domain_page(pgtable_level3);
+}
+#endif
+
+static void kexec_load_segments(void)
+{
+#define __SSTR(X) #X
+#define SSTR(X) __SSTR(X)
+ __asm__ __volatile__ (
+ "\tljmp $"SSTR(__HYPERVISOR_CS)",$1f\n"
+ "\t1:\n"
+ "\tmovl $"SSTR(__HYPERVISOR_DS)",%%eax\n"
+ "\tmovl %%eax,%%ds\n"
+ "\tmovl %%eax,%%es\n"
+ "\tmovl %%eax,%%fs\n"
+ "\tmovl %%eax,%%gs\n"
+ "\tmovl %%eax,%%ss\n"
+ ::: "eax", "memory");
+#undef SSTR
+#undef __SSTR
+}
+
+#define kexec_load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+static void kexec_set_idt(void *newidt, __u16 limit)
+{
+ struct Xgt_desc_struct curidt;
+
+ /* ia32 supports unaliged loads & stores */
+ curidt.size = limit;
+ curidt.address = (unsigned long)newidt;
+
+ kexec_load_idt(&curidt);
+
+};
+
+#define kexec_load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+static void kexec_set_gdt(void *newgdt, __u16 limit)
+{
+ struct Xgt_desc_struct curgdt;
+
+ /* ia32 supports unaligned loads & stores */
+ curgdt.size = limit;
+ curgdt.address = (unsigned long)newgdt;
+
+ kexec_load_gdt(&curgdt);
+};
+
+static void __machine_shutdown(void *data)
+{
+ struct kexec_arg *arg = (struct kexec_arg *)data;
+
+ printk("__machine_shutdown: cpu=%u\n", smp_processor_id());
+
+ watchdog_disable();
+ console_start_sync();
+
+ smp_send_stop();
+
+#ifdef CONFIG_X86_IO_APIC
+ disable_IO_APIC();
+#endif
+
+ __machine_kexec(arg);
+}
+
+void machine_shutdown(struct kexec_arg *arg)
+{
+ int reboot_cpu_id;
+ cpumask_t reboot_cpu;
+
+
+ reboot_cpu_id = 0;
+
+ if (!cpu_isset(reboot_cpu_id, cpu_online_map))
+ reboot_cpu_id = smp_processor_id();
+
+ if (reboot_cpu_id != smp_processor_id()) {
+ cpus_clear(reboot_cpu);
+ cpu_set(reboot_cpu_id, reboot_cpu);
+ on_selected_cpus(reboot_cpu, __machine_shutdown, arg, 1, 0);
+ for (;;)
+ ; /* nothing */
+ }
+ else
+ __machine_shutdown(arg);
+ BUG();
+}
+
+static void __machine_kexec(struct kexec_arg *arg)
+{
+ relocate_new_kernel_t rnk;
+
+ local_irq_disable();
+
+ identity_map_page(arg->u.kexec.reboot_code_buffer);
+
+ copy_from_user((void *)arg->u.kexec.reboot_code_buffer,
+ arg->u.kexec.relocate_new_kernel,
+ arg->u.kexec.relocate_new_kernel_size);
+
+ kexec_load_segments();
+ kexec_set_gdt(__va(0),0);
+ kexec_set_idt(__va(0),0);
+
+ rnk = (relocate_new_kernel_t) arg->u.kexec.reboot_code_buffer;
+ (*rnk)(arg->u.kexec.indirection_page, arg->u.kexec.reboot_code_buffer,
+ arg->u.kexec.start_address, cpu_has_pae);
+}
+
+void machine_kexec(struct kexec_arg *arg)
+{
+ machine_shutdown(arg);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- x/xen/arch/x86/x86_64/Makefile
+++ x/xen/arch/x86/x86_64/Makefile
@@ -1,3 +1,4 @@
obj-y += entry.o
obj-y += mm.o
obj-y += traps.o
+obj-y += machine_kexec.o
--- /dev/null
+++ x/xen/arch/x86/x86_64/machine_kexec.c
@@ -0,0 +1,25 @@
+/******************************************************************************
+ * arch/x86/x86_64/machine_kexec.c
+ *
+ * Created By: Horms
+ *
+ * Based heavily on arch/i386/machine_kexec.c from Linux 2.6.16
+ */
+
+#include <xen/types.h>
+#include <public/kexec.h>
+
+void machine_kexec(struct kexec_arg *arg)
+{
+ printk("machine_kexec: not implemented\n");
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- x/xen/common/Makefile
+++ x/xen/common/Makefile
@@ -7,6 +7,7 @@ obj-y += event_channel.o
obj-y += grant_table.o
obj-y += kernel.o
obj-y += keyhandler.o
+obj-y += kexec.o
obj-y += lib.o
obj-y += memory.o
obj-y += multicall.o
--- /dev/null
+++ x/xen/common/kexec.c
@@ -0,0 +1,73 @@
+/*
+ * Achitecture independent kexec code for Xen
+ *
+ * At this statge, just a switch for the kexec hypercall into
+ * architecture dependent code.
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <public/kexec.h>
+
+extern int machine_kexec_prepare(struct kexec_arg *arg);
+extern void machine_kexec_cleanup(struct kexec_arg *arg);
+extern void machine_kexec(struct kexec_arg *arg);
+
+extern unsigned int opt_kdump_megabytes;
+extern unsigned int opt_kdump_megabytes_base;
+
+int do_kexec(unsigned long op,
+ XEN_GUEST_HANDLE(kexec_arg_t) uarg)
+{
+ struct kexec_arg arg;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ if (op == KEXEC_CMD_reserve)
+ {
+ arg.u.reserve.size = opt_kdump_megabytes << 20;
+ arg.u.reserve.start = opt_kdump_megabytes_base << 20;
+ if ( unlikely(copy_to_guest(uarg, &arg, 1) != 0) )
+ {
+ printk("do_kexec: copy_to_guest failed");
+ return -EFAULT;
+ }
+ return 0;
+ }
+
+ if ( unlikely(copy_from_guest(&arg, uarg, 1) != 0) )
+ {
+ printk("do_kexec: __copy_from_guest failed");
+ return -EFAULT;
+ }
+
+ switch(op) {
+ case KEXEC_CMD_kexec:
+ machine_kexec(&arg);
+ return -EINVAL; /* Not Reached */
+ case KEXEC_CMD_kexec_prepare:
+ return machine_kexec_prepare(&arg);
+ case KEXEC_CMD_kexec_cleanup:
+ machine_kexec_cleanup(&arg);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
--- x/xen/common/page_alloc.c
+++ x/xen/common/page_alloc.c
@@ -212,24 +212,35 @@ void init_boot_pages(paddr_t ps, paddr_t
}
}
+unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at)
+{
+ unsigned long i;
+
+ for ( i = 0; i < nr_pfns; i++ )
+ if ( allocated_in_map(pfn_at + i) )
+ break;
+
+ if ( i == nr_pfns )
+ {
+ map_alloc(pfn_at, nr_pfns);
+ return pfn_at;
+ }
+
+ return 0;
+}
+
unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align)
{
- unsigned long pg, i;
+ unsigned long pg, i = 0;
for ( pg = 0; (pg + nr_pfns) < max_page; pg += pfn_align )
{
- for ( i = 0; i < nr_pfns; i++ )
- if ( allocated_in_map(pg + i) )
- break;
-
- if ( i == nr_pfns )
- {
- map_alloc(pg, nr_pfns);
- return pg;
- }
+ i = alloc_boot_pages_at(nr_pfns, pg);
+ if (i != 0)
+ break;
}
- return 0;
+ return i;
}
--- x/xen/include/asm-x86/hypercall.h
+++ x/xen/include/asm-x86/hypercall.h
@@ -6,6 +6,8 @@
#define __ASM_X86_HYPERCALL_H__
#include <public/physdev.h>
+#include <xen/types.h>
+#include <public/kexec.h>
extern long
do_event_channel_op_compat(
@@ -87,6 +89,10 @@ extern long
arch_do_vcpu_op(
int cmd, struct vcpu *v, XEN_GUEST_HANDLE(void) arg);
+extern int
+do_kexec(
+ unsigned long op, XEN_GUEST_HANDLE(kexec_arg_t) uarg);
+
#ifdef __x86_64__
extern long
--- /dev/null
+++ x/xen/include/public/kexec.h
@@ -0,0 +1,45 @@
+/*
+ * kexec.h: Xen kexec public
+ *
+ * Created By: Horms <horms@xxxxxxxxxxxx>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+#include "xen.h"
+
+/*
+ * Scratch space for passing arguments to the kexec hypercall
+ */
+typedef struct kexec_arg {
+ union {
+ struct {
+ unsigned long data; /* Not sure what this should be yet */
+ } helper;
+ struct {
+ unsigned long indirection_page;
+ unsigned long reboot_code_buffer;
+ unsigned long start_address;
+ const char *relocate_new_kernel;
+ unsigned int relocate_new_kernel_size;
+ } kexec;
+ struct {
+ unsigned long size;
+ unsigned long start;
+ } reserve;
+ } u;
+} kexec_arg_t;
+DEFINE_XEN_GUEST_HANDLE(kexec_arg_t);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- x/xen/include/public/xen.h
+++ x/xen/include/public/xen.h
@@ -64,6 +64,7 @@
#define __HYPERVISOR_xenoprof_op 31
#define __HYPERVISOR_event_channel_op 32
#define __HYPERVISOR_physdev_op 33
+#define __HYPERVISOR_kexec_op 34
/* Architecture-specific hypercall definitions. */
#define __HYPERVISOR_arch_0 48
@@ -238,6 +239,14 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
#define VMASST_TYPE_writable_pagetables 2
#define MAX_VMASST_TYPE 2
+/*
+ * Operations for kexec.
+ */
+#define KEXEC_CMD_kexec 0
+#define KEXEC_CMD_kexec_prepare 1
+#define KEXEC_CMD_kexec_cleanup 2
+#define KEXEC_CMD_reserve 3
+
#ifndef __ASSEMBLY__
typedef uint16_t domid_t;
--- x/xen/include/xen/mm.h
+++ x/xen/include/xen/mm.h
@@ -40,6 +40,7 @@ struct page_info;
paddr_t init_boot_allocator(paddr_t bitmap_start);
void init_boot_pages(paddr_t ps, paddr_t pe);
unsigned long alloc_boot_pages(unsigned long nr_pfns, unsigned long pfn_align);
+unsigned long alloc_boot_pages_at(unsigned long nr_pfns, unsigned long pfn_at);
void end_boot_allocator(void);
/* Generic allocator. These functions are *not* interrupt-safe. */
--- /dev/null 2006-05-09 15:32:30.399072192 +0900
+++ x/patches/linux-2.6.16.13/kexec.patch 2006-05-09 18:03:46.000000000
+0900
@@ -0,0 +1,175 @@
+--- x/drivers/base/cpu.c
++++ x/drivers/base/cpu.c
+@@ -101,7 +101,11 @@ static ssize_t show_crash_notes(struct s
+ * boot up and this data does not change there after. Hence this
+ * operation should be safe. No locking required.
+ */
++#ifndef CONFIG_XEN
+ addr = __pa(per_cpu_ptr(crash_notes, cpunum));
++#else
++ addr = virt_to_machine(per_cpu_ptr(crash_notes, cpunum));
++#endif
+ rc = sprintf(buf, "%Lx\n", addr);
+ return rc;
+ }
+--- x/kernel/kexec.c
++++ x/kernel/kexec.c
+@@ -38,6 +38,20 @@ struct resource crashk_res = {
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+ };
+
++/* Kexec needs to know about the actually physical addresss.
++ * But in xen, a physical address is a pseudo-physical addresss. */
++#ifndef CONFIG_XEN
++#define kexec_page_to_pfn(page) page_to_pfn(page)
++#define kexec_pfn_to_page(pfn) pfn_to_page(pfn)
++#define kexec_virt_to_phys(addr) virt_to_phys(addr)
++#define kexec_phys_to_virt(addr) phys_to_virt(addr)
++#else
++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page))
++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn))
++#define kexec_virt_to_phys(addr) virt_to_machine(addr)
++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
++#endif
++
+ int kexec_should_crash(struct task_struct *p)
+ {
+ if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops)
+@@ -403,7 +417,7 @@ static struct page *kimage_alloc_normal_
+ pages = kimage_alloc_pages(GFP_KERNEL, order);
+ if (!pages)
+ break;
+- pfn = page_to_pfn(pages);
++ pfn = kexec_page_to_pfn(pages);
+ epfn = pfn + count;
+ addr = pfn << PAGE_SHIFT;
+ eaddr = epfn << PAGE_SHIFT;
+@@ -437,6 +451,7 @@ static struct page *kimage_alloc_normal_
+ return pages;
+ }
+
++#ifndef CONFIG_XEN
+ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
+ unsigned int order)
+ {
+@@ -490,7 +505,7 @@ static struct page *kimage_alloc_crash_c
+ }
+ /* If I don't overlap any segments I have found my hole! */
+ if (i == image->nr_segments) {
+- pages = pfn_to_page(hole_start >> PAGE_SHIFT);
++ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
+ break;
+ }
+ }
+@@ -517,6 +532,13 @@ struct page *kimage_alloc_control_pages(
+
+ return pages;
+ }
++#else /* !CONFIG_XEN */
++struct page *kimage_alloc_control_pages(struct kimage *image,
++ unsigned int order)
++{
++ return kimage_alloc_normal_control_pages(image, order);
++}
++#endif
+
+ static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
+ {
+@@ -532,7 +554,7 @@ static int kimage_add_entry(struct kimag
+ return -ENOMEM;
+
+ ind_page = page_address(page);
+- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
++ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
+ image->entry = ind_page;
+ image->last_entry = ind_page +
+ ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
+@@ -593,13 +615,13 @@ static int kimage_terminate(struct kimag
+ #define for_each_kimage_entry(image, ptr, entry) \
+ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
+ ptr = (entry & IND_INDIRECTION)? \
+- phys_to_virt((entry & PAGE_MASK)): ptr +1)
++ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
+
+ static void kimage_free_entry(kimage_entry_t entry)
+ {
+ struct page *page;
+
+- page = pfn_to_page(entry >> PAGE_SHIFT);
++ page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
+ kimage_free_pages(page);
+ }
+
+@@ -686,7 +708,7 @@ static struct page *kimage_alloc_page(st
+ * have a match.
+ */
+ list_for_each_entry(page, &image->dest_pages, lru) {
+- addr = page_to_pfn(page) << PAGE_SHIFT;
++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
+ if (addr == destination) {
+ list_del(&page->lru);
+ return page;
+@@ -701,12 +723,12 @@ static struct page *kimage_alloc_page(st
+ if (!page)
+ return NULL;
+ /* If the page cannot be used file it away */
+- if (page_to_pfn(page) >
++ if (kexec_page_to_pfn(page) >
+ (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
+ list_add(&page->lru, &image->unuseable_pages);
+ continue;
+ }
+- addr = page_to_pfn(page) << PAGE_SHIFT;
++ addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
+
+ /* If it is the destination page we want use it */
+ if (addr == destination)
+@@ -729,7 +751,7 @@ static struct page *kimage_alloc_page(st
+ struct page *old_page;
+
+ old_addr = *old & PAGE_MASK;
+- old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
++ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
+ copy_highpage(page, old_page);
+ *old = addr | (*old & ~PAGE_MASK);
+
+@@ -779,7 +801,7 @@ static int kimage_load_normal_segment(st
+ result = -ENOMEM;
+ goto out;
+ }
+- result = kimage_add_page(image, page_to_pfn(page)
++ result = kimage_add_page(image, kexec_page_to_pfn(page)
+ << PAGE_SHIFT);
+ if (result < 0)
+ goto out;
+@@ -811,6 +833,7 @@ out:
+ return result;
+ }
+
++#ifndef CONFIG_XEN
+ static int kimage_load_crash_segment(struct kimage *image,
+ struct kexec_segment *segment)
+ {
+@@ -833,7 +856,7 @@ static int kimage_load_crash_segment(str
+ char *ptr;
+ size_t uchunk, mchunk;
+
+- page = pfn_to_page(maddr >> PAGE_SHIFT);
++ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
+ if (page == 0) {
+ result = -ENOMEM;
+ goto out;
+@@ -881,6 +904,13 @@ static int kimage_load_segment(struct ki
+
+ return result;
+ }
++#else /* CONFIG_XEN */
++static int kimage_load_segment(struct kimage *image,
++ struct kexec_segment *segment)
++{
++ return kimage_load_normal_segment(image, segment);
++}
++#endif
+
+ /*
+ * Exec Kernel system call: for obvious reasons only root may call it.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|