[PATCH 03/04] Kexec / Kdump: x86_32 specific code
This patch contains the x86_32 implementation of Kexec / Kdump for Xen.
Signed-Off-By: Magnus Damm <magnus@xxxxxxxxxxxxx>
Signed-Off-By: Simon Horman <horms@xxxxxxxxxxxx>
---
Applies on top of xen-unstable-12621.
buildconfigs/linux-defconfig_xen_x86_32 | 2
linux-2.6-xen-sparse/arch/i386/Kconfig | 2
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 19
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h | 8
patches/linux-2.6.16.33/git-35...c9.patch | 401 +++++++
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec...code-i386.patch | 169 ++++
patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch | 108 ++
patches/linux-2.6.16.33/series | 3
xen/arch/x86/x86_32/entry.S | 2
xen/include/asm-x86/x86_32/elf.h | 52 +
xen/include/asm-x86/x86_32/kexec.h | 28
11 files changed, 783 insertions(+), 11 deletions(-)
--- 0002/buildconfigs/linux-defconfig_xen_x86_32
+++ work/buildconfigs/linux-defconfig_xen_x86_32
@@ -183,6 +183,7 @@ CONFIG_MTRR=y
CONFIG_REGPARM=y
CONFIG_SECCOMP=y
CONFIG_HZ_100=y
+CONFIG_KEXEC=y
# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=100
@@ -1036,6 +1037,7 @@ CONFIG_DNOTIFY=y
#
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
+# CONFIG_PROC_VMCORE is not set
CONFIG_SYSFS=y
CONFIG_TMPFS=y
# CONFIG_HUGETLB_PAGE is not set
--- 0001/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ work/linux-2.6-xen-sparse/arch/i386/Kconfig
@@ -726,7 +726,7 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
- depends on EXPERIMENTAL && !X86_XEN
+ depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
--- 0001/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
+++ work/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
@@ -69,6 +69,10 @@
#include "setup_arch_pre.h"
#include <bios_ebda.h>
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
/* Forward Declaration. */
void __init find_max_pfn(void);
@@ -943,6 +947,7 @@ static void __init parse_cmdline_early (
* after a kernel panic.
*/
else if (!memcmp(from, "crashkernel=", 12)) {
+#ifndef CONFIG_XEN
unsigned long size, base;
size = memparse(from+12, &from);
if (*from == '@') {
@@ -953,6 +958,10 @@ static void __init parse_cmdline_early (
crashk_res.start = base;
crashk_res.end = base + size - 1;
}
+#else
+ printk("Ignoring crashkernel command line, "
+ "parameter will be supplied by xen\n");
+#endif
}
#endif
#ifdef CONFIG_PROC_VMCORE
@@ -1322,10 +1331,14 @@ void __init setup_bootmem_allocator(void
}
#endif
#ifdef CONFIG_KEXEC
+#ifdef CONFIG_XEN
+ xen_machine_kexec_setup_resources();
+#else
if (crashk_res.start != crashk_res.end)
reserve_bootmem(crashk_res.start,
crashk_res.end - crashk_res.start + 1);
#endif
+#endif
if (!xen_feature(XENFEAT_auto_translated_physmap))
phys_to_machine_mapping =
@@ -1389,7 +1402,11 @@ legacy_init_iomem_resources(struct e820e
request_resource(res, data_resource);
#endif
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
+#ifdef CONFIG_XEN
+ xen_machine_kexec_register_resources(res);
+#endif
#endif
}
}
--- 0001/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ work/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
@@ -395,5 +395,13 @@ HYPERVISOR_xenoprof_op(
return _hypercall2(int, xenoprof_op, op, arg);
}
+static inline int
+HYPERVISOR_kexec_op(
+ unsigned long op, void *args)
+{
+ return _hypercall2(int, kexec_op, op, args);
+}
+
+
#endif /* __HYPERCALL_H__ */
--- /dev/null
+++
work/patches/linux-2.6.16.33/git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
@@ -0,0 +1,401 @@
+From: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Date: Tue, 26 Sep 2006 08:52:38 +0000 (+0200)
+Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386)
+X-Git-Url:
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=3566561bfadffcb5dbc85d576be80c0dbf2cccc9
+
+[PATCH] i386: Avoid overwriting the current pgd (V4, i386)
+
+kexec: Avoid overwriting the current pgd (V4, i386)
+
+This patch upgrades the i386-specific kexec code to avoid overwriting the
+current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used
+to start a secondary kernel that dumps the memory of the previous kernel.
+
+The code introduces a new set of page tables. These tables are used to provide
+an executable identity mapping without overwriting the current pgd.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+Signed-off-by: Andi Kleen <ak@xxxxxxx>
+---
+
+--- a/arch/i386/kernel/machine_kexec.c
++++ b/arch/i386/kernel/machine_kexec.c
+@@ -21,70 +21,13 @@
+ #include <asm/system.h>
+
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+-
+-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define L2_ATTR (_PAGE_PRESENT)
+-
+-#define LEVEL0_SIZE (1UL << 12UL)
+-
+-#ifndef CONFIG_X86_PAE
+-#define LEVEL1_SIZE (1UL << 22UL)
+-static u32 pgtable_level1[1024] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index;
+- u32 *pgtable_level2;
+-
+- /* Find the current page table */
+- pgtable_level2 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = address / LEVEL1_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level2);
+-}
+-
+-#else
+-#define LEVEL1_SIZE (1UL << 21UL)
+-#define LEVEL2_SIZE (1UL << 30UL)
+-static u64 pgtable_level1[512] PAGE_ALIGNED;
+-static u64 pgtable_level2[512] PAGE_ALIGNED;
+-
+-static void identity_map_page(unsigned long address)
+-{
+- unsigned long level1_index, level2_index, level3_index;
+- u64 *pgtable_level3;
+-
+- /* Find the current page table */
+- pgtable_level3 = __va(read_cr3());
+-
+- /* Find the indexes of the physical address to identity map */
+- level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
+- level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
+- level3_index = address / LEVEL2_SIZE;
+-
+- /* Identity map the page table entry */
+- pgtable_level1[level1_index] = address | L0_ATTR;
+- pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
+- set_64bit(&pgtable_level3[level3_index],
+- __pa(pgtable_level2) | L2_ATTR);
+-
+- /* Flush the tlb so the new mapping takes effect.
+- * Global tlb entries are not flushed but that is not an issue.
+- */
+- load_cr3(pgtable_level3);
+-}
++static u32 kexec_pgd[1024] PAGE_ALIGNED;
++#ifdef CONFIG_X86_PAE
++static u32 kexec_pmd0[1024] PAGE_ALIGNED;
++static u32 kexec_pmd1[1024] PAGE_ALIGNED;
+ #endif
++static u32 kexec_pte0[1024] PAGE_ALIGNED;
++static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+ static void set_idt(void *newidt, __u16 limit)
+ {
+@@ -128,16 +71,6 @@ static void load_segments(void)
+ #undef __STR
+ }
+
+-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
+- unsigned long indirection_page,
+- unsigned long reboot_code_buffer,
+- unsigned long start_address,
+- unsigned int has_pae) ATTRIB_NORET;
+-
+-extern const unsigned char relocate_new_kernel[];
+-extern void relocate_new_kernel_end(void);
+-extern const unsigned int relocate_new_kernel_size;
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage
+ */
+ NORET_TYPE void machine_kexec(struct kimage *image)
+ {
+- unsigned long page_list;
+- unsigned long reboot_code_buffer;
+-
+- relocate_new_kernel_t rnk;
++ unsigned long page_list[PAGES_NR];
++ void *control_page;
+
+ /* Interrupts aren't acceptable while we reboot */
+ local_irq_disable();
+
+- /* Compute some offsets */
+- reboot_code_buffer = page_to_pfn(image->control_code_page)
+- << PAGE_SHIFT;
+- page_list = image->head;
+-
+- /* Set up an identity mapping for the reboot_code_buffer */
+- identity_map_page(reboot_code_buffer);
+-
+- /* copy it out */
+- memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+- relocate_new_kernel_size);
++ control_page = page_address(image->control_code_page);
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ page_list[PA_CONTROL_PAGE] = __pa(control_page);
++ page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
++ page_list[PA_PGD] = __pa(kexec_pgd);
++ page_list[VA_PGD] = (unsigned long)kexec_pgd;
++#ifdef CONFIG_X86_PAE
++ page_list[PA_PMD_0] = __pa(kexec_pmd0);
++ page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
++ page_list[PA_PMD_1] = __pa(kexec_pmd1);
++ page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
++#endif
++ page_list[PA_PTE_0] = __pa(kexec_pte0);
++ page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
++ page_list[PA_PTE_1] = __pa(kexec_pte1);
++ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+ /* The segment registers are funny things, they have both a
+ * visible and an invisible part. Whenever the visible part is
+@@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kim
+ set_idt(phys_to_virt(0),0);
+
+ /* now call it */
+- rnk = (relocate_new_kernel_t) reboot_code_buffer;
+- (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
++ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
++ image->start, cpu_has_pae);
+ }
+
+ /* crashkernel=size@addr specifies the location to reserve for
+--- a/arch/i386/kernel/relocate_kernel.S
++++ b/arch/i386/kernel/relocate_kernel.S
+@@ -7,16 +7,138 @@
+ */
+
+ #include <linux/linkage.h>
++#include <asm/page.h>
++#include <asm/kexec.h>
++
++/*
++ * Must be relocatable PIC code callable as a C function
++ */
++
++#define PTR(x) (x << 2)
++#define PAGE_ALIGNED (1 << PAGE_SHIFT)
++#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
++#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
++
++ .text
++ .align PAGE_ALIGNED
++ .globl relocate_kernel
++relocate_kernel:
++ movl 8(%esp), %ebp /* list of pages */
++
++#ifdef CONFIG_X86_PAE
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_0)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xc0000000, %eax
++ shrl $27, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PMD_1)(%ebp), %edx
++ orl $PAE_PGD_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PMD_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x3fe00000, %eax
++ shrl $18, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x001ff000, %eax
++ shrl $9, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#else
++ /* map the control page at its virtual address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_0)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_0)(%ebp), %edi
++ movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ /* identity map the control page at its physical address */
++
++ movl PTR(VA_PGD)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0xffc00000, %eax
++ shrl $20, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_PTE_1)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++
++ movl PTR(VA_PTE_1)(%ebp), %edi
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
++ andl $0x003ff000, %eax
++ shrl $10, %eax
++ addl %edi, %eax
++
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
++ orl $PAGE_ATTR, %edx
++ movl %edx, (%eax)
++#endif
+
+- /*
+- * Must be relocatable PIC code callable as a C function, that once
+- * it starts can not use the previous processes stack.
+- */
+- .globl relocate_new_kernel
+ relocate_new_kernel:
+ /* read the arguments and say goodbye to the stack */
+ movl 4(%esp), %ebx /* page_list */
+- movl 8(%esp), %ebp /* reboot_code_buffer */
++ movl 8(%esp), %ebp /* list of pages */
+ movl 12(%esp), %edx /* start address */
+ movl 16(%esp), %ecx /* cpu_has_pae */
+
+@@ -24,11 +146,26 @@ relocate_new_kernel:
+ pushl $0
+ popfl
+
+- /* set a new stack at the bottom of our page... */
+- lea 4096(%ebp), %esp
++ /* get physical address of control page now */
++ /* this is impossible after page table switch */
++ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
++
++ /* switch to new set of page tables */
++ movl PTR(PA_PGD)(%ebp), %eax
++ movl %eax, %cr3
++
++ /* setup a new stack at the end of the physical control page */
++ lea 4096(%edi), %esp
+
+- /* store the parameters back on the stack */
+- pushl %edx /* store the start address */
++ /* jump to identity mapped page */
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel), %eax
++ pushl %eax
++ ret
++
++identity_mapped:
++ /* store the start address on the stack */
++ pushl %edx
+
+ /* Set cr0 to a known state:
+ * 31 0 == Paging disabled
+@@ -113,8 +250,3 @@ relocate_new_kernel:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
+-relocate_new_kernel_end:
+-
+- .globl relocate_new_kernel_size
+-relocate_new_kernel_size:
+- .long relocate_new_kernel_end - relocate_new_kernel
+--- a/include/asm-i386/kexec.h
++++ b/include/asm-i386/kexec.h
+@@ -1,6 +1,26 @@
+ #ifndef _I386_KEXEC_H
+ #define _I386_KEXEC_H
+
++#define PA_CONTROL_PAGE 0
++#define VA_CONTROL_PAGE 1
++#define PA_PGD 2
++#define VA_PGD 3
++#define PA_PTE_0 4
++#define VA_PTE_0 5
++#define PA_PTE_1 6
++#define VA_PTE_1 7
++#ifdef CONFIG_X86_PAE
++#define PA_PMD_0 8
++#define VA_PMD_0 9
++#define PA_PMD_1 10
++#define VA_PMD_1 11
++#define PAGES_NR 12
++#else
++#define PAGES_NR 8
++#endif
++
++#ifndef __ASSEMBLY__
++
+ #include <asm/fixmap.h>
+ #include <asm/ptrace.h>
+ #include <asm/string.h>
+@@ -72,5 +92,12 @@ static inline void crash_setup_regs(stru
+ newregs->eip = (unsigned long)current_text_addr();
+ }
+ }
++asmlinkage NORET_TYPE void
++relocate_kernel(unsigned long indirection_page,
++ unsigned long control_page,
++ unsigned long start_address,
++ unsigned int has_pae) ATTRIB_NORET;
++
++#endif /* __ASSEMBLY__ */
+
+ #endif /* _I386_KEXEC_H */
--- /dev/null
+++
work/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
@@ -0,0 +1,169 @@
+kexec: Move asm segment handling code to the assembly file (i386)
+
+This patch moves the idt, gdt, and segment handling code from machine_kexec.c
+to relocate_kernel.S. The main reason behind this move is to avoid code
+duplication in the Xen hypervisor. With this patch all code required to kexec
+is put on the control page.
+
+On top of that this patch also counts as a cleanup - I think it is much
+nicer to write assembly directly in assembly files than wrap inline assembly
+in C functions for no apparent reason.
+
+Signed-off-by: Magnus Damm <magnus@xxxxxxxxxxxxx>
+---
+
+ Applies to 2.6.19-rc1.
+
+ machine_kexec.c | 59 -----------------------------------------------------
+ relocate_kernel.S | 58 +++++++++++++++++++++++++++++++++++++++++++++++-----
+ 2 files changed, 53 insertions(+), 64 deletions(-)
+
+--- 0002/arch/i386/kernel/machine_kexec.c
++++ work/arch/i386/kernel/machine_kexec.c 2006-10-05 15:49:08.000000000
+0900
+@@ -29,48 +29,6 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+-static void set_idt(void *newidt, __u16 limit)
+-{
+- struct Xgt_desc_struct curidt;
+-
+- /* ia32 supports unaliged loads & stores */
+- curidt.size = limit;
+- curidt.address = (unsigned long)newidt;
+-
+- load_idt(&curidt);
+-};
+-
+-
+-static void set_gdt(void *newgdt, __u16 limit)
+-{
+- struct Xgt_desc_struct curgdt;
+-
+- /* ia32 supports unaligned loads & stores */
+- curgdt.size = limit;
+- curgdt.address = (unsigned long)newgdt;
+-
+- load_gdt(&curgdt);
+-};
+-
+-static void load_segments(void)
+-{
+-#define __STR(X) #X
+-#define STR(X) __STR(X)
+-
+- __asm__ __volatile__ (
+- "\tljmp $"STR(__KERNEL_CS)",$1f\n"
+- "\t1:\n"
+- "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
+- "\tmovl %%eax,%%ds\n"
+- "\tmovl %%eax,%%es\n"
+- "\tmovl %%eax,%%fs\n"
+- "\tmovl %%eax,%%gs\n"
+- "\tmovl %%eax,%%ss\n"
+- ::: "eax", "memory");
+-#undef STR
+-#undef __STR
+-}
+-
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+@@ -127,23 +85,6 @@ NORET_TYPE void machine_kexec(struct kim
+ page_list[PA_PTE_1] = __pa(kexec_pte1);
+ page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+
+- /* The segment registers are funny things, they have both a
+- * visible and an invisible part. Whenever the visible part is
+- * set to a specific selector, the invisible part is loaded
+- * with from a table in memory. At no other time is the
+- * descriptor table in memory accessed.
+- *
+- * I take advantage of this here by force loading the
+- * segments, before I zap the gdt with an invalid value.
+- */
+- load_segments();
+- /* The gdt & idt are now invalid.
+- * If you want to load them you must set up your own idt & gdt.
+- */
+- set_gdt(phys_to_virt(0),0);
+- set_idt(phys_to_virt(0),0);
+-
+- /* now call it */
+ relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+ image->start, cpu_has_pae);
+ }
+--- 0002/arch/i386/kernel/relocate_kernel.S
++++ work/arch/i386/kernel/relocate_kernel.S 2006-10-05 16:03:21.000000000
+0900
+@@ -154,14 +154,45 @@ relocate_new_kernel:
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, %cr3
+
++ /* setup idt */
++ movl %edi, %eax
++ addl $(idt_48 - relocate_kernel), %eax
++ lidtl (%eax)
++
++ /* setup gdt */
++ movl %edi, %eax
++ addl $(gdt - relocate_kernel), %eax
++ movl %edi, %esi
++ addl $((gdt_48 - relocate_kernel) + 2), %esi
++ movl %eax, (%esi)
++
++ movl %edi, %eax
++ addl $(gdt_48 - relocate_kernel), %eax
++ lgdtl (%eax)
++
++ /* setup data segment registers */
++ mov $(gdt_ds - gdt), %eax
++ mov %eax, %ds
++ mov %eax, %es
++ mov %eax, %fs
++ mov %eax, %gs
++ mov %eax, %ss
++
+ /* setup a new stack at the end of the physical control page */
+ lea 4096(%edi), %esp
+
+- /* jump to identity mapped page */
+- movl %edi, %eax
+- addl $(identity_mapped - relocate_kernel), %eax
+- pushl %eax
+- ret
++ /* load new code segment and jump to identity mapped page */
++ movl %edi, %esi
++ xorl %eax, %eax
++ pushl %eax
++ pushl %esi
++ pushl %eax
++ movl $(gdt_cs - gdt), %eax
++ pushl %eax
++ movl %edi, %eax
++ addl $(identity_mapped - relocate_kernel),%eax
++ pushl %eax
++ iretl
+
+ identity_mapped:
+ /* store the start address on the stack */
+@@ -250,3 +281,20 @@ identity_mapped:
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ ret
++
++ .align 16
++gdt:
++ .quad 0x0000000000000000 /* NULL descriptor */
++gdt_cs:
++ .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
++gdt_ds:
++ .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
++gdt_end:
++
++gdt_48:
++ .word gdt_end - gdt - 1 /* limit */
++ .long 0 /* base - filled in by code above */
++
++idt_48:
++ .word 0 /* limit */
++ .long 0 /* base */
--- /dev/null
+++ work/patches/linux-2.6.16.33/linux-2.6.19-rc1-kexec-xen-i386.patch
@@ -0,0 +1,108 @@
+--- 0001/arch/i386/kernel/crash.c
++++ work/arch/i386/kernel/crash.c
+@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
+ crash_save_this_cpu(regs, cpu);
+ }
+
++#ifndef CONFIG_XEN
+ #ifdef CONFIG_SMP
+ static atomic_t waiting_for_crash_ipi;
+
+@@ -158,6 +159,7 @@ static void nmi_shootdown_cpus(void)
+ /* There are no cpus to shootdown */
+ }
+ #endif
++#endif /* CONFIG_XEN */
+
+ void machine_crash_shutdown(struct pt_regs *regs)
+ {
+@@ -174,10 +176,12 @@ void machine_crash_shutdown(struct pt_re
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = smp_processor_id();
++#ifndef CONFIG_XEN
+ nmi_shootdown_cpus();
+ lapic_shutdown();
+ #if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+ #endif
++#endif /* CONFIG_XEN */
+ crash_save_self(regs);
+ }
+--- 0007/arch/i386/kernel/machine_kexec.c
++++ work/arch/i386/kernel/machine_kexec.c
+@@ -19,6 +19,10 @@
+ #include <asm/desc.h>
+ #include <asm/system.h>
+
++#ifdef CONFIG_XEN
++#include <xen/interface/kexec.h>
++#endif
++
+ #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+ static u32 kexec_pgd[1024] PAGE_ALIGNED;
+ #ifdef CONFIG_X86_PAE
+@@ -28,6 +32,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
+ static u32 kexec_pte0[1024] PAGE_ALIGNED;
+ static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
++#ifdef CONFIG_XEN
++
++#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
++
++#if PAGES_NR > KEXEC_XEN_NO_PAGES
++#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
++#endif
++
++#if PA_CONTROL_PAGE != 0
++#error PA_CONTROL_PAGE is non zero - Xen support will break
++#endif
++
++void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage
*image)
++{
++ void *control_page;
++
++ memset(xki->page_list, 0, sizeof(xki->page_list));
++
++ control_page = page_address(image->control_code_page);
++ memcpy(control_page, relocate_kernel, PAGE_SIZE);
++
++ xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
++ xki->page_list[PA_PGD] = __ma(kexec_pgd);
++#ifdef CONFIG_X86_PAE
++ xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
++ xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
++#endif
++ xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
++ xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
++
++}
++
++#endif /* CONFIG_XEN */
++
+ /*
+ * A architecture hook called to validate the
+ * proposed image and prepare the control pages
+--- 0006/include/asm-i386/kexec.h
++++ work/include/asm-i386/kexec.h
+@@ -98,6 +98,20 @@ relocate_kernel(unsigned long indirectio
+ unsigned long start_address,
+ unsigned int has_pae) ATTRIB_NORET;
+
++
++/* Under Xen we need to work with machine addresses. These macros give the
++ * machine address of a certain page to the generic kexec code instead of
++ * the pseudo physical address which would be given by the default macros.
++ */
++
++#ifdef CONFIG_XEN
++#define KEXEC_ARCH_HAS_PAGE_MACROS
++#define kexec_page_to_pfn(page) pfn_to_mfn(page_to_pfn(page))
++#define kexec_pfn_to_page(pfn) pfn_to_page(mfn_to_pfn(pfn))
++#define kexec_virt_to_phys(addr) virt_to_machine(addr)
++#define kexec_phys_to_virt(addr) phys_to_virt(machine_to_phys(addr))
++#endif
++
+ #endif /* __ASSEMBLY__ */
+
+ #endif /* _I386_KEXEC_H */
--- 0004/patches/linux-2.6.16.33/series
+++ work/patches/linux-2.6.16.33/series
@@ -1,6 +1,9 @@
kexec-generic.patch
git-2efe55a9cec8418f0e0cde3dc3787a42fddc4411.patch
git-2a8a3d5b65e86ec1dfef7d268c64a909eab94af7.patch
+git-3566561bfadffcb5dbc85d576be80c0dbf2cccc9.patch
+linux-2.6.19-rc1-kexec-move_segment_code-i386.patch
+linux-2.6.19-rc1-kexec-xen-i386.patch
blktap-aio-16_03_06.patch
device_bind.patch
fix-hz-suspend.patch
--- 0001/xen/arch/x86/x86_32/entry.S
+++ work/xen/arch/x86/x86_32/entry.S
@@ -659,6 +659,7 @@ ENTRY(hypercall_table)
.long do_hvm_op
.long do_sysctl /* 35 */
.long do_domctl
+ .long do_kexec_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
@@ -701,6 +702,7 @@ ENTRY(hypercall_args_table)
.byte 2 /* do_hvm_op */
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
+ .byte 2 /* do_kexec_op */
.rept NR_hypercalls-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
--- 0004/xen/include/asm-x86/x86_32/elf.h
+++ work/xen/include/asm-x86/x86_32/elf.h
@@ -1,16 +1,62 @@
#ifndef __X86_32_ELF_H__
#define __X86_32_ELF_H__
-#include <xen/lib.h> /* for printk() used in stub */
+#include <asm/processor.h>
typedef struct {
- unsigned long dummy;
+ unsigned long ebx;
+ unsigned long ecx;
+ unsigned long edx;
+ unsigned long esi;
+ unsigned long edi;
+ unsigned long ebp;
+ unsigned long eax;
+ unsigned long ds;
+ unsigned long es;
+ unsigned long fs;
+ unsigned long gs;
+ unsigned long orig_eax;
+ unsigned long eip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long esp;
+ unsigned long ss;
} ELF_Gregset;
extern inline void elf_core_save_regs(ELF_Gregset *core_regs,
crash_xen_core_t *xen_core_regs)
{
- printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+ unsigned long tmp;
+
+ asm volatile("movl %%ebx,%0" : "=m"(core_regs->ebx));
+ asm volatile("movl %%ecx,%0" : "=m"(core_regs->ecx));
+ asm volatile("movl %%edx,%0" : "=m"(core_regs->edx));
+ asm volatile("movl %%esi,%0" : "=m"(core_regs->esi));
+ asm volatile("movl %%edi,%0" : "=m"(core_regs->edi));
+ asm volatile("movl %%ebp,%0" : "=m"(core_regs->ebp));
+ asm volatile("movl %%eax,%0" : "=m"(core_regs->eax));
+ asm volatile("movw %%ds, %%ax;" :"=a"(core_regs->ds));
+ asm volatile("movw %%es, %%ax;" :"=a"(core_regs->es));
+ asm volatile("movw %%fs, %%ax;" :"=a"(core_regs->fs));
+ asm volatile("movw %%gs, %%ax;" :"=a"(core_regs->gs));
+ /* orig_eax not filled in for now */
+ core_regs->eip = (unsigned long)current_text_addr();
+ asm volatile("movw %%cs, %%ax;" :"=a"(core_regs->cs));
+ asm volatile("pushfl; popl %0" :"=m"(core_regs->eflags));
+ asm volatile("movl %%esp,%0" : "=m"(core_regs->esp));
+ asm volatile("movw %%ss, %%ax;" :"=a"(core_regs->ss));
+
+ asm volatile("mov %%cr0, %0" : "=r" (tmp) : );
+ xen_core_regs->cr0 = tmp;
+
+ asm volatile("mov %%cr2, %0" : "=r" (tmp) : );
+ xen_core_regs->cr2 = tmp;
+
+ asm volatile("mov %%cr3, %0" : "=r" (tmp) : );
+ xen_core_regs->cr3 = tmp;
+
+ asm volatile("mov %%cr4, %0" : "=r" (tmp) : );
+ xen_core_regs->cr4 = tmp;
}
#endif /* __X86_32_ELF_H__ */
--- 0004/xen/include/asm-x86/x86_32/kexec.h
+++ work/xen/include/asm-x86/x86_32/kexec.h
@@ -1,17 +1,33 @@
-#ifndef __X86_32_KEXEC_H__
-#define __X86_32_KEXEC_H__
+/******************************************************************************
+ * kexec.h
+ *
+ * Based heavily on machine_kexec.c and kexec.h from Linux 2.6.19-rc1
+ *
+ */
+
+#ifndef __X86_KEXEC_X86_32_H__
+#define __X86_KEXEC_X86_32_H__
-#include <xen/lib.h> /* for printk() used in stub */
#include <xen/types.h>
-#include <public/xen.h>
#include <xen/kexec.h>
+#include <asm/fixmap.h>
+
+typedef asmlinkage void (*relocate_new_kernel_t)(
+ unsigned long indirection_page,
+ unsigned long page_list,
+ unsigned long start_address,
+ unsigned int has_pae);
static inline void machine_kexec(xen_kexec_image_t *image)
{
- printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
+ relocate_new_kernel_t rnk;
+
+ rnk = (relocate_new_kernel_t) image->page_list[1];
+ (*rnk)(image->indirection_page, (unsigned long)image->page_list,
+ image->start_address, (unsigned long)cpu_has_pae);
}
-#endif /* __X86_32_KEXEC_H__ */
+#endif /* __X86_KEXEC_X86_32_H__ */
/*
* Local variables:
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|