# HG changeset patch
# User Hollis Blanchard <hollisb@xxxxxxxxxx>
# Date 1173386392 21600
# Node ID dcec453681bc85f9f6f3aa49431ded9a63aa1c9b
# Parent 8f0b5295bb1bd66c9e5c86368845bdb055b3d86c
# Parent 38513d22d23420a90f94e7e0f70c564100e83851
[POWERPC][XEN] Merge with xen-unstable.hg.
Signed-off-by: Hollis Blanchard <hollisb@xxxxxxxxxx>
---
linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c | 389 -----
linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c | 774
----------
linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c | 59
linux-2.6-xen-sparse/mm/Kconfig | 157 --
tools/ptsname/Makefile | 22
tools/ptsname/ptsname.c | 44
tools/ptsname/setup.py | 11
xen/arch/x86/mm/shadow/page-guest32.h | 100 -
Config.mk | 7
config/StdGNU.mk | 6
config/SunOS.mk | 6
docs/misc/dump-core-format.txt | 14
linux-2.6-xen-sparse/arch/i386/Kconfig | 6
linux-2.6-xen-sparse/arch/i386/Kconfig.cpu | 4
linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S | 59
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S | 8
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 3
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c | 13
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 4
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 58
linux-2.6-xen-sparse/arch/x86_64/Kconfig | 2
linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile | 2
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S | 83 -
linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S | 25
linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c | 2
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 27
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 218 +-
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c | 11
linux-2.6-xen-sparse/drivers/char/mem.c | 2
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c | 192 +-
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 2
linux-2.6-xen-sparse/drivers/xen/char/mem.c | 2
linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c | 55
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c | 2
linux-2.6-xen-sparse/drivers/xen/netback/common.h | 15
linux-2.6-xen-sparse/drivers/xen/netback/interface.c | 18
linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 21
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 4
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 64
linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c | 11
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 2
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c | 2
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c | 54
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h | 17
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h | 2
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h | 5
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h | 6
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 13
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h | 2
linux-2.6-xen-sparse/include/linux/page-flags.h | 6
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h | 2
patches/linux-2.6.18/blktap-aio-16_03_06.patch | 5
tools/Makefile | 9
tools/blktap/lib/Makefile | 4
tools/console/Makefile | 4
tools/firmware/rombios/rombios.c | 102 -
tools/guest-headers/Makefile | 2
tools/ioemu/Makefile | 8
tools/ioemu/hw/ide.c | 115 +
tools/ioemu/target-i386-dm/qemu-ifup | 3
tools/ioemu/vl.c | 8
tools/ioemu/vnc.c | 8
tools/libxc/xc_core.c | 26
tools/libxc/xc_core.h | 4
tools/libxc/xc_core_ia64.c | 4
tools/libxc/xc_core_ia64.h | 2
tools/libxc/xc_core_x86.h | 2
tools/libxc/xc_dom_core.c | 3
tools/libxc/xc_linux_restore.c | 24
tools/libxc/xc_linux_save.c | 16
tools/libxc/xc_ptrace_core.c | 24
tools/libxen/Makefile | 4
tools/pygrub/Makefile | 7
tools/python/Makefile | 3
tools/python/ptsname/ptsname.c | 44
tools/python/setup.py | 9
tools/python/xen/xend/XendBootloader.py | 3
tools/python/xen/xend/XendDomainInfo.py | 14
tools/python/xen/xend/XendNode.py | 41
tools/security/Makefile | 35
tools/vnet/libxutil/Makefile | 2
tools/xenfb/Makefile | 9
tools/xenfb/xenfb.c | 5
tools/xenstore/Makefile | 12
xen/Rules.mk | 8
xen/arch/x86/domain.c | 25
xen/arch/x86/domain_build.c | 4
xen/arch/x86/domctl.c | 7
xen/arch/x86/hvm/hvm.c | 3
xen/arch/x86/hvm/svm/emulate.c | 22
xen/arch/x86/hvm/svm/svm.c | 388 +++--
xen/arch/x86/hvm/svm/vmcb.c | 10
xen/arch/x86/mm.c | 5
xen/arch/x86/mm/Makefile | 1
xen/arch/x86/mm/hap/Makefile | 2
xen/arch/x86/mm/hap/hap.c | 708
+++++++++
xen/arch/x86/mm/hap/private.h | 112 +
xen/arch/x86/mm/hap/support.c | 334 ++++
xen/arch/x86/mm/page-guest32.h | 100 +
xen/arch/x86/mm/paging.c | 34
xen/arch/x86/mm/shadow/common.c | 21
xen/arch/x86/mm/shadow/private.h | 4
xen/arch/x86/mm/shadow/types.h | 2
xen/common/event_channel.c | 3
xen/common/page_alloc.c | 165 --
xen/common/xmalloc.c | 6
xen/drivers/acpi/numa.c | 9
xen/drivers/char/console.c | 2
xen/include/acm/acm_hooks.h | 4
xen/include/asm-x86/domain.h | 16
xen/include/asm-x86/hap.h | 122 +
xen/include/asm-x86/hvm/svm/emulate.h | 10
xen/include/asm-x86/hvm/svm/svm.h | 35
xen/include/asm-x86/hvm/svm/vmcb.h | 16
xen/include/public/arch-x86/xen.h | 1
xen/include/public/xen.h | 19
116 files changed, 2946 insertions(+), 2426 deletions(-)
diff -r 8f0b5295bb1b -r dcec453681bc Config.mk
--- a/Config.mk Mon Mar 05 12:49:12 2007 -0600
+++ b/Config.mk Thu Mar 08 14:39:52 2007 -0600
@@ -73,9 +73,10 @@ ACM_DEFAULT_SECURITY_POLICY ?= ACM_NULL_
ACM_DEFAULT_SECURITY_POLICY ?= ACM_NULL_POLICY
# Optional components
-XENSTAT_XENTOP ?= y
-VTPM_TOOLS ?= n
+XENSTAT_XENTOP ?= y
+VTPM_TOOLS ?= n
LIBXENAPI_BINDINGS ?= n
-XENFB_TOOLS ?= n
+XENFB_TOOLS ?= n
+PYTHON_TOOLS ?= y
-include $(XEN_ROOT)/.config
diff -r 8f0b5295bb1b -r dcec453681bc config/StdGNU.mk
--- a/config/StdGNU.mk Mon Mar 05 12:49:12 2007 -0600
+++ b/config/StdGNU.mk Thu Mar 08 14:39:52 2007 -0600
@@ -12,9 +12,9 @@ MSGFMT = msgfmt
MSGFMT = msgfmt
INSTALL = install
-INSTALL_DIR = $(INSTALL) -d -m0755
-INSTALL_DATA = $(INSTALL) -m0644
-INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755 -p
+INSTALL_DATA = $(INSTALL) -m0644 -p
+INSTALL_PROG = $(INSTALL) -m0755 -p
LIB64DIR = lib64
diff -r 8f0b5295bb1b -r dcec453681bc config/SunOS.mk
--- a/config/SunOS.mk Mon Mar 05 12:49:12 2007 -0600
+++ b/config/SunOS.mk Thu Mar 08 14:39:52 2007 -0600
@@ -14,9 +14,9 @@ SHELL = bash
SHELL = bash
INSTALL = ginstall
-INSTALL_DIR = $(INSTALL) -d -m0755
-INSTALL_DATA = $(INSTALL) -m0644
-INSTALL_PROG = $(INSTALL) -m0755
+INSTALL_DIR = $(INSTALL) -d -m0755 -p
+INSTALL_DATA = $(INSTALL) -m0644 -p
+INSTALL_PROG = $(INSTALL) -m0755 -p
LIB64DIR = lib/amd64
diff -r 8f0b5295bb1b -r dcec453681bc docs/misc/dump-core-format.txt
--- a/docs/misc/dump-core-format.txt Mon Mar 05 12:49:12 2007 -0600
+++ b/docs/misc/dump-core-format.txt Thu Mar 08 14:39:52 2007 -0600
@@ -26,11 +26,12 @@ Elf header
Elf header
----------
The elf header members are set as follows
+ e_ident[EI_CLASS] = ELFCLASS64 = 2
e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
e_type = ET_CORE = 4
-e_ident[EI_CLASS], e_ident[EI_DATA] and e_flags are set according
-to an architecture which a file is created. Other members are set as usual.
-
+ELFCLASS64 is always used independent of architecture.
+e_ident[EI_DATA] and e_flags are set according to the dumping system's
+architecture. Other members are set as usual.
Sections
--------
@@ -221,5 +222,10 @@ format_version descriptor
Format version history
----------------------
-The currently only (major, minor) = (0, 1) is used.
+Currently only (major, minor) = (0, 1) is used.
[When the format is changed, it would be described here.]
+
+(0, 1) update
+- EI_CLASS member of elf header was changed to ELFCLASS64 independent of
+ architecture. This is mainly for x86_32pae.
+ The format version isn't bumped because analysis tools can distinguish it.
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig Mon Mar 05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Mar 08 14:39:52 2007 -0600
@@ -255,7 +255,6 @@ config SCHED_SMT
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on X86_HT
- depends on !X86_XEN
help
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -311,11 +310,6 @@ config X86_VISWS_APIC
config X86_VISWS_APIC
bool
depends on X86_VISWS
- default y
-
-config X86_TSC
- bool
- depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ &&
!X86_XEN
default y
config X86_MCE
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Thu Mar 08 14:39:52
2007 -0600
@@ -311,5 +311,5 @@ config X86_OOSTORE
config X86_TSC
bool
- depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) &&
!X86_NUMAQ
- default y
+ depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) &&
!X86_NUMAQ && !X86_XEN
+ default y
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c Mon Mar 05
12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,389 +0,0 @@
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <asm/alternative.h>
-#include <asm/sections.h>
-
-static int no_replacement = 0;
-static int smp_alt_once = 0;
-static int debug_alternative = 0;
-
-static int __init noreplacement_setup(char *s)
-{
- no_replacement = 1;
- return 1;
-}
-static int __init bootonly(char *str)
-{
- smp_alt_once = 1;
- return 1;
-}
-static int __init debug_alt(char *str)
-{
- debug_alternative = 1;
- return 1;
-}
-
-__setup("noreplacement", noreplacement_setup);
-__setup("smp-alt-boot", bootonly);
-__setup("debug-alternative", debug_alt);
-
-#define DPRINTK(fmt, args...) if (debug_alternative) \
- printk(KERN_DEBUG fmt, args)
-
-#ifdef GENERIC_NOP1
-/* Use inline assembly to define this because the nops are defined
- as inline assembly strings in the include files and we cannot
- get them easily into strings. */
-asm("\t.data\nintelnops: "
- GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5
GENERIC_NOP6
- GENERIC_NOP7 GENERIC_NOP8);
-extern unsigned char intelnops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
- NULL,
- intelnops,
- intelnops + 1,
- intelnops + 1 + 2,
- intelnops + 1 + 2 + 3,
- intelnops + 1 + 2 + 3 + 4,
- intelnops + 1 + 2 + 3 + 4 + 5,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-#endif
-
-#ifdef K8_NOP1
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
- NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-#endif
-
-#ifdef K7_NOP1
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-extern unsigned char k7nops[];
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
- NULL,
- k7nops,
- k7nops + 1,
- k7nops + 1 + 2,
- k7nops + 1 + 2 + 3,
- k7nops + 1 + 2 + 3 + 4,
- k7nops + 1 + 2 + 3 + 4 + 5,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-};
-#endif
-
-#ifdef CONFIG_X86_64
-
-extern char __vsyscall_0;
-static inline unsigned char** find_nop_table(void)
-{
- return k8_nops;
-}
-
-#else /* CONFIG_X86_64 */
-
-static struct nop {
- int cpuid;
- unsigned char **noptable;
-} noptypes[] = {
- { X86_FEATURE_K8, k8_nops },
- { X86_FEATURE_K7, k7_nops },
- { -1, NULL }
-};
-
-static unsigned char** find_nop_table(void)
-{
- unsigned char **noptable = intel_nops;
- int i;
-
- for (i = 0; noptypes[i].cpuid >= 0; i++) {
- if (boot_cpu_has(noptypes[i].cpuid)) {
- noptable = noptypes[i].noptable;
- break;
- }
- }
- return noptable;
-}
-
-#endif /* CONFIG_X86_64 */
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-/* Replace instructions with better alternatives for this CPU type.
- This runs before SMP is initialized to avoid SMP problems with
- self modifying code. This implies that assymetric systems where
- APs have less capabilities than the boot processor are not handled.
- Tough. Make sure you disable such features by hand. */
-
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
-{
- unsigned char **noptable = find_nop_table();
- struct alt_instr *a;
- u8 *instr;
- int diff, i, k;
-
- DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
- for (a = start; a < end; a++) {
- BUG_ON(a->replacementlen > a->instrlen);
- if (!boot_cpu_has(a->cpuid))
- continue;
- instr = a->instr;
-#ifdef CONFIG_X86_64
- /* vsyscall code is not mapped yet. resolve it manually. */
- if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
{
- instr -= VSYSCALL_START - (unsigned long)&__vsyscall_0;
- DPRINTK("%s: vsyscall fixup: %p => %p\n",
- __FUNCTION__, a->instr, instr);
- }
-#endif
- memcpy(instr, a->replacement, a->replacementlen);
- diff = a->instrlen - a->replacementlen;
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- memcpy(a->instr + i, noptable[k], k);
- }
- }
-}
-
-#ifdef CONFIG_SMP
-
-static void alternatives_smp_save(struct alt_instr *start, struct alt_instr
*end)
-{
- struct alt_instr *a;
-
- DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
- for (a = start; a < end; a++) {
- memcpy(a->replacement + a->replacementlen,
- a->instr,
- a->instrlen);
- }
-}
-
-static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr
*end)
-{
- struct alt_instr *a;
-
- for (a = start; a < end; a++) {
- memcpy(a->instr,
- a->replacement + a->replacementlen,
- a->instrlen);
- }
-}
-
-static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
-{
- u8 **ptr;
-
- for (ptr = start; ptr < end; ptr++) {
- if (*ptr < text)
- continue;
- if (*ptr > text_end)
- continue;
- **ptr = 0xf0; /* lock prefix */
- };
-}
-
-static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8
*text_end)
-{
- unsigned char **noptable = find_nop_table();
- u8 **ptr;
-
- for (ptr = start; ptr < end; ptr++) {
- if (*ptr < text)
- continue;
- if (*ptr > text_end)
- continue;
- **ptr = noptable[1][0];
- };
-}
-
-struct smp_alt_module {
- /* what is this ??? */
- struct module *mod;
- char *name;
-
- /* ptrs to lock prefixes */
- u8 **locks;
- u8 **locks_end;
-
- /* .text segment, needed to avoid patching init code ;) */
- u8 *text;
- u8 *text_end;
-
- struct list_head next;
-};
-static LIST_HEAD(smp_alt_modules);
-static DEFINE_SPINLOCK(smp_alt);
-
-void alternatives_smp_module_add(struct module *mod, char *name,
- void *locks, void *locks_end,
- void *text, void *text_end)
-{
- struct smp_alt_module *smp;
- unsigned long flags;
-
- if (no_replacement)
- return;
-
- if (smp_alt_once) {
- if (boot_cpu_has(X86_FEATURE_UP))
- alternatives_smp_unlock(locks, locks_end,
- text, text_end);
- return;
- }
-
- smp = kzalloc(sizeof(*smp), GFP_KERNEL);
- if (NULL == smp)
- return; /* we'll run the (safe but slow) SMP code then ... */
-
- smp->mod = mod;
- smp->name = name;
- smp->locks = locks;
- smp->locks_end = locks_end;
- smp->text = text;
- smp->text_end = text_end;
- DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
- __FUNCTION__, smp->locks, smp->locks_end,
- smp->text, smp->text_end, smp->name);
-
- spin_lock_irqsave(&smp_alt, flags);
- list_add_tail(&smp->next, &smp_alt_modules);
- if (boot_cpu_has(X86_FEATURE_UP))
- alternatives_smp_unlock(smp->locks, smp->locks_end,
- smp->text, smp->text_end);
- spin_unlock_irqrestore(&smp_alt, flags);
-}
-
-void alternatives_smp_module_del(struct module *mod)
-{
- struct smp_alt_module *item;
- unsigned long flags;
-
- if (no_replacement || smp_alt_once)
- return;
-
- spin_lock_irqsave(&smp_alt, flags);
- list_for_each_entry(item, &smp_alt_modules, next) {
- if (mod != item->mod)
- continue;
- list_del(&item->next);
- spin_unlock_irqrestore(&smp_alt, flags);
- DPRINTK("%s: %s\n", __FUNCTION__, item->name);
- kfree(item);
- return;
- }
- spin_unlock_irqrestore(&smp_alt, flags);
-}
-
-void alternatives_smp_switch(int smp)
-{
- struct smp_alt_module *mod;
- unsigned long flags;
-
-#ifdef CONFIG_LOCKDEP
- /*
- * A not yet fixed binutils section handling bug prevents
- * alternatives-replacement from working reliably, so turn
- * it off:
- */
- printk("lockdep: not fixing up alternatives.\n");
- return;
-#endif
-
- if (no_replacement || smp_alt_once)
- return;
- BUG_ON(!smp && (num_online_cpus() > 1));
-
- spin_lock_irqsave(&smp_alt, flags);
- if (smp) {
- printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
- clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
- alternatives_smp_apply(__smp_alt_instructions,
- __smp_alt_instructions_end);
- list_for_each_entry(mod, &smp_alt_modules, next)
- alternatives_smp_lock(mod->locks, mod->locks_end,
- mod->text, mod->text_end);
- } else {
- printk(KERN_INFO "SMP alternatives: switching to UP code\n");
- set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
- apply_alternatives(__smp_alt_instructions,
- __smp_alt_instructions_end);
- list_for_each_entry(mod, &smp_alt_modules, next)
- alternatives_smp_unlock(mod->locks, mod->locks_end,
- mod->text, mod->text_end);
- }
- spin_unlock_irqrestore(&smp_alt, flags);
-}
-
-#endif
-
-void __init alternative_instructions(void)
-{
- if (no_replacement) {
- printk(KERN_INFO "(SMP-)alternatives turned off\n");
- free_init_pages("SMP alternatives",
- (unsigned long)__smp_alt_begin,
- (unsigned long)__smp_alt_end);
- return;
- }
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-
- /* switch to patch-once-at-boottime-only mode and free the
- * tables in case we know the number of CPUs will never ever
- * change */
-#ifdef CONFIG_HOTPLUG_CPU
- if (num_possible_cpus() < 2)
- smp_alt_once = 1;
-#else
- smp_alt_once = 1;
-#endif
-
-#ifdef CONFIG_SMP
- if (smp_alt_once) {
- if (1 == num_possible_cpus()) {
- printk(KERN_INFO "SMP alternatives: switching to UP
code\n");
- set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
- apply_alternatives(__smp_alt_instructions,
- __smp_alt_instructions_end);
- alternatives_smp_unlock(__smp_locks, __smp_locks_end,
- _text, _etext);
- }
- free_init_pages("SMP alternatives",
- (unsigned long)__smp_alt_begin,
- (unsigned long)__smp_alt_end);
- } else {
- alternatives_smp_save(__smp_alt_instructions,
- __smp_alt_instructions_end);
- alternatives_smp_module_add(NULL, "core kernel",
- __smp_locks, __smp_locks_end,
- _text, _etext);
- alternatives_smp_switch(0);
- }
-#endif
-}
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c Mon Mar
05 12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,774 +0,0 @@
-/*
- * Routines to indentify caches on Intel CPU.
- *
- * Changes:
- * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
- * Ashok Raj <ashok.raj@xxxxxxxxx>: Work with CPU hotplug
infrastructure.
- * Andi Kleen : CPUID4 emulation on AMD.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/compiler.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-
-#include <asm/processor.h>
-#include <asm/smp.h>
-
-#define LVL_1_INST 1
-#define LVL_1_DATA 2
-#define LVL_2 3
-#define LVL_3 4
-#define LVL_TRACE 5
-
-struct _cache_table
-{
- unsigned char descriptor;
- char cache_type;
- short size;
-};
-
-/* all the cache descriptor types we care about (no TLB or trace cache
entries) */
-static struct _cache_table cache_table[] __cpuinitdata =
-{
- { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
- { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
- { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
- { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
- { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
- { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
- { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64
byte line size */
- { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64
byte line size */
- { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64
byte line size */
- { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
- { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
- { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
- { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
- { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
- { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
- { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
- { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size
*/
- { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size
*/
- { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size
*/
- { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size
*/
- { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size
*/
- { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64
byte line size */
- { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
- { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
- { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
- { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
- { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
- { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64
byte line size */
- { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
- { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
- { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
- { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
- { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
- { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
- { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
- { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
- { 0x00, 0, 0}
-};
-
-
-enum _cache_type
-{
- CACHE_TYPE_NULL = 0,
- CACHE_TYPE_DATA = 1,
- CACHE_TYPE_INST = 2,
- CACHE_TYPE_UNIFIED = 3
-};
-
-union _cpuid4_leaf_eax {
- struct {
- enum _cache_type type:5;
- unsigned int level:3;
- unsigned int is_self_initializing:1;
- unsigned int is_fully_associative:1;
- unsigned int reserved:4;
- unsigned int num_threads_sharing:12;
- unsigned int num_cores_on_die:6;
- } split;
- u32 full;
-};
-
-union _cpuid4_leaf_ebx {
- struct {
- unsigned int coherency_line_size:12;
- unsigned int physical_line_partition:10;
- unsigned int ways_of_associativity:10;
- } split;
- u32 full;
-};
-
-union _cpuid4_leaf_ecx {
- struct {
- unsigned int number_of_sets:32;
- } split;
- u32 full;
-};
-
-struct _cpuid4_info {
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
- unsigned long size;
- cpumask_t shared_cpu_map;
-};
-
-unsigned short num_cache_leaves;
-
-/* AMD doesn't have CPUID4. Emulate it here to report the same
- information to the user. This makes some assumptions about the machine:
- No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
-
- In theory the TLBs could be reported as fake type (they are in "dummy").
- Maybe later */
-union l1_cache {
- struct {
- unsigned line_size : 8;
- unsigned lines_per_tag : 8;
- unsigned assoc : 8;
- unsigned size_in_kb : 8;
- };
- unsigned val;
-};
-
-union l2_cache {
- struct {
- unsigned line_size : 8;
- unsigned lines_per_tag : 4;
- unsigned assoc : 4;
- unsigned size_in_kb : 16;
- };
- unsigned val;
-};
-
-static const unsigned short assocs[] = {
- [1] = 1, [2] = 2, [4] = 4, [6] = 8,
- [8] = 16,
- [0xf] = 0xffff // ??
- };
-static const unsigned char levels[] = { 1, 1, 2 };
-static const unsigned char types[] = { 1, 2, 3 };
-
-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
- union _cpuid4_leaf_ebx *ebx,
- union _cpuid4_leaf_ecx *ecx)
-{
- unsigned dummy;
- unsigned line_size, lines_per_tag, assoc, size_in_kb;
- union l1_cache l1i, l1d;
- union l2_cache l2;
-
- eax->full = 0;
- ebx->full = 0;
- ecx->full = 0;
-
- cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
- cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
-
- if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
- return;
-
- eax->split.is_self_initializing = 1;
- eax->split.type = types[leaf];
- eax->split.level = levels[leaf];
- eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
-
- if (leaf <= 1) {
- union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
- assoc = l1->assoc;
- line_size = l1->line_size;
- lines_per_tag = l1->lines_per_tag;
- size_in_kb = l1->size_in_kb;
- } else {
- assoc = l2.assoc;
- line_size = l2.line_size;
- lines_per_tag = l2.lines_per_tag;
- /* cpu_data has errata corrections for K7 applied */
- size_in_kb = current_cpu_data.x86_cache_size;
- }
-
- if (assoc == 0xf)
- eax->split.is_fully_associative = 1;
- ebx->split.coherency_line_size = line_size - 1;
- ebx->split.ways_of_associativity = assocs[assoc] - 1;
- ebx->split.physical_line_partition = lines_per_tag - 1;
- ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
- (ebx->split.ways_of_associativity + 1) - 1;
-}
-
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info
*this_leaf)
-{
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
- unsigned edx;
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
- if (eax.split.type == CACHE_TYPE_NULL)
- return -EIO; /* better error ? */
-
- this_leaf->eax = eax;
- this_leaf->ebx = ebx;
- this_leaf->ecx = ecx;
- this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
- return 0;
-}
-
-/* will only be called once; __init is safe here */
-static int __init find_num_cache_leaves(void)
-{
- unsigned int eax, ebx, ecx, edx;
- union _cpuid4_leaf_eax cache_eax;
- int i = -1;
-
- do {
- ++i;
- /* Do cpuid(4) loop to find out num_cache_leaves */
- cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
- cache_eax.full = eax;
- } while (cache_eax.split.type != CACHE_TYPE_NULL);
- return i;
-}
-
-unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
-{
- unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache
sizes */
- unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
- unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
- unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_X86_HT
- unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
-#endif
-
- if (c->cpuid_level > 3) {
- static int is_initialized;
-
- if (is_initialized == 0) {
- /* Init num_cache_leaves from boot CPU */
- num_cache_leaves = find_num_cache_leaves();
- is_initialized++;
- }
-
- /*
- * Whenever possible use cpuid(4), deterministic cache
- * parameters cpuid leaf to find the cache details
- */
- for (i = 0; i < num_cache_leaves; i++) {
- struct _cpuid4_info this_leaf;
-
- int retval;
-
- retval = cpuid4_cache_lookup(i, &this_leaf);
- if (retval >= 0) {
- switch(this_leaf.eax.split.level) {
- case 1:
- if (this_leaf.eax.split.type ==
- CACHE_TYPE_DATA)
- new_l1d = this_leaf.size/1024;
- else if (this_leaf.eax.split.type ==
- CACHE_TYPE_INST)
- new_l1i = this_leaf.size/1024;
- break;
- case 2:
- new_l2 = this_leaf.size/1024;
- num_threads_sharing = 1 +
this_leaf.eax.split.num_threads_sharing;
- index_msb =
get_count_order(num_threads_sharing);
- l2_id = c->apicid >> index_msb;
- break;
- case 3:
- new_l3 = this_leaf.size/1024;
- num_threads_sharing = 1 +
this_leaf.eax.split.num_threads_sharing;
- index_msb =
get_count_order(num_threads_sharing);
- l3_id = c->apicid >> index_msb;
- break;
- default:
- break;
- }
- }
- }
- }
- /*
- * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
- * trace cache
- */
- if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
- /* supports eax=2 call */
- int i, j, n;
- int regs[4];
- unsigned char *dp = (unsigned char *)regs;
- int only_trace = 0;
-
- if (num_cache_leaves != 0 && c->x86 == 15)
- only_trace = 1;
-
- /* Number of times to iterate */
- n = cpuid_eax(2) & 0xFF;
-
- for ( i = 0 ; i < n ; i++ ) {
- cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
-
- /* If bit 31 is set, this is an unknown format */
- for ( j = 0 ; j < 3 ; j++ ) {
- if ( regs[j] < 0 ) regs[j] = 0;
- }
-
- /* Byte 0 is level count, not a descriptor */
- for ( j = 1 ; j < 16 ; j++ ) {
- unsigned char des = dp[j];
- unsigned char k = 0;
-
- /* look up this descriptor in the table */
- while (cache_table[k].descriptor != 0)
- {
- if (cache_table[k].descriptor == des) {
- if (only_trace &&
cache_table[k].cache_type != LVL_TRACE)
- break;
- switch
(cache_table[k].cache_type) {
- case LVL_1_INST:
- l1i +=
cache_table[k].size;
- break;
- case LVL_1_DATA:
- l1d +=
cache_table[k].size;
- break;
- case LVL_2:
- l2 +=
cache_table[k].size;
- break;
- case LVL_3:
- l3 +=
cache_table[k].size;
- break;
- case LVL_TRACE:
- trace +=
cache_table[k].size;
- break;
- }
-
- break;
- }
-
- k++;
- }
- }
- }
- }
-
- if (new_l1d)
- l1d = new_l1d;
-
- if (new_l1i)
- l1i = new_l1i;
-
- if (new_l2) {
- l2 = new_l2;
-#ifdef CONFIG_X86_HT
- cpu_llc_id[cpu] = l2_id;
-#endif
- }
-
- if (new_l3) {
- l3 = new_l3;
-#ifdef CONFIG_X86_HT
- cpu_llc_id[cpu] = l3_id;
-#endif
- }
-
- if (trace)
- printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
- else if ( l1i )
- printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
-
- if (l1d)
- printk(", L1 D cache: %dK\n", l1d);
- else
- printk("\n");
-
- if (l2)
- printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
-
- if (l3)
- printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
-
- c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
-
- return l2;
-}
-
-/* pointer to _cpuid4_info array (for each cache leaf) */
-static struct _cpuid4_info *cpuid4_info[NR_CPUS];
-#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
-
-#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
-{
- struct _cpuid4_info *this_leaf, *sibling_leaf;
- unsigned long num_threads_sharing;
- int index_msb, i;
- struct cpuinfo_x86 *c = cpu_data;
-
- this_leaf = CPUID4_INFO_IDX(cpu, index);
- num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
-
- if (num_threads_sharing == 1)
- cpu_set(cpu, this_leaf->shared_cpu_map);
- else {
- index_msb = get_count_order(num_threads_sharing);
-
- for_each_online_cpu(i) {
- if (c[i].apicid >> index_msb ==
- c[cpu].apicid >> index_msb) {
- cpu_set(i, this_leaf->shared_cpu_map);
- if (i != cpu && cpuid4_info[i]) {
- sibling_leaf = CPUID4_INFO_IDX(i,
index);
- cpu_set(cpu,
sibling_leaf->shared_cpu_map);
- }
- }
- }
- }
-}
-static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
- struct _cpuid4_info *this_leaf, *sibling_leaf;
- int sibling;
-
- this_leaf = CPUID4_INFO_IDX(cpu, index);
- for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
- sibling_leaf = CPUID4_INFO_IDX(sibling, index);
- cpu_clear(cpu, sibling_leaf->shared_cpu_map);
- }
-}
-#else
-static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
-static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
-#endif
-
-static void free_cache_attributes(unsigned int cpu)
-{
- kfree(cpuid4_info[cpu]);
- cpuid4_info[cpu] = NULL;
-}
-
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
-{
- struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
- cpumask_t oldmask;
-
- if (num_cache_leaves == 0)
- return -ENOENT;
-
- cpuid4_info[cpu] = kmalloc(
- sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(cpuid4_info[cpu] == NULL))
- return -ENOMEM;
- memset(cpuid4_info[cpu], 0,
- sizeof(struct _cpuid4_info) * num_cache_leaves);
-
- oldmask = current->cpus_allowed;
- retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (retval)
- goto out;
-
- /* Do cpuid and store the results */
- retval = 0;
- for (j = 0; j < num_cache_leaves; j++) {
- this_leaf = CPUID4_INFO_IDX(cpu, j);
- retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0))
- break;
- cache_shared_cpu_map_setup(cpu, j);
- }
- set_cpus_allowed(current, oldmask);
-
-out:
- if (retval)
- free_cache_attributes(cpu);
- return retval;
-}
-
-#ifdef CONFIG_SYSFS
-
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-
-extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
-
-/* pointer to kobject for cpuX/cache */
-static struct kobject * cache_kobject[NR_CPUS];
-
-struct _index_kobject {
- struct kobject kobj;
- unsigned int cpu;
- unsigned short index;
-};
-
-/* pointer to array of kobjects for cpuX/cache/indexY */
-static struct _index_kobject *index_kobject[NR_CPUS];
-#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
-
-#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name
\
- (struct _cpuid4_info *this_leaf, char *buf) \
-{ \
- return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
-}
-
-show_one_plus(level, eax.split.level, 0);
-show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
-{
- return sprintf (buf, "%luK\n", this_leaf->size / 1024);
-}
-
-static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
-{
- char mask_str[NR_CPUS];
- cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
- return sprintf(buf, "%s\n", mask_str);
-}
-
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
- switch(this_leaf->eax.split.type) {
- case CACHE_TYPE_DATA:
- return sprintf(buf, "Data\n");
- break;
- case CACHE_TYPE_INST:
- return sprintf(buf, "Instruction\n");
- break;
- case CACHE_TYPE_UNIFIED:
- return sprintf(buf, "Unified\n");
- break;
- default:
- return sprintf(buf, "Unknown\n");
- break;
- }
-}
-
-struct _cache_attr {
- struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
-};
-
-#define define_one_ro(_name) \
-static struct _cache_attr _name = \
- __ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(level);
-define_one_ro(type);
-define_one_ro(coherency_line_size);
-define_one_ro(physical_line_partition);
-define_one_ro(ways_of_associativity);
-define_one_ro(number_of_sets);
-define_one_ro(size);
-define_one_ro(shared_cpu_map);
-
-static struct attribute * default_attrs[] = {
- &type.attr,
- &level.attr,
- &coherency_line_size.attr,
- &physical_line_partition.attr,
- &ways_of_associativity.attr,
- &number_of_sets.attr,
- &size.attr,
- &shared_cpu_map.attr,
- NULL
-};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
-static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
-{
- struct _cache_attr *fattr = to_attr(attr);
- struct _index_kobject *this_leaf = to_object(kobj);
- ssize_t ret;
-
- ret = fattr->show ?
- fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
- 0;
- return ret;
-}
-
-static ssize_t store(struct kobject * kobj, struct attribute * attr,
- const char * buf, size_t count)
-{
- return 0;
-}
-
-static struct sysfs_ops sysfs_ops = {
- .show = show,
- .store = store,
-};
-
-static struct kobj_type ktype_cache = {
- .sysfs_ops = &sysfs_ops,
- .default_attrs = default_attrs,
-};
-
-static struct kobj_type ktype_percpu_entry = {
- .sysfs_ops = &sysfs_ops,
-};
-
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
-{
- kfree(cache_kobject[cpu]);
- kfree(index_kobject[cpu]);
- cache_kobject[cpu] = NULL;
- index_kobject[cpu] = NULL;
- free_cache_attributes(cpu);
-}
-
-static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
-{
-
- if (num_cache_leaves == 0)
- return -ENOENT;
-
- detect_cache_attributes(cpu);
- if (cpuid4_info[cpu] == NULL)
- return -ENOENT;
-
- /* Allocate all required memory */
- cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
- if (unlikely(cache_kobject[cpu] == NULL))
- goto err_out;
- memset(cache_kobject[cpu], 0, sizeof(struct kobject));
-
- index_kobject[cpu] = kmalloc(
- sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(index_kobject[cpu] == NULL))
- goto err_out;
- memset(index_kobject[cpu], 0,
- sizeof(struct _index_kobject) * num_cache_leaves);
-
- return 0;
-
-err_out:
- cpuid4_cache_sysfs_exit(cpu);
- return -ENOMEM;
-}
-
-/* Add/Remove cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
-{
- unsigned int cpu = sys_dev->id;
- unsigned long i, j;
- struct _index_kobject *this_object;
- int retval = 0;
-
- retval = cpuid4_cache_sysfs_init(cpu);
- if (unlikely(retval < 0))
- return retval;
-
- cache_kobject[cpu]->parent = &sys_dev->kobj;
- kobject_set_name(cache_kobject[cpu], "%s", "cache");
- cache_kobject[cpu]->ktype = &ktype_percpu_entry;
- retval = kobject_register(cache_kobject[cpu]);
-
- for (i = 0; i < num_cache_leaves; i++) {
- this_object = INDEX_KOBJECT_PTR(cpu,i);
- this_object->cpu = cpu;
- this_object->index = i;
- this_object->kobj.parent = cache_kobject[cpu];
- kobject_set_name(&(this_object->kobj), "index%1lu", i);
- this_object->kobj.ktype = &ktype_cache;
- retval = kobject_register(&(this_object->kobj));
- if (unlikely(retval)) {
- for (j = 0; j < i; j++) {
- kobject_unregister(
- &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
- }
- kobject_unregister(cache_kobject[cpu]);
- cpuid4_cache_sysfs_exit(cpu);
- break;
- }
- }
- return retval;
-}
-
-static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
-{
- unsigned int cpu = sys_dev->id;
- unsigned long i;
-
- for (i = 0; i < num_cache_leaves; i++) {
- cache_remove_shared_cpu_map(cpu, i);
- kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
- }
- kobject_unregister(cache_kobject[cpu]);
- cpuid4_cache_sysfs_exit(cpu);
- return;
-}
-
-static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
- struct sys_device *sys_dev;
-
- sys_dev = get_cpu_sysdev(cpu);
- switch (action) {
- case CPU_ONLINE:
- cache_add_dev(sys_dev);
- break;
- case CPU_DEAD:
- cache_remove_dev(sys_dev);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
-{
- .notifier_call = cacheinfo_cpu_callback,
-};
-
-static int __cpuinit cache_sysfs_init(void)
-{
- int i;
-
- if (num_cache_leaves == 0)
- return 0;
-
- register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
- for_each_online_cpu(i) {
- cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
- (void *)(long)i);
- }
-
- return 0;
-}
-
-device_initcall(cache_sysfs_init);
-
-#endif
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Thu Mar 08 14:39:52
2007 -0600
@@ -368,6 +368,7 @@ sysexit_scrit: /**** START OF SYSEXIT CR
movl ESI(%esp), %esi
sysexit
14: __DISABLE_INTERRUPTS
+ TRACE_IRQS_OFF
sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
push %esp
call evtchn_do_upcall
@@ -427,11 +428,13 @@ restore_nocheck:
restore_nocheck:
movl EFLAGS(%esp), %eax
testl $(VM_MASK|NMI_MASK), %eax
+ CFI_REMEMBER_STATE
jnz hypervisor_iret
shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
GET_VCPU_INFO
andb evtchn_upcall_mask(%esi),%al
andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
+ CFI_REMEMBER_STATE
jnz restore_all_enable_events # != 0 => enable event delivery
#endif
TRACE_IRQS_IRET
@@ -455,8 +458,8 @@ iret_exc:
.long 1b,iret_exc
.previous
+ CFI_RESTORE_STATE
#ifndef CONFIG_XEN
- CFI_RESTORE_STATE
ldt_ss:
larl OLDSS(%esp), %eax
jnz restore_nocheck
@@ -485,14 +488,36 @@ 1: iret
.align 4
.long 1b,iret_exc
.previous
- CFI_ENDPROC
#else
+ ALIGN
+restore_all_enable_events:
+ TRACE_IRQS_ON
+ __ENABLE_INTERRUPTS
+scrit: /**** START OF CRITICAL REGION ****/
+ __TEST_PENDING
+ jnz 14f # process more events if necessary...
+ RESTORE_REGS
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+1: iret
+.section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+.previous
+14: __DISABLE_INTERRUPTS
+ TRACE_IRQS_OFF
+ jmp 11f
+ecrit: /**** END OF CRITICAL REGION ****/
+
+ CFI_RESTORE_STATE
hypervisor_iret:
andl $~NMI_MASK, EFLAGS(%esp)
RESTORE_REGS
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
jmp hypercall_page + (__HYPERVISOR_iret * 32)
#endif
+ CFI_ENDPROC
# perform work that needs to be done immediately before resumption
ALIGN
@@ -736,7 +761,9 @@ error_code:
# critical region we know that the entire frame is present and correct
# so we can simply throw away the new one.
ENTRY(hypervisor_callback)
+ RING0_INT_FRAME
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
movl EIP(%esp),%eax
cmpl $scrit,%eax
@@ -749,26 +776,13 @@ ENTRY(hypervisor_callback)
ja 11f
addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
11: push %esp
+ CFI_ADJUST_CFA_OFFSET 4
call evtchn_do_upcall
add $4,%esp
+ CFI_ADJUST_CFA_OFFSET -4
jmp ret_from_intr
-
- ALIGN
-restore_all_enable_events:
- __ENABLE_INTERRUPTS
-scrit: /**** START OF CRITICAL REGION ****/
- __TEST_PENDING
- jnz 14f # process more events if necessary...
- RESTORE_REGS
- addl $4, %esp
-1: iret
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
-14: __DISABLE_INTERRUPTS
- jmp 11b
-ecrit: /**** END OF CRITICAL REGION ****/
+ CFI_ENDPROC
+
# [How we do the fixup]. We want to merge the current stack frame with the
# just-interrupted frame. How we do this depends on where in the critical
# region the interrupted handler was executing, and so how many saved
@@ -835,6 +849,7 @@ 4: mov 16(%esp),%gs
addl $16,%esp # EAX != 0 => Category 2 (Bad IRET)
jmp iret_exc
5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment)
+ RING0_INT_FRAME
pushl $0
SAVE_ALL
jmp ret_from_exception
@@ -860,6 +875,7 @@ 9: xorl %eax,%eax; \
.long 4b,9b; \
.previous
#endif
+ CFI_ENDPROC
ENTRY(coprocessor_error)
RING0_INT_FRAME
@@ -1187,8 +1203,11 @@ ENDPROC(arch_unwind_init_running)
#endif
ENTRY(fixup_4gb_segment)
+ RING0_EC_FRAME
pushl $do_fixup_4gb_segment
- jmp error_code
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
.section .rodata,"a"
.align 4
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Thu Mar 08 14:39:52
2007 -0600
@@ -9,10 +9,9 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/dwarf2.h>
#include <xen/interface/xen.h>
#include <xen/interface/elfnote.h>
-
-#define _PAGE_PRESENT 0x1
/*
* References to members of the new_cpu_data structure.
@@ -65,14 +64,13 @@ ENTRY(startup_32)
pushl %eax # fake return address
jmp start_kernel
-L6:
- jmp L6 # main should never return here, but
- # just in case, we know what happens.
#define HYPERCALL_PAGE_OFFSET 0x1000
.org HYPERCALL_PAGE_OFFSET
ENTRY(hypercall_page)
+ CFI_STARTPROC
.skip 0x1000
+ CFI_ENDPROC
/*
* Real beginning of normal "text" segment
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Thu Mar 08 14:39:52
2007 -0600
@@ -1847,6 +1847,9 @@ static __init int add_pcspkr(void)
struct platform_device *pd;
int ret;
+ if (!is_initial_xendomain())
+ return 0;
+
pd = platform_device_alloc("pcspkr", -1);
if (!pd)
return -ENOMEM;
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Thu Mar 08 14:39:52
2007 -0600
@@ -1022,16 +1022,21 @@ void halt(void)
}
EXPORT_SYMBOL(halt);
-/* No locking required. We are only CPU running, and interrupts are off. */
+/* No locking required. Interrupts are disabled on all CPUs. */
void time_resume(void)
{
+ unsigned int cpu;
+
init_cpu_khz();
- get_time_values_from_xen(0);
+ for_each_online_cpu(cpu) {
+ get_time_values_from_xen(cpu);
+ per_cpu(processed_system_time, cpu) =
+ per_cpu(shadow_time, 0).system_timestamp;
+ init_missing_ticks_accounting(cpu);
+ }
processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
- per_cpu(processed_system_time, 0) = processed_system_time;
- init_missing_ticks_accounting(0);
update_wallclock();
}
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Thu Mar 08 14:39:52
2007 -0600
@@ -374,8 +374,6 @@ void iounmap(volatile void __iomem *addr
}
EXPORT_SYMBOL(iounmap);
-#ifdef __i386__
-
void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
{
unsigned long offset, last_addr;
@@ -443,5 +441,3 @@ void __init bt_iounmap(void *addr, unsig
--nrpages;
}
}
-
-#endif /* __i386__ */
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Mar 08 14:39:52
2007 -0600
@@ -573,64 +573,67 @@ void make_pages_writable(void *va, unsig
}
}
-static inline int pgd_walk_set_prot(struct page *page, pgprot_t flags)
+static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
{
unsigned long pfn = page_to_pfn(page);
-
- if (PageHighMem(page))
- return pgprot_val(flags) & _PAGE_RW
- ? test_and_clear_bit(PG_pinned, &page->flags)
- : !test_and_set_bit(PG_pinned, &page->flags);
-
- BUG_ON(HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- pfn_pte(pfn, flags), 0));
-
- return 0;
-}
-
-static int pgd_walk(pgd_t *pgd_base, pgprot_t flags)
+ int rc;
+
+ if (PageHighMem(page)) {
+ if (pgprot_val(flags) & _PAGE_RW)
+ clear_bit(PG_pinned, &page->flags);
+ else
+ set_bit(PG_pinned, &page->flags);
+ } else {
+ rc = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte(pfn, flags), 0);
+ if (rc)
+ BUG();
+ }
+}
+
+static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
{
pgd_t *pgd = pgd_base;
pud_t *pud;
pmd_t *pmd;
- int g, u, m, flush;
+ int g, u, m, rc;
if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
- for (g = 0, flush = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
+ for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
if (pgd_none(*pgd))
continue;
pud = pud_offset(pgd, 0);
if (PTRS_PER_PUD > 1) /* not folded */
- flush |= pgd_walk_set_prot(virt_to_page(pud),flags);
+ pgd_walk_set_prot(virt_to_page(pud),flags);
for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
if (pud_none(*pud))
continue;
pmd = pmd_offset(pud, 0);
if (PTRS_PER_PMD > 1) /* not folded */
- flush |=
pgd_walk_set_prot(virt_to_page(pmd),flags);
+ pgd_walk_set_prot(virt_to_page(pmd),flags);
for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
if (pmd_none(*pmd))
continue;
- flush |=
pgd_walk_set_prot(pmd_page(*pmd),flags);
+ pgd_walk_set_prot(pmd_page(*pmd),flags);
}
}
}
- BUG_ON(HYPERVISOR_update_va_mapping(
+ rc = HYPERVISOR_update_va_mapping(
(unsigned long)pgd_base,
pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
- UVMF_TLB_FLUSH));
-
- return flush;
+ UVMF_TLB_FLUSH);
+ if (rc)
+ BUG();
}
static void __pgd_pin(pgd_t *pgd)
{
- if (pgd_walk(pgd, PAGE_KERNEL_RO))
- kmap_flush_unused();
+ pgd_walk(pgd, PAGE_KERNEL_RO);
+ kmap_flush_unused();
xen_pgd_pin(__pa(pgd));
set_bit(PG_pinned, &virt_to_page(pgd)->flags);
}
@@ -638,8 +641,7 @@ static void __pgd_unpin(pgd_t *pgd)
static void __pgd_unpin(pgd_t *pgd)
{
xen_pgd_unpin(__pa(pgd));
- if (pgd_walk(pgd, PAGE_KERNEL))
- kmap_flush_unused();
+ pgd_walk(pgd, PAGE_KERNEL);
clear_bit(PG_pinned, &virt_to_page(pgd)->flags);
}
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/arch/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Mon Mar 05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Thu Mar 08 14:39:52 2007 -0600
@@ -450,7 +450,7 @@ config CALGARY_IOMMU
bool "IBM Calgary IOMMU support"
default y
select SWIOTLB
- depends on PCI && EXPERIMENTAL
+ depends on PCI && !X86_64_XEN && EXPERIMENTAL
help
Support for hardware IOMMUs in IBM's xSeries x366 and x460
systems. Needed to run systems with more than 3GB of memory
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile Thu Mar 08 14:39:52
2007 -0600
@@ -61,9 +61,7 @@ time-y +=
../../i386/kernel/time-xen.
time-y += ../../i386/kernel/time-xen.o
pci-dma-y += ../../i386/kernel/pci-dma-xen.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) :=
../../i386/kernel/microcode-xen.o
-intel_cacheinfo-y := ../../i386/kernel/cpu/intel_cacheinfo-xen.o
quirks-y := ../../i386/kernel/quirks-xen.o
-alternative-y := ../../i386/kernel/alternative-xen.o
n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Thu Mar 08
14:39:52 2007 -0600
@@ -116,19 +116,21 @@ NMI_MASK = 0x80000000
CFI_ADJUST_CFA_OFFSET -(6*8)
.endm
- .macro CFI_DEFAULT_STACK start=1
+ .macro CFI_DEFAULT_STACK start=1,adj=0
.if \start
CFI_STARTPROC simple
- CFI_DEF_CFA rsp,SS+8
+ CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
.else
- CFI_DEF_CFA_OFFSET SS+8
+ CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
.endif
+ .if \adj == 0
CFI_REL_OFFSET r15,R15
CFI_REL_OFFSET r14,R14
CFI_REL_OFFSET r13,R13
CFI_REL_OFFSET r12,R12
CFI_REL_OFFSET rbp,RBP
CFI_REL_OFFSET rbx,RBX
+ .endif
CFI_REL_OFFSET r11,R11
CFI_REL_OFFSET r10,R10
CFI_REL_OFFSET r9,R9
@@ -363,8 +365,8 @@ ENTRY(int_ret_from_sys_call)
CFI_REL_OFFSET r9,R9-ARGOFFSET
CFI_REL_OFFSET r10,R10-ARGOFFSET
CFI_REL_OFFSET r11,R11-ARGOFFSET
+ XEN_BLOCK_EVENTS(%rsi)
TRACE_IRQS_OFF
- XEN_BLOCK_EVENTS(%rsi)
testb $3,CS-ARGOFFSET(%rsp)
jnz 1f
/* Need to set the proper %ss (not NULL) for ring 3 iretq */
@@ -534,7 +536,7 @@ END(stub_rt_sigreturn)
*/
retint_check:
- CFI_DEFAULT_STACK
+ CFI_DEFAULT_STACK adj=1
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
CFI_REMEMBER_STATE
@@ -838,6 +840,7 @@ ENTRY(error_entry)
CFI_REL_OFFSET r15,R15
#if 0
cmpl $__KERNEL_CS,CS(%rsp)
+ CFI_REMEMBER_STATE
je error_kernelspace
#endif
error_call_handler:
@@ -864,7 +867,7 @@ error_exit:
TRACE_IRQS_IRETQ
jmp retint_restore_args
-error_kernelspace:
+#if 0
/*
* We need to re-write the logic here because we don't do iretq to
* to return to user mode. It's still possible that we get trap/fault
@@ -872,7 +875,8 @@ error_kernelspace:
* for example).
*
*/
-#if 0
+ CFI_RESTORE_STATE
+error_kernelspace:
incl %ebx
/* There are two places in the kernel that can potentially fault with
usergs. Handle them here. The exception handlers after
@@ -888,11 +892,13 @@ error_kernelspace:
cmpq $gs_change,RIP(%rsp)
je error_swapgs
jmp error_sti
-#endif
+#endif
+ CFI_ENDPROC
END(error_entry)
ENTRY(hypervisor_callback)
zeroentry do_hypervisor_callback
+END(hypervisor_callback)
/*
* Copied from arch/xen/i386/kernel/entry.S
@@ -909,48 +915,66 @@ ENTRY(hypervisor_callback)
# existing activation in its critical region -- if so, we pop the current
# activation and restart the handler using the previous one.
ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
+ CFI_STARTPROC
# Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
# see the correct pointer to the pt_regs
movq %rdi, %rsp # we don't return, adjust the stack frame
-11: movq %gs:pda_irqstackptr,%rax
- incl %gs:pda_irqcount
- cmovzq %rax,%rsp
- pushq %rdi
+ CFI_ENDPROC
+ CFI_DEFAULT_STACK
+11: incl %gs:pda_irqcount
+ movq %rsp,%rbp
+ CFI_DEF_CFA_REGISTER rbp
+ cmovzq %gs:pda_irqstackptr,%rsp
+ pushq %rbp # backlink for old unwinder
call evtchn_do_upcall
popq %rsp
+ CFI_DEF_CFA_REGISTER rsp
decl %gs:pda_irqcount
jmp error_exit
+ CFI_ENDPROC
+END(do_hypervisor_callback)
#ifdef CONFIG_X86_LOCAL_APIC
KPROBE_ENTRY(nmi)
zeroentry do_nmi_callback
ENTRY(do_nmi_callback)
+ CFI_STARTPROC
addq $8, %rsp
+ CFI_ENDPROC
+ CFI_DEFAULT_STACK
call do_nmi
orl $NMI_MASK,EFLAGS(%rsp)
RESTORE_REST
XEN_BLOCK_EVENTS(%rsi)
+ TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
jmp retint_restore_args
+ CFI_ENDPROC
.previous .text
+END(nmi)
#endif
ALIGN
restore_all_enable_events:
+ CFI_DEFAULT_STACK adj=1
+ TRACE_IRQS_ON
XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
scrit: /**** START OF CRITICAL REGION ****/
XEN_TEST_PENDING(%rsi)
+ CFI_REMEMBER_STATE
jnz 14f # process more events if necessary...
XEN_PUT_VCPU_INFO(%rsi)
RESTORE_ARGS 0,8,0
HYPERVISOR_IRET 0
+ CFI_RESTORE_STATE
14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
XEN_PUT_VCPU_INFO(%rsi)
SAVE_REST
movq %rsp,%rdi # set the argument again
jmp 11b
+ CFI_ENDPROC
ecrit: /**** END OF CRITICAL REGION ****/
# At this point, unlike on x86-32, we don't do the fixup to simplify the
# code and the stack frame is more complex on x86-64.
@@ -970,8 +994,12 @@ ecrit: /**** END OF CRITICAL REGION ***
# We distinguish between categories by comparing each saved segment register
# with its current contents: any discrepancy means we in category 1.
ENTRY(failsafe_callback)
+ _frame (RIP-0x30)
+ CFI_REL_OFFSET rcx, 0
+ CFI_REL_OFFSET r11, 8
movw %ds,%cx
cmpw %cx,0x10(%rsp)
+ CFI_REMEMBER_STATE
jne 1f
movw %es,%cx
cmpw %cx,0x18(%rsp)
@@ -984,17 +1012,26 @@ ENTRY(failsafe_callback)
jne 1f
/* All segments match their saved values => Category 2 (Bad IRET). */
movq (%rsp),%rcx
+ CFI_RESTORE rcx
movq 8(%rsp),%r11
+ CFI_RESTORE r11
addq $0x30,%rsp
+ CFI_ADJUST_CFA_OFFSET -0x30
movq $11,%rdi /* SIGSEGV */
jmp do_exit
+ CFI_RESTORE_STATE
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp),%rcx
+ CFI_RESTORE rcx
movq 8(%rsp),%r11
+ CFI_RESTORE r11
addq $0x30,%rsp
+ CFI_ADJUST_CFA_OFFSET -0x30
pushq $0
+ CFI_ADJUST_CFA_OFFSET 8
SAVE_ALL
jmp error_exit
+ CFI_ENDPROC
#if 0
.section __ex_table,"a"
.align 8
@@ -1117,12 +1154,12 @@ END(device_not_available)
/* runs on exception stack */
KPROBE_ENTRY(debug)
- INTR_FRAME
-/* pushq $0
+/* INTR_FRAME
+ pushq $0
CFI_ADJUST_CFA_OFFSET 8 */
zeroentry do_debug
-/* paranoid_exit */
- CFI_ENDPROC
+/* paranoidexit
+ CFI_ENDPROC */
END(debug)
.previous .text
@@ -1144,12 +1181,12 @@ END(nmi)
#endif
KPROBE_ENTRY(int3)
- INTR_FRAME
-/* pushq $0
+/* INTR_FRAME
+ pushq $0
CFI_ADJUST_CFA_OFFSET 8 */
zeroentry do_int3
-/* jmp paranoid_exit1 */
- CFI_ENDPROC
+/* jmp paranoid_exit1
+ CFI_ENDPROC */
END(int3)
.previous .text
@@ -1193,9 +1230,11 @@ END(segment_not_present)
/* runs on exception stack */
ENTRY(stack_segment)
- XCPT_FRAME
+/* XCPT_FRAME
+ paranoidentry do_stack_segment */
errorentry do_stack_segment
- CFI_ENDPROC
+/* jmp paranoid_exit1
+ CFI_ENDPROC */
END(stack_segment)
KPROBE_ENTRY(general_protection)
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Thu Mar 08
14:39:52 2007 -0600
@@ -22,10 +22,8 @@
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
-
+#include <asm/dwarf2.h>
#include <xen/interface/elfnote.h>
-
-#define _PAGE_PRESENT 0x1
.section .bootstrap.text, "ax", @progbits
.code64
@@ -42,6 +40,7 @@ ENTRY(_start)
/* rsi is pointer to startup info structure.
pass it to C */
movq %rsi,%rdi
+ pushq $0 # fake return address
jmp x86_64_start_kernel
ENTRY(stext)
@@ -82,7 +81,25 @@ NEXT_PAGE(level2_kernel_pgt)
.fill 512,8,0
NEXT_PAGE(hypercall_page)
- .fill 512,8,0
+ CFI_STARTPROC
+ .rept 0x1000 / 0x20
+ .skip 1 /* push %rcx */
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rcx,0
+ .skip 2 /* push %r11 */
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rcx,0
+ .skip 5 /* mov $#,%eax */
+ .skip 2 /* syscall */
+ .skip 2 /* pop %r11 */
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE r11
+ .skip 1 /* pop %rcx */
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE rcx
+ .align 0x20,0 /* ret */
+ .endr
+ CFI_ENDPROC
#undef NEXT_PAGE
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c Thu Mar 08
14:39:52 2007 -0600
@@ -47,11 +47,11 @@ EXPORT_SYMBOL(init_task);
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
DEFINE_PER_CPU(struct tss_struct, init_tss)
____cacheline_internodealigned_in_smp = INIT_TSS;
-#endif
/* Copies of the original ist values from the tss are only accessed during
* debugging, no special alignment required.
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
+#endif
#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Thu Mar 08
14:39:52 2007 -0600
@@ -676,7 +676,8 @@ void __init setup_arch(char **cmdline_p)
init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
- /* dmi_scan_machine(); */
+ if (is_initial_xendomain())
+ dmi_scan_machine();
#ifdef CONFIG_ACPI_NUMA
/*
@@ -691,6 +692,14 @@ void __init setup_arch(char **cmdline_p)
contig_initmem_init(0, end_pfn);
#endif
+#ifdef CONFIG_XEN
+ /*
+ * Reserve kernel, physmap, start info, initial page tables, and
+ * direct mapping.
+ */
+ reserve_bootmem_generic(__pa_symbol(&_text),
+ (table_end << PAGE_SHIFT) -
__pa_symbol(&_text));
+#else
/* Reserve direct mapping */
reserve_bootmem_generic(table_start << PAGE_SHIFT,
(table_end - table_start) << PAGE_SHIFT);
@@ -699,12 +708,6 @@ void __init setup_arch(char **cmdline_p)
reserve_bootmem_generic(__pa_symbol(&_text),
__pa_symbol(&_end) - __pa_symbol(&_text));
-#ifdef CONFIG_XEN
- /* reserve physmap, start info and initial page tables */
- reserve_bootmem_generic(__pa_symbol(&_end),
- (table_start << PAGE_SHIFT) -
- __pa_symbol(&_end));
-#else
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
@@ -1625,13 +1628,6 @@ struct seq_operations cpuinfo_op = {
.show = show_cpuinfo,
};
-static int __init run_dmi_scan(void)
-{
- dmi_scan_machine();
- return 0;
-}
-core_initcall(run_dmi_scan);
-
#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
#include <linux/platform_device.h>
static __init int add_pcspkr(void)
@@ -1639,6 +1635,9 @@ static __init int add_pcspkr(void)
struct platform_device *pd;
int ret;
+ if (!is_initial_xendomain())
+ return 0;
+
pd = platform_device_alloc("pcspkr", -1);
if (!pd)
return -ENOMEM;
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c Mon Mar 05
12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-/* Exports for assembly files.
- All C exports should go in the respective C files. */
-
-#include <linux/module.h>
-#include <linux/smp.h>
-
-#include <asm/semaphore.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-EXPORT_SYMBOL(kernel_thread);
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
-
-EXPORT_SYMBOL(copy_user_generic);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(clear_page);
-
-#ifdef CONFIG_SMP
-extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
-extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
-EXPORT_SYMBOL(__write_lock_failed);
-EXPORT_SYMBOL(__read_lock_failed);
-#endif
-
-/* Export string functions. We normally rely on gcc builtin for most of these,
- but gcc sometimes decides not to inline them. */
-#undef memcpy
-#undef memset
-#undef memmove
-
-extern void * memset(void *,int,__kernel_size_t);
-extern void * memcpy(void *,const void *,__kernel_size_t);
-extern void * __memcpy(void *,const void *,__kernel_size_t);
-
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(__memcpy);
-
-EXPORT_SYMBOL(empty_zero_page);
-
-EXPORT_SYMBOL(load_gs_index);
-
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Thu Mar 08 14:39:52
2007 -0600
@@ -102,13 +102,10 @@ static void __meminit early_make_page_re
BUG();
}
-void make_page_readonly(void *va, unsigned int feature)
+static void __make_page_readonly(void *va)
{
pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
unsigned long addr = (unsigned long) va;
-
- if (xen_feature(feature))
- return;
pgd = pgd_offset_k(addr);
pud = pud_offset(pgd, addr);
@@ -120,16 +117,13 @@ void make_page_readonly(void *va, unsign
xen_l1_entry_update(ptep, pte); /* fallback */
if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
- make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT), feature);
-}
-
-void make_page_writable(void *va, unsigned int feature)
+ __make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT));
+}
+
+static void __make_page_writable(void *va)
{
pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
unsigned long addr = (unsigned long) va;
-
- if (xen_feature(feature))
- return;
pgd = pgd_offset_k(addr);
pud = pud_offset(pgd, addr);
@@ -141,7 +135,19 @@ void make_page_writable(void *va, unsign
xen_l1_entry_update(ptep, pte); /* fallback */
if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
- make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT), feature);
+ __make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT));
+}
+
+void make_page_readonly(void *va, unsigned int feature)
+{
+ if (!xen_feature(feature))
+ __make_page_readonly(va);
+}
+
+void make_page_writable(void *va, unsigned int feature)
+{
+ if (!xen_feature(feature))
+ __make_page_writable(va);
}
void make_pages_readonly(void *va, unsigned nr, unsigned int feature)
@@ -150,7 +156,7 @@ void make_pages_readonly(void *va, unsig
return;
while (nr-- != 0) {
- make_page_readonly(va, feature);
+ __make_page_readonly(va);
va = (void*)((unsigned long)va + PAGE_SIZE);
}
}
@@ -161,7 +167,7 @@ void make_pages_writable(void *va, unsig
return;
while (nr-- != 0) {
- make_page_writable(va, feature);
+ __make_page_writable(va);
va = (void*)((unsigned long)va + PAGE_SIZE);
}
}
@@ -208,7 +214,11 @@ static __init void *spp_getpage(void)
void *ptr;
if (after_bootmem)
ptr = (void *) get_zeroed_page(GFP_ATOMIC);
- else
+ else if (start_pfn < table_end) {
+ ptr = __va(start_pfn << PAGE_SHIFT);
+ start_pfn++;
+ memset(ptr, 0, PAGE_SIZE);
+ } else
ptr = alloc_bootmem_pages(PAGE_SIZE);
if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
panic("set_pte_phys: cannot allocate page data %s\n",
after_bootmem?"after bootmem":"");
@@ -299,36 +309,26 @@ static __init void set_pte_phys_ma(unsig
pmd = (pmd_t *) spp_getpage();
make_page_readonly(pmd, XENFEAT_writable_page_tables);
-
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
-
if (pmd != pmd_offset(pud, 0)) {
printk("PAGETABLE BUG #01! %p <-> %p\n", pmd,
pmd_offset(pud,0));
return;
}
}
pmd = pmd_offset(pud, vaddr);
-
if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
make_page_readonly(pte, XENFEAT_writable_page_tables);
-
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
if (pte != pte_offset_kernel(pmd, 0)) {
printk("PAGETABLE BUG #02!\n");
return;
}
}
-
new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
+
pte = pte_offset_kernel(pmd, vaddr);
-
- /*
- * Note that the pte page is already RO, thus we want to use
- * xen_l1_entry_update(), not set_pte().
- */
- xen_l1_entry_update(pte,
- pfn_pte_ma(phys >> PAGE_SHIFT, prot));
+ set_pte(pte, new_pte);
/*
* It's enough to flush this one mapping.
@@ -404,6 +404,7 @@ static inline void __set_pte(pte_t *dst,
static inline int make_readonly(unsigned long paddr)
{
+ extern char __vsyscall_0;
int readonly = 0;
/* Make new page tables read-only. */
@@ -420,25 +421,45 @@ static inline int make_readonly(unsigned
/*
* No need for writable mapping of kernel image. This also ensures that
* page and descriptor tables embedded inside don't have writable
- * mappings.
+ * mappings. Exclude the vsyscall area here, allowing alternative
+ * instruction patching to work.
*/
- if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
+ if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))
+ && !(paddr >= __pa_symbol(&__vsyscall_0)
+ && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE))
readonly = 1;
return readonly;
}
+#ifndef CONFIG_XEN
/* Must run before zap_low_mappings */
__init void *early_ioremap(unsigned long addr, unsigned long size)
{
- return ioremap(addr, size);
+ unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
+
+ /* actually usually some more */
+ if (size >= LARGE_PAGE_SIZE) {
+ printk("SMBIOS area too long %lu\n", size);
+ return NULL;
+ }
+ set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+ map += LARGE_PAGE_SIZE;
+ set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+ __flush_tlb();
+ return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
}
/* To avoid virtual aliases later */
__init void early_iounmap(void *addr, unsigned long size)
{
- iounmap(addr);
-}
+ if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) !=
temp_mappings[0].address)
+ printk("early_iounmap: bad address %p\n", addr);
+ set_pmd(temp_mappings[0].pmd, __pmd(0));
+ set_pmd(temp_mappings[1].pmd, __pmd(0));
+ __flush_tlb();
+}
+#endif
static void __meminit
phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
@@ -638,7 +659,7 @@ static void __init extend_init_mapping(u
static void __init find_early_table_space(unsigned long end)
{
- unsigned long puds, pmds, ptes, tables;
+ unsigned long puds, pmds, ptes, tables;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
@@ -656,6 +677,64 @@ static void __init find_early_table_spac
early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
end, table_start << PAGE_SHIFT,
(table_start << PAGE_SHIFT) + tables);
+}
+
+static void xen_finish_init_mapping(void)
+{
+ unsigned long i, start, end;
+
+ /* Re-vector virtual addresses pointing into the initial
+ mapping to the just-established permanent ones. */
+ xen_start_info = __va(__pa(xen_start_info));
+ xen_start_info->pt_base = (unsigned long)
+ __va(__pa(xen_start_info->pt_base));
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ phys_to_machine_mapping =
+ __va(__pa(xen_start_info->mfn_list));
+ xen_start_info->mfn_list = (unsigned long)
+ phys_to_machine_mapping;
+ }
+ if (xen_start_info->mod_start)
+ xen_start_info->mod_start = (unsigned long)
+ __va(__pa(xen_start_info->mod_start));
+
+ /* Destroy the Xen-created mappings beyond the kernel image as
+ * well as the temporary mappings created above. Prevents
+ * overlap with modules area (if init mapping is very big).
+ */
+ start = PAGE_ALIGN((unsigned long)_end);
+ end = __START_KERNEL_map + (table_end << PAGE_SHIFT);
+ for (; start < end; start += PAGE_SIZE)
+ WARN_ON(HYPERVISOR_update_va_mapping(
+ start, __pte_ma(0), 0));
+
+ /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */
+ table_end = ~0UL;
+
+ /*
+ * Prefetch pte's for the bt_ioremap() area. It gets used before the
+ * boot-time allocator is online, so allocate-on-demand would fail.
+ */
+ for (i = FIX_BTMAP_END; i <= FIX_BTMAP_BEGIN; i++)
+ __set_fixmap(i, 0, __pgprot(0));
+
+ /* Switch to the real shared_info page, and clear the dummy page. */
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+ /* Set up mapping of lowest 1MB of physical memory. */
+ for (i = 0; i < NR_FIX_ISAMAPS; i++)
+ if (is_initial_xendomain())
+ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
+ else
+ __set_fixmap(FIX_ISAMAP_BEGIN - i,
+ virt_to_mfn(empty_zero_page)
+ << PAGE_SHIFT,
+ PAGE_KERNEL_RO);
+
+ /* Disable the 'start_pfn' allocator. */
+ table_end = start_pfn;
}
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -663,7 +742,7 @@ static void __init find_early_table_spac
physical memory. To access them they are temporarily mapped. */
void __meminit init_memory_mapping(unsigned long start, unsigned long end)
{
- unsigned long next;
+ unsigned long next;
Dprintk("init_memory_mapping\n");
@@ -702,31 +781,7 @@ void __meminit init_memory_mapping(unsig
if (!after_bootmem) {
BUG_ON(start_pfn != table_end);
-
- /* Re-vector virtual addresses pointing into the initial
- mapping to the just-established permanent ones. */
- xen_start_info = __va(__pa(xen_start_info));
- xen_start_info->pt_base = (unsigned long)
- __va(__pa(xen_start_info->pt_base));
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
- phys_to_machine_mapping =
- __va(__pa(xen_start_info->mfn_list));
- xen_start_info->mfn_list = (unsigned long)
- phys_to_machine_mapping;
- }
- if (xen_start_info->mod_start)
- xen_start_info->mod_start = (unsigned long)
- __va(__pa(xen_start_info->mod_start));
-
- /* Destroy the Xen-created mappings beyond the kernel image as
- * well as the temporary mappings created above. Prevents
- * overlap with modules area (if init mapping is very big).
- */
- start = PAGE_ALIGN((unsigned long)_end);
- end = __START_KERNEL_map + (table_end << PAGE_SHIFT);
- for (; start < end; start += PAGE_SIZE)
- WARN_ON(HYPERVISOR_update_va_mapping(
- start, __pte_ma(0), 0));
+ xen_finish_init_mapping();
}
__flush_tlb_all();
@@ -805,7 +860,6 @@ void __init paging_init(void)
void __init paging_init(void)
{
unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
- int i;
memory_present(0, 0, end_pfn);
sparse_init();
@@ -813,22 +867,7 @@ void __init paging_init(void)
free_area_init_node(0, NODE_DATA(0), zones,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
- /* Switch to the real shared_info page, and clear the
- * dummy page. */
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
-
init_mm.context.pinned = 1;
-
- /* Setup mapping of lower 1st MB */
- for (i = 0; i < NR_FIX_ISAMAPS; i++)
- if (is_initial_xendomain())
- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
- else
- __set_fixmap(FIX_ISAMAP_BEGIN - i,
- virt_to_mfn(empty_zero_page) << PAGE_SHIFT,
- PAGE_KERNEL_RO);
}
#endif
@@ -1028,11 +1067,6 @@ void __init mem_init(void)
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
-#ifdef __DO_LATER__
- /*
- * Some pages can be pinned, but some are not. Unpinning such pages
- * triggers BUG().
- */
unsigned long addr;
if (begin >= end)
@@ -1044,25 +1078,27 @@ void free_init_pages(char *what, unsigne
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)),
POISON_FREE_INITMEM, PAGE_SIZE);
- make_page_writable(
- __va(__pa(addr)), XENFEAT_writable_page_tables);
- /*
- * Make pages from __PAGE_OFFSET address as well
- */
- make_page_writable(
- (void *)addr, XENFEAT_writable_page_tables);
+ if (addr >= __START_KERNEL_map) {
+ /* make_readonly() reports all kernel addresses. */
+ __make_page_writable(__va(__pa(addr)));
+ if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
+ pgd_t *pgd = pgd_offset_k(addr);
+ pud_t *pud = pud_offset(pgd, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+
+ xen_l1_entry_update(pte, __pte(0)); /* fallback
*/
+ }
+ }
free_page(addr);
totalram_pages++;
}
-#endif
}
void free_initmem(void)
{
-#ifdef __DO_LATER__
memset(__initdata_begin, POISON_FREE_INITDATA,
__initdata_end - __initdata_begin);
-#endif
free_init_pages("unused kernel memory",
(unsigned long)(&__init_begin),
(unsigned long)(&__init_end));
@@ -1125,7 +1161,7 @@ int kern_addr_valid(unsigned long addr)
if (pgd_none(*pgd))
return 0;
- pud = pud_offset_k(pgd, addr);
+ pud = pud_offset(pgd, addr);
if (pud_none(*pud))
return 0;
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Mar 08
14:39:52 2007 -0600
@@ -24,10 +24,13 @@ static inline void mm_walk_set_prot(void
{
struct page *page = virt_to_page(pt);
unsigned long pfn = page_to_pfn(page);
-
- BUG_ON(HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- pfn_pte(pfn, flags), 0));
+ int rc;
+
+ rc = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte(pfn, flags), 0);
+ if (rc)
+ BUG();
}
static void mm_walk(struct mm_struct *mm, pgprot_t flags)
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/drivers/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/char/mem.c Mon Mar 05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/mem.c Thu Mar 08 14:39:52 2007 -0600
@@ -787,7 +787,7 @@ static const struct file_operations mem_
.open = open_mem,
};
#else
-extern struct file_operations mem_fops;
+extern const struct file_operations mem_fops;
#endif
static const struct file_operations kmem_fops = {
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Thu Mar 08 14:39:52
2007 -0600
@@ -113,14 +113,13 @@ void __exit tpmif_exit(void);
static inline int
-tx_buffer_copy(struct tx_buffer *txb, const u8 * src, int len,
+tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
int isuserbuffer)
{
int copied = len;
- if (len > txb->size) {
+ if (len > txb->size)
copied = txb->size;
- }
if (isuserbuffer) {
if (copy_from_user(txb->data, src, copied))
return -EFAULT;
@@ -133,18 +132,20 @@ tx_buffer_copy(struct tx_buffer *txb, co
static inline struct tx_buffer *tx_buffer_alloc(void)
{
- struct tx_buffer *txb = kzalloc(sizeof (struct tx_buffer),
- GFP_KERNEL);
-
- if (txb) {
- txb->len = 0;
- txb->size = PAGE_SIZE;
- txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
- if (txb->data == NULL) {
- kfree(txb);
- txb = NULL;
- }
- }
+ struct tx_buffer *txb;
+
+ txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
+ if (!txb)
+ return NULL;
+
+ txb->len = 0;
+ txb->size = PAGE_SIZE;
+ txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+ if (txb->data == NULL) {
+ kfree(txb);
+ txb = NULL;
+ }
+
return txb;
}
@@ -160,37 +161,41 @@ static inline void tx_buffer_free(struct
/**************************************************************
Utility function for the tpm_private structure
**************************************************************/
-static inline void tpm_private_init(struct tpm_private *tp)
+static void tpm_private_init(struct tpm_private *tp)
{
spin_lock_init(&tp->tx_lock);
init_waitqueue_head(&tp->wait_q);
atomic_set(&tp->refcnt, 1);
}
-static inline void tpm_private_put(void)
-{
- if ( atomic_dec_and_test(&my_priv->refcnt)) {
- tpmif_free_tx_buffers(my_priv);
- kfree(my_priv);
- my_priv = NULL;
- }
+static void tpm_private_put(void)
+{
+ if (!atomic_dec_and_test(&my_priv->refcnt))
+ return;
+
+ tpmif_free_tx_buffers(my_priv);
+ kfree(my_priv);
+ my_priv = NULL;
}
static struct tpm_private *tpm_private_get(void)
{
int err;
- if (!my_priv) {
- my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
- if (my_priv) {
- tpm_private_init(my_priv);
- err = tpmif_allocate_tx_buffers(my_priv);
- if (err < 0) {
- tpm_private_put();
- }
- }
- } else {
+
+ if (my_priv) {
atomic_inc(&my_priv->refcnt);
- }
+ return my_priv;
+ }
+
+ my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
+ if (!my_priv)
+ return NULL;
+
+ tpm_private_init(my_priv);
+ err = tpmif_allocate_tx_buffers(my_priv);
+ if (err < 0)
+ tpm_private_put();
+
return my_priv;
}
@@ -379,10 +384,8 @@ static int tpmfront_probe(struct xenbus_
return -ENOMEM;
tp->chip = init_vtpm(&dev->dev, &tvd, tp);
-
- if (IS_ERR(tp->chip)) {
+ if (IS_ERR(tp->chip))
return PTR_ERR(tp->chip);
- }
err = xenbus_scanf(XBT_NIL, dev->nodename,
"handle", "%i", &handle);
@@ -401,6 +404,7 @@ static int tpmfront_probe(struct xenbus_
tpm_private_put();
return err;
}
+
return 0;
}
@@ -417,30 +421,34 @@ static int tpmfront_suspend(struct xenbu
{
struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
u32 ctr;
- /* lock, so no app can send */
+
+ /* Take the lock, preventing any application from sending. */
mutex_lock(&suspend_lock);
tp->is_suspended = 1;
- for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) {
+ for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
if ((ctr % 10) == 0)
printk("TPM-FE [INFO]: Waiting for outstanding "
"request.\n");
- /*
- * Wait for a request to be responded to.
- */
+ /* Wait for a request to be responded to. */
interruptible_sleep_on_timeout(&tp->wait_q, 100);
}
- xenbus_switch_state(dev, XenbusStateClosing);
-
- if (atomic_read(&tp->tx_busy)) {
- /*
- * A temporary work-around.
- */
- printk("TPM-FE [WARNING]: Resetting busy flag.");
- atomic_set(&tp->tx_busy, 0);
- }
-
- return 0;
+
+ return 0;
+}
+
+static int tpmfront_suspend_finish(struct tpm_private *tp)
+{
+ tp->is_suspended = 0;
+ /* Allow applications to send again. */
+ mutex_unlock(&suspend_lock);
+ return 0;
+}
+
+static int tpmfront_suspend_cancel(struct xenbus_device *dev)
+{
+ struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
+ return tpmfront_suspend_finish(tp);
}
static int tpmfront_resume(struct xenbus_device *dev)
@@ -484,6 +492,7 @@ static struct xenbus_driver tpmfront = {
.resume = tpmfront_resume,
.otherend_changed = backend_changed,
.suspend = tpmfront_suspend,
+ .suspend_cancel = tpmfront_suspend_cancel,
};
static void __init init_tpm_xenbus(void)
@@ -514,9 +523,8 @@ static void tpmif_free_tx_buffers(struct
{
unsigned int i;
- for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
+ for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
tx_buffer_free(tp->tx_buffers[i]);
- }
}
static void tpmif_rx_action(unsigned long priv)
@@ -536,9 +544,8 @@ static void tpmif_rx_action(unsigned lon
received = tx->size;
buffer = kmalloc(received, GFP_ATOMIC);
- if (NULL == buffer) {
+ if (!buffer)
goto exit;
- }
for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
struct tx_buffer *txb = tp->tx_buffers[i];
@@ -547,9 +554,8 @@ static void tpmif_rx_action(unsigned lon
tx = &tp->tx->ring[i].req;
tocopy = tx->size;
- if (tocopy > PAGE_SIZE) {
+ if (tocopy > PAGE_SIZE)
tocopy = PAGE_SIZE;
- }
memcpy(&buffer[offset], txb->data, tocopy);
@@ -607,12 +613,13 @@ static int tpm_xmit(struct tpm_private *
struct tx_buffer *txb = tp->tx_buffers[i];
int copied;
- if (NULL == txb) {
+ if (!txb) {
DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
"Not transmitting anything!\n", i);
spin_unlock_irq(&tp->tx_lock);
return -EFAULT;
}
+
copied = tx_buffer_copy(txb, &buf[offset], count,
isuserbuffer);
if (copied < 0) {
@@ -624,25 +631,26 @@ static int tpm_xmit(struct tpm_private *
offset += copied;
tx = &tp->tx->ring[i].req;
-
tx->addr = virt_to_machine(txb->data);
tx->size = txb->len;
- DPRINTK("First 4 characters sent by TPM-FE are 0x%02x 0x%02x
0x%02x 0x%02x\n",
+ DPRINTK("First 4 characters sent by TPM-FE are "
+ "0x%02x 0x%02x 0x%02x 0x%02x\n",
txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
- /* get the granttable reference for this page */
+ /* Get the granttable reference for this page. */
tx->ref = gnttab_claim_grant_reference(&gref_head);
-
- if (-ENOSPC == tx->ref) {
+ if (tx->ref == -ENOSPC) {
spin_unlock_irq(&tp->tx_lock);
- DPRINTK(" Grant table claim reference failed in func:%s
line:%d file:%s\n", __FUNCTION__, __LINE__, __FILE__);
+ DPRINTK("Grant table claim reference failed in "
+ "func:%s line:%d file:%s\n",
+ __FUNCTION__, __LINE__, __FILE__);
return -ENOSPC;
}
- gnttab_grant_foreign_access_ref( tx->ref,
- tp->backend_id,
- virt_to_mfn(txb->data),
- 0 /*RW*/);
+ gnttab_grant_foreign_access_ref(tx->ref,
+ tp->backend_id,
+ virt_to_mfn(txb->data),
+ 0 /*RW*/);
wmb();
}
@@ -660,15 +668,10 @@ static int tpm_xmit(struct tpm_private *
static void tpmif_notify_upperlayer(struct tpm_private *tp)
{
- /*
- * Notify upper layer about the state of the connection
- * to the BE.
- */
- if (tp->is_connected) {
- vtpm_vd_status(tp->chip, TPM_VD_STATUS_CONNECTED);
- } else {
- vtpm_vd_status(tp->chip, TPM_VD_STATUS_DISCONNECTED);
- }
+ /* Notify upper layer about the state of the connection to the BE. */
+ vtpm_vd_status(tp->chip, (tp->is_connected
+ ? TPM_VD_STATUS_CONNECTED
+ : TPM_VD_STATUS_DISCONNECTED));
}
@@ -679,20 +682,16 @@ static void tpmif_set_connected_state(st
* should disconnect - assumption is that we will resume
* The mutex keeps apps from sending.
*/
- if (is_connected == 0 && tp->is_suspended == 1) {
+ if (is_connected == 0 && tp->is_suspended == 1)
return;
- }
/*
* Unlock the mutex if we are connected again
* after being suspended - now resuming.
* This also removes the suspend state.
*/
- if (is_connected == 1 && tp->is_suspended == 1) {
- tp->is_suspended = 0;
- /* unlock, so apps can resume sending */
- mutex_unlock(&suspend_lock);
- }
+ if (is_connected == 1 && tp->is_suspended == 1)
+ tpmfront_suspend_finish(tp);
if (is_connected != tp->is_connected) {
tp->is_connected = is_connected;
@@ -710,33 +709,24 @@ static void tpmif_set_connected_state(st
static int __init tpmif_init(void)
{
- long rc = 0;
struct tpm_private *tp;
if (is_initial_xendomain())
return -EPERM;
tp = tpm_private_get();
- if (!tp) {
- rc = -ENOMEM;
- goto failexit;
- }
+ if (!tp)
+ return -ENOMEM;
IPRINTK("Initialising the vTPM driver.\n");
- if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE,
- &gref_head ) < 0) {
- rc = -EFAULT;
- goto gnttab_alloc_failed;
+ if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
+ &gref_head) < 0) {
+ tpm_private_put();
+ return -EFAULT;
}
init_tpm_xenbus();
return 0;
-
-gnttab_alloc_failed:
- tpm_private_put();
-failexit:
-
- return (int)rc;
}
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Mar 08 14:39:52
2007 -0600
@@ -335,7 +335,7 @@ static int blktap_ioctl(struct inode *in
unsigned int cmd, unsigned long arg);
static unsigned int blktap_poll(struct file *file, poll_table *wait);
-static struct file_operations blktap_fops = {
+static const struct file_operations blktap_fops = {
.owner = THIS_MODULE,
.poll = blktap_poll,
.ioctl = blktap_ioctl,
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/drivers/xen/char/mem.c
--- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c Thu Mar 08 14:39:52
2007 -0600
@@ -194,7 +194,7 @@ static int open_mem(struct inode * inode
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
-struct file_operations mem_fops = {
+const struct file_operations mem_fops = {
.llseek = memory_lseek,
.read = read_mem,
.write = write_mem,
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c Thu Mar 08
14:39:52 2007 -0600
@@ -59,27 +59,11 @@ EXPORT_SYMBOL(machine_halt);
EXPORT_SYMBOL(machine_halt);
EXPORT_SYMBOL(machine_power_off);
-/* Ensure we run on the idle task page tables so that we will
- switch page tables before running user space. This is needed
- on architectures with separate kernel and user page tables
- because the user page table pointer is not saved/restored. */
-static void switch_idle_mm(void)
-{
- struct mm_struct *mm = current->active_mm;
-
- if (mm == &init_mm)
- return;
-
- atomic_inc(&init_mm.mm_count);
- switch_mm(mm, &init_mm, current);
- current->active_mm = &init_mm;
- mmdrop(mm);
-}
-
static void pre_suspend(void)
{
HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
- clear_fixmap(FIX_SHARED_INFO);
+ HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
+ __pte_ma(0), 0);
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
@@ -89,6 +73,7 @@ static void post_suspend(int suspend_can
static void post_suspend(int suspend_cancelled)
{
int i, j, k, fpp;
+ unsigned long shinfo_mfn;
extern unsigned long max_pfn;
extern unsigned long *pfn_to_mfn_frame_list_list;
extern unsigned long *pfn_to_mfn_frame_list[];
@@ -99,11 +84,14 @@ static void post_suspend(int suspend_can
xen_start_info->console.domU.mfn =
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
+#ifdef CONFIG_SMP
cpu_initialized_map = cpumask_of_cpu(0);
- }
-
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-
+#endif
+ }
+
+ shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
+ HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
+ pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), 0);
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
memset(empty_zero_page, 0, PAGE_SIZE);
@@ -172,10 +160,25 @@ static int take_machine_down(void *p_fas
post_suspend(suspend_cancelled);
gnttab_resume();
- if (!suspend_cancelled)
+ if (!suspend_cancelled) {
irq_resume();
+#ifdef __x86_64__
+ /*
+ * Older versions of Xen do not save/restore the user %cr3.
+ * We do it here just in case, but there's no need if we are
+ * in fast-suspend mode as that implies a new enough Xen.
+ */
+ if (!fast_suspend) {
+ struct mmuext_op op;
+ op.cmd = MMUEXT_NEW_USER_BASEPTR;
+ op.arg1.mfn = pfn_to_mfn(__pa(__user_pgd(
+ current->active_mm->pgd)) >> PAGE_SHIFT);
+ if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+ BUG();
+ }
+#endif
+ }
time_resume();
- switch_idle_mm();
local_irq_enable();
if (fast_suspend && !suspend_cancelled) {
@@ -210,6 +213,10 @@ int __xen_suspend(int fast_suspend)
}
#endif
+ /* If we are definitely UP then 'slow mode' is actually faster. */
+ if (num_possible_cpus() == 1)
+ fast_suspend = 0;
+
if (fast_suspend) {
xenbus_suspend();
err = stop_machine_run(take_machine_down, &fast_suspend, 0);
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Thu Mar 08 14:39:52
2007 -0600
@@ -406,7 +406,7 @@ static int evtchn_release(struct inode *
return 0;
}
-static struct file_operations evtchn_fops = {
+static const struct file_operations evtchn_fops = {
.owner = THIS_MODULE,
.read = evtchn_read,
.write = evtchn_write,
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Mar 08 14:39:52
2007 -0600
@@ -99,8 +99,20 @@ typedef struct netif_st {
struct net_device *dev;
struct net_device_stats stats;
+ unsigned int carrier;
+
wait_queue_head_t waiting_to_free;
} netif_t;
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss; also the etherbridge
+ * can be rather lazy in activating its port).
+ */
+#define netback_carrier_on(netif) ((netif)->carrier = 1)
+#define netback_carrier_off(netif) ((netif)->carrier = 0)
+#define netback_carrier_ok(netif) ((netif)->carrier)
#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
@@ -120,7 +132,8 @@ int netif_map(netif_t *netif, unsigned l
void netif_xenbus_init(void);
-#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev))
+#define netif_schedulable(netif) \
+ (netif_running((netif)->dev) && netback_carrier_ok(netif))
void netif_schedule_work(netif_t *netif);
void netif_deschedule_work(netif_t *netif);
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Thu Mar 08
14:39:52 2007 -0600
@@ -66,16 +66,19 @@ static int net_open(struct net_device *d
static int net_open(struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
- if (netif_carrier_ok(dev))
+ if (netback_carrier_ok(netif)) {
__netif_up(netif);
+ netif_start_queue(dev);
+ }
return 0;
}
static int net_close(struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
- if (netif_carrier_ok(dev))
+ if (netback_carrier_ok(netif))
__netif_down(netif);
+ netif_stop_queue(dev);
return 0;
}
@@ -138,8 +141,6 @@ netif_t *netif_alloc(domid_t domid, unsi
return ERR_PTR(-ENOMEM);
}
- netif_carrier_off(dev);
-
netif = netdev_priv(dev);
memset(netif, 0, sizeof(*netif));
netif->domid = domid;
@@ -147,6 +148,8 @@ netif_t *netif_alloc(domid_t domid, unsi
atomic_set(&netif->refcnt, 1);
init_waitqueue_head(&netif->waiting_to_free);
netif->dev = dev;
+
+ netback_carrier_off(netif);
netif->credit_bytes = netif->remaining_credit = ~0UL;
netif->credit_usec = 0UL;
@@ -285,7 +288,7 @@ int netif_map(netif_t *netif, unsigned l
netif_get(netif);
rtnl_lock();
- netif_carrier_on(netif->dev);
+ netback_carrier_on(netif);
if (netif_running(netif->dev))
__netif_up(netif);
rtnl_unlock();
@@ -302,9 +305,10 @@ err_rx:
void netif_disconnect(netif_t *netif)
{
- if (netif_carrier_ok(netif->dev)) {
+ if (netback_carrier_ok(netif)) {
rtnl_lock();
- netif_carrier_off(netif->dev);
+ netback_carrier_off(netif);
+ netif_carrier_off(netif->dev); /* discard queued packets */
if (netif_running(netif->dev))
__netif_down(netif);
rtnl_unlock();
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Mar 08
14:39:52 2007 -0600
@@ -38,7 +38,10 @@
#include <xen/balloon.h>
#include <xen/interface/memory.h>
-/*#define NETBE_DEBUG_INTERRUPT*/
+/*define NETBE_DEBUG_INTERRUPT*/
+
+/* extra field used in struct page */
+#define netif_page_index(pg) (*(long *)&(pg)->mapping)
struct netbk_rx_meta {
skb_frag_t frag;
@@ -231,7 +234,7 @@ static void tx_queue_callback(unsigned l
static void tx_queue_callback(unsigned long data)
{
netif_t *netif = (netif_t *)data;
- if (netif_schedulable(netif->dev))
+ if (netif_schedulable(netif))
netif_wake_queue(netif->dev);
}
@@ -242,7 +245,7 @@ int netif_be_start_xmit(struct sk_buff *
BUG_ON(skb->dev != dev);
/* Drop the packet if the target domain has no receive buffers. */
- if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif)))
+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
goto drop;
/*
@@ -352,7 +355,7 @@ static u16 netbk_gop_frag(netif_t *netif
copy_gop->flags = GNTCOPY_dest_gref;
if (PageForeign(page)) {
struct pending_tx_info *src_pend =
- &pending_tx_info[page->index];
+ &pending_tx_info[netif_page_index(page)];
copy_gop->source.domid = src_pend->netif->domid;
copy_gop->source.u.ref = src_pend->req.gref;
copy_gop->flags |= GNTCOPY_source_gref;
@@ -681,7 +684,7 @@ static void net_rx_action(unsigned long
}
if (netif_queue_stopped(netif->dev) &&
- netif_schedulable(netif->dev) &&
+ netif_schedulable(netif) &&
!netbk_queue_full(netif))
netif_wake_queue(netif->dev);
@@ -739,7 +742,7 @@ static void add_to_net_schedule_list_tai
spin_lock_irq(&net_schedule_list_lock);
if (!__on_net_schedule_list(netif) &&
- likely(netif_schedulable(netif->dev))) {
+ likely(netif_schedulable(netif))) {
list_add_tail(&netif->list, &net_schedule_list);
netif_get(netif);
}
@@ -1327,7 +1330,7 @@ static void netif_page_release(struct pa
/* Ready for next use. */
init_page_count(page);
- netif_idx_release(page->index);
+ netif_idx_release(netif_page_index(page));
}
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
@@ -1337,7 +1340,7 @@ irqreturn_t netif_be_int(int irq, void *
add_to_net_schedule_list_tail(netif);
maybe_schedule_tx_action();
- if (netif_schedulable(netif->dev) && !netbk_queue_full(netif))
+ if (netif_schedulable(netif) && !netbk_queue_full(netif))
netif_wake_queue(netif->dev);
return IRQ_HANDLED;
@@ -1457,7 +1460,7 @@ static int __init netback_init(void)
for (i = 0; i < MAX_PENDING_REQS; i++) {
page = mmap_pages[i];
SetPageForeign(page, netif_page_release);
- page->index = i;
+ netif_page_index(page) = i;
}
pending_cons = 0;
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Mar 08 14:39:52
2007 -0600
@@ -338,9 +338,7 @@ static void connect(struct backend_info
xenbus_switch_state(dev, XenbusStateConnected);
- /* May not get a kick from the frontend, so start the tx_queue now. */
- if (!netbk_can_queue(be->netif->dev))
- netif_wake_queue(be->netif->dev);
+ netif_wake_queue(be->netif->dev);
}
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Mar 08
14:39:52 2007 -0600
@@ -154,6 +154,7 @@ struct netfront_info {
unsigned int irq;
unsigned int copying_receiver;
+ unsigned int carrier;
/* Receive-ring batched refills. */
#define RX_MIN_TARGET 8
@@ -191,6 +192,15 @@ struct netfront_rx_info {
struct netif_rx_response rx;
struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
};
+
+/*
+ * Implement our own carrier flag: the network stack's version causes delays
+ * when the carrier is re-enabled (in particular, dev_activate() may not
+ * immediately be called, which can cause packet loss).
+ */
+#define netfront_carrier_on(netif) ((netif)->carrier = 1)
+#define netfront_carrier_off(netif) ((netif)->carrier = 0)
+#define netfront_carrier_ok(netif) ((netif)->carrier)
/*
* Access macros for acquiring freeing slots in tx_skbs[].
@@ -590,6 +600,22 @@ static int send_fake_arp(struct net_devi
return dev_queue_xmit(skb);
}
+static inline int netfront_tx_slot_available(struct netfront_info *np)
+{
+ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+ (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
+}
+
+static inline void network_maybe_wake_tx(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+
+ if (unlikely(netif_queue_stopped(dev)) &&
+ netfront_tx_slot_available(np) &&
+ likely(netif_running(dev)))
+ netif_wake_queue(dev);
+}
+
static int network_open(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
@@ -597,7 +623,7 @@ static int network_open(struct net_devic
memset(&np->stats, 0, sizeof(np->stats));
spin_lock(&np->rx_lock);
- if (netif_carrier_ok(dev)) {
+ if (netfront_carrier_ok(np)) {
network_alloc_rx_buffers(dev);
np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
@@ -605,25 +631,9 @@ static int network_open(struct net_devic
}
spin_unlock(&np->rx_lock);
- netif_start_queue(dev);
+ network_maybe_wake_tx(dev);
return 0;
-}
-
-static inline int netfront_tx_slot_available(struct netfront_info *np)
-{
- return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
- (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
-}
-
-static inline void network_maybe_wake_tx(struct net_device *dev)
-{
- struct netfront_info *np = netdev_priv(dev);
-
- if (unlikely(netif_queue_stopped(dev)) &&
- netfront_tx_slot_available(np) &&
- likely(netif_running(dev)))
- netif_wake_queue(dev);
}
static void network_tx_buf_gc(struct net_device *dev)
@@ -633,7 +643,7 @@ static void network_tx_buf_gc(struct net
struct netfront_info *np = netdev_priv(dev);
struct sk_buff *skb;
- BUG_ON(!netif_carrier_ok(dev));
+ BUG_ON(!netfront_carrier_ok(np));
do {
prod = np->tx.sring->rsp_prod;
@@ -703,7 +713,7 @@ static void network_alloc_rx_buffers(str
int nr_flips;
netif_rx_request_t *req;
- if (unlikely(!netif_carrier_ok(dev)))
+ if (unlikely(!netfront_carrier_ok(np)))
return;
/*
@@ -934,7 +944,7 @@ static int network_start_xmit(struct sk_
spin_lock_irq(&np->tx_lock);
- if (unlikely(!netif_carrier_ok(dev) ||
+ if (unlikely(!netfront_carrier_ok(np) ||
(frags > 1 && !xennet_can_sg(dev)) ||
netif_needs_gso(dev, skb))) {
spin_unlock_irq(&np->tx_lock);
@@ -1024,7 +1034,7 @@ static irqreturn_t netif_int(int irq, vo
spin_lock_irqsave(&np->tx_lock, flags);
- if (likely(netif_carrier_ok(dev))) {
+ if (likely(netfront_carrier_ok(np))) {
network_tx_buf_gc(dev);
/* Under tx_lock: protects access to rx shared-ring indexes. */
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
@@ -1299,7 +1309,7 @@ static int netif_poll(struct net_device
spin_lock(&np->rx_lock);
- if (unlikely(!netif_carrier_ok(dev))) {
+ if (unlikely(!netfront_carrier_ok(np))) {
spin_unlock(&np->rx_lock);
return 0;
}
@@ -1317,7 +1327,7 @@ static int netif_poll(struct net_device
work_done = 0;
while ((i != rp) && (work_done < budget)) {
memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
- memset(extras, 0, sizeof(extras));
+ memset(extras, 0, sizeof(rinfo.extras));
err = xennet_get_responses(np, &rinfo, rp, &tmpq,
&pages_flipped);
@@ -1744,7 +1754,7 @@ static int network_connect(struct net_de
* domain a kick because we've probably just requeued some
* packets.
*/
- netif_carrier_on(dev);
+ netfront_carrier_on(np);
notify_remote_via_irq(np->irq);
network_tx_buf_gc(dev);
network_alloc_rx_buffers(dev);
@@ -1989,7 +1999,7 @@ static struct net_device * __devinit cre
np->netdev = netdev;
- netif_carrier_off(netdev);
+ netfront_carrier_off(np);
return netdev;
@@ -2023,7 +2033,7 @@ static void netif_disconnect_backend(str
/* Stop old i/f to prevent errors whilst we rebuild the state. */
spin_lock_irq(&info->tx_lock);
spin_lock(&info->rx_lock);
- netif_carrier_off(info->netdev);
+ netfront_carrier_off(info);
spin_unlock(&info->rx_lock);
spin_unlock_irq(&info->tx_lock);
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c Thu Mar 08
14:39:52 2007 -0600
@@ -239,17 +239,12 @@ static void free_root_bus_devs(struct pc
{
struct pci_dev *dev;
- down_write(&pci_bus_sem);
while (!list_empty(&bus->devices)) {
- dev = container_of(bus->devices.next, struct pci_dev, bus_list);
- up_write(&pci_bus_sem);
-
+ dev = container_of(bus->devices.next, struct pci_dev,
+ bus_list);
dev_dbg(&dev->dev, "removing device\n");
pci_remove_bus_device(dev);
-
- down_write(&pci_bus_sem);
- }
- up_write(&pci_bus_sem);
+ }
}
void pcifront_free_roots(struct pcifront_device *pdev)
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu Mar 08
14:39:52 2007 -0600
@@ -248,7 +248,7 @@ static int privcmd_enforce_singleshot_ma
}
#endif
-static struct file_operations privcmd_file_ops = {
+static const struct file_operations privcmd_file_ops = {
.ioctl = privcmd_ioctl,
.mmap = privcmd_mmap,
};
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Thu Mar 08
14:39:52 2007 -0600
@@ -629,7 +629,7 @@ static unsigned int vtpm_op_poll(struct
return flags;
}
-static struct file_operations vtpm_ops = {
+static const struct file_operations vtpm_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.open = vtpm_op_open,
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Thu Mar 08
14:39:52 2007 -0600
@@ -173,17 +173,22 @@ static ssize_t xenbus_dev_write(struct f
void *reply;
char *path, *token;
struct watch_adapter *watch, *tmp_watch;
- int err;
-
- if ((len + u->len) > sizeof(u->u.buffer))
- return -EINVAL;
-
- if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0)
- return -EFAULT;
+ int err, rc = len;
+
+ if ((len + u->len) > sizeof(u->u.buffer)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
+ rc = -EFAULT;
+ goto out;
+ }
u->len += len;
- if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
- return len;
+ if ((u->len < sizeof(u->u.msg)) ||
+ (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
+ return rc;
msg_type = u->u.msg.type;
@@ -201,14 +206,17 @@ static ssize_t xenbus_dev_write(struct f
case XS_SET_PERMS:
if (msg_type == XS_TRANSACTION_START) {
trans = kmalloc(sizeof(*trans), GFP_KERNEL);
- if (!trans)
- return -ENOMEM;
+ if (!trans) {
+ rc = -ENOMEM;
+ goto out;
+ }
}
reply = xenbus_dev_request_and_reply(&u->u.msg);
if (IS_ERR(reply)) {
kfree(trans);
- return PTR_ERR(reply);
+ rc = PTR_ERR(reply);
+ goto out;
}
if (msg_type == XS_TRANSACTION_START) {
@@ -231,8 +239,10 @@ static ssize_t xenbus_dev_write(struct f
case XS_UNWATCH:
path = u->u.buffer + sizeof(u->u.msg);
token = memchr(path, 0, u->u.msg.len);
- if (token == NULL)
- return -EILSEQ;
+ if (token == NULL) {
+ rc = -EILSEQ;
+ goto out;
+ }
token++;
if (msg_type == XS_WATCH) {
@@ -251,7 +261,8 @@ static ssize_t xenbus_dev_write(struct f
err = register_xenbus_watch(&watch->watch);
if (err) {
free_watch_adapter(watch);
- return err;
+ rc = err;
+ goto out;
}
list_add(&watch->list, &u->watches);
@@ -265,7 +276,6 @@ static ssize_t xenbus_dev_write(struct f
&u->watches, list) {
if (!strcmp(watch->token, token) &&
!strcmp(watch->watch.node, path))
- break;
{
unregister_xenbus_watch(&watch->watch);
list_del(&watch->list);
@@ -278,11 +288,13 @@ static ssize_t xenbus_dev_write(struct f
break;
default:
- return -EINVAL;
- }
-
+ rc = -EINVAL;
+ break;
+ }
+
+ out:
u->len = 0;
- return len;
+ return rc;
}
static int xenbus_dev_open(struct inode *inode, struct file *filp)
@@ -342,7 +354,7 @@ static unsigned int xenbus_dev_poll(stru
return 0;
}
-static struct file_operations xenbus_dev_file_ops = {
+static const struct file_operations xenbus_dev_file_ops = {
.read = xenbus_dev_read,
.write = xenbus_dev_write,
.open = xenbus_dev_open,
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Thu Mar 08
14:39:52 2007 -0600
@@ -20,6 +20,14 @@
#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
#ifdef __KERNEL__
+
+/*
+ * Need to repeat this here in order to not include pgtable.h (which in turn
+ * depends on definitions made here), but to be able to use the symbolic
+ * below. The preprocessor will warn if the two definitions aren't identical.
+ */
+#define _PAGE_PRESENT 0x001
+
#ifndef __ASSEMBLY__
#include <linux/string.h>
@@ -28,13 +36,6 @@
#include <asm/bug.h>
#include <xen/interface/xen.h>
#include <xen/features.h>
-
-/*
- * Need to repeat this here in order to not include pgtable.h (which in turn
- * depends on definitions made here), but to be able to use the symbolic
- * below. The preprocessor will warn if the two definitions aren't identical.
- */
-#define _PAGE_PRESENT 0x001
#define arch_free_page(_page,_order) \
({ int foreign = PageForeign(_page); \
@@ -225,8 +226,6 @@ extern int page_is_ram(unsigned long pag
((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define __HAVE_ARCH_GATE_AREA 1
-
#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Mon Mar
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Thu Mar
08 14:39:52 2007 -0600
@@ -137,7 +137,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
})
#define write_cr4(x) \
- __asm__ __volatile__("movl %0,%%cr4": :"r" (x));
+ __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
/*
* Clear and set 'TS' bit respectively
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h Mon Mar
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h Thu Mar
08 14:39:52 2007 -0600
@@ -53,6 +53,11 @@ enum fixed_addresses {
#define NR_FIX_ISAMAPS 256
FIX_ISAMAP_END,
FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
+ __end_of_permanent_fixed_addresses,
+ /* temporary boot-time mappings, used before ioremap() is functional */
+#define NR_FIX_BTMAPS 16
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
__end_of_fixed_addresses
};
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Mon Mar 05
12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Thu Mar 08
14:39:52 2007 -0600
@@ -150,8 +150,10 @@ static inline void __iomem * ioremap (un
return __ioremap(offset, size, 0);
}
-extern void *early_ioremap(unsigned long addr, unsigned long size);
-extern void early_iounmap(void *addr, unsigned long size);
+extern void *bt_ioremap(unsigned long addr, unsigned long size);
+extern void bt_iounmap(void *addr, unsigned long size);
+#define early_ioremap bt_ioremap
+#define early_iounmap bt_iounmap
/*
* This one maps high address device memory and turns off caching for that
area.
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Mon Mar
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Mar
08 14:39:52 2007 -0600
@@ -403,19 +403,6 @@ static inline int pmd_large(pmd_t pte) {
/* to find an entry in a page-table-directory. */
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) +
pud_index(address))
-static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
-{
- return pud + pud_index(address);
-}
-
-/* Find correct pud via the hidden fourth level page level: */
-
-/* This accesses the reference page table of the boot cpu.
- Other CPUs get synced lazily via the page fault handler. */
-static inline pud_t *pud_offset_k(pgd_t *pgd, unsigned long address)
-{
- return pud_offset(pgd_offset_k(address), address);
-}
/* PMD - Level 2 access */
#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Mon Mar
05 12:49:12 2007 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Thu Mar
08 14:39:52 2007 -0600
@@ -246,11 +246,13 @@ DECLARE_PER_CPU(struct tss_struct,init_t
extern struct cpuinfo_x86 boot_cpu_data;
+#ifndef CONFIG_X86_NO_TSS
/* Save the original ist values for checking stack pointers during debugging */
struct orig_ist {
unsigned long ist[7];
};
DECLARE_PER_CPU(struct orig_ist, orig_ist);
+#endif
#ifdef CONFIG_X86_VSMP
#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/linux/page-flags.h
--- a/linux-2.6-xen-sparse/include/linux/page-flags.h Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/include/linux/page-flags.h Thu Mar 08 14:39:52
2007 -0600
@@ -252,14 +252,14 @@
#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
#define SetPageForeign(page, dtor) do { \
set_bit(PG_foreign, &(page)->flags); \
- (page)->mapping = (void *)dtor; \
+ (page)->index = (long)(dtor); \
} while (0)
#define ClearPageForeign(page) do { \
clear_bit(PG_foreign, &(page)->flags); \
- (page)->mapping = NULL; \
+ (page)->index = 0; \
} while (0)
#define PageForeignDestructor(page) \
- ( (void (*) (struct page *)) (page)->mapping )(page)
+ ( (void (*) (struct page *)) (page)->index )(page)
struct page; /* forward declaration */
diff -r 8f0b5295bb1b -r dcec453681bc
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- a/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Mon Mar 05 12:49:12
2007 -0600
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Thu Mar 08 14:39:52
2007 -0600
@@ -4,7 +4,7 @@
#include <linux/kernel.h>
#include <linux/cpumask.h>
-#if defined(CONFIG_X86)
+#if defined(CONFIG_X86) && defined(CONFIG_SMP)
extern cpumask_t cpu_initialized_map;
#define cpu_set_initialized(cpu) cpu_set(cpu, cpu_initialized_map)
#else
diff -r 8f0b5295bb1b -r dcec453681bc linux-2.6-xen-sparse/mm/Kconfig
--- a/linux-2.6-xen-sparse/mm/Kconfig Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-config SELECT_MEMORY_MODEL
- def_bool y
- depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
-
-choice
- prompt "Memory model"
- depends on SELECT_MEMORY_MODEL
- default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
- default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
- default FLATMEM_MANUAL
-
-config FLATMEM_MANUAL
- bool "Flat Memory"
- depends on !(ARCH_DISCONTIGMEM_ENABLE || ARCH_SPARSEMEM_ENABLE) ||
ARCH_FLATMEM_ENABLE
- help
- This option allows you to change some of the ways that
- Linux manages its memory internally. Most users will
- only have one option here: FLATMEM. This is normal
- and a correct option.
-
- Some users of more advanced features like NUMA and
- memory hotplug may have different options here.
- DISCONTIGMEM is an more mature, better tested system,
- but is incompatible with memory hotplug and may suffer
- decreased performance over SPARSEMEM. If unsure between
- "Sparse Memory" and "Discontiguous Memory", choose
- "Discontiguous Memory".
-
- If unsure, choose this option (Flat Memory) over any other.
-
-config DISCONTIGMEM_MANUAL
- bool "Discontiguous Memory"
- depends on ARCH_DISCONTIGMEM_ENABLE
- help
- This option provides enhanced support for discontiguous
- memory systems, over FLATMEM. These systems have holes
- in their physical address spaces, and this option provides
- more efficient handling of these holes. However, the vast
- majority of hardware has quite flat address spaces, and
- can have degraded performance from extra overhead that
- this option imposes.
-
- Many NUMA configurations will have this as the only option.
-
- If unsure, choose "Flat Memory" over this option.
-
-config SPARSEMEM_MANUAL
- bool "Sparse Memory"
- depends on ARCH_SPARSEMEM_ENABLE
- help
- This will be the only option for some systems, including
- memory hotplug systems. This is normal.
-
- For many other systems, this will be an alternative to
- "Discontiguous Memory". This option provides some potential
- performance benefits, along with decreased code complexity,
- but it is newer, and more experimental.
-
- If unsure, choose "Discontiguous Memory" or "Flat Memory"
- over this option.
-
-endchoice
-
-config DISCONTIGMEM
- def_bool y
- depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) ||
DISCONTIGMEM_MANUAL
-
-config SPARSEMEM
- def_bool y
- depends on SPARSEMEM_MANUAL
-
-config FLATMEM
- def_bool y
- depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
-
-config FLAT_NODE_MEM_MAP
- def_bool y
- depends on !SPARSEMEM
-
-#
-# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
-# to represent different areas of memory. This variable allows
-# those dependencies to exist individually.
-#
-config NEED_MULTIPLE_NODES
- def_bool y
- depends on DISCONTIGMEM || NUMA
-
-config HAVE_MEMORY_PRESENT
- def_bool y
- depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
-
-#
-# SPARSEMEM_EXTREME (which is the default) does some bootmem
-# allocations when memory_present() is called. If this can not
-# be done on your architecture, select this option. However,
-# statically allocating the mem_section[] array can potentially
-# consume vast quantities of .bss, so be careful.
-#
-# This option will also potentially produce smaller runtime code
-# with gcc 3.4 and later.
-#
-config SPARSEMEM_STATIC
- def_bool n
-
-#
-# Architectecture platforms which require a two level mem_section in SPARSEMEM
-# must select this option. This is usually for architecture platforms with
-# an extremely sparse physical address space.
-#
-config SPARSEMEM_EXTREME
- def_bool y
- depends on SPARSEMEM && !SPARSEMEM_STATIC
-
-# eventually, we can have this option just 'select SPARSEMEM'
-config MEMORY_HOTPLUG
- bool "Allow for memory hot-add"
- depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND &&
ARCH_ENABLE_MEMORY_HOTPLUG
- depends on (IA64 || X86 || PPC64)
-
-comment "Memory hotplug is currently incompatible with Software Suspend"
- depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
-
-# Heavily threaded applications may benefit from splitting the mm-wide
-# page_table_lock, so that faults on different parts of the user address
-# space can be handled with less contention: split it at this NR_CPUS.
-# Default to 4 for wider testing, though 8 might be more appropriate.
-# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
-# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
-# XEN on x86 architecture uses the mapping field on pagetable pages to store a
-# pointer to the destructor. This conflicts with pte_lock_deinit().
-#
-config SPLIT_PTLOCK_CPUS
- int
- default "4096" if ARM && !CPU_CACHE_VIPT
- default "4096" if PARISC && !PA20
- default "4096" if X86_XEN || X86_64_XEN
- default "4"
-
-#
-# support for page migration
-#
-config MIGRATION
- bool "Page migration"
- def_bool y
- depends on NUMA
- help
- Allows the migration of the physical location of pages of processes
- while the virtual addresses are not changed. This is useful for
- example on NUMA systems to put pages nearer to the processors
accessing
- the page.
-
-config RESOURCES_64BIT
- bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT &&
EXPERIMENTAL)
- default 64BIT
- help
- This option allows memory and IO resources to be 64 bit.
diff -r 8f0b5295bb1b -r dcec453681bc
patches/linux-2.6.18/blktap-aio-16_03_06.patch
--- a/patches/linux-2.6.18/blktap-aio-16_03_06.patch Mon Mar 05 12:49:12
2007 -0600
+++ b/patches/linux-2.6.18/blktap-aio-16_03_06.patch Thu Mar 08 14:39:52
2007 -0600
@@ -106,7 +106,7 @@ diff -pruN ../orig-linux-2.6.18/fs/aio.c
+ return pollflags;
+}
+
-+static struct file_operations aioq_fops = {
++static const struct file_operations aioq_fops = {
+ .release = aio_queue_fd_close,
+ .poll = aio_queue_fd_poll
+};
@@ -201,7 +201,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
int maxevents, long timeout);
static int eventpollfs_delete_dentry(struct dentry *dentry);
-static struct inode *ep_eventpoll_inode(void);
-+static struct inode *ep_eventpoll_inode(struct file_operations *fops);
++static struct inode *ep_eventpoll_inode(const struct file_operations *fops);
static int eventpollfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data, struct vfsmount *mnt);
@@ -221,7 +221,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
- struct eventpoll *ep)
+int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+ struct eventpoll *ep, struct file_operations *fops)
++ struct eventpoll *ep, const struct file_operations *fops)
{
struct qstr this;
char name[32];
@@ -248,7 +248,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
-static struct inode *ep_eventpoll_inode(void)
-+static struct inode *ep_eventpoll_inode(struct file_operations *fops)
++static struct inode *ep_eventpoll_inode(const struct file_operations *fops)
{
int error = -ENOMEM;
struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
@@ -288,7 +288,7 @@ diff -pruN ../orig-linux-2.6.18/include/
+ */
+struct eventpoll;
+int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-+ struct eventpoll *ep, struct file_operations *fops);
++ struct eventpoll *ep, const struct file_operations *fops);
#else
static inline void eventpoll_init_file(struct file *file) {}
diff -r 8f0b5295bb1b -r dcec453681bc tools/Makefile
--- a/tools/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -24,9 +24,8 @@ SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
# These don't cross-compile
ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
-SUBDIRS-y += python
-SUBDIRS-y += pygrub
-SUBDIRS-y += ptsname
+SUBDIRS-$(PYTHON_TOOLS) += python
+SUBDIRS-$(PYTHON_TOOLS) += pygrub
endif
.PHONY: all
@@ -42,8 +41,8 @@ install: check
$(MAKE) -C $$subdir $@; \
done
$(MAKE) ioemuinstall
- $(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump
- $(INSTALL_DIR) -p $(DESTDIR)/var/log/xen
+ $(INSTALL_DIR) $(DESTDIR)/var/xen/dump
+ $(INSTALL_DIR) $(DESTDIR)/var/log/xen
.PHONY: clean
clean: check_clean
diff -r 8f0b5295bb1b -r dcec453681bc tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/blktap/lib/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -40,8 +40,8 @@ libblktap: libblktap.a
libblktap: libblktap.a
install: all
- $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
- $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+ $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DIR) $(DESTDIR)/usr/include
$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
ln -sf libblktap.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libblktap.so.$(MAJOR)
ln -sf libblktap.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libblktap.so
diff -r 8f0b5295bb1b -r dcec453681bc tools/console/Makefile
--- a/tools/console/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/console/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -30,7 +30,7 @@ xenconsole: $(patsubst %.c,%.o,$(wildcar
.PHONY: install
install: $(BIN)
- $(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR)
+ $(INSTALL_DIR) $(DESTDIR)/$(DAEMON_INSTALL_DIR)
$(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR)
- $(INSTALL_DIR) -p $(DESTDIR)/$(CLIENT_INSTALL_DIR)
+ $(INSTALL_DIR) $(DESTDIR)/$(CLIENT_INSTALL_DIR)
$(INSTALL_PROG) xenconsole $(DESTDIR)/$(CLIENT_INSTALL_DIR)
diff -r 8f0b5295bb1b -r dcec453681bc tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/firmware/rombios/rombios.c Thu Mar 08 14:39:52 2007 -0600
@@ -890,7 +890,7 @@ static void int15_function();
static void int15_function();
static void int16_function();
static void int17_function();
-static void int19_function();
+static void int18_function();
static void int1a_function();
static void int70_function();
static void int74_function();
@@ -1837,6 +1837,38 @@ keyboard_panic(status)
}
//--------------------------------------------------------------------------
+// machine_reset
+//--------------------------------------------------------------------------
+ void
+machine_reset()
+{
+ /* Frob the keyboard reset line to reset the processor */
+ outb(0x64, 0x60); /* Map the flags register at data port (0x60) */
+ outb(0x60, 0x14); /* Set the flags to system|disable */
+ outb(0x64, 0xfe); /* Pulse output 0 (system reset) low */
+ BX_PANIC("Couldn't reset the machine\n");
+}
+
+//--------------------------------------------------------------------------
+// clobber_entry_point
+// Because PV drivers in HVM guests detach some of the emulated devices,
+// it is not safe to do a soft reboot by just dropping to real mode and
+// jumping at ffff:0000. -- the boot drives might have disappeared!
+// This rather foul function overwrites(!) the BIOS entry point
+// to point at machine-reset, which will cause the Xen tools to
+// rebuild the whole machine from scratch.
+//--------------------------------------------------------------------------
+ void
+clobber_entry_point()
+{
+ /* The instruction at the entry point is one byte (0xea) for the
+ * jump opcode, then two bytes of address, then two of segment.
+ * Overwrite the address bytes.*/
+ write_word(0xffff, 0x0001, machine_reset);
+}
+
+
+//--------------------------------------------------------------------------
// shutdown_status_panic
// called when the shutdown statsu is not implemented, displays the status
//--------------------------------------------------------------------------
@@ -7626,7 +7658,7 @@ int17_function(regs, ds, iret_addr)
}
void
-int19_function(seq_nr)
+int18_function(seq_nr)
Bit16u seq_nr;
{
Bit16u ebda_seg=read_word(0x0040,0x000E);
@@ -7702,8 +7734,8 @@ ASM_START
push cx
push dx
- mov dl, _int19_function.bootdrv + 2[bp]
- mov ax, _int19_function.bootseg + 2[bp]
+ mov dl, _int18_function.bootdrv + 2[bp]
+ mov ax, _int18_function.bootseg + 2[bp]
mov es, ax ;; segment
mov bx, #0x0000 ;; offset
mov ah, #0x02 ;; function 2, read diskette sector
@@ -7714,7 +7746,7 @@ ASM_START
int #0x13 ;; read sector
jnc int19_load_done
mov ax, #0x0001
- mov _int19_function.status + 2[bp], ax
+ mov _int18_function.status + 2[bp], ax
int19_load_done:
pop dx
@@ -7789,13 +7821,13 @@ ASM_START
;; Build an iret stack frame that will take us to the boot vector.
;; iret pops ip, then cs, then flags, so push them in the opposite order.
pushf
- mov ax, _int19_function.bootseg + 0[bp]
+ mov ax, _int18_function.bootseg + 0[bp]
push ax
- mov ax, _int19_function.bootip + 0[bp]
+ mov ax, _int18_function.bootip + 0[bp]
push ax
;; Set the magic number in ax and the boot drive in dl.
mov ax, #0xaa55
- mov dl, _int19_function.bootdrv + 0[bp]
+ mov dl, _int18_function.bootdrv + 0[bp]
;; Zero some of the other registers.
xor bx, bx
mov ds, bx
@@ -8272,6 +8304,8 @@ int18_handler: ;; Boot Failure recovery:
mov ss, ax
;; Get the boot sequence number out of the IPL memory
+ ;; The first time we do this it will have been set to -1 so
+ ;; we will start from device 0.
mov bx, #IPL_SEG
mov ds, bx ;; Set segment
mov bx, IPL_SEQUENCE_OFFSET ;; BX is now the sequence number
@@ -8279,43 +8313,33 @@ int18_handler: ;; Boot Failure recovery:
mov IPL_SEQUENCE_OFFSET, bx ;; Write it back
mov ds, ax ;; and reset the segment to zero.
- ;; Carry on in the INT 19h handler, using the new sequence number
+ ;; Call the C code for the next boot device
push bx
-
- jmp int19_next_boot
+ call _int18_function
+
+ ;; Boot failed: invoke the boot recovery function...
+ int #0x18
;----------
;- INT19h -
;----------
int19_relocated: ;; Boot function, relocated
-
- ;; int19 was beginning to be really complex, so now it
- ;; just calls a C function that does the work
-
- push bp
- mov bp, sp
-
- ;; Reset SS and SP
+ ;;
+ ;; *** Warning: INT 19h resets the whole machine ***
+ ;;
+ ;; Because PV drivers in HVM guests detach some of the emulated devices,
+ ;; it is not safe to do a soft reboot by just dropping to real mode and
+ ;; invoking INT 19h -- the boot drives might have disappeared!
+ ;; If the user asks for a soft reboot, the only thing we can do is
+ ;; reset the whole machine. When it comes back up, the normal BIOS
+ ;; boot sequence will start, which is more or less the required behaviour.
+ ;;
+ ;; Reset SP and SS
mov ax, #0xfffe
mov sp, ax
xor ax, ax
mov ss, ax
-
- ;; Start from the first boot device (0, in AX)
- mov bx, #IPL_SEG
- mov ds, bx ;; Set segment to write to the IPL memory
- mov IPL_SEQUENCE_OFFSET, ax ;; Save the sequence number
- mov ds, ax ;; and reset the segment.
-
- push ax
-
-int19_next_boot:
-
- ;; Call the C code for the next boot device
- call _int19_function
-
- ;; Boot failed: invoke the boot recovery function
- int #0x18
+ call _machine_reset
;----------
;- INT1Ch -
@@ -9609,6 +9633,8 @@ normal_post:
call _log_bios_start
+ call _clobber_entry_point
+
;; set all interrupts to default handler
mov bx, #0x0000 ;; offset index
mov cx, #0x0100 ;; counter (256 interrupts)
@@ -9857,8 +9883,10 @@ post_default_ints:
call _tcpa_calling_int19h /* specs: 8.2.3 step 1 */
call _tcpa_add_event_separators /* specs: 8.2.3 step 2 */
#endif
- int #0x19
- //JMP_EP(0x0064) ; INT 19h location
+
+ ;; Start the boot sequence. See the comments in int19_relocated
+ ;; for why we use INT 18h instead of INT 19h here.
+ int #0x18
#if BX_TCGBIOS
call _tcpa_returned_int19h /* specs: 8.2.3 step 3/7 */
diff -r 8f0b5295bb1b -r dcec453681bc tools/guest-headers/Makefile
--- a/tools/guest-headers/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/guest-headers/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -13,7 +13,7 @@ check:
install-Linux:
mkdir -p $(DESTDIR)/usr/include/xen/linux
- install -m0644 $(linuxsparsetree)/include/xen/public/*.h
$(DESTDIR)/usr/include/xen/linux
+ $(INSTALL_DATA) $(linuxsparsetree)/include/xen/public/*.h
$(DESTDIR)/usr/include/xen/linux
install-SunOS:
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/Makefile
--- a/tools/ioemu/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -65,10 +65,10 @@ common de-ch es fo fr-ca hu
install-doc: $(DOCS)
mkdir -p "$(DESTDIR)$(docdir)"
- $(INSTALL) -m 644 qemu-doc.html qemu-tech.html "$(DESTDIR)$(docdir)"
+ $(INSTALL_DATA) qemu-doc.html qemu-tech.html "$(DESTDIR)$(docdir)"
ifndef CONFIG_WIN32
mkdir -p "$(DESTDIR)$(mandir)/man1"
- $(INSTALL) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
+ $(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
endif
install: all $(if $(BUILD_DOCS),install-doc)
@@ -77,12 +77,12 @@ install: all $(if $(BUILD_DOCS),install-
# mkdir -p "$(DESTDIR)$(datadir)"
# for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \
# video.x openbios-sparc32 linux_boot.bin; do \
-# $(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x
"$(DESTDIR)$(datadir)"; \
+# $(INSTALL_DATA) $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)";
\
# done
ifndef CONFIG_WIN32
mkdir -p "$(DESTDIR)$(datadir)/keymaps"
for x in $(KEYMAPS); do \
- $(INSTALL) -m 644 $(SRC_PATH)/keymaps/$$x
"$(DESTDIR)$(datadir)/keymaps"; \
+ $(INSTALL_DATA) $(SRC_PATH)/keymaps/$$x
"$(DESTDIR)$(datadir)/keymaps"; \
done
endif
for d in $(TARGET_DIRS); do \
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/hw/ide.c
--- a/tools/ioemu/hw/ide.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/hw/ide.c Thu Mar 08 14:39:52 2007 -0600
@@ -2602,6 +2602,120 @@ void pci_cmd646_ide_init(PCIBus *bus, Bl
#endif /* DMA_MULTI_THREAD */
}
+static void pci_ide_save(QEMUFile* f, void *opaque)
+{
+ PCIIDEState *d = opaque;
+ int i;
+
+ for(i = 0; i < 2; i++) {
+ BMDMAState *bm = &d->bmdma[i];
+ qemu_put_8s(f, &bm->cmd);
+ qemu_put_8s(f, &bm->status);
+ qemu_put_be32s(f, &bm->addr);
+ /* XXX: if a transfer is pending, we do not save it yet */
+ }
+
+ /* per IDE interface data */
+ for(i = 0; i < 2; i++) {
+ IDEState *s = &d->ide_if[i * 2];
+ uint8_t drive1_selected;
+ qemu_put_8s(f, &s->cmd);
+ drive1_selected = (s->cur_drive != s);
+ qemu_put_8s(f, &drive1_selected);
+ }
+
+ /* per IDE drive data */
+ for(i = 0; i < 4; i++) {
+ IDEState *s = &d->ide_if[i];
+ qemu_put_be32s(f, &s->mult_sectors);
+ qemu_put_be32s(f, &s->identify_set);
+ if (s->identify_set) {
+ qemu_put_buffer(f, (const uint8_t *)s->identify_data, 512);
+ }
+ qemu_put_8s(f, &s->write_cache);
+ qemu_put_8s(f, &s->feature);
+ qemu_put_8s(f, &s->error);
+ qemu_put_be32s(f, &s->nsector);
+ qemu_put_8s(f, &s->sector);
+ qemu_put_8s(f, &s->lcyl);
+ qemu_put_8s(f, &s->hcyl);
+ qemu_put_8s(f, &s->hob_feature);
+ qemu_put_8s(f, &s->hob_nsector);
+ qemu_put_8s(f, &s->hob_sector);
+ qemu_put_8s(f, &s->hob_lcyl);
+ qemu_put_8s(f, &s->hob_hcyl);
+ qemu_put_8s(f, &s->select);
+ qemu_put_8s(f, &s->status);
+ qemu_put_8s(f, &s->lba48);
+
+ qemu_put_8s(f, &s->sense_key);
+ qemu_put_8s(f, &s->asc);
+ /* XXX: if a transfer is pending, we do not save it yet */
+ }
+}
+
+static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
+{
+ PCIIDEState *d = opaque;
+ int ret, i;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ for(i = 0; i < 2; i++) {
+ BMDMAState *bm = &d->bmdma[i];
+ qemu_get_8s(f, &bm->cmd);
+ qemu_get_8s(f, &bm->status);
+ qemu_get_be32s(f, &bm->addr);
+ /* XXX: if a transfer is pending, we do not save it yet */
+ }
+
+ /* per IDE interface data */
+ for(i = 0; i < 2; i++) {
+ IDEState *s = &d->ide_if[i * 2];
+ uint8_t drive1_selected;
+ qemu_get_8s(f, &s->cmd);
+ qemu_get_8s(f, &drive1_selected);
+ s->cur_drive = &d->ide_if[i * 2 + (drive1_selected != 0)];
+ }
+
+ /* per IDE drive data */
+ for(i = 0; i < 4; i++) {
+ IDEState *s = &d->ide_if[i];
+ qemu_get_be32s(f, &s->mult_sectors);
+ qemu_get_be32s(f, &s->identify_set);
+ if (s->identify_set) {
+ qemu_get_buffer(f, (uint8_t *)s->identify_data, 512);
+ }
+ qemu_get_8s(f, &s->write_cache);
+ qemu_get_8s(f, &s->feature);
+ qemu_get_8s(f, &s->error);
+ qemu_get_be32s(f, &s->nsector);
+ qemu_get_8s(f, &s->sector);
+ qemu_get_8s(f, &s->lcyl);
+ qemu_get_8s(f, &s->hcyl);
+ qemu_get_8s(f, &s->hob_feature);
+ qemu_get_8s(f, &s->hob_nsector);
+ qemu_get_8s(f, &s->hob_sector);
+ qemu_get_8s(f, &s->hob_lcyl);
+ qemu_get_8s(f, &s->hob_hcyl);
+ qemu_get_8s(f, &s->select);
+ qemu_get_8s(f, &s->status);
+ qemu_get_8s(f, &s->lba48);
+
+ qemu_get_8s(f, &s->sense_key);
+ qemu_get_8s(f, &s->asc);
+ /* XXX: if a transfer is pending, we do not save it yet */
+ if (s->status & (DRQ_STAT|BUSY_STAT)) {
+ /* Tell the guest that its transfer has gone away */
+ ide_abort_command(s);
+ ide_set_irq(s);
+ }
+ }
+ return 0;
+}
+
+
/* hd_table must contain 4 block drivers */
/* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */
void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn)
@@ -2643,6 +2757,7 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
buffered_pio_init();
register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d);
+ register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
#ifdef DMA_MULTI_THREAD
dma_create_thread();
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/target-i386-dm/qemu-ifup
--- a/tools/ioemu/target-i386-dm/qemu-ifup Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/target-i386-dm/qemu-ifup Thu Mar 08 14:39:52 2007 -0600
@@ -3,8 +3,7 @@
#. /etc/rc.d/init.d/functions
#ulimit -c unlimited
-echo -c 'config qemu network with xen bridge for '
-echo $*
+echo 'config qemu network with xen bridge for ' $*
ifconfig $1 0.0.0.0 up
brctl addif $2 $1
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/vl.c Thu Mar 08 14:39:52 2007 -0600
@@ -3250,6 +3250,14 @@ static int net_tap_init(VLANState *vlan,
pid = fork();
if (pid >= 0) {
if (pid == 0) {
+ int open_max = sysconf(_SC_OPEN_MAX), i;
+ for (i = 0; i < open_max; i++)
+ if (i != STDIN_FILENO &&
+ i != STDOUT_FILENO &&
+ i != STDERR_FILENO &&
+ i != fd)
+ close(i);
+
parg = args;
*parg++ = (char *)setup_script;
*parg++ = ifname;
diff -r 8f0b5295bb1b -r dcec453681bc tools/ioemu/vnc.c
--- a/tools/ioemu/vnc.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/ioemu/vnc.c Thu Mar 08 14:39:52 2007 -0600
@@ -1445,7 +1445,7 @@ int vnc_display_init(DisplayState *ds, i
int vnc_start_viewer(int port)
{
- int pid;
+ int pid, i, open_max;
char s[16];
sprintf(s, ":%d", port);
@@ -1456,6 +1456,12 @@ int vnc_start_viewer(int port)
exit(1);
case 0: /* child */
+ open_max = sysconf(_SC_OPEN_MAX);
+ for (i = 0; i < open_max; i++)
+ if (i != STDIN_FILENO &&
+ i != STDOUT_FILENO &&
+ i != STDERR_FILENO)
+ close(i);
execlp("vncviewer", "vncviewer", s, NULL);
fprintf(stderr, "vncviewer execlp failed\n");
exit(1);
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core.c Thu Mar 08 14:39:52 2007 -0600
@@ -153,7 +153,7 @@ struct xc_core_section_headers {
uint16_t num;
uint16_t num_max;
- Elf_Shdr *shdrs;
+ Elf64_Shdr *shdrs;
};
#define SHDR_INIT 16
#define SHDR_INC 4
@@ -184,14 +184,14 @@ xc_core_shdr_free(struct xc_core_section
free(sheaders);
}
-Elf_Shdr*
+Elf64_Shdr*
xc_core_shdr_get(struct xc_core_section_headers *sheaders)
{
- Elf_Shdr *shdr;
+ Elf64_Shdr *shdr;
if ( sheaders->num == sheaders->num_max )
{
- Elf_Shdr *shdrs;
+ Elf64_Shdr *shdrs;
if ( sheaders->num_max + SHDR_INC < sheaders->num_max )
{
errno = E2BIG;
@@ -212,7 +212,7 @@ xc_core_shdr_get(struct xc_core_section_
}
int
-xc_core_shdr_set(Elf_Shdr *shdr,
+xc_core_shdr_set(Elf64_Shdr *shdr,
struct xc_core_strtab *strtab,
const char *name, uint32_t type,
uint64_t offset, uint64_t size,
@@ -317,15 +317,15 @@ xc_domain_dumpcore_via_callback(int xc_h
uint64_t *pfn_array = NULL;
- Elf_Ehdr ehdr;
- unsigned long filesz;
- unsigned long offset;
- unsigned long fixup;
+ Elf64_Ehdr ehdr;
+ uint64_t filesz;
+ uint64_t offset;
+ uint64_t fixup;
struct xc_core_strtab *strtab = NULL;
uint16_t strtab_idx;
struct xc_core_section_headers *sheaders = NULL;
- Elf_Shdr *shdr;
+ Elf64_Shdr *shdr;
/* elf notes */
struct elfnote elfnote;
@@ -460,7 +460,7 @@ xc_domain_dumpcore_via_callback(int xc_h
ehdr.e_ident[EI_MAG1] = ELFMAG1;
ehdr.e_ident[EI_MAG2] = ELFMAG2;
ehdr.e_ident[EI_MAG3] = ELFMAG3;
- ehdr.e_ident[EI_CLASS] = ELFCLASS;
+ ehdr.e_ident[EI_CLASS] = ELFCLASS64;
ehdr.e_ident[EI_DATA] = ELF_ARCH_DATA;
ehdr.e_ident[EI_VERSION] = EV_CURRENT;
ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
@@ -474,9 +474,9 @@ xc_domain_dumpcore_via_callback(int xc_h
ehdr.e_shoff = sizeof(ehdr);
ehdr.e_flags = ELF_CORE_EFLAGS;
ehdr.e_ehsize = sizeof(ehdr);
- ehdr.e_phentsize = sizeof(Elf_Phdr);
+ ehdr.e_phentsize = sizeof(Elf64_Phdr);
ehdr.e_phnum = 0;
- ehdr.e_shentsize = sizeof(Elf_Shdr);
+ ehdr.e_shentsize = sizeof(Elf64_Shdr);
/* ehdr.e_shnum and ehdr.e_shstrndx aren't known here yet. fill it later*/
/* create section header */
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core.h
--- a/tools/libxc/xc_core.h Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core.h Thu Mar 08 14:39:52 2007 -0600
@@ -116,10 +116,10 @@ struct xc_core_strtab;
struct xc_core_strtab;
struct xc_core_section_headers;
-Elf_Shdr*
+Elf64_Shdr*
xc_core_shdr_get(struct xc_core_section_headers *sheaders);
int
-xc_core_shdr_set(Elf_Shdr *shdr,
+xc_core_shdr_set(Elf64_Shdr *shdr,
struct xc_core_strtab *strtab,
const char *name, uint32_t type,
uint64_t offset, uint64_t size,
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core_ia64.c
--- a/tools/libxc/xc_core_ia64.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core_ia64.c Thu Mar 08 14:39:52 2007 -0600
@@ -266,10 +266,10 @@ xc_core_arch_context_get_shdr(struct xc_
xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt,
struct xc_core_section_headers *sheaders,
struct xc_core_strtab *strtab,
- unsigned long *filesz, unsigned long offset)
+ uint64_t *filesz, uint64_t offset)
{
int sts = -1;
- Elf_Shdr *shdr;
+ Elf64_Shdr *shdr;
/* mmapped priv regs */
shdr = xc_core_shdr_get(sheaders);
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core_ia64.h
--- a/tools/libxc/xc_core_ia64.h Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core_ia64.h Thu Mar 08 14:39:52 2007 -0600
@@ -42,7 +42,7 @@ xc_core_arch_context_get_shdr(struct xc_
xc_core_arch_context_get_shdr(struct xc_core_arch_context* arch_ctxt,
struct xc_core_section_headers *sheaders,
struct xc_core_strtab *strtab,
- unsigned long *filesz, unsigned long offset);
+ uint64_t *filesz, uint64_t offset);
int
xc_core_arch_context_dump(struct xc_core_arch_context* arch_ctxt,
void* args, dumpcore_rtn_t dump_rtn);
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_core_x86.h
--- a/tools/libxc/xc_core_x86.h Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_core_x86.h Thu Mar 08 14:39:52 2007 -0600
@@ -45,7 +45,7 @@ xc_core_arch_context_get_shdr(struct xc_
xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt,
struct xc_core_section_headers *sheaders,
struct xc_core_strtab *strtab,
- unsigned long *filesz, unsigned long offset)
+ uint64_t *filesz, uint64_t offset)
{
*filesz = 0;
return 0;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_dom_core.c
--- a/tools/libxc/xc_dom_core.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_dom_core.c Thu Mar 08 14:39:52 2007 -0600
@@ -721,9 +721,6 @@ int xc_dom_build_image(struct xc_dom_ima
}
page_size = XC_DOM_PAGE_SIZE(dom);
- /* 4MB align virtual base address */
- dom->parms.virt_base &= ~(((uint64_t)1<<22)-1);
-
/* load kernel */
if ( xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
dom->kernel_seg.vstart,
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_linux_restore.c Thu Mar 08 14:39:52 2007 -0600
@@ -19,7 +19,7 @@ static unsigned long max_mfn;
/* virtual starting address of the hypervisor */
static unsigned long hvirt_start;
-/* #levels of page tables used by the currrent guest */
+/* #levels of page tables used by the current guest */
static unsigned int pt_levels;
/* total number of pages used by the current guest */
@@ -857,6 +857,28 @@ int xc_linux_restore(int xc_handle, int
ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
+ /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+ if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+ {
+ pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
+
+ if (pfn >= max_pfn) {
+ ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
+ pfn, max_pfn, pfn_type[pfn]);
+ goto out;
+ }
+
+ if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+ ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
+ ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+ pfn, max_pfn, pfn_type[pfn],
+ (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+ goto out;
+ }
+
+ ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
+ }
+
domctl.cmd = XEN_DOMCTL_setvcpucontext;
domctl.domain = (domid_t)dom;
domctl.u.vcpucontext.vcpu = i;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_linux_save.c Thu Mar 08 14:39:52 2007 -0600
@@ -34,7 +34,7 @@ static unsigned long max_mfn;
/* virtual starting address of the hypervisor */
static unsigned long hvirt_start;
-/* #levels of page tables used by the currrent guest */
+/* #levels of page tables used by the current guest */
static unsigned int pt_levels;
/* total number of pages used by the current guest */
@@ -491,7 +491,7 @@ static int canonicalize_pagetable(unsign
** reserved hypervisor mappings. This depends on the current
** page table type as well as the number of paging levels.
*/
- xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
+ xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
if (pt_levels == 2 && type == XEN_DOMCTL_PFINFO_L2TAB)
xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
@@ -1279,6 +1279,18 @@ int xc_linux_save(int xc_handle, int io_
ctxt.ctrlreg[3] =
xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
+ /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
+ if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
+ {
+ if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) {
+ ERROR("PT base is not in range of pseudophys map");
+ goto out;
+ }
+ /* Least-significant bit means 'valid PFN'. */
+ ctxt.ctrlreg[1] = 1 |
+ xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
+ }
+
if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
ERROR("Error when writing to state file (1) (errno %d)", errno);
goto out;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxc/xc_ptrace_core.c Thu Mar 08 14:39:52 2007 -0600
@@ -192,7 +192,7 @@ struct elf_core
struct elf_core
{
int domfd;
- Elf_Ehdr ehdr;
+ Elf64_Ehdr ehdr;
char* shdr;
@@ -241,6 +241,8 @@ elf_core_init(struct elf_core* ecore, in
/* check elf header */
if (!IS_ELF(ecore->ehdr) || ecore->ehdr.e_type != ET_CORE)
+ goto out;
+ if (ecore->ehdr.e_ident[EI_CLASS] != ELFCLASS64)
goto out;
/* check elf header more: EI_DATA, EI_VERSION, e_machine... */
@@ -294,7 +296,7 @@ elf_core_search_note(struct elf_core* ec
}
static int
-elf_core_alloc_read_sec(struct elf_core* ecore, const Elf_Shdr* shdr,
+elf_core_alloc_read_sec(struct elf_core* ecore, const Elf64_Shdr* shdr,
char** buf)
{
int ret;
@@ -309,19 +311,19 @@ elf_core_alloc_read_sec(struct elf_core*
return ret;
}
-static Elf_Shdr*
+static Elf64_Shdr*
elf_core_shdr_by_index(struct elf_core* ecore, uint16_t index)
{
if (index >= ecore->ehdr.e_shnum)
return NULL;
- return (Elf_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index);
+ return (Elf64_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index);
}
static int
elf_core_alloc_read_sec_by_index(struct elf_core* ecore, uint16_t index,
char** buf, uint64_t* size)
{
- Elf_Shdr* shdr = elf_core_shdr_by_index(ecore, index);
+ Elf64_Shdr* shdr = elf_core_shdr_by_index(ecore, index);
if (shdr == NULL)
return -1;
if (size != NULL)
@@ -329,14 +331,14 @@ elf_core_alloc_read_sec_by_index(struct
return elf_core_alloc_read_sec(ecore, shdr, buf);
}
-static Elf_Shdr*
+static Elf64_Shdr*
elf_core_shdr_by_name(struct elf_core* ecore, const char* name)
{
const char* s;
for (s = ecore->shdr;
s < ecore->shdr + ecore->ehdr.e_shentsize * ecore->ehdr.e_shnum;
s += ecore->ehdr.e_shentsize) {
- Elf_Shdr* shdr = (Elf_Shdr*)s;
+ Elf64_Shdr* shdr = (Elf64_Shdr*)s;
if (strncmp(ecore->shstrtab + shdr->sh_name, name, strlen(name)) == 0)
return shdr;
@@ -348,7 +350,7 @@ static int
static int
elf_core_read_sec_by_name(struct elf_core* ecore, const char* name, char* buf)
{
- Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
+ Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
return pread_exact(ecore->domfd, buf, shdr->sh_size, shdr->sh_offset);
}
@@ -357,7 +359,7 @@ elf_core_alloc_read_sec_by_name(struct e
elf_core_alloc_read_sec_by_name(struct elf_core* ecore, const char* name,
char** buf, uint64_t* size)
{
- Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
+ Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
if (shdr == NULL)
return -1;
if (size != NULL)
@@ -508,8 +510,8 @@ xc_waitdomain_core_elf(
struct xen_dumpcore_elfnote_xen_version *xen_version;
struct xen_dumpcore_elfnote_format_version *format_version;
- Elf_Shdr* table_shdr;
- Elf_Shdr* pages_shdr;
+ Elf64_Shdr* table_shdr;
+ Elf64_Shdr* pages_shdr;
if (elf_core_init(&ecore, domfd) < 0)
goto out;
diff -r 8f0b5295bb1b -r dcec453681bc tools/libxen/Makefile
--- a/tools/libxen/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/libxen/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -57,8 +57,8 @@ test/test_hvm_bindings: test/test_hvm_bi
.PHONY: install
install: all
- $(INSTALL_DIR) -p $(DESTDIR)/usr/include/xen/api
- $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/api
+ $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_PROG) libxenapi.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
ln -sf libxenapi.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libxenapi.so.$(MAJOR)
ln -sf libxenapi.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenapi.so
diff -r 8f0b5295bb1b -r dcec453681bc tools/ptsname/Makefile
--- a/tools/ptsname/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-
-XEN_ROOT = ../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-.PHONY: all
-all: build
-.PHONY: build
-build:
- CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
-
-.PHONY: install
-ifndef XEN_PYTHON_NATIVE_INSTALL
-install: all
- CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--home="$(DESTDIR)/usr" --prefix=""
-else
-install: all
- CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--root="$(DESTDIR)"
-endif
-
-.PHONY: clean
-clean:
- rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
diff -r 8f0b5295bb1b -r dcec453681bc tools/ptsname/ptsname.c
--- a/tools/ptsname/ptsname.c Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-/******************************************************************************
- * ptsname.c
- *
- * A python extension to expose the POSIX ptsname() function.
- *
- * Copyright (C) 2007 XenSource Ltd
- */
-
-#include <Python.h>
-#include <stdlib.h>
-
-/* Needed for Python versions earlier than 2.3. */
-#ifndef PyMODINIT_FUNC
-#define PyMODINIT_FUNC DL_EXPORT(void)
-#endif
-
-static PyObject *do_ptsname(PyObject *self, PyObject *args)
-{
- int fd;
- char *path;
-
- if (!PyArg_ParseTuple(args, "i", &fd))
- return NULL;
-
- path = ptsname(fd);
-
- if (!path)
- {
- PyErr_SetFromErrno(PyExc_IOError);
- return NULL;
- }
-
- return PyString_FromString(path);
-}
-
-static PyMethodDef ptsname_methods[] = {
- { "ptsname", do_ptsname, METH_VARARGS },
- { NULL }
-};
-
-PyMODINIT_FUNC initptsname(void)
-{
- Py_InitModule("ptsname", ptsname_methods);
-}
diff -r 8f0b5295bb1b -r dcec453681bc tools/ptsname/setup.py
--- a/tools/ptsname/setup.py Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-from distutils.core import setup, Extension
-
-extra_compile_args = [ "-fno-strict-aliasing", "-Werror" ]
-
-setup(name = 'ptsname',
- version = '1.0',
- description = 'POSIX ptsname() function',
- author = 'Tim Deegan',
- author_email = 'Tim.Deegan@xxxxxxxxxxxxx',
- license = 'GPL',
- ext_modules = [ Extension("ptsname", [ "ptsname.c" ]) ])
diff -r 8f0b5295bb1b -r dcec453681bc tools/pygrub/Makefile
--- a/tools/pygrub/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/pygrub/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -10,13 +10,14 @@ build:
.PHONY: install
ifndef XEN_PYTHON_NATIVE_INSTALL
+install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import
auxbin; print auxbin.libpath()")
install: all
- CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--home="$(DESTDIR)/usr" --prefix=""
- $(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot
+ CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--home="$(DESTDIR)/usr" --prefix="" --install-lib="$(DESTDIR)$(LIBPATH)/python"
+ $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot
else
install: all
CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--root="$(DESTDIR)"
- $(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot
+ $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot
endif
.PHONY: clean
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/Makefile
--- a/tools/python/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -18,8 +18,9 @@ build:
.PHONY: install
ifndef XEN_PYTHON_NATIVE_INSTALL
+install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print
auxbin.libpath()")
install: install-messages
- CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--home="$(DESTDIR)/usr" --prefix="" --force
+ CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--home="$(DESTDIR)/usr" --prefix="" --force
--install-lib="$(DESTDIR)$(LIBPATH)/python"
else
install: install-messages
CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install
--root="$(DESTDIR)" --force
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/ptsname/ptsname.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/ptsname/ptsname.c Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * ptsname.c
+ *
+ * A python extension to expose the POSIX ptsname() function.
+ *
+ * Copyright (C) 2007 XenSource Ltd
+ */
+
+#include <Python.h>
+#include <stdlib.h>
+
+/* Needed for Python versions earlier than 2.3. */
+#ifndef PyMODINIT_FUNC
+#define PyMODINIT_FUNC DL_EXPORT(void)
+#endif
+
+static PyObject *do_ptsname(PyObject *self, PyObject *args)
+{
+ int fd;
+ char *path;
+
+ if (!PyArg_ParseTuple(args, "i", &fd))
+ return NULL;
+
+ path = ptsname(fd);
+
+ if (!path)
+ {
+ PyErr_SetFromErrno(PyExc_IOError);
+ return NULL;
+ }
+
+ return PyString_FromString(path);
+}
+
+static PyMethodDef ptsname_methods[] = {
+ { "ptsname", do_ptsname, METH_VARARGS },
+ { NULL }
+};
+
+PyMODINIT_FUNC initptsname(void)
+{
+ Py_InitModule("ptsname", ptsname_methods);
+}
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/setup.py
--- a/tools/python/setup.py Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/setup.py Thu Mar 08 14:39:52 2007 -0600
@@ -44,7 +44,14 @@ acm = Extension("acm",
libraries = libraries,
sources = [ "xen/lowlevel/acm/acm.c" ])
-modules = [ xc, xs, acm ]
+ptsname = Extension("ptsname",
+ extra_compile_args = extra_compile_args,
+ include_dirs = include_dirs + [ "ptsname" ],
+ library_dirs = library_dirs,
+ libraries = libraries,
+ sources = [ "ptsname/ptsname.c" ])
+
+modules = [ xc, xs, acm, ptsname ]
if os.uname()[0] == 'SunOS':
modules.append(scf)
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/xen/xend/XendBootloader.py
--- a/tools/python/xen/xend/XendBootloader.py Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/xen/xend/XendBootloader.py Thu Mar 08 14:39:52 2007 -0600
@@ -21,7 +21,8 @@ from XendLogging import log
from XendLogging import log
from XendError import VmError
-import pty, ptsname, termios, fcntl
+import pty, termios, fcntl
+from xen.lowlevel import ptsname
def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '',
ramdisk = '', kernel_args = ''):
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Thu Mar 08 14:39:52 2007 -0600
@@ -781,7 +781,6 @@ class XendDomainInfo:
'name': self.info['name_label'],
'console/limit': str(xoptions.get_console_limit() * 1024),
'memory/target': str(self.info['memory_static_min'] * 1024),
- 'control/platform-feature-multiprocessor-suspend': str(1)
}
def f(n, v):
@@ -795,6 +794,9 @@ class XendDomainInfo:
f('console/ring-ref', self.console_mfn)
f('store/port', self.store_port)
f('store/ring-ref', self.store_mfn)
+
+ if arch.type == "x86":
+ f('control/platform-feature-multiprocessor-suspend', True)
# elfnotes
for n, v in self.info.get_notes().iteritems():
@@ -1503,7 +1505,7 @@ class XendDomainInfo:
self.info['start_time'] = time.time()
self._stateSet(DOM_STATE_RUNNING)
- except RuntimeError, exn:
+ except (RuntimeError, VmError), exn:
log.exception("XendDomainInfo.initDomain: exception occurred")
self.image.cleanupBootloading()
raise VmError(str(exn))
@@ -2090,26 +2092,26 @@ class XendDomainInfo:
return self.info.get('tools_version', {})
def get_on_shutdown(self):
- after_shutdown = self.info.get('action_after_shutdown')
+ after_shutdown = self.info.get('actions_after_shutdown')
if not after_shutdown or after_shutdown not in XEN_API_ON_NORMAL_EXIT:
return XEN_API_ON_NORMAL_EXIT[-1]
return after_shutdown
def get_on_reboot(self):
- after_reboot = self.info.get('action_after_reboot')
+ after_reboot = self.info.get('actions_after_reboot')
if not after_reboot or after_reboot not in XEN_API_ON_NORMAL_EXIT:
return XEN_API_ON_NORMAL_EXIT[-1]
return after_reboot
def get_on_suspend(self):
# TODO: not supported
- after_suspend = self.info.get('action_after_suspend')
+ after_suspend = self.info.get('actions_after_suspend')
if not after_suspend or after_suspend not in XEN_API_ON_NORMAL_EXIT:
return XEN_API_ON_NORMAL_EXIT[-1]
return after_suspend
def get_on_crash(self):
- after_crash = self.info.get('action_after_crash')
+ after_crash = self.info.get('actions_after_crash')
if not after_crash or after_crash not in XEN_API_ON_CRASH_BEHAVIOUR:
return XEN_API_ON_CRASH_BEHAVIOUR[0]
return after_crash
diff -r 8f0b5295bb1b -r dcec453681bc tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/python/xen/xend/XendNode.py Thu Mar 08 14:39:52 2007 -0600
@@ -22,7 +22,7 @@ import xen.lowlevel.xc
from xen.util import Brctl
-from xen.xend import uuid
+from xen.xend import uuid, arch
from xen.xend.XendError import *
from xen.xend.XendOptions import instance as xendoptions
from xen.xend.XendQCoWStorageRepo import XendQCoWStorageRepo
@@ -97,17 +97,38 @@ class XendNode:
for u in self.cpus.keys():
log.error(self.cpus[u])
number = self.cpus[u]['number']
+ # We can run off the end of the cpuinfo list if domain0 does not
+ # have #vcpus == #pcpus. In that case we just replicate one that's
+ # in the hash table.
+ if not cpuinfo.has_key(number):
+ number = cpuinfo.keys()[0]
log.error(number)
log.error(cpuinfo)
- self.cpus[u].update(
- { 'host' : self.uuid,
- 'features' : cpu_features,
- 'speed' : int(float(cpuinfo[number]['cpu MHz'])),
- 'vendor' : cpuinfo[number]['vendor_id'],
- 'modelname': cpuinfo[number]['model name'],
- 'stepping' : cpuinfo[number]['stepping'],
- 'flags' : cpuinfo[number]['flags'],
- })
+ if arch.type == "x86":
+ self.cpus[u].update(
+ { 'host' : self.uuid,
+ 'features' : cpu_features,
+ 'speed' : int(float(cpuinfo[number]['cpu MHz'])),
+ 'vendor' : cpuinfo[number]['vendor_id'],
+ 'modelname': cpuinfo[number]['model name'],
+ 'stepping' : cpuinfo[number]['stepping'],
+ 'flags' : cpuinfo[number]['flags'],
+ })
+ elif arch.type == "ia64":
+ self.cpus[u].update(
+ { 'host' : self.uuid,
+ 'features' : cpu_features,
+ 'speed' : int(float(cpuinfo[number]['cpu MHz'])),
+ 'vendor' : cpuinfo[number]['vendor'],
+ 'modelname': cpuinfo[number]['family'],
+ 'stepping' : cpuinfo[number]['model'],
+ 'flags' : cpuinfo[number]['features'],
+ })
+ else:
+ self.cpus[u].update(
+ { 'host' : self.uuid,
+ 'features' : cpu_features,
+ })
self.pifs = {}
self.pif_metrics = {}
diff -r 8f0b5295bb1b -r dcec453681bc tools/security/Makefile
--- a/tools/security/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/security/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -54,26 +54,29 @@ all: build
all: build
.PHONY: install
+ifndef XEN_PYTHON_NATIVE_INSTALL
+install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import
auxbin; print auxbin.libpath()")
+endif
install: all $(ACM_CONFIG_FILE)
- $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
- $(INSTALL_PROG) -p $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin
- $(INSTALL_PROG) -p $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_CONFIG_DIR)
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)
- $(INSTALL_DATA) -p policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR)
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example
+ $(INSTALL_DIR) $(DESTDIR)/usr/sbin
+ $(INSTALL_PROG) $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin
+ $(INSTALL_PROG) $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_CONFIG_DIR)
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)
+ $(INSTALL_DATA) policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR)
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example
for i in $(ACM_EXAMPLES); do \
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
- $(INSTALL_DATA) -p
policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX)
$(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
+ $(INSTALL_DATA)
policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX)
$(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
done
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SCRIPT_DIR)
- $(INSTALL_PROG) -p $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR)
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
- $(INSTALL_DATA) -p $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
- $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_CGIDIR)
- $(INSTALL_PROG) -p $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_SCRIPT_DIR)
+ $(INSTALL_PROG) $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR)
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
+ $(INSTALL_DATA) $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
+ $(INSTALL_PROG) $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
ifndef XEN_PYTHON_NATIVE_INSTALL
- python python/setup.py install --home="$(DESTDIR)/usr"
+ python python/setup.py install --home="$(DESTDIR)/usr"
--install-lib="$(DESTDIR)$(LIBPATH)/python"
else
python python/setup.py install --root="$(DESTDIR)"
endif
diff -r 8f0b5295bb1b -r dcec453681bc tools/vnet/libxutil/Makefile
--- a/tools/vnet/libxutil/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/vnet/libxutil/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -71,7 +71,7 @@ check-for-zlib:
.PHONY: install
install: build
- [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) -p
$(DESTDIR)/usr/$(LIBDIR)
+ [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR)
$(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_DATA) libxutil.a $(DESTDIR)/usr/$(LIBDIR)
ln -sf libxutil.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libxutil.so.$(MAJOR)
diff -r 8f0b5295bb1b -r dcec453681bc tools/xenfb/Makefile
--- a/tools/xenfb/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/xenfb/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -1,12 +1,9 @@ XEN_ROOT=../..
XEN_ROOT=../..
include $(XEN_ROOT)/tools/Rules.mk
-CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
-I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu
+CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
+CFLAGS += -I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu
LDFLAGS += -L$(XEN_LIBXC) -L$(XEN_XENSTORE)
-
-INSTALL = install
-INSTALL_PROG = $(INSTALL) -m0755
-INSTALL_DIR = $(INSTALL) -d -m0755
.PHONY: all
all: build
@@ -16,7 +13,7 @@ build:
$(MAKE) vncfb sdlfb
install: all
- $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)/xen/bin
+ $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
$(INSTALL_PROG) vncfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-vncfb
$(INSTALL_PROG) sdlfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-sdlfb
diff -r 8f0b5295bb1b -r dcec453681bc tools/xenfb/xenfb.c
--- a/tools/xenfb/xenfb.c Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/xenfb/xenfb.c Thu Mar 08 14:39:52 2007 -0600
@@ -245,11 +245,10 @@ static int xenfb_wait_for_state(struct x
unsigned state, dummy;
char **vec;
+ awaited |= 1 << XenbusStateUnknown;
+
for (;;) {
state = xenfb_read_state(xsh, dir);
- if (state < 0)
- return -1;
-
if ((1 << state) & awaited)
return state;
diff -r 8f0b5295bb1b -r dcec453681bc tools/xenstore/Makefile
--- a/tools/xenstore/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/tools/xenstore/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -168,16 +168,16 @@ tarball: clean
.PHONY: install
install: all
- $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
- $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
- $(INSTALL_DIR) -p $(DESTDIR)/usr/bin
- $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
- $(INSTALL_DIR) -p $(DESTDIR)/usr/include
+ $(INSTALL_DIR) $(DESTDIR)/var/run/xenstored
+ $(INSTALL_DIR) $(DESTDIR)/var/lib/xenstored
+ $(INSTALL_DIR) $(DESTDIR)/usr/bin
+ $(INSTALL_DIR) $(DESTDIR)/usr/sbin
+ $(INSTALL_DIR) $(DESTDIR)/usr/include
$(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
$(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin
$(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin
$(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin
- $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
+ $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
$(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)
ln -sf libxenstore.so.$(MAJOR).$(MINOR)
$(DESTDIR)/usr/$(LIBDIR)/libxenstore.so.$(MAJOR)
ln -sf libxenstore.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenstore.so
diff -r 8f0b5295bb1b -r dcec453681bc xen/Rules.mk
--- a/xen/Rules.mk Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/Rules.mk Thu Mar 08 14:39:52 2007 -0600
@@ -41,8 +41,8 @@ include $(BASEDIR)/arch/$(TARGET_ARCH)/R
include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
# Do not depend on auto-generated header files.
-HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS))
-HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS))
+AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
+HDRS := $(filter-out %/asm-offsets.h,$(AHDRS))
# Note that link order matters!
ALL_OBJS-y += $(BASEDIR)/common/built_in.o
@@ -110,12 +110,12 @@ _clean_%/: FORCE
%.o: %.c $(HDRS) Makefile
$(CC) $(CFLAGS) -c $< -o $@
-%.o: %.S $(HDRS) Makefile
+%.o: %.S $(AHDRS) Makefile
$(CC) $(AFLAGS) -c $< -o $@
%.i: %.c $(HDRS) Makefile
$(CPP) $(CFLAGS) $< -o $@
# -std=gnu{89,99} gets confused by # as an end-of-line comment marker
-%.s: %.S $(HDRS) Makefile
+%.s: %.S $(AHDRS) Makefile
$(CPP) $(AFLAGS) $< -o $@
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/domain.c Thu Mar 08 14:39:52 2007 -0600
@@ -641,6 +641,31 @@ int arch_set_info_guest(
}
v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
+
+#ifdef __x86_64__
+ if ( c.nat->ctrlreg[1] )
+ {
+ cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1]));
+
+ if ( !mfn_valid(cr3_pfn) ||
+ (paging_mode_refcounts(d)
+ ? !get_page(mfn_to_page(cr3_pfn), d)
+ : !get_page_and_type(mfn_to_page(cr3_pfn), d,
+ PGT_base_page_table)) )
+ {
+ cr3_pfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_null();
+ if ( paging_mode_refcounts(d) )
+ put_page(mfn_to_page(cr3_pfn));
+ else
+ put_page_and_type(mfn_to_page(cr3_pfn));
+ destroy_gdt(v);
+ return -EINVAL;
+ }
+
+ v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
+ }
+#endif
}
#ifdef CONFIG_COMPAT
else
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/domain_build.c Thu Mar 08 14:39:52 2007 -0600
@@ -374,9 +374,6 @@ int construct_dom0(struct domain *d,
if ( parms.f_required[0] /* Huh? -- kraxel */ )
panic("Domain 0 requires an unsupported hypervisor feature.\n");
- /* Align load address to 4MB boundary. */
- v_start = parms.virt_base & ~((1UL<<22)-1);
-
/*
* Why do we need this? The number of page-table frames depends on the
* size of the bootstrap address space. But the size of the address space
@@ -384,6 +381,7 @@ int construct_dom0(struct domain *d,
* read-only). We have a pair of simultaneous equations in two unknowns,
* which we solve by exhaustive search.
*/
+ v_start = parms.virt_base;
vkern_start = parms.virt_kstart;
vkern_end = parms.virt_kend;
vinitrd_start = round_pgup(vkern_end);
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/domctl.c Thu Mar 08 14:39:52 2007 -0600
@@ -470,8 +470,15 @@ void arch_get_info_guest(struct vcpu *v,
c(user_regs.eflags |= v->arch.iopl << 12);
if ( !IS_COMPAT(v->domain) )
+ {
c.nat->ctrlreg[3] = xen_pfn_to_cr3(
pagetable_get_pfn(v->arch.guest_table));
+#ifdef __x86_64__
+ if ( !pagetable_is_null(v->arch.guest_table_user) )
+ c.nat->ctrlreg[1] = xen_pfn_to_cr3(
+ pagetable_get_pfn(v->arch.guest_table_user));
+#endif
+ }
#ifdef CONFIG_COMPAT
else
{
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Thu Mar 08 14:39:52 2007 -0600
@@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain
spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
spin_lock_init(&d->arch.hvm_domain.irq_lock);
- rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external);
+ /* paging support will be determined inside paging.c */
+ rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
if ( rc != 0 )
return rc;
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/svm/emulate.c Thu Mar 08 14:39:52 2007 -0600
@@ -24,8 +24,10 @@
#include <asm/msr.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/hvm/svm/emulate.h>
+
extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
@@ -133,13 +135,15 @@ static inline unsigned long DECODE_GPR_V
#define sib operand [1]
-unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb,
- struct cpu_user_regs *regs, const u8 prefix, int inst_len,
- const u8 *operand, u8 *size)
+unsigned long get_effective_addr_modrm64(struct cpu_user_regs *regs,
+ const u8 prefix, int inst_len,
+ const u8 *operand, u8 *size)
{
unsigned long effective_addr = (unsigned long) -1;
u8 length, modrm_mod, modrm_rm;
u32 disp = 0;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
HVM_DBG_LOG(DBG_LEVEL_1, "get_effective_addr_modrm64(): prefix = %x, "
"length = %d, operand[0,1] = %x %x.\n", prefix, *size, operand [0],
@@ -198,7 +202,7 @@ unsigned long get_effective_addr_modrm64
#if __x86_64__
/* 64-bit mode */
- if (vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA))
+ if (vmcb->cs.attr.fields.l && svm_long_mode_enabled(v))
return vmcb->rip + inst_len + *size + disp;
#endif
return disp;
@@ -310,7 +314,7 @@ unsigned int decode_src_reg(u8 prefix, u
}
-unsigned long svm_rip2pointer(struct vmcb_struct *vmcb)
+unsigned long svm_rip2pointer(struct vcpu *v)
{
/*
* The following is subtle. Intuitively this code would be something like:
@@ -322,8 +326,9 @@ unsigned long svm_rip2pointer(struct vmc
* %cs is update, but fortunately, base contain the valid base address
* no matter what kind of addressing is used.
*/
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
unsigned long p = vmcb->cs.base + vmcb->rip;
- if (!(vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA))
+ if (!(vmcb->cs.attr.fields.l && svm_long_mode_enabled(v)))
return (u32)p; /* mask to 32 bits */
/* NB. Should mask to 16 bits if in real mode or 16-bit protected mode. */
return p;
@@ -410,10 +415,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU
* The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
* to enough bytes to satisfy the instruction including prefix bytes.
*/
-int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+int __get_instruction_length_from_list(struct vcpu *v,
enum instruction_index *list, unsigned int list_count,
u8 *guest_eip_buf, enum instruction_index *match)
{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
unsigned int inst_len = 0;
unsigned int i;
unsigned int j;
@@ -429,7 +435,7 @@ int __get_instruction_length_from_list(s
}
else
{
- inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), MAX_INST_LEN);
+ inst_copy_from_guest(buffer, svm_rip2pointer(v), MAX_INST_LEN);
buf = buffer;
}
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Thu Mar 08 14:39:52 2007 -0600
@@ -49,6 +49,7 @@
#include <public/sched.h>
#include <asm/hvm/vpt.h>
#include <asm/hvm/trace.h>
+#include <asm/hap.h>
#define SVM_EXTRA_DEBUG
@@ -75,6 +76,10 @@ static void *root_vmcb[NR_CPUS] __read_m
/* physical address of above for host VMSAVE/VMLOAD */
u64 root_vmcb_pa[NR_CPUS] __read_mostly;
+
+/* hardware assisted paging bits */
+extern int opt_hap_enabled;
+extern int hap_capable_system;
static inline void svm_inject_exception(struct vcpu *v, int trap,
int ev, int error_code)
@@ -148,31 +153,6 @@ static void svm_store_cpu_guest_regs(
}
}
-static int svm_paging_enabled(struct vcpu *v)
-{
- unsigned long cr0;
-
- cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
-
- return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
-}
-
-static int svm_pae_enabled(struct vcpu *v)
-{
- unsigned long cr4;
-
- if(!svm_paging_enabled(v))
- return 0;
-
- cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
-
- return (cr4 & X86_CR4_PAE);
-}
-
-static int svm_long_mode_enabled(struct vcpu *v)
-{
- return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
-}
static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
{
@@ -183,8 +163,7 @@ static inline int long_mode_do_msr_read(
switch ((u32)regs->ecx)
{
case MSR_EFER:
- msr_content = vmcb->efer;
- msr_content &= ~EFER_SVME;
+ msr_content = v->arch.hvm_svm.cpu_shadow_efer;
break;
#ifdef __x86_64__
@@ -255,30 +234,54 @@ static inline int long_mode_do_msr_write
goto gp_fault;
}
+ /*
+ * update the VMCB's EFER with the intended value along with
+ * that crucial EFER.SVME bit =)
+ */
+ vmcb->efer = msr_content | EFER_SVME;
+
#ifdef __x86_64__
- /* LME: 0 -> 1 */
- if ( msr_content & EFER_LME &&
- !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
+
+ /*
+ * Check for EFER.LME transitions from 0->1 or 1->0. Do the
+ * sanity checks and then make sure that both EFER.LME and
+ * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb
+ * until the guest also sets CR0.PG, since even if the guest has
+ * paging "disabled", the vmcb's CR0 always has PG set.)
+ */
+ if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
{
+ /* EFER.LME transition from 0 to 1 */
+
if ( svm_paging_enabled(v) ||
- !test_bit(SVM_CPU_STATE_PAE_ENABLED,
- &v->arch.hvm_svm.cpu_state) )
+ !svm_cr4_pae_is_set(v) )
{
gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
"in paging mode or PAE bit is not set\n");
goto gp_fault;
}
- set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state);
- }
-
- /* We have already recorded that we want LME, so it will be set
- * next time CR0 gets updated. So we clear that bit and continue.
- */
- if ((msr_content ^ vmcb->efer) & EFER_LME)
- msr_content &= ~EFER_LME;
- /* No update for LME/LMA since it have no effect */
-#endif
- vmcb->efer = msr_content | EFER_SVME;
+
+ vmcb->efer &= ~(EFER_LME | EFER_LMA);
+ }
+ else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) )
+ {
+ /* EFER.LME transistion from 1 to 0 */
+
+ if ( svm_paging_enabled(v) )
+ {
+ gdprintk(XENLOG_WARNING,
+ "Trying to clear EFER.LME while paging enabled\n");
+ goto gp_fault;
+ }
+
+ vmcb->efer &= ~(EFER_LME | EFER_LMA);
+ }
+
+#endif /* __x86_64__ */
+
+ /* update the guest EFER's shadow with the intended value */
+ v->arch.hvm_svm.cpu_shadow_efer = msr_content;
+
break;
#ifdef __x86_64__
@@ -468,22 +471,25 @@ int svm_vmcb_restore(struct vcpu *v, str
c->cr4);
#endif
- if (!svm_paging_enabled(v)) {
+ if ( !svm_paging_enabled(v) )
+ {
printk("%s: paging not enabled.", __func__);
goto skip_cr3;
}
- if (c->cr3 == v->arch.hvm_svm.cpu_cr3) {
+ if ( c->cr3 == v->arch.hvm_svm.cpu_cr3 )
+ {
/*
* This is simple TLB flush, implying the guest has
* removed some translation or changed page attributes.
* We simply invalidate the shadow.
*/
mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+ if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
goto bad_cr3;
- }
- } else {
+ }
+ else
+ {
/*
* If different, make a shadow. Check if the PDBR is valid
* first.
@@ -491,9 +497,9 @@ int svm_vmcb_restore(struct vcpu *v, str
HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
/* current!=vcpu as not called by arch_vmx_do_launch */
mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+ if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
goto bad_cr3;
- }
+
old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
v->arch.guest_table = pagetable_from_pfn(mfn);
if (old_base_mfn)
@@ -631,7 +637,7 @@ static int svm_guest_x86_mode(struct vcp
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l )
+ if ( svm_long_mode_enabled(v) && vmcb->cs.attr.fields.l )
return 8;
if ( svm_realmode(v) )
@@ -681,7 +687,7 @@ static unsigned long svm_get_segment_bas
int long_mode = 0;
#ifdef __x86_64__
- long_mode = vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA);
+ long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
#endif
switch ( seg )
{
@@ -905,6 +911,10 @@ static void arch_svm_do_launch(struct vc
{
svm_do_launch(v);
+ if ( paging_mode_hap(v->domain) ) {
+ v->arch.hvm_svm.vmcb->h_cr3 =
pagetable_get_paddr(v->domain->arch.phys_table);
+ }
+
if ( v->vcpu_id != 0 )
{
cpu_user_regs_t *regs = ¤t->arch.guest_context.user_regs;
@@ -1011,6 +1021,21 @@ static struct hvm_function_table svm_fun
.event_injection_faulted = svm_event_injection_faulted
};
+void svm_npt_detect(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ /* check CPUID for nested paging support */
+ cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
+ if ( edx & 0x01 ) { /* nested paging */
+ hap_capable_system = 1;
+ }
+ else if ( opt_hap_enabled ) {
+ printk(" nested paging is not supported by this CPU.\n");
+ hap_capable_system = 0; /* no nested paging, we disable flag. */
+ }
+}
+
int start_svm(void)
{
u32 eax, ecx, edx;
@@ -1041,6 +1066,8 @@ int start_svm(void)
wrmsr(MSR_EFER, eax, edx);
printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
+ svm_npt_detect();
+
/* Initialize the HSA for this core */
phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
phys_hsa_lo = (u32) phys_hsa;
@@ -1077,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v)
}
}
+static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
+{
+ if (mmio_space(gpa)) {
+ handle_mmio(gpa);
+ return 1;
+ }
+
+ /* We should not reach here. Otherwise, P2M table is not correct.*/
+ return 0;
+}
+
+
static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
{
HVM_DBG_LOG(DBG_LEVEL_VMMU,
@@ -1114,7 +1153,7 @@ static void svm_do_general_protection_fa
printk("Huh? We got a GP Fault with an invalid IDTR!\n");
svm_dump_vmcb(__func__, vmcb);
svm_dump_regs(__func__, regs);
- svm_dump_inst(svm_rip2pointer(vmcb));
+ svm_dump_inst(svm_rip2pointer(v));
domain_crash(v->domain);
return;
}
@@ -1209,7 +1248,7 @@ static void svm_vmexit_do_cpuid(struct v
HVMTRACE_3D(CPUID, v, input,
((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
- inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
+ inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
ASSERT(inst_len > 0);
__update_guest_eip(vmcb, inst_len);
}
@@ -1312,15 +1351,16 @@ static void svm_dr_access(struct vcpu *v
}
-static void svm_get_prefix_info(
- struct vmcb_struct *vmcb,
- unsigned int dir, svm_segment_register_t **seg, unsigned int *asize)
-{
+static void svm_get_prefix_info(struct vcpu *v, unsigned int dir,
+ svm_segment_register_t **seg,
+ unsigned int *asize)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
unsigned char inst[MAX_INST_LEN];
int i;
memset(inst, 0, MAX_INST_LEN);
- if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst))
+ if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst))
!= MAX_INST_LEN)
{
gdprintk(XENLOG_ERR, "get guest instruction failed\n");
@@ -1400,7 +1440,7 @@ static inline int svm_get_io_address(
#ifdef __x86_64__
/* If we're in long mode, we shouldn't check the segment presence & limit
*/
- long_mode = vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA;
+ long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
#endif
/* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
@@ -1419,7 +1459,7 @@ static inline int svm_get_io_address(
isize --;
if (isize > 1)
- svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
+ svm_get_prefix_info(v, info.fields.type, &seg, &asize);
if (info.fields.type == IOREQ_WRITE)
{
@@ -1702,6 +1742,52 @@ static void svm_io_instruction(struct vc
}
}
+static int npt_set_cr0(unsigned long value)
+{
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ ASSERT(vmcb);
+
+ /* ET is reserved and should be always be 1*/
+ value |= X86_CR0_ET;
+
+ /* Check whether the guest is about to turn on long mode.
+ * If it is, set EFER.LME and EFER.LMA. Update the shadow EFER.LMA
+ * bit too, so svm_long_mode_enabled() will work.
+ */
+ if ( (value & X86_CR0_PG) && svm_lme_is_set(v) &&
+ (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) )
+ {
+ v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
+ vmcb->efer |= EFER_LMA | EFER_LME;
+ }
+
+ /* Whenever CR0.PG is cleared under long mode, LMA will be cleared
+ * immediatly. We emulate this process for svm_long_mode_enabled().
+ */
+ if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
+ {
+ if ( svm_long_mode_enabled(v) )
+ {
+ v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
+ }
+ }
+
+ vmcb->cr0 = value | X86_CR0_WP;
+ v->arch.hvm_svm.cpu_shadow_cr0 = value;
+
+ /* TS cleared? Then initialise FPU now. */
+ if ( !(value & X86_CR0_TS) ) {
+ setup_fpu(v);
+ vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
+ }
+
+ paging_update_paging_modes(v);
+
+ return 1;
+}
+
static int svm_set_cr0(unsigned long value)
{
struct vcpu *v = current;
@@ -1727,7 +1813,8 @@ static int svm_set_cr0(unsigned long val
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
- if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled)
+ if ( ((value & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG))
+ && !paging_enabled )
{
/* The guest CR3 must be pointing to the guest physical. */
mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
@@ -1740,18 +1827,16 @@ static int svm_set_cr0(unsigned long val
}
#if defined(__x86_64__)
- if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)
- && !test_bit(SVM_CPU_STATE_PAE_ENABLED,
- &v->arch.hvm_svm.cpu_state))
+ if ( svm_lme_is_set(v) && !svm_cr4_pae_is_set(v) )
{
HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
svm_inject_exception(v, TRAP_gp_fault, 1, 0);
}
- if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
+ if ( svm_lme_is_set(v) )
{
HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
- set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
+ v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
vmcb->efer |= EFER_LMA | EFER_LME;
}
#endif /* __x86_64__ */
@@ -1790,14 +1875,93 @@ static int svm_set_cr0(unsigned long val
{
if ( svm_long_mode_enabled(v) )
{
- vmcb->efer &= ~EFER_LMA;
- clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
+ vmcb->efer &= ~(EFER_LME | EFER_LMA);
+ v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
}
/* we should take care of this kind of situation */
paging_update_paging_modes(v);
}
return 1;
+}
+
+//
+// nested paging functions
+//
+
+static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
+{
+ unsigned long value;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ ASSERT(vmcb);
+
+ value = get_reg(gpreg, regs, vmcb);
+
+ switch (cr) {
+ case 0:
+ return npt_set_cr0(value);
+
+ case 3:
+ vmcb->cr3 = value;
+ v->arch.hvm_svm.cpu_cr3 = value;
+ break;
+
+ case 4: /* CR4 */
+ vmcb->cr4 = value;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ paging_update_paging_modes(v);
+ break;
+
+ case 8:
+ vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
+ vmcb->vintr.fields.tpr = value & 0x0F;
+ break;
+
+ default:
+ gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
+ domain_crash(v->domain);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+ unsigned long value = 0;
+ struct vcpu *v = current;
+ struct vmcb_struct *vmcb;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ vmcb = v->arch.hvm_svm.vmcb;
+ ASSERT(vmcb);
+
+ switch(cr) {
+ case 0:
+ value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0;
+ break;
+ case 2:
+ value = vmcb->cr2;
+ break;
+ case 3:
+ value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
+ break;
+ case 4:
+ value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
+ break;
+ case 8:
+ value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
+ value = (value & 0xF0) >> 4;
+ break;
+ default:
+ domain_crash(v->domain);
+ return;
+ }
+
+ set_reg(gp, value, regs, vmcb);
}
/*
@@ -1848,12 +2012,6 @@ static void mov_from_cr(int cr, int gp,
set_reg(gp, value, regs, vmcb);
HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
-}
-
-
-static inline int svm_pgbit_test(struct vcpu *v)
-{
- return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
}
@@ -1933,7 +2091,6 @@ static int mov_to_cr(int gpreg, int cr,
old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
{
- set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
if ( svm_pgbit_test(v) )
{
/* The guest is a 32-bit PAE guest. */
@@ -1962,15 +2119,13 @@ static int mov_to_cr(int gpreg, int cr,
v->arch.hvm_svm.cpu_cr3, mfn);
#endif
}
- }
- else if (value & X86_CR4_PAE) {
- set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
- } else {
- if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
- &v->arch.hvm_svm.cpu_state)) {
+ }
+ else if ( !(value & X86_CR4_PAE) )
+ {
+ if ( svm_long_mode_enabled(v) )
+ {
svm_inject_exception(v, TRAP_gp_fault, 1, 0);
}
- clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
}
v->arch.hvm_svm.cpu_shadow_cr4 = value;
@@ -2024,7 +2179,7 @@ static int svm_cr_access(struct vcpu *v,
ASSERT(vmcb);
- inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
+ inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
/* get index to first actual instruction byte - as we will need to know
where the prefix lives later on */
@@ -2033,12 +2188,12 @@ static int svm_cr_access(struct vcpu *v,
if ( type == TYPE_MOV_TO_CR )
{
inst_len = __get_instruction_length_from_list(
- vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
+ v, list_a, ARR_SIZE(list_a), &buffer[index], &match);
}
else /* type == TYPE_MOV_FROM_CR */
{
inst_len = __get_instruction_length_from_list(
- vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
+ v, list_b, ARR_SIZE(list_b), &buffer[index], &match);
}
ASSERT(inst_len > 0);
@@ -2055,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v,
{
case INSTR_MOV2CR:
gpreg = decode_src_reg(prefix, buffer[index+2]);
- result = mov_to_cr(gpreg, cr, regs);
+ if ( paging_mode_hap(v->domain) )
+ result = npt_mov_to_cr(gpreg, cr, regs);
+ else
+ result = mov_to_cr(gpreg, cr, regs);
break;
case INSTR_MOVCR2:
gpreg = decode_src_reg(prefix, buffer[index+2]);
- mov_from_cr(cr, gpreg, regs);
+ if ( paging_mode_hap(v->domain) )
+ npt_mov_from_cr(cr, gpreg, regs);
+ else
+ mov_from_cr(cr, gpreg, regs);
break;
case INSTR_CLTS:
@@ -2073,7 +2234,7 @@ static int svm_cr_access(struct vcpu *v,
case INSTR_LMSW:
if (svm_dbg_on)
- svm_dump_inst(svm_rip2pointer(vmcb));
+ svm_dump_inst(svm_rip2pointer(v));
gpreg = decode_src_reg(prefix, buffer[index+2]);
value = get_reg(gpreg, regs, vmcb) & 0xF;
@@ -2087,12 +2248,15 @@ static int svm_cr_access(struct vcpu *v,
if (svm_dbg_on)
printk("CR0-LMSW CR0 - New value=%lx\n", value);
- result = svm_set_cr0(value);
+ if ( paging_mode_hap(v->domain) )
+ result = npt_set_cr0(value);
+ else
+ result = svm_set_cr0(value);
break;
case INSTR_SMSW:
if (svm_dbg_on)
- svm_dump_inst(svm_rip2pointer(vmcb));
+ svm_dump_inst(svm_rip2pointer(v));
value = v->arch.hvm_svm.cpu_shadow_cr0;
gpreg = decode_src_reg(prefix, buffer[index+2]);
set_reg(gpreg, value, regs, vmcb);
@@ -2168,7 +2332,7 @@ static inline void svm_do_msr_access(
HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
- inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
+ inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
}
else
{
@@ -2200,7 +2364,7 @@ static inline void svm_do_msr_access(
break;
}
- inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
+ inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
}
__update_guest_eip(vmcb, inst_len);
@@ -2223,8 +2387,9 @@ static inline void svm_vmexit_do_hlt(str
}
-static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
-{
+static void svm_vmexit_do_invd(struct vcpu *v)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
int inst_len;
/* Invalidate the cache - we can't really do that safely - maybe we should
@@ -2237,7 +2402,7 @@ static void svm_vmexit_do_invd(struct vm
*/
printk("INVD instruction intercepted - ignored\n");
- inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
+ inst_len = __get_instruction_length(v, INSTR_INVD, NULL);
__update_guest_eip(vmcb, inst_len);
}
@@ -2289,7 +2454,7 @@ void svm_handle_invlpg(const short invlp
* Unknown how many bytes the invlpg instruction will take. Use the
* maximum instruction length here
*/
- if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
+ if (inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length)
{
gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
domain_crash(v->domain);
@@ -2298,7 +2463,7 @@ void svm_handle_invlpg(const short invlp
if (invlpga)
{
- inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
+ inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
ASSERT(inst_len > 0);
__update_guest_eip(vmcb, inst_len);
@@ -2312,7 +2477,7 @@ void svm_handle_invlpg(const short invlp
{
/* What about multiple prefix codes? */
prefix = (is_prefix(opcode[0])?opcode[0]:0);
- inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
+ inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
ASSERT(inst_len > 0);
inst_len--;
@@ -2323,7 +2488,7 @@ void svm_handle_invlpg(const short invlp
* displacement to get effective address and length in bytes. Assume
* the system in either 32- or 64-bit mode.
*/
- g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len,
+ g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
&opcode[inst_len], &length);
inst_len += length;
@@ -2369,7 +2534,11 @@ static int svm_do_vmmcall_reset_to_realm
vmcb->cr4 = SVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = 0;
- clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
+
+ if ( paging_mode_hap(v->domain) ) {
+ vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ }
/* This will jump to ROMBIOS */
vmcb->rip = 0xFFF0;
@@ -2445,7 +2614,7 @@ static int svm_do_vmmcall(struct vcpu *v
ASSERT(vmcb);
ASSERT(regs);
- inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
+ inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
ASSERT(inst_len > 0);
HVMTRACE_1D(VMMCALL, v, regs->eax);
@@ -2855,7 +3024,7 @@ asmlinkage void svm_vmexit_handler(struc
svm_dump_vmcb(__func__, vmcb);
svm_dump_regs(__func__, regs);
- svm_dump_inst(svm_rip2pointer(vmcb));
+ svm_dump_inst(svm_rip2pointer(v));
}
#if defined(__i386__)
@@ -2957,7 +3126,7 @@ asmlinkage void svm_vmexit_handler(struc
/* Debug info to hopefully help debug WHY the guest double-faulted. */
svm_dump_vmcb(__func__, vmcb);
svm_dump_regs(__func__, regs);
- svm_dump_inst(svm_rip2pointer(vmcb));
+ svm_dump_inst(svm_rip2pointer(v));
svm_inject_exception(v, TRAP_double_fault, 1, 0);
break;
@@ -2967,7 +3136,7 @@ asmlinkage void svm_vmexit_handler(struc
break;
case VMEXIT_INVD:
- svm_vmexit_do_invd(vmcb);
+ svm_vmexit_do_invd(v);
break;
case VMEXIT_GDTR_WRITE:
@@ -3053,6 +3222,15 @@ asmlinkage void svm_vmexit_handler(struc
case VMEXIT_SHUTDOWN:
hvm_triple_fault();
break;
+
+ case VMEXIT_NPF:
+ {
+ regs->error_code = vmcb->exitinfo1;
+ if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) {
+ domain_crash(v->domain);
+ }
+ break;
+ }
default:
exit_and_crash:
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Mar 08 14:39:52 2007 -0600
@@ -200,6 +200,13 @@ static int construct_vmcb(struct vcpu *v
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
+
+ if ( paging_mode_hap(v->domain) ) {
+ vmcb->cr0 = arch_svm->cpu_shadow_cr0;
+ vmcb->np_enable = 1; /* enable nested paging */
+ vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
+ vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG;
+ }
return 0;
}
@@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, str
printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n",
(unsigned long long) vmcb->kerngsbase,
(unsigned long long) vmcb->g_pat);
-
+ printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
+
/* print out all the selectors */
svm_dump_sel("CS", &vmcb->cs);
svm_dump_sel("DS", &vmcb->ds);
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm.c Thu Mar 08 14:39:52 2007 -0600
@@ -424,7 +424,10 @@ void invalidate_shadow_ldt(struct vcpu *
}
/* Dispose of the (now possibly invalid) mappings from the TLB. */
- queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
+ if ( v == current )
+ queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
+ else
+ flush_tlb_mask(v->domain->domain_dirty_cpumask);
}
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/Makefile
--- a/xen/arch/x86/mm/Makefile Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -1,4 +1,5 @@ subdir-y += shadow
subdir-y += shadow
+subdir-y += hap
obj-y += paging.o
obj-y += p2m.o
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/Makefile Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,2 @@
+obj-y += hap.o
+obj-y += support.o
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/hap.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,708 @@
+/******************************************************************************
+ * arch/x86/mm/hap/hap.c
+ *
+ * hardware assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Parts of this code are Copyright (c) 2007 by XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shared.h>
+#include <asm/hap.h>
+#include <asm/paging.h>
+#include <asm/domain.h>
+
+#include "private.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+
+/************************************************/
+/* HAP SUPPORT FUNCTIONS */
+/************************************************/
+mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+{
+ struct page_info *sp = NULL;
+ void *p;
+
+ ASSERT(hap_locked_by_me(d));
+
+ sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list);
+ list_del(&sp->list);
+ d->arch.paging.hap.free_pages -= 1;
+
+ /* Now safe to clear the page for reuse */
+ p = hap_map_domain_page(page_to_mfn(sp));
+ ASSERT(p != NULL);
+ clear_page(p);
+ hap_unmap_domain_page(p);
+
+ return page_to_mfn(sp);
+}
+
+void hap_free(struct domain *d, mfn_t smfn)
+{
+ struct page_info *sp = mfn_to_page(smfn);
+
+ ASSERT(hap_locked_by_me(d));
+
+ d->arch.paging.hap.free_pages += 1;
+ list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+}
+
+static int hap_alloc_p2m_pages(struct domain *d)
+{
+ struct page_info *pg;
+
+ ASSERT(hap_locked_by_me(d));
+
+ pg = mfn_to_page(hap_alloc(d, 0));
+ d->arch.paging.hap.p2m_pages += 1;
+ d->arch.paging.hap.total_pages -= 1;
+
+ page_set_owner(pg, d);
+ pg->count_info = 1;
+ list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
+
+ return 1;
+}
+
+struct page_info * hap_alloc_p2m_page(struct domain *d)
+{
+ struct list_head *entry;
+ struct page_info *pg;
+ mfn_t mfn;
+ void *p;
+
+ hap_lock(d);
+
+ if ( list_empty(&d->arch.paging.hap.p2m_freelist) &&
+ !hap_alloc_p2m_pages(d) ) {
+ hap_unlock(d);
+ return NULL;
+ }
+ entry = d->arch.paging.hap.p2m_freelist.next;
+ list_del(entry);
+
+ hap_unlock(d);
+
+ pg = list_entry(entry, struct page_info, list);
+ mfn = page_to_mfn(pg);
+ p = hap_map_domain_page(mfn);
+ clear_page(p);
+ hap_unmap_domain_page(p);
+
+ return pg;
+}
+
+void hap_free_p2m_page(struct domain *d, struct page_info *pg)
+{
+ ASSERT(page_get_owner(pg) == d);
+ /* Should have just the one ref we gave it in alloc_p2m_page() */
+ if ( (pg->count_info & PGC_count_mask) != 1 ) {
+ HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+ pg->count_info, pg->u.inuse.type_info);
+ }
+ /* Free should not decrement domain's total allocation, since
+ * these pages were allocated without an owner. */
+ page_set_owner(pg, NULL);
+ free_domheap_pages(pg, 0);
+ d->arch.paging.hap.p2m_pages--;
+}
+
+/* Return the size of the pool, rounded up to the nearest MB */
+static unsigned int
+hap_get_allocation(struct domain *d)
+{
+ unsigned int pg = d->arch.paging.hap.total_pages;
+
+ HERE_I_AM;
+ return ((pg >> (20 - PAGE_SHIFT))
+ + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/* Set the pool of pages to the required number of pages.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int
+hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
+{
+ struct page_info *sp;
+
+ ASSERT(hap_locked_by_me(d));
+
+ while ( d->arch.paging.hap.total_pages != pages ) {
+ if ( d->arch.paging.hap.total_pages < pages ) {
+ /* Need to allocate more memory from domheap */
+ sp = alloc_domheap_pages(NULL, 0, 0);
+ if ( sp == NULL ) {
+ HAP_PRINTK("failed to allocate hap pages.\n");
+ return -ENOMEM;
+ }
+ d->arch.paging.hap.free_pages += 1;
+ d->arch.paging.hap.total_pages += 1;
+ list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
+ }
+ else if ( d->arch.paging.hap.total_pages > pages ) {
+ /* Need to return memory to domheap */
+ ASSERT(!list_empty(&d->arch.paging.hap.freelists));
+ sp = list_entry(d->arch.paging.hap.freelists.next,
+ struct page_info, list);
+ list_del(&sp->list);
+ d->arch.paging.hap.free_pages -= 1;
+ d->arch.paging.hap.total_pages -= 1;
+ free_domheap_pages(sp, 0);
+ }
+
+ /* Check to see if we need to yield and try again */
+ if ( preempted && hypercall_preempt_check() ) {
+ *preempted = 1;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+#if CONFIG_PAGING_LEVELS == 4
+void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+ struct domain *d = v->domain;
+ l4_pgentry_t *sl4e;
+
+ sl4e = hap_map_domain_page(sl4mfn);
+ ASSERT(sl4e != NULL);
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+ ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+ /* Install the per-domain mappings for this domain */
+ sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))),
+ __PAGE_HYPERVISOR);
+
+ sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR);
+
+ /* install domain-specific P2M table */
+ sl4e[l4_table_offset(RO_MPT_VIRT_START)] =
+ l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+ __PAGE_HYPERVISOR);
+
+ hap_unmap_domain_page(sl4e);
+}
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+
+#if CONFIG_PAGING_LEVELS == 3
+void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
+{
+ struct domain *d = v->domain;
+ l2_pgentry_t *sl2e;
+
+ int i;
+
+ sl2e = hap_map_domain_page(sl2hmfn);
+ ASSERT(sl2e != NULL);
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+ &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+ /* Install the per-domain mappings for this domain */
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_pfn(
+
mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+ __PAGE_HYPERVISOR);
+
+ for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ )
+ sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+ l2e_empty();
+
+ if ( paging_mode_translate(d) )
+ {
+ /* Install the domain-specific p2m table */
+ l3_pgentry_t *p2m;
+ ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+ p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+ for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+ {
+ sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
+ (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
+ ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))),
+ __PAGE_HYPERVISOR)
+ : l2e_empty();
+ }
+ hap_unmap_domain_page(p2m);
+ }
+
+ hap_unmap_domain_page(sl2e);
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+ struct domain *d = v->domain;
+ l2_pgentry_t *sl2e;
+ int i;
+
+ sl2e = hap_map_domain_page(sl2mfn);
+ ASSERT(sl2e != NULL);
+
+ /* Copy the common Xen mappings from the idle domain */
+ memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+ /* Install the per-domain mappings for this domain */
+ for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+ sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_pfn(
+ mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
+ __PAGE_HYPERVISOR);
+
+
+ sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
+ l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR);
+
+ /* install domain-specific P2M table */
+ sl2e[l2_table_offset(RO_MPT_VIRT_START)] =
+ l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
+ __PAGE_HYPERVISOR);
+
+ hap_unmap_domain_page(sl2e);
+}
+#endif
+
+mfn_t hap_make_monitor_table(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+
+#if CONFIG_PAGING_LEVELS == 4
+ {
+ mfn_t m4mfn;
+ m4mfn = hap_alloc(d, 0);
+ hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+ return m4mfn;
+ }
+#elif CONFIG_PAGING_LEVELS == 3
+ {
+ mfn_t m3mfn, m2mfn;
+ l3_pgentry_t *l3e;
+ l2_pgentry_t *l2e;
+ int i;
+
+ m3mfn = hap_alloc(d, 0);
+
+ /* Install a monitor l2 table in slot 3 of the l3 table.
+ * This is used for all Xen entries, including linear maps
+ */
+ m2mfn = hap_alloc(d, 0);
+ l3e = hap_map_domain_page(m3mfn);
+ l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+ hap_install_xen_entries_in_l2h(v, m2mfn);
+ /* Install the monitor's own linear map */
+ l2e = hap_map_domain_page(m2mfn);
+ for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+ (l3e_get_flags(l3e[i]) & _PAGE_PRESENT)
+ ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR)
+ : l2e_empty();
+ hap_unmap_domain_page(l2e);
+ hap_unmap_domain_page(l3e);
+
+ HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+ return m3mfn;
+ }
+#else
+ {
+ mfn_t m2mfn;
+
+ m2mfn = hap_alloc(d, 0);
+ hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+
+ return m2mfn;
+ }
+#endif
+}
+
+void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
+{
+ struct domain *d = v->domain;
+
+#if CONFIG_PAGING_LEVELS == 4
+ /* Need to destroy the l3 monitor page in slot 0 too */
+ {
+ mfn_t m3mfn;
+ l4_pgentry_t *l4e = hap_map_domain_page(mmfn);
+ ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+ m3mfn = _mfn(l4e_get_pfn(l4e[0]));
+ hap_free(d, m3mfn);
+ hap_unmap_domain_page(l4e);
+ }
+#elif CONFIG_PAGING_LEVELS == 3
+ /* Need to destroy the l2 monitor page in slot 4 too */
+ {
+ l3_pgentry_t *l3e = hap_map_domain_page(mmfn);
+ ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+ hap_free(d, _mfn(l3e_get_pfn(l3e[3])));
+ hap_unmap_domain_page(l3e);
+ }
+#endif
+
+ /* Put the memory back in the pool */
+ hap_free(d, mmfn);
+}
+
+/************************************************/
+/* HAP DOMAIN LEVEL FUNCTIONS */
+/************************************************/
+void hap_domain_init(struct domain *d)
+{
+ hap_lock_init(d);
+ INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
+ INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
+}
+
+/* return 0 for success, -errno for failure */
+int hap_enable(struct domain *d, u32 mode)
+{
+ unsigned int old_pages;
+ int rv = 0;
+
+ HERE_I_AM;
+
+ domain_pause(d);
+ /* error check */
+ if ( (d == current->domain) ) {
+ rv = -EINVAL;
+ goto out;
+ }
+
+ old_pages = d->arch.paging.hap.total_pages;
+ if ( old_pages == 0 ) {
+ unsigned int r;
+ hap_lock(d);
+ r = hap_set_allocation(d, 256, NULL);
+ hap_unlock(d);
+ if ( r != 0 ) {
+ hap_set_allocation(d, 0, NULL);
+ rv = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* allocate P2m table */
+ if ( mode & PG_translate ) {
+ rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
+ if ( rv != 0 )
+ goto out;
+ }
+
+ d->arch.paging.mode = mode | PG_SH_enable;
+
+ out:
+ domain_unpause(d);
+ return rv;
+}
+
+void hap_final_teardown(struct domain *d)
+{
+ HERE_I_AM;
+
+ if ( d->arch.paging.hap.total_pages != 0 )
+ hap_teardown(d);
+
+ p2m_teardown(d);
+}
+
+void hap_teardown(struct domain *d)
+{
+ struct vcpu *v;
+ mfn_t mfn;
+ HERE_I_AM;
+
+ ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+ ASSERT(d != current->domain);
+
+ if ( !hap_locked_by_me(d) )
+ hap_lock(d); /* Keep various asserts happy */
+
+ if ( paging_mode_enabled(d) ) {
+ /* release the monitor table held by each vcpu */
+ for_each_vcpu(d, v) {
+ if ( v->arch.paging.mode && paging_mode_external(d) ) {
+ mfn = pagetable_get_mfn(v->arch.monitor_table);
+ if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
+ hap_destroy_monitor_table(v, mfn);
+ v->arch.monitor_table = pagetable_null();
+ }
+ }
+ }
+
+ if ( d->arch.paging.hap.total_pages != 0 ) {
+ HAP_PRINTK("teardown of domain %u starts."
+ " pages total = %u, free = %u, p2m=%u\n",
+ d->domain_id,
+ d->arch.paging.hap.total_pages,
+ d->arch.paging.hap.free_pages,
+ d->arch.paging.hap.p2m_pages);
+ hap_set_allocation(d, 0, NULL);
+ HAP_PRINTK("teardown done."
+ " pages total = %u, free = %u, p2m=%u\n",
+ d->arch.paging.hap.total_pages,
+ d->arch.paging.hap.free_pages,
+ d->arch.paging.hap.p2m_pages);
+ ASSERT(d->arch.paging.hap.total_pages == 0);
+ }
+
+ d->arch.paging.mode &= ~PG_log_dirty;
+
+ hap_unlock(d);
+}
+
+int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(void) u_domctl)
+{
+ int rc, preempted = 0;
+
+ HERE_I_AM;
+
+ if ( unlikely(d == current->domain) ) {
+ gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
+ return -EINVAL;
+ }
+
+ switch ( sc->op ) {
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+ case XEN_DOMCTL_SHADOW_OP_ENABLE:
+ HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+ domain_crash(d);
+ return -EINVAL;
+ case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+ hap_lock(d);
+ rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
+ hap_unlock(d);
+ if ( preempted )
+ /* Not finished. Set up to re-run the call. */
+ rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
+ u_domctl);
+ else
+ /* Finished. Return the new allocation */
+ sc->mb = hap_get_allocation(d);
+ return rc;
+ case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+ sc->mb = hap_get_allocation(d);
+ return 0;
+ default:
+ HAP_ERROR("Bad hap domctl op %u\n", sc->op);
+ return -EINVAL;
+ }
+}
+
+void hap_vcpu_init(struct vcpu *v)
+{
+ v->arch.paging.mode = &hap_paging_real_mode;
+}
+/************************************************/
+/* HAP PAGING MODE FUNCTIONS */
+/************************************************/
+/* In theory, hap should not intercept guest page fault. This function can
+ * be recycled to handle host/nested page fault, if needed.
+ */
+int hap_page_fault(struct vcpu *v, unsigned long va,
+ struct cpu_user_regs *regs)
+{
+ HERE_I_AM;
+ domain_crash(v->domain);
+ return 0;
+}
+
+/* called when guest issues a invlpg request.
+ * Return 1 if need to issue page invalidation on CPU; Return 0 if does not
+ * need to do so.
+ */
+int hap_invlpg(struct vcpu *v, unsigned long va)
+{
+ HERE_I_AM;
+ return 0;
+}
+
+void hap_update_cr3(struct vcpu *v, int do_locking)
+{
+ struct domain *d = v->domain;
+ mfn_t gmfn;
+
+ HERE_I_AM;
+ /* Don't do anything on an uninitialised vcpu */
+ if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) {
+ ASSERT(v->arch.cr3 == 0);
+ return;
+ }
+
+ if ( do_locking )
+ hap_lock(v->domain);
+
+ ASSERT(hap_locked_by_me(v->domain));
+ ASSERT(v->arch.paging.mode);
+
+ gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+ make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+
+ hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table));
+
+ HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n",
+ d->domain_id, v->vcpu_id,
+ (unsigned long)pagetable_get_pfn(v->arch.guest_table),
+ (unsigned long)pagetable_get_pfn(v->arch.monitor_table));
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+
+ if ( do_locking )
+ hap_unlock(v->domain);
+}
+
+void hap_update_paging_modes(struct vcpu *v)
+{
+ struct domain *d;
+
+ HERE_I_AM;
+
+ d = v->domain;
+ hap_lock(d);
+
+ /* update guest paging mode. Note that we rely on hvm functions to detect
+ * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER)
+ * reflect guest's status correctly.
+ */
+ if ( hvm_paging_enabled(v) ) {
+ if ( hvm_long_mode_enabled(v) )
+ v->arch.paging.mode = &hap_paging_long_mode;
+ else if ( hvm_pae_enabled(v) )
+ v->arch.paging.mode = &hap_paging_pae_mode;
+ else
+ v->arch.paging.mode = &hap_paging_protected_mode;
+ }
+ else {
+ v->arch.paging.mode = &hap_paging_real_mode;
+ }
+
+ v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);
+
+ /* use p2m map */
+ v->arch.guest_table =
+ pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+
+ if ( pagetable_is_null(v->arch.monitor_table) ) {
+ mfn_t mmfn = hap_make_monitor_table(v);
+ v->arch.monitor_table = pagetable_from_mfn(mmfn);
+ make_cr3(v, mfn_x(mmfn));
+ }
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ hap_unlock(d);
+}
+
+void
+hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
+ l1_pgentry_t new, unsigned int level)
+{
+ hap_lock(v->domain);
+ safe_write_pte(p, new);
+ hap_unlock(v->domain);
+}
+
+/* Entry points into this mode of the hap code. */
+struct paging_mode hap_paging_real_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_real_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 1
+};
+
+struct paging_mode hap_paging_protected_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_protected_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 2
+};
+
+struct paging_mode hap_paging_pae_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_pae_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 3
+};
+
+struct paging_mode hap_paging_long_mode = {
+ .page_fault = hap_page_fault,
+ .invlpg = hap_invlpg,
+ .gva_to_gfn = hap_gva_to_gfn_long_mode,
+ .update_cr3 = hap_update_cr3,
+ .update_paging_modes = hap_update_paging_modes,
+ .write_p2m_entry = hap_write_p2m_entry,
+ .guest_levels = 4
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
+
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/private.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/private.h Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,112 @@
+/*
+ * arch/x86/mm/hap/private.h
+ *
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __HAP_PRIVATE_H__
+#define __HAP_PRIVATE_H__
+
+#include <asm/flushtlb.h>
+#include <asm/hvm/support.h>
+
+/********************************************/
+/* GUEST TRANSLATION FUNCS */
+/********************************************/
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva);
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva);
+/********************************************/
+/* MISC DEFINITIONS */
+/********************************************/
+
+/* PT_SHIFT describes the amount by which a virtual address is shifted right
+ * to right justify the portion to be used for indexing into a page
+ * table, given the guest memory model (i.e. number of levels) and the level
+ * of the page table being accessed. The idea is from Virtual Iron's code.
+ */
+static const int PT_SHIFT[][5] =
+ { /* ------ level ------ nr_levels */
+ /* 1 2 3 4 */
+ { 0, 0, 0, 0, 0}, /* 0 not used */
+ { 0, 0, 0, 0, 0}, /* 1 not used */
+ { 0, 12, 22, 0, 0}, /* 2 */
+ { 0, 12, 21, 30, 0}, /* 3 */
+ { 0, 12, 21, 30, 39} /* 4 */
+ };
+
+/* PT_ENTRIES describes the number of entries in a page table, given the
+ * memory model (i.e. number of levels) and the level of the page table
+ * being considered. This idea from Virtual Iron's shadow code*/
+static const int PT_ENTRIES[][5] =
+ { /* ------ level ------ nr_levels */
+ /* 1 2 3 4 */
+ { 0, 0, 0, 0, 0}, /* 0 not used */
+ { 0, 0, 0, 0, 0}, /* 1 not used */
+ { 0, 1024, 1024, 0, 0}, /* 2 */
+ { 0, 512, 512, 4, 0}, /* 3 */
+ { 0, 512, 512, 512, 512} /* 4 */
+ };
+
+/********************************************/
+/* PAGING DEFINITION FOR GUEST */
+/********************************************/
+#define PHYSICAL_PAGE_4K_SIZE (1UL << 12)
+#define PHYSICAL_PAGE_2M_SIZE (1UL << 21)
+#define PHYSICAL_PAGE_4M_SIZE (1UL << 22)
+#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) )
+#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) )
+#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) )
+
+/* long mode physical address mask */
+#define PHYSICAL_ADDR_BITS_LM 52
+#define PHYSICAL_ADDR_MASK_LM ((1UL << PHYSICAL_ADDR_BITS_LM)-1)
+#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK &
PHYSICAL_ADDR_MASK_LM)
+#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK &
PHYSICAL_ADDR_MASK_LM)
+
+#define PAGE_NX_BIT (1ULL << 63)
+/************************************************/
+/* PAGETABLE RELATED VARIABLES */
+/************************************************/
+#if CONFIG_PAGING_LEVELS == 2
+#define HAP_L1_PAGETABLE_ENTRIES 1024
+#define HAP_L2_PAGETABLE_ENTRIES 1024
+#define HAP_L1_PAGETABLE_SHIFT 12
+#define HAP_L2_PAGETABLE_SHIFT 22
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3
+#define HAP_L1_PAGETABLE_ENTRIES 512
+#define HAP_L2_PAGETABLE_ENTRIES 512
+#define HAP_L3_PAGETABLE_ENTRIES 4
+#define HAP_L1_PAGETABLE_SHIFT 12
+#define HAP_L2_PAGETABLE_SHIFT 21
+#define HAP_L3_PAGETABLE_SHIFT 30
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+#define HAP_L1_PAGETABLE_ENTRIES 512
+#define HAP_L2_PAGETABLE_ENTRIES 512
+#define HAP_L3_PAGETABLE_ENTRIES 512
+#define HAP_L4_PAGETABLE_ENTRIES 512
+#define HAP_L1_PAGETABLE_SHIFT 12
+#define HAP_L2_PAGETABLE_SHIFT 21
+#define HAP_L3_PAGETABLE_SHIFT 30
+#define HAP_L4_PAGETABLE_SHIFT 39
+#endif
+
+#endif /* __SVM_NPT_H__ */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/hap/support.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/hap/support.c Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,334 @@
+/*
+ * arch/x86/mm/hap/support.c
+ *
+ * guest page table walker
+ * Copyright (c) 2007, AMD Corporation (Wei Huang)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/sched.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/domain.h>
+#include <asm/shadow.h>
+#include <asm/hap.h>
+
+#include "private.h"
+#include "../page-guest32.h"
+
+/*******************************************/
+/* Platform Specific Functions */
+/*******************************************/
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for real mode guest.
+ */
+unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva)
+{
+ HERE_I_AM;
+ return ((paddr_t)gva >> PAGE_SHIFT);
+}
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for protected guest.
+ */
+unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva)
+{
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 2; /* two-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int success = 1;
+ l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */
+ l1_pgentry_32_t *l1e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ success = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ HAP_PRINTK("l2 page table entry is %ulx at index = %d\n",
+ l2e[index].l2, index);
+ if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+ success = 0;
+ }
+
+ if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ HAP_PRINTK("guest page table is PSE\n");
+ if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */
+ printk("guest physical memory size is too large!\n");
+ domain_crash(v->domain);
+ }
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) +
+ (gva & ~PHYSICAL_PAGE_4M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, return from here */
+ }
+ else {
+ gpfn = l2e_get_pfn( l2e[index] );
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ HAP_PRINTK("l1 page table entry is %ulx at index = %d\n",
+ l1e[index].l1, index);
+ if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( !success ) /* error happened, jump out */
+ break;
+ }
+
+ HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+ if ( !success ) /* error happened */
+ return INVALID_GFN;
+ else
+ return ((paddr_t)gpa >> PAGE_SHIFT);
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for PAE mode guest.
+ */
+unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS >= 3
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 3; /* three-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int success = 1;
+ l1_pgentry_t *l1e;
+ l2_pgentry_t *l2e;
+ l3_pgentry_t *l3e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ success = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 3 ) {
+ l3e = map_domain_page( mfn );
+ index += ( ((gcr3 >> 5 ) & 127 ) * 4 );
+ if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l3e_get_pfn( l3e[index] );
+ unmap_domain_page(l3e);
+ }
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+ success = 0;
+ }
+
+ if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ HAP_PRINTK("guest page table is PSE\n");
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) +
+ (gva & ~PHYSICAL_PAGE_2M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, jump out from here */
+ }
+ else {
+ gpfn = l2e_get_pfn(l2e[index]);
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( success != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+ HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+ if ( !success )
+ return INVALID_GFN;
+ else
+ return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+ HERE_I_AM;
+ printk("guest paging level (3) is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return INVALID_GFN;
+#endif
+}
+
+
+
+/* Translate guest virtual address to guest physical address. Specifically
+ * for long mode guest.
+ */
+unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva)
+{
+#if CONFIG_PAGING_LEVELS == 4
+ unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
+ int mode = 4; /* four-level guest */
+ int lev, index;
+ paddr_t gpa = 0;
+ unsigned long gpfn, mfn;
+ int success = 1;
+ l4_pgentry_t *l4e;
+ l3_pgentry_t *l3e;
+ l2_pgentry_t *l2e;
+ l1_pgentry_t *l1e;
+
+ HERE_I_AM;
+
+ gpfn = (gcr3 >> PAGE_SHIFT);
+ for ( lev = mode; lev >= 1; lev-- ) {
+ mfn = get_mfn_from_gpfn( gpfn );
+ if ( mfn == INVALID_MFN ) {
+ HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
+ lev);
+ success = 0;
+ break;
+ }
+ index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
+
+ if ( lev == 4 ) {
+ l4e = map_domain_page( mfn );
+ if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 4 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l4e_get_pfn( l4e[index] );
+ unmap_domain_page(l4e);
+ }
+
+ if ( lev == 3 ) {
+ l3e = map_domain_page( mfn );
+ if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l3e_get_pfn( l3e[index] );
+ unmap_domain_page(l3e);
+ }
+
+ if ( lev == 2 ) {
+ l2e = map_domain_page( mfn );
+ if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
+ success = 0;
+ }
+
+ if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
+ HAP_PRINTK("guest page table is PSE\n");
+ gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM)
+ + (gva & ~PHYSICAL_PAGE_2M_MASK);
+ unmap_domain_page(l2e);
+ break; /* last level page table, jump out from here */
+ }
+ else {
+ gpfn = l2e_get_pfn(l2e[index]);
+ }
+ unmap_domain_page(l2e);
+ }
+
+ if ( lev == 1 ) {
+ l1e = map_domain_page( mfn );
+ if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
+ HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
+ success = 0;
+ }
+ gpfn = l1e_get_pfn( l1e[index] );
+ gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) +
+ (gva & ~PHYSICAL_PAGE_4K_MASK);
+ unmap_domain_page(l1e);
+ }
+
+ if ( success != 1 ) /* error happened, jump out */
+ break;
+ }
+
+ gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
+ HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+
+ if ( !success )
+ return INVALID_GFN;
+ else
+ return ((paddr_t)gpa >> PAGE_SHIFT);
+#else
+ HERE_I_AM;
+ printk("guest paging level (4) is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return INVALID_GFN;
+#endif
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/page-guest32.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/page-guest32.h Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,100 @@
+
+#ifndef __X86_PAGE_GUEST_H__
+#define __X86_PAGE_GUEST_H__
+
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
+
+#define PAGETABLE_ORDER_32 10
+#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32)
+#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32)
+#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32
+
+
+#define L1_PAGETABLE_SHIFT_32 12
+#define L2_PAGETABLE_SHIFT_32 22
+
+/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
+
+#ifndef __ASSEMBLY__
+
+typedef u32 intpte_32_t;
+
+typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
+typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
+typedef l2_pgentry_t root_pgentry_32_t;
+#endif
+
+#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
+#define put_pte_flags_32(x) ((intpte_32_t)(x))
+
+/* Get pte access flags (unsigned int). */
+#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1))
+#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2))
+
+#define l1e_get_paddr_32(x) \
+ ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
+#define l2e_get_paddr_32(x) \
+ ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
+
+/* Construct an empty pte. */
+#define l1e_empty_32() ((l1_pgentry_32_t) { 0 })
+#define l2e_empty_32() ((l2_pgentry_32_t) { 0 })
+
+/* Construct a pte from a pfn and access flags. */
+#define l1e_from_pfn_32(pfn, flags) \
+ ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) |
put_pte_flags_32(flags) })
+#define l2e_from_pfn_32(pfn, flags) \
+ ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) |
put_pte_flags_32(flags) })
+
+/* Construct a pte from a physical address and access flags. */
+#ifndef __ASSEMBLY__
+static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
+{
+ ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
+ return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
+}
+#endif /* !__ASSEMBLY__ */
+
+
+/* Construct a pte from a page pointer and access flags. */
+#define l1e_from_page_32(page, flags)
(l1e_from_pfn_32(page_to_mfn(page),(flags)))
+#define l2e_from_page_32(page, flags)
(l2e_from_pfn_32(page_to_mfn(page),(flags)))
+
+/* Add extra flags to an existing pte. */
+#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags))
+#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags))
+
+/* Remove flags from an existing pte. */
+#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
+#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
+
+/* Check if a pte's page mapping or significant access flags have changed. */
+#define l1e_has_changed_32(x,y,flags) \
+ ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags)))
)
+#define l2e_has_changed_32(x,y,flags) \
+ ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags)))
)
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset_32(a) \
+ (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
+#define l2_table_offset_32(a) \
+ (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
+
+#endif /* __X86_PAGE_GUEST_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/paging.c Thu Mar 08 14:39:52 2007 -0600
@@ -24,10 +24,12 @@
#include <asm/paging.h>
#include <asm/shadow.h>
#include <asm/p2m.h>
+#include <asm/hap.h>
/* Xen command-line option to enable hardware-assisted paging */
int opt_hap_enabled = 0;
boolean_param("hap", opt_hap_enabled);
+int hap_capable_system = 0;
/* Printouts */
#define PAGING_PRINTK(_f, _a...) \
@@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d
{
p2m_init(d);
shadow_domain_init(d);
+
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ hap_domain_init(d);
}
/* vcpu paging struct initialization goes here */
void paging_vcpu_init(struct vcpu *v)
{
- shadow_vcpu_init(v);
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) )
+ hap_vcpu_init(v);
+ else
+ shadow_vcpu_init(v);
}
@@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_
XEN_GUEST_HANDLE(void) u_domctl)
{
/* Here, dispatch domctl to the appropriate paging code */
- return shadow_domctl(d, sc, u_domctl);
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ return hap_domctl(d, sc, u_domctl);
+ else
+ return shadow_domctl(d, sc, u_domctl);
}
/* Call when destroying a domain */
void paging_teardown(struct domain *d)
{
- shadow_teardown(d);
- /* Call other modes' teardown code here */
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ hap_teardown(d);
+ else
+ shadow_teardown(d);
}
/* Call once all of the references to the domain have gone away */
void paging_final_teardown(struct domain *d)
{
- shadow_teardown(d);
- /* Call other modes' final teardown code here */
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ hap_final_teardown(d);
+ else
+ shadow_final_teardown(d);
}
/* Enable an arbitrary paging-assistance mode. Call once at domain
* creation. */
int paging_enable(struct domain *d, u32 mode)
{
- if ( mode & PG_SH_enable )
- return shadow_enable(d, mode);
+ if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
+ return hap_enable(d, mode | PG_HAP_enable);
else
- /* No other modes supported yet */
- return -EINVAL;
+ return shadow_enable(d, mode | PG_SH_enable);
}
/* Print paging-assistance info to the console */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/shadow/common.c Thu Mar 08 14:39:52 2007 -0600
@@ -2912,7 +2912,16 @@ void sh_mark_dirty(struct domain *d, mfn
* can be called from __hvm_copy during emulation).
* If the lock isn't held, take it for the duration of the call. */
do_locking = !shadow_locked_by_me(d);
- if ( do_locking ) shadow_lock(d);
+ if ( do_locking )
+ {
+ shadow_lock(d);
+ /* Check the mode again with the lock held */
+ if ( unlikely(!shadow_mode_log_dirty(d)) )
+ {
+ shadow_unlock(d);
+ return;
+ }
+ }
ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
@@ -2968,8 +2977,16 @@ int shadow_domctl(struct domain *d,
if ( unlikely(d == current->domain) )
{
- gdprintk(XENLOG_INFO, "Don't try to do a shadow op on yourself!\n");
+ gdprintk(XENLOG_INFO, "Dom %u tried to do a shadow op on itself.\n",
+ d->domain_id);
return -EINVAL;
+ }
+
+ if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
+ {
+ gdprintk(XENLOG_INFO, "Ignoring shadow op on dying domain %u\n",
+ d->domain_id);
+ return 0;
}
switch ( sc->op )
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/page-guest32.h
--- a/xen/arch/x86/mm/shadow/page-guest32.h Mon Mar 05 12:49:12 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-
-#ifndef __X86_PAGE_GUEST_H__
-#define __X86_PAGE_GUEST_H__
-
-#ifndef __ASSEMBLY__
-# include <asm/types.h>
-#endif
-
-#define PAGETABLE_ORDER_32 10
-#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32)
-#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32)
-#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32
-
-
-#define L1_PAGETABLE_SHIFT_32 12
-#define L2_PAGETABLE_SHIFT_32 22
-
-/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
-
-#ifndef __ASSEMBLY__
-
-typedef u32 intpte_32_t;
-
-typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
-typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
-typedef l2_pgentry_t root_pgentry_32_t;
-#endif
-
-#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
-#define put_pte_flags_32(x) ((intpte_32_t)(x))
-
-/* Get pte access flags (unsigned int). */
-#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1))
-#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2))
-
-#define l1e_get_paddr_32(x) \
- ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
-#define l2e_get_paddr_32(x) \
- ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
-
-/* Construct an empty pte. */
-#define l1e_empty_32() ((l1_pgentry_32_t) { 0 })
-#define l2e_empty_32() ((l2_pgentry_32_t) { 0 })
-
-/* Construct a pte from a pfn and access flags. */
-#define l1e_from_pfn_32(pfn, flags) \
- ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) |
put_pte_flags_32(flags) })
-#define l2e_from_pfn_32(pfn, flags) \
- ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) |
put_pte_flags_32(flags) })
-
-/* Construct a pte from a physical address and access flags. */
-#ifndef __ASSEMBLY__
-static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
-{
- ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
- return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
-}
-static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
-{
- ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
- return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
-}
-#endif /* !__ASSEMBLY__ */
-
-
-/* Construct a pte from a page pointer and access flags. */
-#define l1e_from_page_32(page, flags)
(l1e_from_pfn_32(page_to_mfn(page),(flags)))
-#define l2e_from_page_32(page, flags)
(l2e_from_pfn_32(page_to_mfn(page),(flags)))
-
-/* Add extra flags to an existing pte. */
-#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags))
-#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags))
-
-/* Remove flags from an existing pte. */
-#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
-#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
-
-/* Check if a pte's page mapping or significant access flags have changed. */
-#define l1e_has_changed_32(x,y,flags) \
- ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags)))
)
-#define l2e_has_changed_32(x,y,flags) \
- ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags)))
)
-
-/* Given a virtual address, get an entry offset into a page table. */
-#define l1_table_offset_32(a) \
- (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
-#define l2_table_offset_32(a) \
- (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
-
-#endif /* __X86_PAGE_GUEST_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/shadow/private.h Thu Mar 08 14:39:52 2007 -0600
@@ -539,7 +539,7 @@ static inline int sh_get_ref(struct vcpu
/* We remember the first shadow entry that points to each shadow. */
if ( entry_pa != 0
- && sh_type_is_pinnable(v, sp->type)
+ && !sh_type_is_pinnable(v, sp->type)
&& sp->up == 0 )
sp->up = entry_pa;
@@ -559,7 +559,7 @@ static inline void sh_put_ref(struct vcp
/* If this is the entry in the up-pointer, remove it */
if ( entry_pa != 0
- && sh_type_is_pinnable(v, sp->type)
+ && !sh_type_is_pinnable(v, sp->type)
&& sp->up == entry_pa )
sp->up = 0;
diff -r 8f0b5295bb1b -r dcec453681bc xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/arch/x86/mm/shadow/types.h Thu Mar 08 14:39:52 2007 -0600
@@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_fr
#if GUEST_PAGING_LEVELS == 2
-#include "page-guest32.h"
+#include "../page-guest32.h"
#define GUEST_L1_PAGETABLE_ENTRIES 1024
#define GUEST_L2_PAGETABLE_ENTRIES 1024
diff -r 8f0b5295bb1b -r dcec453681bc xen/common/event_channel.c
--- a/xen/common/event_channel.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/common/event_channel.c Thu Mar 08 14:39:52 2007 -0600
@@ -560,6 +560,9 @@ void send_guest_global_virq(struct domai
ASSERT(virq_is_global(virq));
+ if ( unlikely(d == NULL) )
+ return;
+
v = d->vcpu[0];
if ( unlikely(v == NULL) )
return;
diff -r 8f0b5295bb1b -r dcec453681bc xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/common/page_alloc.c Thu Mar 08 14:39:52 2007 -0600
@@ -49,7 +49,7 @@ string_param("badpage", opt_badpage);
* Bit width of the DMA heap.
*/
static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
-static unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT))
- 1;
+static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1;
static void parse_dma_bits(char *s)
{
unsigned int v = simple_strtol(s, NULL, 0);
@@ -339,11 +339,13 @@ static void init_heap_block(heap_by_zone
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
- unsigned int zone_lo, unsigned zone_hi,
+ unsigned int zone_lo, unsigned int zone_hi,
unsigned int cpu, unsigned int order)
{
- unsigned int i, j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
- unsigned int zone, request = (1UL << order);
+ unsigned int i, j, zone;
+ unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
+ unsigned long request = 1UL << order;
+ cpumask_t extra_cpus_mask, mask;
struct page_info *pg;
ASSERT(node >= 0);
@@ -356,25 +358,24 @@ static struct page_info *alloc_heap_page
spin_lock(&heap_lock);
- /* start with requested node, but exhaust all node memory
- * in requested zone before failing, only calc new node
- * value if we fail to find memory in target node, this avoids
- * needless computation on fast-path */
+ /*
+ * Start with requested node, but exhaust all node memory in requested
+ * zone before failing, only calc new node value if we fail to find memory
+ * in target node, this avoids needless computation on fast-path.
+ */
for ( i = 0; i < num_nodes; i++ )
{
- for ( zone = zone_hi; zone >= zone_lo; --zone )
- {
- /* check if target node can support the allocation */
- if ( avail[node] && (avail[node][zone] >= request) )
- {
- /* Find smallest order which can satisfy the request. */
- for ( j = order; j <= MAX_ORDER; j++ )
- {
- if ( !list_empty(&heap(node, zone, j)) )
- goto found;
- }
- }
- }
+ zone = zone_hi;
+ do {
+ /* Check if target node can support the allocation. */
+ if ( !avail[node] || (avail[node][zone] < request) )
+ continue;
+
+ /* Find smallest order which can satisfy the request. */
+ for ( j = order; j <= MAX_ORDER; j++ )
+ if ( !list_empty(&heap(node, zone, j)) )
+ goto found;
+ } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
/* Pick next node, wrapping around if needed. */
if ( ++node == num_nodes )
@@ -403,6 +404,29 @@ static struct page_info *alloc_heap_page
spin_unlock(&heap_lock);
+ cpus_clear(mask);
+
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ /* Reference count must continuously be zero for free pages. */
+ BUG_ON(pg[i].count_info != 0);
+
+ /* Add in any extra CPUs that need flushing because of this page. */
+ cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
+ tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+ cpus_or(mask, mask, extra_cpus_mask);
+
+ /* Initialise fields which have other uses for free pages. */
+ pg[i].u.inuse.type_info = 0;
+ page_set_owner(&pg[i], NULL);
+ }
+
+ if ( unlikely(!cpus_empty(mask)) )
+ {
+ perfc_incrc(need_flush_tlb_flush);
+ flush_tlb_mask(mask);
+ }
+
return pg;
}
@@ -411,13 +435,28 @@ static void free_heap_pages(
unsigned int zone, struct page_info *pg, unsigned int order)
{
unsigned long mask;
- unsigned int node = phys_to_nid(page_to_maddr(pg));
+ unsigned int i, node = phys_to_nid(page_to_maddr(pg));
+ struct domain *d;
ASSERT(zone < NR_ZONES);
ASSERT(order <= MAX_ORDER);
ASSERT(node >= 0);
ASSERT(node < num_online_nodes());
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ BUG_ON(pg[i].count_info != 0);
+ if ( (d = page_get_owner(&pg[i])) != NULL )
+ {
+ pg[i].tlbflush_timestamp = tlbflush_current_time();
+ pg[i].u.free.cpumask = d->domain_dirty_cpumask;
+ }
+ else
+ {
+ cpus_clear(pg[i].u.free.cpumask);
+ }
+ }
+
spin_lock(&heap_lock);
map_free(page_to_mfn(pg), 1 << order);
@@ -426,7 +465,7 @@ static void free_heap_pages(
/* Merge chunks as far as possible. */
while ( order < MAX_ORDER )
{
- mask = 1 << order;
+ mask = 1UL << order;
if ( (page_to_mfn(pg) & mask) )
{
@@ -554,7 +593,7 @@ void end_boot_allocator(void)
/*
* Scrub all unallocated pages in all heap zones. This function is more
* convoluted than appears necessary because we do not want to continuously
- * hold the lock or disable interrupts while scrubbing very large memory areas.
+ * hold the lock while scrubbing very large memory areas.
*/
void scrub_heap_pages(void)
{
@@ -575,7 +614,7 @@ void scrub_heap_pages(void)
if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
printk(".");
- spin_lock_irq(&heap_lock);
+ spin_lock(&heap_lock);
/* Re-check page status with lock held. */
if ( !allocated_in_map(mfn) )
@@ -595,7 +634,7 @@ void scrub_heap_pages(void)
}
}
- spin_unlock_irq(&heap_lock);
+ spin_unlock(&heap_lock);
}
printk("done.\n");
@@ -609,8 +648,6 @@ void scrub_heap_pages(void)
void init_xenheap_pages(paddr_t ps, paddr_t pe)
{
- unsigned long flags;
-
ps = round_pgup(ps);
pe = round_pgdown(pe);
if ( pe <= ps )
@@ -625,33 +662,21 @@ void init_xenheap_pages(paddr_t ps, padd
if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
pe -= PAGE_SIZE;
- local_irq_save(flags);
init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
- local_irq_restore(flags);
}
void *alloc_xenheap_pages(unsigned int order)
{
- unsigned long flags;
struct page_info *pg;
- int i;
-
- local_irq_save(flags);
+
+ ASSERT(!in_irq());
+
pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
- local_irq_restore(flags);
-
if ( unlikely(pg == NULL) )
goto no_memory;
memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
-
- for ( i = 0; i < (1 << order); i++ )
- {
- pg[i].count_info = 0;
- pg[i].u.inuse._domain = 0;
- pg[i].u.inuse.type_info = 0;
- }
return page_to_virt(pg);
@@ -663,16 +688,14 @@ void *alloc_xenheap_pages(unsigned int o
void free_xenheap_pages(void *v, unsigned int order)
{
- unsigned long flags;
+ ASSERT(!in_irq());
if ( v == NULL )
return;
- memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
-
- local_irq_save(flags);
+ memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
+
free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
- local_irq_restore(flags);
}
@@ -762,8 +785,6 @@ struct page_info *__alloc_domheap_pages(
unsigned int memflags)
{
struct page_info *pg = NULL;
- cpumask_t mask;
- unsigned long i;
unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
ASSERT(!in_irq());
@@ -792,38 +813,10 @@ struct page_info *__alloc_domheap_pages(
return NULL;
}
- if ( pg == NULL )
- if ( (pg = alloc_heap_pages(MEMZONE_XEN + 1,
- zone_hi,
- cpu, order)) == NULL )
- return NULL;
-
- mask = pg->u.free.cpumask;
- tlbflush_filter(mask, pg->tlbflush_timestamp);
-
- pg->count_info = 0;
- pg->u.inuse._domain = 0;
- pg->u.inuse.type_info = 0;
-
- for ( i = 1; i < (1 << order); i++ )
- {
- /* Add in any extra CPUs that need flushing because of this page. */
- cpumask_t extra_cpus_mask;
- cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
- tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
- cpus_or(mask, mask, extra_cpus_mask);
-
- pg[i].count_info = 0;
- pg[i].u.inuse._domain = 0;
- pg[i].u.inuse.type_info = 0;
- page_set_owner(&pg[i], NULL);
- }
-
- if ( unlikely(!cpus_empty(mask)) )
- {
- perfc_incrc(need_flush_tlb_flush);
- flush_tlb_mask(mask);
- }
+ if ( (pg == NULL) &&
+ ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
+ cpu, order)) == NULL) )
+ return NULL;
if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
{
@@ -867,10 +860,7 @@ void free_domheap_pages(struct page_info
for ( i = 0; i < (1 << order); i++ )
{
- shadow_drop_references(d, &pg[i]);
- ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
- pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpumask = d->domain_dirty_cpumask;
+ BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
list_del(&pg[i].list);
}
@@ -892,6 +882,7 @@ void free_domheap_pages(struct page_info
*/
for ( i = 0; i < (1 << order); i++ )
{
+ page_set_owner(&pg[i], NULL);
spin_lock(&page_scrub_lock);
list_add(&pg[i].list, &page_scrub_list);
scrub_pages++;
@@ -902,8 +893,6 @@ void free_domheap_pages(struct page_info
else
{
/* Freeing anonymous domain-heap pages. */
- for ( i = 0; i < (1 << order); i++ )
- cpus_clear(pg[i].u.free.cpumask);
free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
drop_dom_ref = 0;
}
diff -r 8f0b5295bb1b -r dcec453681bc xen/common/xmalloc.c
--- a/xen/common/xmalloc.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/common/xmalloc.c Thu Mar 08 14:39:52 2007 -0600
@@ -33,6 +33,8 @@
#include <xen/timer.h>
#include <xen/cache.h>
#include <xen/prefetch.h>
+#include <xen/irq.h>
+#include <xen/smp.h>
/*
* XMALLOC_DEBUG:
@@ -175,6 +177,8 @@ void *_xmalloc(size_t size, size_t align
struct xmalloc_hdr *i;
unsigned long flags;
+ ASSERT(!in_irq());
+
/* We currently always return cacheline aligned. */
BUG_ON(align > SMP_CACHE_BYTES);
@@ -212,6 +216,8 @@ void xfree(void *p)
{
unsigned long flags;
struct xmalloc_hdr *i, *tmp, *hdr;
+
+ ASSERT(!in_irq());
if ( p == NULL )
return;
diff -r 8f0b5295bb1b -r dcec453681bc xen/drivers/acpi/numa.c
--- a/xen/drivers/acpi/numa.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/drivers/acpi/numa.c Thu Mar 08 14:39:52 2007 -0600
@@ -22,10 +22,6 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
*/
-#if 0
-#include <linux/module.h>
-#include <linux/kernel.h>
-#endif
#include <xen/config.h>
#include <xen/init.h>
#include <xen/types.h>
@@ -34,7 +30,6 @@
#include <xen/numa.h>
#include <acpi/acpi_bus.h>
#include <acpi/acmacros.h>
-#include <asm/page.h> /* __va() */
#define ACPI_NUMA 0x80000000
#define _COMPONENT ACPI_NUMA
@@ -106,7 +101,7 @@ static int __init acpi_parse_slit(unsign
if (!phys_addr || !size)
return -EINVAL;
- slit = (struct acpi_table_slit *)__va(phys_addr);
+ slit = (struct acpi_table_slit *)__acpi_map_table(phys_addr, size);
/* downcast just for %llu vs %lu for i386/ia64 */
localities = (u32) slit->localities;
@@ -159,7 +154,7 @@ static int __init acpi_parse_srat(unsign
if (!phys_addr || !size)
return -EINVAL;
- srat = (struct acpi_table_srat *)__va(phys_addr);
+ srat = (struct acpi_table_srat *)__acpi_map_table(phys_addr, size);
return 0;
}
diff -r 8f0b5295bb1b -r dcec453681bc xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/drivers/char/console.c Thu Mar 08 14:39:52 2007 -0600
@@ -399,6 +399,8 @@ static void __putstr(const char *str)
vga_putchar(c);
putchar_console_ring(c);
}
+
+ send_guest_global_virq(dom0, VIRQ_CON_RING);
}
static int printk_prefix_check(char *p, char **pp)
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/acm/acm_hooks.h
--- a/xen/include/acm/acm_hooks.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/acm/acm_hooks.h Thu Mar 08 14:39:52 2007 -0600
@@ -247,12 +247,12 @@ static inline int acm_pre_domctl(struct
if (*ssid == NULL) {
printk("%s: Warning. Destroying domain without ssid
pointer.\n",
__func__);
- domain_rcu_lock(d);
+ rcu_unlock_domain(d);
return -EACCES;
}
d->ssid = NULL; /* make sure it's not used any more */
/* no policy-specific hook */
- domain_rcu_lock(d);
+ rcu_unlock_domain(d);
ret = 0;
}
break;
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/domain.h Thu Mar 08 14:39:52 2007 -0600
@@ -104,6 +104,21 @@ struct shadow_vcpu {
};
/************************************************/
+/* hardware assisted paging */
+/************************************************/
+struct hap_domain {
+ spinlock_t lock;
+ int locker;
+ const char *locker_function;
+
+ struct list_head freelists;
+ struct list_head p2m_freelist;
+ unsigned int total_pages; /* number of pages allocated */
+ unsigned int free_pages; /* number of pages on freelists */
+ unsigned int p2m_pages; /* number of pages allocates to p2m */
+};
+
+/************************************************/
/* p2m handling */
/************************************************/
@@ -135,6 +150,7 @@ struct paging_domain {
struct shadow_domain shadow;
/* Other paging assistance code will have structs here */
+ struct hap_domain hap;
};
struct paging_vcpu {
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hap.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hap.h Thu Mar 08 14:39:52 2007 -0600
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * include/asm-x86/hap.h
+ *
+ * hardware-assisted paging
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ *
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_HAP_H
+#define _XEN_HAP_H
+
+#define HERE_I_AM \
+ debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__)
+#define HAP_PRINTK(_f, _a...) \
+ debugtrace_printk("hap: %s(): " _f, __func__, ##_a)
+#define HAP_ERROR(_f, _a...) \
+ printk("hap error: %s(): " _f, __func__, ##_a)
+
+/************************************************/
+/* hap domain page mapping */
+/************************************************/
+static inline void *
+hap_map_domain_page(mfn_t mfn)
+{
+ return map_domain_page(mfn_x(mfn));
+}
+
+static inline void
+hap_unmap_domain_page(void *p)
+{
+ unmap_domain_page(p);
+}
+
+static inline void *
+hap_map_domain_page_global(mfn_t mfn)
+{
+ return map_domain_page_global(mfn_x(mfn));
+}
+
+static inline void
+hap_unmap_domain_page_global(void *p)
+{
+ unmap_domain_page_global(p);
+}
+
+/************************************************/
+/* locking for hap code */
+/************************************************/
+#define hap_lock_init(_d) \
+ do { \
+ spin_lock_init(&(_d)->arch.paging.hap.lock); \
+ (_d)->arch.paging.hap.locker = -1; \
+ (_d)->arch.paging.hap.locker_function = "nobody"; \
+ } while (0)
+
+#define hap_locked_by_me(_d) \
+ (current->processor == (_d)->arch.paging.hap.locker)
+
+#define hap_lock(_d) \
+ do { \
+ if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\
+ { \
+ printk("Error: hap lock held by %s\n", \
+ (_d)->arch.paging.hap.locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_d)->arch.paging.hap.lock); \
+ ASSERT((_d)->arch.paging.hap.locker == -1); \
+ (_d)->arch.paging.hap.locker = current->processor; \
+ (_d)->arch.paging.hap.locker_function = __func__; \
+ } while (0)
+
+#define hap_unlock(_d) \
+ do { \
+ ASSERT((_d)->arch.paging.hap.locker == current->processor); \
+ (_d)->arch.paging.hap.locker = -1; \
+ (_d)->arch.paging.hap.locker_function = "nobody"; \
+ spin_unlock(&(_d)->arch.paging.hap.lock); \
+ } while (0)
+
+/************************************************/
+/* hap domain level functions */
+/************************************************/
+void hap_domain_init(struct domain *d);
+int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
+ XEN_GUEST_HANDLE(void) u_domctl);
+int hap_enable(struct domain *d, u32 mode);
+void hap_final_teardown(struct domain *d);
+void hap_teardown(struct domain *d);
+void hap_vcpu_init(struct vcpu *v);
+
+extern struct paging_mode hap_paging_real_mode;
+extern struct paging_mode hap_paging_protected_mode;
+extern struct paging_mode hap_paging_pae_mode;
+extern struct paging_mode hap_paging_long_mode;
+#endif /* XEN_HAP_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/emulate.h Thu Mar 08 14:39:52 2007 -0600
@@ -76,7 +76,7 @@ enum instruction_index {
};
-extern unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb,
+extern unsigned long get_effective_addr_modrm64(
struct cpu_user_regs *regs, const u8 prefix, int inst_len,
const u8 *operand, u8 *size);
extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb,
@@ -85,17 +85,17 @@ extern OPERATING_MODE get_operating_mode
extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
extern unsigned int decode_dest_reg(u8 prefix, u8 modrm);
extern unsigned int decode_src_reg(u8 prefix, u8 modrm);
-extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb);
-extern int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+extern unsigned long svm_rip2pointer(struct vcpu *v);
+extern int __get_instruction_length_from_list(struct vcpu *v,
enum instruction_index *list, unsigned int list_count,
u8 *guest_eip_buf, enum instruction_index *match);
-static inline int __get_instruction_length(struct vmcb_struct *vmcb,
+static inline int __get_instruction_length(struct vcpu *v,
enum instruction_index instr, u8 *guest_eip_buf)
{
return __get_instruction_length_from_list(
- vmcb, &instr, 1, guest_eip_buf, NULL);
+ v, &instr, 1, guest_eip_buf, NULL);
}
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/svm.h Thu Mar 08 14:39:52 2007 -0600
@@ -34,6 +34,41 @@ extern void arch_svm_do_resume(struct vc
extern u64 root_vmcb_pa[NR_CPUS];
+static inline int svm_long_mode_enabled(struct vcpu *v)
+{
+ u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
+ return guest_efer & EFER_LMA;
+}
+
+static inline int svm_lme_is_set(struct vcpu *v)
+{
+ u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
+ return guest_efer & EFER_LME;
+}
+
+static inline int svm_cr4_pae_is_set(struct vcpu *v)
+{
+ unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ return guest_cr4 & X86_CR4_PAE;
+}
+
+static inline int svm_paging_enabled(struct vcpu *v)
+{
+ unsigned long guest_cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+ return (guest_cr0 & X86_CR0_PE) && (guest_cr0 & X86_CR0_PG);
+}
+
+static inline int svm_pae_enabled(struct vcpu *v)
+{
+ unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE);
+}
+
+static inline int svm_pgbit_test(struct vcpu *v)
+{
+ return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
+}
+
#define SVM_REG_EAX (0)
#define SVM_REG_ECX (1)
#define SVM_REG_EDX (2)
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Thu Mar 08 14:39:52 2007 -0600
@@ -302,14 +302,6 @@ enum VMEXIT_EXITCODE
VMEXIT_NPF = 1024, /* nested paging fault */
VMEXIT_INVALID = -1
};
-
-enum {
- SVM_CPU_STATE_PG_ENABLED=0,
- SVM_CPU_STATE_PAE_ENABLED,
- SVM_CPU_STATE_LME_ENABLED,
- SVM_CPU_STATE_LMA_ENABLED,
- SVM_CPU_STATE_ASSIST_ENABLED,
-};
/* Definitions of segment state are borrowed by the generic HVM code. */
typedef segment_attributes_t svm_segment_attributes_t;
@@ -457,12 +449,12 @@ struct arch_svm_struct {
int saved_irq_vector;
u32 launch_core;
- unsigned long flags; /* VMCB flags */
- unsigned long cpu_shadow_cr0; /* Guest value for CR0 */
- unsigned long cpu_shadow_cr4; /* Guest value for CR4 */
+ unsigned long flags; /* VMCB flags */
+ unsigned long cpu_shadow_cr0; /* Guest value for CR0 */
+ unsigned long cpu_shadow_cr4; /* Guest value for CR4 */
+ unsigned long cpu_shadow_efer; /* Guest value for EFER */
unsigned long cpu_cr2;
unsigned long cpu_cr3;
- unsigned long cpu_state;
};
struct vmcb_struct *alloc_vmcb(void);
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/public/arch-x86/xen.h Thu Mar 08 14:39:52 2007 -0600
@@ -132,6 +132,7 @@ struct vcpu_guest_context {
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
+ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
#ifdef __i386__
diff -r 8f0b5295bb1b -r dcec453681bc xen/include/public/xen.h
--- a/xen/include/public/xen.h Mon Mar 05 12:49:12 2007 -0600
+++ b/xen/include/public/xen.h Thu Mar 08 14:39:52 2007 -0600
@@ -131,6 +131,7 @@
#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16
@@ -473,26 +474,24 @@ typedef struct shared_info shared_info_t
#endif
/*
- * Start-of-day memory layout for the initial domain (DOM0):
+ * Start-of-day memory layout:
* 1. The domain is started within contiguous virtual-memory region.
- * 2. The contiguous region begins and ends on an aligned 4MB boundary.
- * 3. The region start corresponds to the load address of the OS image.
- * If the load address is not 4MB aligned then the address is rounded down.
- * 4. This the order of bootstrap elements in the initial virtual region:
+ * 2. The contiguous region ends on an aligned 4MB boundary.
+ * 3. This the order of bootstrap elements in the initial virtual region:
* a. relocated kernel image
* b. initial ram disk [mod_start, mod_len]
* c. list of allocated page frames [mfn_list, nr_pages]
* d. start_info_t structure [register ESI (x86)]
* e. bootstrap page tables [pt_base, CR3 (x86)]
* f. bootstrap stack [register ESP (x86)]
- * 5. Bootstrap elements are packed together, but each is 4kB-aligned.
- * 6. The initial ram disk may be omitted.
- * 7. The list of page frames forms a contiguous 'pseudo-physical' memory
+ * 4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ * 5. The initial ram disk may be omitted.
+ * 6. The list of page frames forms a contiguous 'pseudo-physical' memory
* layout for the domain. In particular, the bootstrap virtual-memory
* region is a 1:1 mapping to the first section of the pseudo-physical map.
- * 8. All bootstrap elements are mapped read-writable for the guest OS. The
+ * 7. All bootstrap elements are mapped read-writable for the guest OS. The
* only exception is the bootstrap page table, which is mapped read-only.
- * 9. There is guaranteed to be at least 512kB padding after the final
+ * 8. There is guaranteed to be at least 512kB padding after the final
* bootstrap element. If necessary, the bootstrap virtual region is
* extended by an extra 4MB to ensure this.
*/
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|