# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 6c43118bdba8d4316ddfffbea7a53f0b2f128ab3
# Parent d940ec92958d62d3c03a92cecb2aa13fb9be5d0d
# Parent 0807931dfa54a79664b8ff4ec942036dfcbdcd19
merge
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xen0_ia64 Fri Feb 24 22:41:08 2006
@@ -113,10 +113,10 @@
# CONFIG_IA64_PAGE_SIZE_64KB is not set
CONFIG_PGTABLE_3=y
# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
CONFIG_IA64_L1_CACHE_SHIFT=7
CONFIG_IA64_CYCLONE=y
CONFIG_IOSAPIC=y
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xen0_x86_32 Fri Feb 24 22:41:08 2006
@@ -175,10 +175,10 @@
CONFIG_MTRR=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen0_x86_64
--- a/buildconfigs/linux-defconfig_xen0_x86_64 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xen0_x86_64 Fri Feb 24 22:41:08 2006
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.16-rc3-xen0
-# Thu Feb 16 22:55:30 2006
+# Mon Feb 20 11:37:43 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -126,10 +126,10 @@
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
@@ -160,6 +160,11 @@
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
# CONFIG_ACPI_CONTAINER is not set
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
#
# Bus options (PCI etc.)
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xenU_ia64
--- a/buildconfigs/linux-defconfig_xenU_ia64 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xenU_ia64 Fri Feb 24 22:41:08 2006
@@ -110,10 +110,10 @@
# CONFIG_IA64_PAGE_SIZE_64KB is not set
CONFIG_PGTABLE_3=y
# CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
CONFIG_IA64_L1_CACHE_SHIFT=7
# CONFIG_IA64_CYCLONE is not set
CONFIG_IOSAPIC=y
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xenU_x86_32
--- a/buildconfigs/linux-defconfig_xenU_x86_32 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xenU_x86_32 Fri Feb 24 22:41:08 2006
@@ -174,10 +174,10 @@
CONFIG_SPLIT_PTLOCK_CPUS=4096
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_HOTPLUG_CPU=y
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xenU_x86_64
--- a/buildconfigs/linux-defconfig_xenU_x86_64 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xenU_x86_64 Fri Feb 24 22:41:08 2006
@@ -131,10 +131,10 @@
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Feb 24 22:41:08 2006
@@ -180,10 +180,10 @@
CONFIG_MTRR=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_HOTPLUG_CPU=y
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64 Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/linux-defconfig_xen_x86_64 Fri Feb 24 22:41:08 2006
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.16-rc3-xen0
-# Thu Feb 16 22:56:25 2006
+# Linux kernel version: 2.6.16-rc3-xen
+# Mon Feb 20 11:37:11 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -132,10 +132,10 @@
# CONFIG_CRASH_DUMP is not set
CONFIG_PHYSICAL_START=0x100000
CONFIG_SECCOMP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
@@ -168,6 +168,11 @@
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
CONFIG_ACPI_CONTAINER=m
+
+#
+# CPU Frequency scaling
+#
+# CONFIG_CPU_FREQ is not set
#
# Bus options (PCI etc.)
diff -r d940ec92958d -r 6c43118bdba8 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Fri Feb 24 21:03:07 2006
+++ b/buildconfigs/mk.linux-2.6-xen Fri Feb 24 22:41:08 2006
@@ -2,8 +2,8 @@
OS = linux
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.16-rc3
-LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc3.bz2
+LINUX_VER = 2.6.16-rc4
+LINUX_SRCS = linux-2.6.15.tar.bz2 patch-2.6.16-rc4.bz2
LINUX_PDIR = linux-$(LINUX_VER)
EXTRAVERSION ?= xen
@@ -34,7 +34,7 @@
touch $(@D)/.hgskip
touch $@
-pristine-linux-%.16-rc3/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs
+pristine-linux-%.16-rc4/.valid-pristine: pristine-$(LINUX_PDIR)/.valid-srcs
touch $@ # update timestamp to avoid rebuild
$(LINUX_DIR)/include/linux/autoconf.h: ref-$(OS)-$(LINUX_VER)/.valid-ref
diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h Fri Feb 24 21:03:07 2006
+++ b/extras/mini-os/include/hypervisor.h Fri Feb 24 22:41:08 2006
@@ -3,10 +3,10 @@
*
* Hypervisor handling.
*
- * TODO - x86_64 broken!
*
* Copyright (c) 2002, K A Fraser
* Copyright (c) 2005, Grzegorz Milos
+ * Updates: Aravindh Puthiyaparambil <aravindh.puthiyaparambil@xxxxxxxxxx>
*/
#ifndef _HYPERVISOR_H_
@@ -15,6 +15,13 @@
#include <types.h>
#include <xen/xen.h>
#include <xen/dom0_ops.h>
+#if defined(__i386__)
+#include <hypercall-x86_32.h>
+#elif defined(__x86_64__)
+#include <hypercall-x86_64.h>
+#else
+#error "Unsupported architecture"
+#endif
/*
* a placeholder for the start of day information passed up from the hypervisor
@@ -27,503 +34,10 @@
extern union start_info_union start_info_union;
#define start_info (start_info_union.start_info)
-
/* hypervisor.c */
//void do_hypervisor_callback(struct pt_regs *regs);
void mask_evtchn(u32 port);
void unmask_evtchn(u32 port);
void clear_evtchn(u32 port);
-/*
- * Assembler stubs for hyper-calls.
- */
-#if defined(__i386__)
-/* Taken from Linux */
-
-#ifndef __HYPERCALL_H__
-#define __HYPERCALL_H__
-
-#include <xen/sched.h>
-
-#define _hypercall0(type, name) \
-({ \
- long __res; \
- asm volatile ( \
- TRAP_INSTR \
- : "=a" (__res) \
- : "0" (__HYPERVISOR_##name) \
- : "memory" ); \
- (type)__res; \
-})
-
-#define _hypercall1(type, name, a1) \
-({ \
- long __res, __ign1; \
- asm volatile ( \
- TRAP_INSTR \
- : "=a" (__res), "=b" (__ign1) \
- : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \
- : "memory" ); \
- (type)__res; \
-})
-
-#define _hypercall2(type, name, a1, a2) \
-({ \
- long __res, __ign1, __ign2; \
- asm volatile ( \
- TRAP_INSTR \
- : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \
- : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
- "2" ((long)(a2)) \
- : "memory" ); \
- (type)__res; \
-})
-
-#define _hypercall3(type, name, a1, a2, a3) \
-({ \
- long __res, __ign1, __ign2, __ign3; \
- asm volatile ( \
- TRAP_INSTR \
- : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
- "=d" (__ign3) \
- : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
- "2" ((long)(a2)), "3" ((long)(a3)) \
- : "memory" ); \
- (type)__res; \
-})
-
-#define _hypercall4(type, name, a1, a2, a3, a4) \
-({ \
- long __res, __ign1, __ign2, __ign3, __ign4; \
- asm volatile ( \
- TRAP_INSTR \
- : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
- "=d" (__ign3), "=S" (__ign4) \
- : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
- "2" ((long)(a2)), "3" ((long)(a3)), \
- "4" ((long)(a4)) \
- : "memory" ); \
- (type)__res; \
-})
-
-#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
-({ \
- long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \
- asm volatile ( \
- TRAP_INSTR \
- : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
- "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \
- : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
- "2" ((long)(a2)), "3" ((long)(a3)), \
- "4" ((long)(a4)), "5" ((long)(a5)) \
- : "memory" ); \
- (type)__res; \
-})
-
-static inline int
-HYPERVISOR_set_trap_table(
- trap_info_t *table)
-{
- return _hypercall1(int, set_trap_table, table);
-}
-
-static inline int
-HYPERVISOR_mmu_update(
- mmu_update_t *req, int count, int *success_count, domid_t domid)
-{
- return _hypercall4(int, mmu_update, req, count, success_count, domid);
-}
-
-static inline int
-HYPERVISOR_mmuext_op(
- struct mmuext_op *op, int count, int *success_count, domid_t domid)
-{
- return _hypercall4(int, mmuext_op, op, count, success_count, domid);
-}
-
-static inline int
-HYPERVISOR_set_gdt(
- unsigned long *frame_list, int entries)
-{
- return _hypercall2(int, set_gdt, frame_list, entries);
-}
-
-static inline int
-HYPERVISOR_stack_switch(
- unsigned long ss, unsigned long esp)
-{
- return _hypercall2(int, stack_switch, ss, esp);
-}
-
-static inline int
-HYPERVISOR_set_callbacks(
- unsigned long event_selector, unsigned long event_address,
- unsigned long failsafe_selector, unsigned long failsafe_address)
-{
- return _hypercall4(int, set_callbacks,
- event_selector, event_address,
- failsafe_selector, failsafe_address);
-}
-
-static inline int
-HYPERVISOR_fpu_taskswitch(
- int set)
-{
- return _hypercall1(int, fpu_taskswitch, set);
-}
-
-static inline int
-HYPERVISOR_sched_op(
- int cmd, unsigned long arg)
-{
- return _hypercall2(int, sched_op, cmd, arg);
-}
-
-static inline long
-HYPERVISOR_set_timer_op(
- u64 timeout)
-{
- unsigned long timeout_hi = (unsigned long)(timeout>>32);
- unsigned long timeout_lo = (unsigned long)timeout;
- return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
-}
-
-static inline int
-HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
-{
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- return _hypercall1(int, dom0_op, dom0_op);
-}
-
-static inline int
-HYPERVISOR_set_debugreg(
- int reg, unsigned long value)
-{
- return _hypercall2(int, set_debugreg, reg, value);
-}
-
-static inline unsigned long
-HYPERVISOR_get_debugreg(
- int reg)
-{
- return _hypercall1(unsigned long, get_debugreg, reg);
-}
-
-static inline int
-HYPERVISOR_update_descriptor(
- u64 ma, u64 desc)
-{
- return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
-}
-
-static inline int
-HYPERVISOR_memory_op(
- unsigned int cmd, void *arg)
-{
- return _hypercall2(int, memory_op, cmd, arg);
-}
-
-static inline int
-HYPERVISOR_multicall(
- void *call_list, int nr_calls)
-{
- return _hypercall2(int, multicall, call_list, nr_calls);
-}
-
-static inline int
-HYPERVISOR_update_va_mapping(
- unsigned long va, pte_t new_val, unsigned long flags)
-{
- unsigned long pte_hi = 0;
-#ifdef CONFIG_X86_PAE
- pte_hi = new_val.pte_high;
-#endif
- return _hypercall4(int, update_va_mapping, va,
- new_val.pte_low, pte_hi, flags);
-}
-
-static inline int
-HYPERVISOR_event_channel_op(
- void *op)
-{
- return _hypercall1(int, event_channel_op, op);
-}
-
-static inline int
-HYPERVISOR_xen_version(
- int cmd, void *arg)
-{
- return _hypercall2(int, xen_version, cmd, arg);
-}
-
-static inline int
-HYPERVISOR_console_io(
- int cmd, int count, char *str)
-{
- return _hypercall3(int, console_io, cmd, count, str);
-}
-
-static inline int
-HYPERVISOR_physdev_op(
- void *physdev_op)
-{
- return _hypercall1(int, physdev_op, physdev_op);
-}
-
-static inline int
-HYPERVISOR_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count)
-{
- return _hypercall3(int, grant_table_op, cmd, uop, count);
-}
-
-static inline int
-HYPERVISOR_update_va_mapping_otherdomain(
- unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
-{
- unsigned long pte_hi = 0;
-#ifdef CONFIG_X86_PAE
- pte_hi = new_val.pte_high;
-#endif
- return _hypercall5(int, update_va_mapping_otherdomain, va,
- new_val.pte_low, pte_hi, flags, domid);
-}
-
-static inline int
-HYPERVISOR_vm_assist(
- unsigned int cmd, unsigned int type)
-{
- return _hypercall2(int, vm_assist, cmd, type);
-}
-
-static inline int
-HYPERVISOR_vcpu_op(
- int cmd, int vcpuid, void *extra_args)
-{
- return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
-}
-
-static inline int
-HYPERVISOR_suspend(
- unsigned long srec)
-{
- return _hypercall3(int, sched_op, SCHEDOP_shutdown,
- SHUTDOWN_suspend, srec);
-}
-
-#endif /* __HYPERCALL_H__ */
-#elif defined(__x86_64__)
-
-#define __syscall_clobber "r11","rcx","memory"
-
-/*
- * Assembler stubs for hyper-calls.
- */
-static inline int
-HYPERVISOR_set_trap_table(
- trap_info_t *table)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table)
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_mmu_update(
- mmu_update_t *req, int count, int *success_count, domid_t domid)
-{
- int ret;
-
- __asm__ __volatile__ (
- "movq %5, %%r10;" TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S"
((long)count),
- "d" (success_count), "g" ((unsigned long)domid)
- : __syscall_clobber, "r10" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_mmuext_op(
- struct mmuext_op *op, int count, int *success_count, domid_t domid)
-{
- int ret;
-
- __asm__ __volatile__ (
- "movq %5, %%r10;" TRAP_INSTR
- : "=a" (ret)
- : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count),
- "d" (success_count), "g" ((unsigned long)domid)
- : __syscall_clobber, "r10" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_gdt(
- unsigned long *frame_list, int entries)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S"
((long)entries)
- : __syscall_clobber );
-
-
- return ret;
-}
-static inline int
-HYPERVISOR_stack_switch(
- unsigned long ss, unsigned long esp)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp)
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_set_callbacks(
- unsigned long event_address, unsigned long failsafe_address,
- unsigned long syscall_address)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address),
- "S" (failsafe_address), "d" (syscall_address)
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_fpu_taskswitch(
- int set)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch),
- "D" ((unsigned long) set) : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_yield(
- void)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned
long)SCHEDOP_yield)
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_block(
- void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned
long)SCHEDOP_block)
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_shutdown(
- void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff <<
SCHEDOP_reasonshift)))
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_reboot(
- void)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot <<
SCHEDOP_reasonshift)))
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(
- unsigned long srec)
-{
- int ret;
-
- /* NB. On suspend, control software expects a suspend record in %esi. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend <<
SCHEDOP_reasonshift))),
- "S" (srec)
- : __syscall_clobber );
-
- return ret;
-}
-
-/*
- * We can have the timeout value in a single argument for the hypercall, but
- * that will break the common code.
- */
-static inline long
-HYPERVISOR_set_timer_op(
- u64 timeout)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_timer_op),
- "D" (timeout)
- : __syscall_clobber );
-
- return ret;
-}
-#endif
-
#endif /* __HYPERVISOR_H__ */
diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/x86_32.S
--- a/extras/mini-os/x86_32.S Fri Feb 24 21:03:07 2006
+++ b/extras/mini-os/x86_32.S Fri Feb 24 22:41:08 2006
@@ -1,16 +1,16 @@
#include <os.h>
#include <xen/arch-x86_32.h>
-
.section __xen_guest
.ascii "GUEST_OS=Mini-OS"
.ascii ",XEN_VER=xen-3.0"
+ .ascii ",HYPERCALL_PAGE=0x2"
.ascii ",LOADER=generic"
.ascii ",PT_MODE_WRITABLE"
.byte 0
.text
-.globl _start, shared_info
+.globl _start, shared_info, hypercall_page
_start:
cld
@@ -26,7 +26,9 @@
.org 0x1000
shared_info:
.org 0x2000
-
+
+hypercall_page:
+ .org 0x3000
ES = 0x20
ORIG_EAX = 0x24
diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/x86_64.S
--- a/extras/mini-os/x86_64.S Fri Feb 24 21:03:07 2006
+++ b/extras/mini-os/x86_64.S Fri Feb 24 22:41:08 2006
@@ -1,11 +1,16 @@
#include <os.h>
.section __xen_guest
- .asciz "XEN_VER=3.0,LOADER=generic,PT_MODE_WRITABLE"
+ .ascii "GUEST_OS=Mini-OS"
+ .ascii ",XEN_VER=xen-3.0"
+ .ascii ",HYPERCALL_PAGE=0x2"
+ .ascii ",LOADER=generic"
+ .ascii ",PT_MODE_WRITABLE"
+ .byte 0
.text
#define ENTRY(X) .globl X ; X :
-.globl _start, shared_info
+.globl _start, shared_info, hypercall_page
#define SAVE_ALL \
cld; \
@@ -56,6 +61,9 @@
.org 0x1000
shared_info:
.org 0x2000
+
+hypercall_page:
+ .org 0x3000
ENTRY(hypervisor_callback)
popq %rcx
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/Kconfig
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Fri Feb 24 22:41:08 2006
@@ -780,7 +780,7 @@
config DOUBLEFAULT
default y
bool "Enable doublefault exception handler" if EMBEDDED
- depends on !XEN
+ depends on !X86_NO_TSS
help
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
@@ -1176,6 +1176,21 @@
depends on X86_SMP || (X86_VOYAGER && SMP)
default y
+config X86_NO_TSS
+ bool
+ depends on X86_XEN
+ default y
+
+config X86_SYSENTER
+ bool
+ depends on !X86_NO_TSS
+ default y
+
+config X86_NO_IDT
+ bool
+ depends on X86_XEN
+ default y
+
config KTIME_SCALAR
bool
default y
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile Fri Feb 24 22:41:08 2006
@@ -49,11 +49,13 @@
vsyscall_note := vsyscall-note.o
endif
+VSYSCALL_TYPES-y := int80
+VSYSCALL_TYPES-$(CONFIG_X86_SYSENTER) += sysenter
# vsyscall.o contains the vsyscall DSO images as __initdata.
# We must build both images before we can assemble it.
# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
-targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
+$(obj)/vsyscall.o: $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.so)
+targets += $(foreach F,$(VSYSCALL_TYPES-y),vsyscall-$F.o vsyscall-$F.so)
targets += $(vsyscall_note) vsyscall.lds
# The DSO images are built using a special linker script.
@@ -81,7 +83,8 @@
SYSCFLAGS_vsyscall-syms.o = -r
$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
- $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE
+ $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.o) \
+ $(obj)/$(vsyscall_note) FORCE
$(call if_changed,syscall)
ifdef CONFIG_XEN
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/apic-xen.c Fri Feb 24 22:41:08 2006
@@ -85,13 +85,6 @@
return 0xff;
}
-#ifdef CONFIG_XEN
-void switch_APIC_timer_to_ipi(void *cpumask) { }
-EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
-void switch_ipi_to_APIC_timer(void *cpumask) { }
-EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
-#endif
-
#ifndef CONFIG_XEN
#ifndef CONFIG_SMP
static void up_apic_timer_interrupt_call(struct pt_regs *regs)
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c Fri Feb 24
22:41:08 2006
@@ -583,7 +583,6 @@
}
if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
BUG();
- lgdt_finish();
}
/*
@@ -595,7 +594,9 @@
void __cpuinit cpu_init(void)
{
int cpu = smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct * t = &per_cpu(init_tss, cpu);
+#endif
struct thread_struct *thread = ¤t->thread;
if (cpu_test_and_set(cpu, cpu_initialized)) {
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Fri Feb 24 22:41:08 2006
@@ -223,6 +223,7 @@
jmp need_resched
#endif
+#ifdef CONFIG_X86_SYSENTER
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -270,6 +271,7 @@
xorl %ebp,%ebp
sti
sysexit
+#endif /* CONFIG_X86_SYSENTER */
# system call handler stub
@@ -662,6 +664,7 @@
call math_state_restore
jmp ret_from_exception
+#ifdef CONFIG_X86_SYSENTER
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
* that sets up the real kernel stack. Check here, since we can't
@@ -683,12 +686,15 @@
pushfl; \
pushl $__KERNEL_CS; \
pushl $sysenter_past_esp
+#endif /* CONFIG_X86_SYSENTER */
KPROBE_ENTRY(debug)
+#ifdef CONFIG_X86_SYSENTER
cmpl $sysenter_entry,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
+#endif /* !CONFIG_X86_SYSENTER */
pushl $-1 # mark this as an int
SAVE_ALL
xorl %edx,%edx # error code 0
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Fri Feb 24 22:41:08 2006
@@ -1,23 +1,11 @@
-#include <linux/config.h>
-
-.section __xen_guest
- .ascii "GUEST_OS=linux,GUEST_VER=2.6"
- .ascii ",XEN_VER=xen-3.0"
- .ascii ",VIRT_BASE=0xC0000000"
- .ascii ",HYPERCALL_PAGE=0x104" /* __pa(hypercall_page) >> 12 */
-#ifdef CONFIG_X86_PAE
- .ascii ",PAE=yes"
-#else
- .ascii ",PAE=no"
-#endif
- .ascii ",LOADER=generic"
- .byte 0
.text
+#include <linux/config.h>
#include <linux/threads.h>
#include <linux/linkage.h>
#include <asm/segment.h>
+#include <asm/page.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <xen/interface/arch-x86_32.h>
@@ -37,15 +25,10 @@
ENTRY(startup_32)
movl %esi,xen_start_info
-
-#if 0
-ENTRY(startup_32_smp)
-#endif /* CONFIG_SMP */
-
cld
/* Set up the stack pointer */
- lss stack_start,%esp
+ movl $(init_thread_union+THREAD_SIZE),%esp
checkCPUtype:
@@ -69,66 +52,54 @@
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
+ movb $1,X86_HARD_MATH
+
xorl %eax,%eax # Clear FS/GS and LDT
movl %eax,%fs
movl %eax,%gs
cld # gcc2 wants the direction flag cleared at all
times
-#if 0
- movb ready, %cl
- movb $1, ready
- cmpb $0,%cl
- je 1f # the first CPU calls start_kernel
- # all other CPUs call initialize_secondary
- call initialize_secondary
- jmp L6
-1:
-#endif /* CONFIG_SMP */
call start_kernel
L6:
jmp L6 # main should never return here, but
# just in case, we know what happens.
-ENTRY(lgdt_finish)
- movl $(__KERNEL_DS),%eax # reload all the segment registers
- movw %ax,%ss # after changing gdt.
+#define HYPERCALL_PAGE_OFFSET 0x1000
+.org HYPERCALL_PAGE_OFFSET
+ENTRY(hypercall_page)
+.skip 0x1000
- movl $(__USER_DS),%eax # DS/ES contains default USER segment
- movw %ax,%ds
- movw %ax,%es
+/*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
- popl %eax # reload CS by intersegment return
- pushl $(__KERNEL_CS)
- pushl %eax
- lret
+/*
+ * BSS section
+ */
+.section ".bss.page_aligned","w"
+ENTRY(empty_zero_page)
+ .fill 4096,1,0
-ENTRY(stack_start)
- .long init_thread_union+THREAD_SIZE
- .long __BOOT_DS
-
-ready: .byte 0
-
-.globl idt_descr
-.globl cpu_gdt_descr
+/*
+ * This starts the data section.
+ */
+.data
ALIGN
- .word 0 # 32-bit align idt_desc.address
-idt_descr:
- .word IDT_ENTRIES*8-1 # idt contains 256 entries
- .long idt_table
-
-# boot GDT descriptor (later on used by CPU#0):
.word 0 # 32 bit align gdt_desc.address
+ .globl cpu_gdt_descr
cpu_gdt_descr:
.word GDT_SIZE
.long cpu_gdt_table
.fill NR_CPUS-1,8,0 # space for the other GDT descriptors
-.org 0x1000
-ENTRY(empty_zero_page)
-
-.org 0x2000
+/*
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
+ */
+ .align PAGE_SIZE_asm
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* 0x0b reserved */
@@ -143,17 +114,10 @@
.quad 0x0000000000000000 /* 0x53 reserved */
.quad 0x0000000000000000 /* 0x5b reserved */
-#ifdef CONFIG_X86_PAE
- .quad 0x00cfbb00000067ff /* 0x60 kernel 4GB code at 0x00000000 */
- .quad 0x00cfb300000067ff /* 0x68 kernel 4GB data at 0x00000000 */
- .quad 0x00cffb00000067ff /* 0x73 user 4GB code at 0x00000000 */
- .quad 0x00cff300000067ff /* 0x7b user 4GB data at 0x00000000 */
-#else
- .quad 0x00cfbb000000c3ff /* 0x60 kernel 4GB code at 0x00000000 */
- .quad 0x00cfb3000000c3ff /* 0x68 kernel 4GB data at 0x00000000 */
- .quad 0x00cffb000000c3ff /* 0x73 user 4GB code at 0x00000000 */
- .quad 0x00cff3000000c3ff /* 0x7b user 4GB data at 0x00000000 */
-#endif
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* 0x80 TSS descriptor */
.quad 0x0000000000000000 /* 0x88 LDT descriptor */
@@ -187,15 +151,36 @@
/* Be sure this is zeroed to avoid false validations in Xen */
.fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0
-.org 0x3000
-ENTRY(default_ldt)
-.org 0x4000
-ENTRY(hypercall_page)
+/*
+ * __xen_guest information
+ */
+.macro utoa value
+ .if (\value) < 0 || (\value) >= 0x10
+ utoa (((\value)>>4)&0x0fffffff)
+ .endif
+ .if ((\value) & 0xf) < 10
+ .byte '0' + ((\value) & 0xf)
+ .else
+ .byte 'A' + ((\value) & 0xf) - 10
+ .endif
+.endm
-.org 0x5000
-/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
+.section __xen_guest
+ .ascii "GUEST_OS=linux,GUEST_VER=2.6"
+ .ascii ",XEN_VER=xen-3.0"
+ .ascii ",VIRT_BASE=0x"
+ utoa __PAGE_OFFSET
+ .ascii ",HYPERCALL_PAGE=0x"
+ utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
+ .ascii ",FEATURES=writable_page_tables"
+ .ascii "|writable_descriptor_tables"
+ .ascii "|auto_translated_physmap"
+ .ascii "|supervisor_mode_kernel"
+#ifdef CONFIG_X86_PAE
+ .ascii ",PAE=yes"
+#else
+ .ascii ",PAE=no"
+#endif
+ .ascii ",LOADER=generic"
+ .byte 0
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c Fri Feb 24
22:41:08 2006
@@ -41,9 +41,11 @@
EXPORT_SYMBOL(init_task);
+#ifndef CONFIG_X86_NO_TSS
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's.
*/
DEFINE_PER_CPU(struct tss_struct, init_tss)
____cacheline_internodealigned_in_smp = INIT_TSS;
+#endif
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Fri Feb 24
22:41:08 2006
@@ -272,7 +272,7 @@
regs.xes = __USER_DS;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
- regs.xcs = __KERNEL_CS;
+ regs.xcs = GET_KERNEL_CS();
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
/* Ok, create the new process.. */
@@ -518,7 +518,9 @@
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
int cpu = smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+#endif
physdev_op_t iopl_op, iobmp_op;
multicall_entry_t _mcl[8], *mcl = _mcl;
@@ -543,10 +545,9 @@
* Reload esp0.
* This is load_esp0(tss, next) with a multicall.
*/
- tss->esp0 = next->esp0;
mcl->op = __HYPERVISOR_stack_switch;
- mcl->args[0] = tss->ss0;
- mcl->args[1] = tss->esp0;
+ mcl->args[0] = __KERNEL_DS;
+ mcl->args[1] = next->esp0;
mcl++;
/*
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Feb 24 22:41:08 2006
@@ -94,9 +94,9 @@
#endif
/* cpu data as detected by the assembly code in head.S */
-struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
+struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
/* common cpu data for all cpus */
-struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 0, 1, 0, -1
};
+struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1
};
EXPORT_SYMBOL(boot_cpu_data);
unsigned long mmu_cr4_features;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/traps-xen.c Fri Feb 24 22:41:08 2006
@@ -58,15 +58,20 @@
asmlinkage int system_call(void);
+struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 } };
+
/* Do we ignore FPU interrupts ? */
char ignore_fpu_irq = 0;
+#ifndef CONFIG_X86_NO_IDT
/*
* The IDT has to be page-aligned to simplify the Pentium
* F0 0F bug workaround.. We have a special link segment
* for this.
*/
struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) =
{ {0, 0}, };
+#endif
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@@ -496,20 +501,6 @@
fastcall void __kprobes do_general_protection(struct pt_regs * regs,
long error_code)
{
- /*
- * If we trapped on an LDT access then ensure that the default_ldt is
- * loaded, if nothing else. We load default_ldt lazily because LDT
- * switching costs time and many applications don't need it.
- */
- if (unlikely((error_code & 6) == 4)) {
- unsigned long ldt;
- __asm__ __volatile__ ("sldt %0" : "=r" (ldt));
- if (ldt == 0) {
- xen_set_ldt((unsigned long)&default_ldt[0], 5);
- return;
- }
- }
-
current->thread.error_code = error_code;
current->thread.trap_no = 13;
@@ -1079,13 +1070,6 @@
}
/*
- * default LDT is a single-entry callgate to lcall7 for iBCS
- * and a callgate to lcall27 for Solaris/x86 binaries
- */
- make_lowmem_page_readonly(
- &default_ldt[0], XENFEAT_writable_descriptor_tables);
-
- /*
* Should be a barrier for any external CPU state.
*/
cpu_init();
@@ -1094,12 +1078,6 @@
void smp_trap_init(trap_info_t *trap_ctxt)
{
trap_info_t *t = trap_table;
- int i;
-
- for (i = 0; i < 256; i++) {
- trap_ctxt[i].vector = i;
- trap_ctxt[i].cs = FLAT_KERNEL_CS;
- }
for (t = trap_table; t->address; t++) {
trap_ctxt[t->vector].flags = t->flags;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Feb 24 22:41:08 2006
@@ -85,7 +85,7 @@
return eip + (seg << 4);
/* By far the most common cases. */
- if (likely(seg == __USER_CS || seg == __KERNEL_CS))
+ if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
return eip;
/* Check the segment exists, is within the current LDT/GDT size,
@@ -396,7 +396,7 @@
switch (error_code & 3) {
default: /* 3: write, present */
#ifdef TEST_VERIFY_AREA
- if (regs->cs == KERNEL_CS)
+ if (regs->cs == GET_KERNEL_CS())
printk("WP fault at %08lx\n", regs->eip);
#endif
/* fall through */
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Feb 24 22:41:08 2006
@@ -324,10 +324,13 @@
.domid = DOMID_SELF
};
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- BUG_ON(order >= 1);
+ /*
+ * Currently an auto-translated guest will not perform I/O, nor will
+ * it require PAE page directories below 4GB. Therefore any calls to
+ * this function are redundant and can be ignored.
+ */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
- }
scrub_pages(vstart, 1 << order);
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Fri Feb 24 22:41:08 2006
@@ -759,7 +759,7 @@
#endif
0,
pgd_ctor,
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+ pgd_dtor);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Fri Feb 24 22:41:08 2006
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
+#include <linux/module.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -184,6 +185,10 @@
__flush_tlb_one(vaddr);
}
+static int nr_fixmaps = 0;
+unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
+EXPORT_SYMBOL(__FIXADDR_TOP);
+
void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
@@ -194,7 +199,6 @@
}
switch (idx) {
case FIX_WP_TEST:
- case FIX_VSYSCALL:
#ifdef CONFIG_X86_F00F_BUG
case FIX_F00F_IDT:
#endif
@@ -204,6 +208,13 @@
set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
break;
}
+ nr_fixmaps++;
+}
+
+void set_fixaddr_top(unsigned long top)
+{
+ BUG_ON(nr_fixmaps > 0);
+ __FIXADDR_TOP = top - PAGE_SIZE;
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
@@ -289,10 +300,11 @@
unsigned long flags;
if (PTRS_PER_PMD > 1) {
- /* Ensure pgd resides below 4GB. */
- int rc = xen_create_contiguous_region(
- (unsigned long)pgd, 0, 32);
- BUG_ON(rc);
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
+ int rc = xen_create_contiguous_region(
+ (unsigned long)pgd, 0, 32);
+ BUG_ON(rc);
+ }
if (HAVE_SHARED_KERNEL_PMD)
memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
@@ -308,16 +320,20 @@
}
}
-/* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-
- pgd_test_and_unpin(pgd);
+ if (PTRS_PER_PMD > 1) {
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+ xen_destroy_contiguous_region((unsigned long)pgd, 0);
+ } else {
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ pgd_test_and_unpin(pgd);
+ }
}
pgd_t *pgd_alloc(struct mm_struct *mm)
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S Fri Feb 24 22:41:08 2006
@@ -569,7 +569,9 @@
.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot
for r8
.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in
slot for r10
br.call.sptk.many rp=syscall_trace_leave // give parent a chance to
catch return value
-.ret3: br.cond.sptk .work_pending_syscall_end
+.ret3:
+(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
+ br.cond.sptk .work_pending_syscall_end
strace_error:
ld8 r3=[r2] // load pt_regs.r8
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c Fri Feb 24 22:41:08 2006
@@ -446,6 +446,7 @@
if (early_console_setup(*cmdline_p) == 0)
mark_bsp_online();
+ parse_early_param();
#ifdef CONFIG_ACPI
/* Initialize the ACPI boot-time table parser */
acpi_table_init();
@@ -704,6 +705,9 @@
setup_per_cpu_areas (void)
{
/* start_kernel() requires this... */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+ prefill_possible_map();
+#endif
}
/*
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/x86_64/Kconfig
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Fri Feb 24 22:41:08 2006
@@ -124,6 +124,16 @@
select SWIOTLB
help
This option will compile a kernel compatible with Xen hypervisor
+
+config X86_NO_TSS
+ bool
+ depends on X86_64_XEN
+ default y
+
+config X86_NO_IDT
+ bool
+ depends on X86_64_XEN
+ default y
#
# Define implied options from the CPU selection here
@@ -529,9 +539,7 @@
source "drivers/acpi/Kconfig"
-if !X86_64_XEN
source "arch/x86_64/kernel/cpufreq/Kconfig"
-endif
endmenu
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/ia32/syscall32-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/ia32/syscall32-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/syscall32-xen.c Fri Feb 24
22:41:08 2006
@@ -119,7 +119,7 @@
/* Load these always in case some future AMD CPU supports
SYSENTER from compat mode too. */
- checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)(__KERNEL_CS | 3));
+ checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/apic-xen.c Fri Feb 24
22:41:08 2006
@@ -57,13 +57,6 @@
if (!disable_apic)
ack_APIC_irq();
}
-
-#ifdef CONFIG_XEN
-void switch_APIC_timer_to_ipi(void *cpumask) { }
-EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
-void switch_ipi_to_APIC_timer(void *cpumask) { }
-EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
-#endif
int setup_profiling_timer(unsigned int multiplier)
{
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Fri Feb 24
22:41:08 2006
@@ -61,6 +61,8 @@
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
+
+NMI_MASK = 0x80000000
/*
* C code is not supposed to know about undefined top of stack. Every time
@@ -143,6 +145,18 @@
* #define VGCF_IN_SYSCALL (1<<8)
*/
.macro HYPERVISOR_IRET flag
+ testb $3,1*8(%rsp)
+ jnz 1f
+ testl $NMI_MASK,2*8(%rsp)
+ jnz 1f
+
+ /* Direct iret to kernel space. Correct CS and SS. */
+ orb $3,1*8(%rsp)
+ orb $3,4*8(%rsp)
+ iretq
+
+1: /* Slow iret via hypervisor. */
+ andl $~NMI_MASK, 16(%rsp)
pushq $\flag
jmp hypercall_page + (__HYPERVISOR_iret * 32)
.endm
@@ -510,13 +524,7 @@
jnz restore_all_enable_events # != 0 => reenable event delivery
XEN_PUT_VCPU_INFO(%rsi)
- RESTORE_ARGS 0,8,0
- testb $3,8(%rsp) # check CS
- jnz user_mode
-kernel_mode:
- orb $3,1*8(%rsp)
- iretq
-user_mode:
+ RESTORE_ARGS 0,8,0
HYPERVISOR_IRET 0
/* edi: workmask, edx: work */
@@ -811,6 +819,7 @@
ENTRY(do_nmi_callback)
addq $8, %rsp
call do_nmi
+ orl $NMI_MASK,EFLAGS(%rsp)
RESTORE_REST
XEN_BLOCK_EVENTS(%rsi)
GET_THREAD_INFO(%rcx)
@@ -826,11 +835,6 @@
jnz 14f # process more events if necessary...
XEN_PUT_VCPU_INFO(%rsi)
RESTORE_ARGS 0,8,0
- testb $3,8(%rsp) # check CS
- jnz crit_user_mode
- orb $3,1*8(%rsp)
- iretq
-crit_user_mode:
HYPERVISOR_IRET 0
14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Fri Feb 24
22:41:08 2006
@@ -14,15 +14,6 @@
#include <linux/linkage.h>
-
-.section __xen_guest
- .ascii "GUEST_OS=linux,GUEST_VER=2.6"
- .ascii ",XEN_VER=xen-3.0"
- .ascii ",VIRT_BASE=0xffffffff80000000"
- .ascii ",HYPERCALL_PAGE=0x10a" /* __pa(hypercall_page) >> 12 */
- .ascii ",LOADER=generic"
- .byte 0
-
#include <linux/threads.h>
#include <linux/init.h>
#include <asm/desc.h>
@@ -30,64 +21,21 @@
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
-
-/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
- * because we need identity-mapped pages on setup so define __START_KERNEL to
- * 0x100000 for this stage
- *
- */
.text
.code64
.globl startup_64
startup_64:
ENTRY(_start)
- movq %rsi,xen_start_info(%rip)
-
-#ifdef CONFIG_SMP
-ENTRY(startup_64_smp)
-#endif /* CONFIG_SMP */
-
- cld
-
- movq init_rsp(%rip),%rsp
+ movq $(init_thread_union+THREAD_SIZE-8),%rsp
/* zero EFLAGS after setting rsp */
pushq $0
popfq
- movq initial_code(%rip),%rax
- jmp *%rax
- /* SMP bootup changes these two */
- .globl initial_code
-initial_code:
- .quad x86_64_start_kernel
- .globl init_rsp
-init_rsp:
- .quad init_thread_union+THREAD_SIZE-8
-
-ENTRY(early_idt_handler)
- xorl %eax,%eax
- movq 8(%rsp),%rsi # get rip
- movq (%rsp),%rdx
- leaq early_idt_msg(%rip),%rdi
-1: hlt # generate #GP
- jmp 1b
-
-early_idt_msg:
- .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n"
-
-#if 0
-ENTRY(lgdt_finish)
- movl $(__USER_DS),%eax # DS/ES contains default USER segment
- movw %ax,%ds
- movw %ax,%es
- movl $(__KERNEL_DS),%eax
- movw %ax,%ss # after changing gdt.
- popq %rax # get the retrun address
- pushq $(__KERNEL_CS)
- pushq %rax
- lretq
-#endif
+ /* rsi is pointer to startup info structure.
+ pass it to C */
+ movq %rsi,%rdi
+ jmp x86_64_start_kernel
ENTRY(stext)
ENTRY(_stext)
@@ -113,39 +61,9 @@
NEXT_PAGE(init_level4_user_pgt)
.fill 512,8,0
- /*
- * In Xen the following pre-initialized pgt entries are re-initialized.
- */
NEXT_PAGE(level3_kernel_pgt)
- .fill 510,8,0
- /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad phys_level2_kernel_pgt | 0x007
- .fill 1,8,0
+ .fill 512,8,0
-NEXT_PAGE(level2_ident_pgt)
- /* 40MB for bootup. */
- i = 0
- .rept 20
- .quad i << 21 | 0x083
- i = i + 1
- .endr
- /* Temporary mappings for the super early allocator in
arch/x86_64/mm/init.c */
- .globl temp_boot_pmds
-temp_boot_pmds:
- .fill 492,8,0
-
-NEXT_PAGE(level2_kernel_pgt)
- /* 40MB kernel mapping. The kernel code cannot be bigger than that.
- When you change this change KERNEL_TEXT_SIZE in page.h too. */
- /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- i = 0
- .rept 20
- .quad i << 21 | 0x183
- i = i + 1
- .endr
- /* Module mapping starts here */
- .fill 492,8,0
-
/*
* This is used for vsyscall area mapping as we have a different
* level4 page table for user.
@@ -153,78 +71,16 @@
NEXT_PAGE(level3_user_pgt)
.fill 512,8,0
-NEXT_PAGE(cpu_gdt_table)
-/* The TLS descriptors are currently at a different place compared to i386.
- Hopefully nobody expects them at a fixed place (Wine?) */
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0 /* unused */
- .quad 0x00affa000000ffff /* __KERNEL_CS */
- .quad 0x00cff2000000ffff /* __KERNEL_DS */
- .quad 0x00cffa000000ffff /* __USER32_CS */
- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
- .quad 0x00affa000000ffff /* __USER_CS */
- .quad 0x00cffa000000ffff /* __KERNEL32_CS */
- .quad 0,0 /* TSS */
- .quad 0,0 /* LDT */
- .quad 0,0,0 /* three TLS descriptors */
- .quad 0 /* unused */
-gdt_end:
-#if 0
- /* asm/segment.h:GDT_ENTRIES must match this */
- /* This should be a multiple of the cache line size */
- /* GDTs of other CPUs are now dynamically allocated */
-
- /* zero the remaining page */
- .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
-#endif
+NEXT_PAGE(level2_kernel_pgt)
+ .fill 512,8,0
NEXT_PAGE(empty_zero_page)
-
-NEXT_PAGE(level3_physmem_pgt)
- .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even
before pagetable_init */
+ .skip PAGE_SIZE
NEXT_PAGE(hypercall_page)
-.if (phys_hypercall_page - 0x10a000)
- /* cause compiler error if the hypercall_page is at a
- * different address than expected. */
- .quad __adjust_hypercall_page_in_header
-.endif
.fill 512,8,0
#undef NEXT_PAGE
-
- .data
-
-#ifndef CONFIG_XEN
-#ifdef CONFIG_ACPI_SLEEP
- .align PAGE_SIZE
-ENTRY(wakeup_level4_pgt)
- .quad phys_level3_ident_pgt | 0x007
- .fill 255,8,0
- .quad phys_level3_physmem_pgt | 0x007
- .fill 254,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad phys_level3_kernel_pgt | 0x007
-#endif
-
-#ifndef CONFIG_HOTPLUG_CPU
- __INITDATA
-#endif
- /*
- * This default setting generates an ident mapping at address 0x100000
- * and a mapping for the kernel that precisely maps virtual address
- * 0xffffffff80000000 to physical address 0x000000. (always using
- * 2Mbyte large pages provided by PAE mode)
- */
- .align PAGE_SIZE
-ENTRY(boot_level4_pgt)
- .quad phys_level3_ident_pgt | 0x007
- .fill 255,8,0
- .quad phys_level3_physmem_pgt | 0x007
- .fill 254,8,0
- /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad phys_level3_kernel_pgt | 0x007
-#endif
.data
@@ -246,11 +102,55 @@
* Also sysret mandates a special GDT layout
*/
-.align PAGE_SIZE
+ .section .data.page_aligned, "aw"
+ .align PAGE_SIZE
-ENTRY(idt_table)
- .rept 256
- .quad 0
- .quad 0
- .endr
+/* The TLS descriptors are currently at a different place compared to i386.
+ Hopefully nobody expects them at a fixed place (Wine?) */
+ENTRY(cpu_gdt_table)
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x0 /* unused */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ .quad 0x00cffa000000ffff /* __USER32_CS */
+ .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
+ .quad 0x00affa000000ffff /* __USER_CS */
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+ .quad 0,0 /* TSS */
+ .quad 0,0 /* LDT */
+ .quad 0,0,0 /* three TLS descriptors */
+ .quad 0 /* unused */
+gdt_end:
+ /* asm/segment.h:GDT_ENTRIES must match this */
+ /* This should be a multiple of the cache line size */
+ /* GDTs of other CPUs are now dynamically allocated */
+
+ /* zero the remaining page */
+ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
+
+/*
+ * __xen_guest information
+ */
+.macro utoh value
+ .if (\value) < 0 || (\value) >= 0x10
+ utoh (((\value)>>4)&0x0fffffffffffffff)
+ .endif
+ .if ((\value) & 0xf) < 10
+ .byte '0' + ((\value) & 0xf)
+ .else
+ .byte 'A' + ((\value) & 0xf) - 10
+ .endif
+.endm
+
+.section __xen_guest
+ .ascii "GUEST_OS=linux,GUEST_VER=2.6"
+ .ascii ",XEN_VER=xen-3.0"
+ .ascii ",VIRT_BASE=0x"; utoh __START_KERNEL_map
+ .ascii ",HYPERCALL_PAGE=0x"; utoh (phys_hypercall_page >> PAGE_SHIFT)
+ .ascii ",FEATURES=writable_page_tables"
+ .ascii "|writable_descriptor_tables"
+ .ascii "|auto_translated_physmap"
+ .ascii "|supervisor_mode_kernel"
+ .ascii ",LOADER=generic"
+ .byte 0
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Feb 24
22:41:08 2006
@@ -47,7 +47,7 @@
static void __init copy_bootdata(char *real_mode_data)
{
-#if 0
+#ifndef CONFIG_XEN
int new_data;
char * command_line;
@@ -63,7 +63,6 @@
}
command_line = (char *) ((u64)(new_data));
memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
- printk("Bootdata ok (command line is %s)\n", saved_command_line);
#else
int max_cmdline;
@@ -71,8 +70,8 @@
max_cmdline = COMMAND_LINE_SIZE;
memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
saved_command_line[max_cmdline-1] = '\0';
+#endif
printk("Bootdata ok (command line is %s)\n", saved_command_line);
-#endif
}
static void __init setup_boot_cpu_data(void)
@@ -98,6 +97,7 @@
char *s;
int i;
+ xen_start_info = (struct start_info *)real_mode_data;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
phys_to_machine_mapping =
(unsigned long *)xen_start_info->mfn_list;
@@ -105,9 +105,9 @@
xen_start_info->nr_pt_frames;
}
+#if 0
for (i = 0; i < 256; i++)
set_intr_gate(i, early_idt_handler);
-#if 0
asm volatile("lidt %0" :: "m" (idt_descr));
#endif
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/io_apic-xen.c Fri Feb 24
22:41:08 2006
@@ -30,6 +30,9 @@
#include <linux/mc146818rtc.h>
#include <linux/acpi.h>
#include <linux/sysdev.h>
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
#include <asm/io.h>
#include <asm/smp.h>
@@ -309,6 +312,8 @@
Can be overwritten with "apic"
And another hack to disable the IOMMU on VIA chipsets.
+
+ ... and others. Really should move this somewhere else.
Kludge-O-Rama. */
void __init check_ioapic(void)
@@ -358,6 +363,17 @@
#ifndef CONFIG_XEN
if (apic_runs_main_timer != 0)
break;
+#ifdef CONFIG_ACPI
+ /* Don't do this for laptops right
+ right now because their timer
+ doesn't necessarily tick in C2/3 */
+ if (acpi_fadt.revision >= 3 &&
+ (acpi_fadt.plvl2_lat + acpi_fadt.plvl3_lat) < 1100) {
+ printk(KERN_INFO
+"ATI board detected, but seems to be a laptop. Timer might be shakey,
sorry\n");
+ break;
+ }
+#endif
printk(KERN_INFO
"ATI board detected. Using APIC/PM timer.\n");
apic_runs_main_timer = 1;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/mpparse-xen.c Fri Feb 24
22:41:08 2006
@@ -295,9 +295,9 @@
memcpy(str,mpc->mpc_productid,12);
str[12]=0;
- printk(KERN_INFO "Product ID: %s ",str);
-
- printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic);
+ printk("Product ID: %s ",str);
+
+ printk("APIC at: 0x%X\n",mpc->mpc_lapic);
/* save the local APIC address, it might be non-default */
if (!acpi_lapic)
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Fri Feb 24
22:41:08 2006
@@ -289,16 +289,28 @@
kprobe_flush_task(me);
if (me->thread.io_bitmap_ptr) {
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+#endif
+#ifdef CONFIG_XEN
+ static physdev_op_t iobmp_op = {
+ .cmd = PHYSDEVOP_SET_IOBITMAP
+ };
+#endif
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
/*
* Careful, clear this in the TSS too:
*/
+#ifndef CONFIG_X86_NO_TSS
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+ put_cpu();
+#endif
+#ifdef CONFIG_XEN
+ HYPERVISOR_physdev_op(&iobmp_op);
+#endif
t->io_bitmap_max = 0;
- put_cpu();
}
}
@@ -463,7 +475,9 @@
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
int cpu = smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+#endif
physdev_op_t iopl_op, iobmp_op;
multicall_entry_t _mcl[8], *mcl = _mcl;
@@ -482,10 +496,9 @@
/*
* Reload esp0, LDT and the page table pointer:
*/
- tss->rsp0 = next->rsp0;
mcl->op = __HYPERVISOR_stack_switch;
mcl->args[0] = __KERNEL_DS;
- mcl->args[1] = tss->rsp0;
+ mcl->args[1] = next->rsp0;
mcl++;
/*
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup64-xen.c Fri Feb 24
22:41:08 2006
@@ -40,7 +40,9 @@
struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
+#ifndef CONFIG_X86_NO_IDT
struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
+#endif
char boot_cpu_stack[IRQSTACKSIZE]
__attribute__((section(".bss.page_aligned")));
@@ -155,13 +157,7 @@
void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
{
-#ifdef CONFIG_SMP
- int cpu = stack_smp_processor_id();
-#else
- int cpu = smp_processor_id();
-#endif
-
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
+ asm volatile("lgdt %0" :: "m" (*gdt_descr));
asm volatile("lidt %0" :: "m" (idt_descr));
}
#endif
@@ -203,8 +199,10 @@
pda->irqstackptr += IRQSTACKSIZE-64;
}
+#ifndef CONFIG_X86_NO_TSS
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
DEBUG_STKSZ]
__attribute__((section(".bss.page_aligned")));
+#endif
/* May not be marked __init: used by software suspend */
void syscall_init(void)
@@ -246,18 +244,23 @@
void __cpuinit cpu_init (void)
{
int cpu = stack_smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct *t = &per_cpu(init_tss, cpu);
unsigned long v;
char *estacks = NULL;
+ unsigned i;
+#endif
struct task_struct *me;
- int i;
/* CPU 0 is initialised in head64.c */
if (cpu != 0) {
pda_init(cpu);
zap_low_mappings(cpu);
- } else
+ }
+#ifndef CONFIG_X86_NO_TSS
+ else
estacks = boot_exception_stacks;
+#endif
me = current;
@@ -278,12 +281,7 @@
#endif
cpu_gdt_descr[cpu].size = GDT_SIZE;
-#ifndef CONFIG_XEN
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
-#else
cpu_gdt_init(&cpu_gdt_descr[cpu]);
-#endif
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
@@ -294,6 +292,7 @@
check_efer();
+#ifndef CONFIG_X86_NO_TSS
/*
* set up and load the per-CPU TSS
*/
@@ -330,6 +329,7 @@
*/
for (i = 0; i <= IO_BITMAP_LONGS; i++)
t->io_bitmap[i] = ~0UL;
+#endif
atomic_inc(&init_mm.mm_count);
me->active_mm = &init_mm;
@@ -337,8 +337,10 @@
BUG();
enter_lazy_tlb(&init_mm, me);
+#ifndef CONFIG_X86_NO_TSS
+ set_tss_desc(cpu, t);
+#endif
#ifndef CONFIG_XEN
- set_tss_desc(cpu, t);
load_TR_desc();
#endif
load_LDT(&init_mm.context);
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c Fri Feb 24
22:41:08 2006
@@ -47,7 +47,9 @@
#include <asm/proto.h>
#include <asm/nmi.h>
+#ifndef CONFIG_X86_NO_IDT
extern struct gate_struct idt_table[256];
+#endif
asmlinkage void divide_error(void);
asmlinkage void debug(void);
@@ -134,6 +136,7 @@
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, const char **idp)
{
+#ifndef CONFIG_X86_NO_TSS
static char ids[][8] = {
[DEBUG_STACK - 1] = "#DB",
[NMI_STACK - 1] = "NMI",
@@ -185,6 +188,7 @@
}
#endif
}
+#endif
return NULL;
}
@@ -948,28 +952,28 @@
* specify <dpl>|4 in the second field.
*/
static trap_info_t trap_table[] = {
- { 0, 0|4, (__KERNEL_CS|0x3), (unsigned long)divide_error
},
- { 1, 0|4, (__KERNEL_CS|0x3), (unsigned long)debug
},
- { 3, 3|4, (__KERNEL_CS|0x3), (unsigned long)int3
},
- { 4, 3|4, (__KERNEL_CS|0x3), (unsigned long)overflow
},
- { 5, 0|4, (__KERNEL_CS|0x3), (unsigned long)bounds
},
- { 6, 0|4, (__KERNEL_CS|0x3), (unsigned long)invalid_op
},
- { 7, 0|4, (__KERNEL_CS|0x3), (unsigned long)device_not_available
},
- { 9, 0|4, (__KERNEL_CS|0x3), (unsigned
long)coprocessor_segment_overrun},
- { 10, 0|4, (__KERNEL_CS|0x3), (unsigned long)invalid_TSS
},
- { 11, 0|4, (__KERNEL_CS|0x3), (unsigned long)segment_not_present
},
- { 12, 0|4, (__KERNEL_CS|0x3), (unsigned long)stack_segment
},
- { 13, 0|4, (__KERNEL_CS|0x3), (unsigned long)general_protection
},
- { 14, 0|4, (__KERNEL_CS|0x3), (unsigned long)page_fault
},
- { 15, 0|4, (__KERNEL_CS|0x3), (unsigned long)spurious_interrupt_bug
},
- { 16, 0|4, (__KERNEL_CS|0x3), (unsigned long)coprocessor_error
},
- { 17, 0|4, (__KERNEL_CS|0x3), (unsigned long)alignment_check
},
+ { 0, 0|4, __KERNEL_CS, (unsigned long)divide_error },
+ { 1, 0|4, __KERNEL_CS, (unsigned long)debug },
+ { 3, 3|4, __KERNEL_CS, (unsigned long)int3 },
+ { 4, 3|4, __KERNEL_CS, (unsigned long)overflow },
+ { 5, 0|4, __KERNEL_CS, (unsigned long)bounds },
+ { 6, 0|4, __KERNEL_CS, (unsigned long)invalid_op },
+ { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available },
+ { 9, 0|4, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun},
+ { 10, 0|4, __KERNEL_CS, (unsigned long)invalid_TSS },
+ { 11, 0|4, __KERNEL_CS, (unsigned long)segment_not_present },
+ { 12, 0|4, __KERNEL_CS, (unsigned long)stack_segment },
+ { 13, 0|4, __KERNEL_CS, (unsigned long)general_protection },
+ { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault },
+ { 15, 0|4, __KERNEL_CS, (unsigned long)spurious_interrupt_bug },
+ { 16, 0|4, __KERNEL_CS, (unsigned long)coprocessor_error },
+ { 17, 0|4, __KERNEL_CS, (unsigned long)alignment_check },
#ifdef CONFIG_X86_MCE
- { 18, 0|4, (__KERNEL_CS|0x3), (unsigned long)machine_check
},
-#endif
- { 19, 0|4, (__KERNEL_CS|0x3), (unsigned long)simd_coprocessor_error
},
+ { 18, 0|4, __KERNEL_CS, (unsigned long)machine_check },
+#endif
+ { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
#ifdef CONFIG_IA32_EMULATION
- { IA32_SYSCALL_VECTOR, 3|4, (__KERNEL_CS|0x3), (unsigned
long)ia32_syscall},
+ { IA32_SYSCALL_VECTOR, 3|4, __KERNEL_CS, (unsigned long)ia32_syscall},
#endif
{ 0, 0, 0, 0 }
};
@@ -984,10 +988,6 @@
printk("HYPERVISOR_set_trap_table faild: error %d\n",
ret);
-#ifdef CONFIG_IA32_EMULATION
- set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
-#endif
-
/*
* Should be a barrier for any external CPU state.
*/
@@ -997,12 +997,6 @@
void smp_trap_init(trap_info_t *trap_ctxt)
{
trap_info_t *t = trap_table;
- int i;
-
- for (i = 0; i < 256; i++) {
- trap_ctxt[i].vector = i;
- trap_ctxt[i].cs = FLAT_KERNEL_CS;
- }
for (t = trap_table; t->address; t++) {
trap_ctxt[t->vector].flags = t->flags;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Feb 24 22:41:08 2006
@@ -152,10 +152,7 @@
pmd_t *pmd;
pte_t *pte;
- asm("movq %%cr3,%0" : "=r" (pgd));
- pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
+ pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd += pgd_index(address);
if (bad_address(pgd)) goto bad;
printk("PGD %lx ", pgd_val(*pgd));
@@ -261,9 +258,7 @@
/* On Xen the line below does not always work. Needs investigating! */
/*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
- asm("movq %%cr3,%0" : "=r" (pgd));
- pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
+ pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd += pgd_index(address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tpm/tpm.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c Fri Feb 24 22:41:08 2006
@@ -53,7 +53,7 @@
down(&chip->buffer_mutex);
atomic_set(&chip->data_pending, 0);
- memset(chip->data_buffer, 0, chip->vendor->buffersize);
+ memset(chip->data_buffer, 0, get_chip_buffersize(chip));
up(&chip->buffer_mutex);
}
@@ -352,7 +352,7 @@
spin_unlock(&driver_lock);
- chip->data_buffer = kmalloc(chip->vendor->buffersize * sizeof(u8),
GFP_KERNEL);
+ chip->data_buffer = kmalloc(get_chip_buffersize(chip) * sizeof(u8),
GFP_KERNEL);
if (chip->data_buffer == NULL) {
chip->num_opens--;
put_device(chip->dev);
@@ -400,8 +400,8 @@
down(&chip->buffer_mutex);
- if (in_size > chip->vendor->buffersize)
- in_size = chip->vendor->buffersize;
+ if (in_size > get_chip_buffersize(chip))
+ in_size = get_chip_buffersize(chip);
if (copy_from_user
(chip->data_buffer, (void __user *) buf, in_size)) {
@@ -411,7 +411,7 @@
/* atomic tpm command send and result receive */
out_size = tpm_transmit(chip, chip->data_buffer,
- chip->vendor->buffersize);
+ get_chip_buffersize(chip));
atomic_set(&chip->data_pending, out_size);
atomic_set(&chip->data_position, 0);
@@ -432,8 +432,6 @@
int ret_size;
int pos, pending = 0;
- del_singleshot_timer_sync(&chip->user_read_timer);
- flush_scheduled_work();
ret_size = atomic_read(&chip->data_pending);
if (ret_size > 0) { /* relay data */
if (size < ret_size)
@@ -457,6 +455,7 @@
if ( ret_size <= 0 || pending == 0 ) {
atomic_set( &chip->data_pending, 0 );
del_singleshot_timer_sync(&chip->user_read_timer);
+ flush_scheduled_work();
}
return ret_size;
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tpm/tpm.h
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h Fri Feb 24 22:41:08 2006
@@ -101,6 +101,11 @@
outb(value & 0xFF, base+1);
}
+static inline u32 get_chip_buffersize(struct tpm_chip *chip)
+{
+ return chip->vendor->buffersize;
+}
+
extern int tpm_register_hardware(struct device *,
struct tpm_vendor_specific *);
extern int tpm_open(struct inode *, struct file *);
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri Feb 24 22:41:08 2006
@@ -25,6 +25,7 @@
#include <xen/tpmfe.h>
#include <linux/device.h>
#include <linux/interrupt.h>
+#include <linux/platform_device.h>
#include "tpm.h"
/* read status bits */
@@ -455,9 +456,7 @@
.buffersize = 64 * 1024,
};
-static struct device tpm_device = {
- .bus_id = "vtpm",
-};
+static struct platform_device *pdev;
static struct tpmfe_device tpmfe = {
.receive = tpm_recv,
@@ -477,23 +476,22 @@
* driver
*/
if ((rc = tpm_fe_register_receiver(&tpmfe)) < 0) {
- return rc;
+ goto err_exit;
}
/*
* Register our device with the system.
*/
- if ((rc = device_register(&tpm_device)) < 0) {
- tpm_fe_unregister_receiver();
- return rc;
+ pdev = platform_device_register_simple("tpm_vtpm", -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ rc = PTR_ERR(pdev);
+ goto err_unreg_fe;
}
tpm_xen.buffersize = tpmfe.max_tx_size;
- if ((rc = tpm_register_hardware(&tpm_device, &tpm_xen)) < 0) {
- device_unregister(&tpm_device);
- tpm_fe_unregister_receiver();
- return rc;
+ if ((rc = tpm_register_hardware(&pdev->dev, &tpm_xen)) < 0) {
+ goto err_unreg_pdev;
}
dataex.current_request = NULL;
@@ -508,13 +506,25 @@
disconnect_time = jiffies;
return 0;
+
+
+err_unreg_pdev:
+ platform_device_unregister(pdev);
+err_unreg_fe:
+ tpm_fe_unregister_receiver();
+
+err_exit:
+ return rc;
}
static void __exit cleanup_xen(void)
{
- tpm_remove_hardware(&tpm_device);
- device_unregister(&tpm_device);
- tpm_fe_unregister_receiver();
+ struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
+ if (chip) {
+ tpm_remove_hardware(chip->dev);
+ platform_device_unregister(pdev);
+ tpm_fe_unregister_receiver();
+ }
}
module_init(init_xen);
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/char/tty_io.c
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c Fri Feb 24 22:41:08 2006
@@ -1843,7 +1843,6 @@
tty_closing = tty->count <= 1;
o_tty_closing = o_tty &&
(o_tty->count <= (pty_master ? 1 : 0));
- up(&tty_sem);
do_sleep = 0;
if (tty_closing) {
@@ -1871,6 +1870,7 @@
printk(KERN_WARNING "release_dev: %s: read/write wait queue "
"active!\n", tty_name(tty, buf));
+ up(&tty_sem);
schedule();
}
@@ -1879,8 +1879,6 @@
* both sides, and we've completed the last operation that could
* block, so it's safe to proceed with closing.
*/
-
- down(&tty_sem);
if (pty_master) {
if (--o_tty->count < 0) {
printk(KERN_WARNING "release_dev: bad pty slave count "
@@ -1894,7 +1892,6 @@
tty->count, tty_name(tty, buf));
tty->count = 0;
}
- up(&tty_sem);
/*
* We've decremented tty->count, so we need to remove this file
@@ -1938,6 +1935,8 @@
} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
}
+
+ up(&tty_sem);
/* check whether both sides are closing ... */
if (!tty_closing || (o_tty && !o_tty_closing))
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/serial/Kconfig
--- a/linux-2.6-xen-sparse/drivers/serial/Kconfig Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig Fri Feb 24 22:41:08 2006
@@ -903,8 +903,8 @@
something like this to connect more than two modems to your Linux
box, for instance in order to become a dial-in server. This driver
supports PCI boards only.
- If you have a card like this, say Y here and read the file
- <file:Documentation/jsm.txt>.
+
+ If you have a card like this, say Y here, otherwise say N.
To compile this driver as a module, choose M here: the
module will be called jsm.
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Feb 24
22:41:08 2006
@@ -726,8 +726,7 @@
int j;
/* Stage 1: Make a safe copy of the shadow state. */
- copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
- BUG_ON(copy == NULL);
+ copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL);
memcpy(copy, info->shadow, sizeof(info->shadow));
/* Stage 2: Set up free list. */
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Fri Feb 24 22:41:08 2006
@@ -222,25 +222,22 @@
}
int
-gnttab_grant_foreign_transfer(domid_t domid)
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
{
int ref;
if (unlikely((ref = get_free_entry()) == -1))
return -ENOSPC;
-
- shared[ref].frame = 0;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = GTF_accept_transfer;
+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
return ref;
}
void
-gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid)
-{
- shared[ref].frame = 0;
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
+{
+ shared[ref].frame = pfn;
shared[ref].domid = domid;
wmb();
shared[ref].flags = GTF_accept_transfer;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Fri Feb 24 22:41:08 2006
@@ -188,7 +188,7 @@
ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
#else /* __x86_64__ */
- ctxt.user_regs.cs = __KERNEL_CS | 3;
+ ctxt.user_regs.cs = __KERNEL_CS;
ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
ctxt.kernel_ss = __KERNEL_DS;
@@ -237,7 +237,6 @@
#ifdef __x86_64__
cpu_pda(cpu)->pcurrent = idle;
cpu_pda(cpu)->cpunumber = cpu;
- per_cpu(init_tss,cpu).rsp0 = idle->thread.rsp;
clear_ti_thread_flag(idle->thread_info, TIF_FORK);
#endif
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Fri Feb 24
22:41:08 2006
@@ -137,10 +137,13 @@
sprintf(dev_name, "vif0.%d", i);
dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
+ if (!dev1)
+ return err;
+
sprintf(dev_name, "veth%d", i);
dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
- if ((dev1 == NULL) || (dev2 == NULL))
- goto fail;
+ if (!dev2)
+ goto fail_netdev2;
loopback_construct(dev1, dev2);
loopback_construct(dev2, dev1);
@@ -169,8 +172,9 @@
return 0;
fail:
- kfree(dev1);
- kfree(dev2);
+ free_netdev(dev2);
+ fail_netdev2:
+ free_netdev(dev1);
return err;
}
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Feb 24
22:41:08 2006
@@ -587,25 +587,23 @@
BUG_ON((signed short)ref < 0);
np->grant_rx_ref[id] = ref;
gnttab_grant_foreign_transfer_ref(ref,
- np->xbdev->otherend_id);
+ np->xbdev->otherend_id,
+ __pa(skb->head) >>
PAGE_SHIFT);
RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
rx_pfn_array[i] = virt_to_mfn(skb->head);
- /* Remove this page from map before passing back to Xen. */
- set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
- INVALID_P2M_ENTRY);
-
- MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
- __pte(0), 0);
- }
-
- /* After all PTEs have been zapped we blow away stale TLB entries. */
- rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-
- /* Give away a batch of pages. */
- rx_mcl[i].op = __HYPERVISOR_memory_op;
- rx_mcl[i].args[0] = XENMEM_decrease_reservation;
- rx_mcl[i].args[1] = (unsigned long)&reservation;
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remove this page before passing back to Xen. */
+ set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
+ INVALID_P2M_ENTRY);
+ MULTI_update_va_mapping(rx_mcl+i,
+ (unsigned long)skb->head,
+ __pte(0), 0);
+ }
+ }
+
+ /* Tell the ballon driver what is going on. */
+ balloon_update_driver_allowance(i);
reservation.extent_start = rx_pfn_array;
reservation.nr_extents = i;
@@ -613,15 +611,27 @@
reservation.address_bits = 0;
reservation.domid = DOMID_SELF;
- /* Tell the ballon driver what is going on. */
- balloon_update_driver_allowance(i);
-
- /* Zap PTEs and give away pages in one big multicall. */
- (void)HYPERVISOR_multicall(rx_mcl, i+1);
-
- /* Check return status of HYPERVISOR_memory_op(). */
- if (unlikely(rx_mcl[i].result != i))
- panic("Unable to reduce memory reservation\n");
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* After all PTEs have been zapped, flush the TLB. */
+ rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+ UVMF_TLB_FLUSH|UVMF_ALL;
+
+ /* Give away a batch of pages. */
+ rx_mcl[i].op = __HYPERVISOR_memory_op;
+ rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+ rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+ /* Zap PTEs and give away pages in one big multicall. */
+ (void)HYPERVISOR_multicall(rx_mcl, i+1);
+
+ /* Check return status of HYPERVISOR_memory_op(). */
+ if (unlikely(rx_mcl[i].result != i))
+ panic("Unable to reduce memory reservation\n");
+ } else {
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation) != i)
+ panic("Unable to reduce memory reservation\n");
+ }
/* Above is a suitable barrier to ensure backend will see requests. */
np->rx.req_prod_pvt = req_prod + i;
@@ -802,17 +812,19 @@
np->stats.rx_packets++;
np->stats.rx_bytes += rx->status;
- /* Remap the page. */
- MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
- pfn_pte_ma(mfn, PAGE_KERNEL), 0);
- mcl++;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
+ pfn_pte_ma(mfn, PAGE_KERNEL),
+ 0);
+ mcl++;
mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
| MMU_MACHPHYS_UPDATE;
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
- set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, mfn);
+ set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
+ mfn);
}
__skb_queue_tail(&rxq, skb);
@@ -1003,7 +1015,8 @@
if ((unsigned long)np->rx_skbs[i] < __PAGE_OFFSET)
continue;
gnttab_grant_foreign_transfer_ref(
- np->grant_rx_ref[i], np->xbdev->otherend_id);
+ np->grant_rx_ref[i], np->xbdev->otherend_id,
+ __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT);
RING_GET_REQUEST(&np->rx, requeue_idx)->gref =
np->grant_rx_ref[i];
RING_GET_REQUEST(&np->rx, requeue_idx)->id = i;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Fri Feb 24
22:41:08 2006
@@ -13,9 +13,6 @@
#include "common.h"
#include <xen/balloon.h>
-
-#define TPMIF_HASHSZ (2 << 5)
-#define TPMIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(TPMIF_HASHSZ-1))
static kmem_cache_t *tpmif_cachep;
int num_frontends = 0;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Feb 24
22:41:08 2006
@@ -65,8 +65,6 @@
int isuserbuffer,
u32 left);
-
-#define MAX_PENDING_REQS TPMIF_TX_RING_SIZE
#define MIN(x,y) (x) < (y) ? (x) : (y)
@@ -973,8 +971,6 @@
static void tpm_tx_action(unsigned long unused);
static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0);
-#define MAX_PENDING_REQS TPMIF_TX_RING_SIZE
-
static struct list_head tpm_schedule_list;
static spinlock_t tpm_schedule_list_lock;
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h Fri Feb 24
22:41:08 2006
@@ -61,6 +61,7 @@
"rorl $16,%1" \
: "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
+#ifndef CONFIG_X86_NO_TSS
static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void
*addr)
{
_set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
@@ -68,6 +69,7 @@
}
#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
+#endif
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int
size)
{
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h Fri Feb
24 22:41:08 2006
@@ -20,7 +20,7 @@
* Leave one empty page between vmalloc'ed areas and
* the start of the fixmap.
*/
-#define __FIXADDR_TOP (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE)
+extern unsigned long __FIXADDR_TOP;
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
@@ -53,7 +53,6 @@
*/
enum fixed_addresses {
FIX_HOLE,
- FIX_VSYSCALL,
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif
@@ -99,8 +98,10 @@
__end_of_fixed_addresses
};
-extern void __set_fixmap(
- enum fixed_addresses idx, maddr_t phys, pgprot_t flags);
+extern void __set_fixmap(enum fixed_addresses idx,
+ maddr_t phys, pgprot_t flags);
+
+extern void set_fixaddr_top(unsigned long top);
#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
@@ -122,14 +123,6 @@
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
-/*
- * This is the range that is readable by user mode, and things
- * acting like user mode such as get_user_pages.
- */
-#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL))
-#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
-
extern void __this_fixmap_does_not_exist(void);
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Fri Feb
24 22:41:08 2006
@@ -60,9 +60,6 @@
/* arch/xen/i386/kernel/hypervisor.c */
void do_hypervisor_callback(struct pt_regs *regs);
-
-/* arch/xen/i386/kernel/head.S */
-void lgdt_finish(void);
/* arch/xen/i386/mm/hypervisor.c */
/*
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Feb 24
21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Feb 24
22:41:08 2006
@@ -294,7 +294,7 @@
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM
(HYPERVISOR_VIRT_START-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned
long)(x)+PAGE_OFFSET))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
@@ -317,6 +317,8 @@
#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
+#define __HAVE_ARCH_GATE_AREA 1
+
#endif /* __KERNEL__ */
#include <asm-generic/page.h>
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h Fri Feb
24 22:41:08 2006
@@ -91,8 +91,10 @@
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
+#ifndef CONFIG_X86_NO_TSS
extern struct tss_struct doublefault_tss;
DECLARE_PER_CPU(struct tss_struct, init_tss);
+#endif
#ifdef CONFIG_SMP
extern struct cpuinfo_x86 cpu_data[];
@@ -343,7 +345,9 @@
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+#ifndef CONFIG_X86_NO_TSS
#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#endif
#define INVALID_IO_BITMAP_OFFSET 0x8000
#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
@@ -401,6 +405,7 @@
struct thread_struct;
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct {
unsigned short back_link,__blh;
unsigned long esp0;
@@ -446,6 +451,7 @@
*/
unsigned long stack[64];
} __attribute__((packed));
+#endif
#define ARCH_MIN_TASKALIGN 16
@@ -482,6 +488,7 @@
.io_bitmap_ptr = NULL, \
}
+#ifndef CONFIG_X86_NO_TSS
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
@@ -496,16 +503,23 @@
.io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
}
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct
*thread)
+static inline void __load_esp0(struct tss_struct *tss, struct thread_struct
*thread)
{
tss->esp0 = thread->esp0;
+#ifdef CONFIG_X86_SYSENTER
/* This can only happen when SEP is enabled, no need to test
"SEP"arately */
if (unlikely(tss->ss1 != thread->sysenter_cs)) {
tss->ss1 = thread->sysenter_cs;
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
}
- HYPERVISOR_stack_switch(tss->ss0, tss->esp0);
-}
+#endif
+}
+#define load_esp0(tss, thread) \
+ __load_esp0(tss, thread)
+#else
+#define load_esp0(tss, thread) \
+ HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)
+#endif
#define start_thread(regs, new_eip, new_esp) do { \
__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h Fri Feb
24 22:41:08 2006
@@ -60,10 +60,12 @@
#define GDT_ENTRY_KERNEL_BASE 12
#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8 + 1)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+#define GET_KERNEL_CS() (__KERNEL_CS |
(xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8 + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+#define GET_KERNEL_DS() (__KERNEL_DS |
(xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
@@ -85,9 +87,11 @@
/* Simple and small GDT entries for booting only */
-#define __BOOT_CS FLAT_KERNEL_CS
+#define GDT_ENTRY_BOOT_CS 2
+#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
-#define __BOOT_DS FLAT_KERNEL_DS
+#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
+#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
/* The PnP BIOS entries in the GDT */
#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Feb
24 22:41:08 2006
@@ -25,6 +25,7 @@
clear_bit(X86_FEATURE_SEP, c->x86_capability);
if (!(xen_start_info->flags & SIF_PRIVILEGED))
clear_bit(X86_FEATURE_MTRR, c->x86_capability);
+ c->hlt_works_ok = 0;
}
extern void hypervisor_callback(void);
@@ -33,6 +34,8 @@
static void __init machine_specific_arch_setup(void)
{
+ struct xen_platform_parameters pp;
+
HYPERVISOR_set_callbacks(
__KERNEL_CS, (unsigned long)hypervisor_callback,
__KERNEL_CS, (unsigned long)failsafe_callback);
@@ -40,4 +43,8 @@
HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
machine_specific_modify_cpu_capabilities(&boot_cpu_data);
+
+ if (HYPERVISOR_xen_version(XENVER_platform_parameters,
+ &pp) == 0)
+ set_fixaddr_top(pp.virt_start - PAGE_SIZE);
}
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/desc.h Fri Feb
24 22:41:08 2006
@@ -90,7 +90,9 @@
* something other than this.
*/
extern struct desc_struct default_ldt[];
+#ifndef CONFIG_X86_NO_IDT
extern struct gate_struct idt_table[];
+#endif
extern struct desc_ptr cpu_gdt_descr[];
/* the cpu gdt accessor */
@@ -113,6 +115,7 @@
memcpy(adr, &s, 16);
}
+#ifndef CONFIG_X86_NO_IDT
static inline void set_intr_gate(int nr, void *func)
{
BUG_ON((unsigned)nr > 0xFF);
@@ -135,6 +138,7 @@
{
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
}
+#endif
static inline void set_tssldt_descriptor(void *ptr, unsigned long tss,
unsigned type,
unsigned size)
@@ -152,6 +156,7 @@
memcpy(ptr, &d, 16);
}
+#ifndef CONFIG_X86_NO_TSS
static inline void set_tss_desc(unsigned cpu, void *addr)
{
/*
@@ -165,6 +170,7 @@
(unsigned long)addr, DESC_TSS,
IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
}
+#endif
static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
{
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/floppy.h Fri Feb
24 22:41:08 2006
@@ -14,7 +14,6 @@
#include <linux/vmalloc.h>
-
/*
* The DMA channel used by the floppy controller cannot access data at
* addresses >= 16MB
@@ -25,8 +24,6 @@
*/
#define _CROSS_64KB(a,s,vdma) \
(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
-
-#include <linux/vmalloc.h>
/* XEN: Hit DMA paths on the head. This trick from asm-m68k/floppy.h. */
#include <asm/dma.h>
@@ -43,8 +40,12 @@
#define fd_disable_irq() disable_irq(FLOPPY_IRQ)
#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL)
#define fd_get_dma_residue() vdma_get_dma_residue(FLOPPY_DMA)
-#define fd_dma_mem_alloc(size) vdma_mem_alloc(size)
-#define fd_dma_mem_free(addr, size) vdma_mem_free(addr, size)
+/*
+ * Do not use vmalloc/vfree: floppy_release_irq_and_dma() gets called from
+ * softirq context via motor_off_callback. A generic bug we happen to trigger.
+ */
+#define fd_dma_mem_alloc(size) __get_free_pages(GFP_KERNEL, get_order(size))
+#define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
#define fd_dma_setup(addr, size, mode, io) vdma_dma_setup(addr, size, mode, io)
static int virtual_dma_count;
@@ -137,7 +138,7 @@
"floppy", NULL);
}
-
+#if 0
static unsigned long vdma_mem_alloc(unsigned long size)
{
return (unsigned long) vmalloc(size);
@@ -148,6 +149,7 @@
{
vfree((void *)addr);
}
+#endif
static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
{
@@ -168,7 +170,7 @@
{
use_virtual_dma = 1;
can_use_virtual_dma = 1;
- return 0x340;
+ return 0x3f0;
}
/*
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Fri Feb
24 22:41:08 2006
@@ -196,7 +196,9 @@
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+#ifndef CONFIG_X86_NO_TSS
#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#endif
#define INVALID_IO_BITMAP_OFFSET 0x8000
struct i387_fxsave_struct {
@@ -217,6 +219,7 @@
struct i387_fxsave_struct fxsave;
};
+#ifndef CONFIG_X86_NO_TSS
struct tss_struct {
u32 reserved1;
u64 rsp0;
@@ -240,8 +243,10 @@
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
} __attribute__((packed)) ____cacheline_aligned;
+DECLARE_PER_CPU(struct tss_struct,init_tss);
+#endif
+
extern struct cpuinfo_x86 boot_cpu_data;
-DECLARE_PER_CPU(struct tss_struct,init_tss);
#ifdef CONFIG_X86_VSMP
#define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
@@ -283,9 +288,11 @@
.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
}
+#ifndef CONFIG_X86_NO_TSS
#define INIT_TSS { \
.rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \
}
+#endif
#define INIT_MMAP \
{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL,
NULL }
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h Fri Feb
24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/system.h Fri Feb
24 22:41:08 2006
@@ -182,7 +182,7 @@
#define read_cr3() ({ \
unsigned long __dummy; \
asm("movq %%cr3,%0" : "=r" (__dummy)); \
- return machine_to_phys(__dummy); \
+ machine_to_phys(__dummy); \
})
static inline unsigned long read_cr4(void)
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/linux/mm.h Fri Feb 24 22:41:08 2006
@@ -1064,5 +1064,7 @@
void drop_pagecache(void);
void drop_slab(void);
+extern int randomize_va_space;
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Fri Feb 24 22:41:08 2006
@@ -71,7 +71,7 @@
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page);
-int gnttab_grant_foreign_transfer(domid_t domid);
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
@@ -98,7 +98,8 @@
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly);
-void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid);
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+ unsigned long pfn);
#ifdef __ia64__
#define gnttab_map_vaddr(map) __va(map.dev_bus_addr)
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/mm/memory.c Fri Feb 24 22:41:08 2006
@@ -81,6 +81,16 @@
EXPORT_SYMBOL(num_physpages);
EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(vmalloc_earlyreserve);
+
+int randomize_va_space __read_mostly = 1;
+
+static int __init disable_randmaps(char *s)
+{
+ randomize_va_space = 0;
+ return 0;
+}
+__setup("norandmaps", disable_randmaps);
+
/*
* If a p?d_bad entry is found while walking page tables, report
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/mm/page_alloc.c
--- a/linux-2.6-xen-sparse/mm/page_alloc.c Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c Fri Feb 24 22:41:08 2006
@@ -56,6 +56,7 @@
int percpu_pagelist_fraction;
static void fastcall free_hot_cold_page(struct page *page, int cold);
+static void __free_pages_ok(struct page *page, unsigned int order);
/*
* results with 256, 32 in the lowmem_reserve sysctl:
@@ -169,20 +170,23 @@
* All pages have PG_compound set. All pages have their ->private pointing at
* the head page (even the head page has this).
*
- * The first tail page's ->mapping, if non-zero, holds the address of the
- * compound page's put_page() function.
- *
- * The order of the allocation is stored in the first tail page's ->index
- * This is only for debug at present. This usage means that zero-order pages
- * may not be compound.
- */
+ * The first tail page's ->lru.next holds the address of the compound page's
+ * put_page() function. Its ->lru.prev holds the order of allocation.
+ * This usage means that zero-order pages may not be compound.
+ */
+
+static void free_compound_page(struct page *page)
+{
+ __free_pages_ok(page, (unsigned long)page[1].lru.prev);
+}
+
static void prep_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
- page[1].mapping = NULL;
- page[1].index = order;
+ page[1].lru.next = (void *)free_compound_page; /* set dtor */
+ page[1].lru.prev = (void *)order;
for (i = 0; i < nr_pages; i++) {
struct page *p = page + i;
@@ -196,7 +200,7 @@
int i;
int nr_pages = 1 << order;
- if (unlikely(page[1].index != order))
+ if (unlikely((unsigned long)page[1].lru.prev != order))
bad_page(page);
for (i = 0; i < nr_pages; i++) {
@@ -1539,28 +1543,28 @@
*/
static int __init find_next_best_node(int node, nodemask_t *used_node_mask)
{
- int i, n, val;
+ int n, val;
int min_val = INT_MAX;
int best_node = -1;
- for_each_online_node(i) {
+ /* Use the local node if we haven't already */
+ if (!node_isset(node, *used_node_mask)) {
+ node_set(node, *used_node_mask);
+ return node;
+ }
+
+ for_each_online_node(n) {
cpumask_t tmp;
-
- /* Start from local node */
- n = (node+i) % num_online_nodes();
/* Don't want a node to appear more than once */
if (node_isset(n, *used_node_mask))
continue;
- /* Use the local node if we haven't already */
- if (!node_isset(node, *used_node_mask)) {
- best_node = node;
- break;
- }
-
/* Use the distance array to find the distance */
val = node_distance(node, n);
+
+ /* Penalize nodes under us ("prefer the next node") */
+ val += (n < node);
/* Give preference to headless and unused nodes */
tmp = node_to_cpumask(n);
diff -r d940ec92958d -r 6c43118bdba8 tools/examples/vif-common.sh
--- a/tools/examples/vif-common.sh Fri Feb 24 21:03:07 2006
+++ b/tools/examples/vif-common.sh Fri Feb 24 22:41:08 2006
@@ -125,7 +125,7 @@
#
function ip_of()
{
- ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed 's,/.*,,'
+ ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed 's,/.*,,' | head
-1
}
diff -r d940ec92958d -r 6c43118bdba8 tools/examples/vtpm
--- a/tools/examples/vtpm Fri Feb 24 21:03:07 2006
+++ b/tools/examples/vtpm Fri Feb 24 22:41:08 2006
@@ -3,6 +3,7 @@
dir=$(dirname "$0")
. "$dir/vtpm-common.sh"
+vtpm_fatal_error=0
case "$command" in
add)
@@ -19,5 +20,8 @@
;;
esac
-log debug "Successful vTPM operation '$command'."
-success
+if [ $vtpm_fatal_error -eq 0 ]; then
+ log debug "Successful vTPM operation '$command'."
+ success
+fi
+
diff -r d940ec92958d -r 6c43118bdba8 tools/examples/vtpm-common.sh
--- a/tools/examples/vtpm-common.sh Fri Feb 24 21:03:07 2006
+++ b/tools/examples/vtpm-common.sh Fri Feb 24 22:41:08 2006
@@ -173,6 +173,7 @@
local vmname=$1
local inst=$2
local res
+
res=`cat $VTPMDB | \
gawk -vvmname=$vmname \
-vinst=$inst \
@@ -238,6 +239,9 @@
local res
set +e
get_create_reason
+
+ claim_lock vtpmdb
+
find_instance $domname
res=$?
if [ $res -eq 0 ]; then
@@ -262,6 +266,9 @@
vtpm_create $instance
fi
fi
+
+ release_lock vtpmdb
+
if [ "$REASON" == "create" ]; then
vtpm_reset $instance
elif [ "$REASON" == "resume" ]; then
@@ -292,3 +299,5 @@
fi
set -e
}
+
+
diff -r d940ec92958d -r 6c43118bdba8 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm Fri Feb 24 21:03:07 2006
+++ b/tools/examples/xmexample.hvm Fri Feb 24 22:41:08 2006
@@ -29,6 +29,9 @@
#-----------------------------------------------------------------------------
# the number of cpus guest platform has, default=1
#vcpus=1
+
+# enable/disable HVM guest PAE, default=0 (disabled)
+#pae=0
# enable/disable HVM guest ACPI, default=0 (disabled)
#acpi=0
diff -r d940ec92958d -r 6c43118bdba8 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c Fri Feb 24 21:03:07 2006
+++ b/tools/ioemu/target-i386-dm/helper2.c Fri Feb 24 22:41:08 2006
@@ -125,9 +125,8 @@
//the evtchn fd for polling
int evtchn_fd = -1;
-//the evtchn port for polling the notification,
-//should be inputed as bochs's parameter
-evtchn_port_t ioreq_remote_port, ioreq_local_port;
+//which vcpu we are serving
+int send_vcpu = 0;
//some functions to handle the io req packet
void sp_info()
@@ -135,52 +134,62 @@
ioreq_t *req;
int i;
- term_printf("event port: %d\n", shared_page->sp_global.eport);
for ( i = 0; i < vcpus; i++ ) {
req = &(shared_page->vcpu_iodata[i].vp_ioreq);
- term_printf("vcpu %d:\n", i);
+ term_printf("vcpu %d: event port %d\n",
+ i, shared_page->vcpu_iodata[i].vp_eport);
term_printf(" req state: %x, pvalid: %x, addr: %llx, "
"data: %llx, count: %llx, size: %llx\n",
req->state, req->pdata_valid, req->addr,
req->u.data, req->count, req->size);
+ term_printf(" IO totally occurred on this vcpu: %llx\n",
+ req->io_count);
}
}
//get the ioreq packets from share mem
-ioreq_t* __cpu_get_ioreq(void)
+static ioreq_t* __cpu_get_ioreq(int vcpu)
{
ioreq_t *req;
- req = &(shared_page->vcpu_iodata[0].vp_ioreq);
- if (req->state == STATE_IOREQ_READY) {
- req->state = STATE_IOREQ_INPROCESS;
- } else {
- fprintf(logfile, "False I/O request ... in-service already: "
- "%x, pvalid: %x, port: %llx, "
- "data: %llx, count: %llx, size: %llx\n",
- req->state, req->pdata_valid, req->addr,
- req->u.data, req->count, req->size);
- req = NULL;
- }
-
- return req;
+ req = &(shared_page->vcpu_iodata[vcpu].vp_ioreq);
+
+ if ( req->state == STATE_IOREQ_READY )
+ return req;
+
+ fprintf(logfile, "False I/O request ... in-service already: "
+ "%x, pvalid: %x, port: %llx, "
+ "data: %llx, count: %llx, size: %llx\n",
+ req->state, req->pdata_valid, req->addr,
+ req->u.data, req->count, req->size);
+ return NULL;
}
//use poll to get the port notification
//ioreq_vec--out,the
//retval--the number of ioreq packet
-ioreq_t* cpu_get_ioreq(void)
-{
- int rc;
+static ioreq_t* cpu_get_ioreq(void)
+{
+ int i, rc;
evtchn_port_t port;
rc = read(evtchn_fd, &port, sizeof(port));
- if ((rc == sizeof(port)) && (port == ioreq_local_port)) {
+ if ( rc == sizeof(port) ) {
+ for ( i = 0; i < vcpus; i++ )
+ if ( shared_page->vcpu_iodata[i].dm_eport == port )
+ break;
+
+ if ( i == vcpus ) {
+ fprintf(logfile, "Fatal error while trying to get io event!\n");
+ exit(1);
+ }
+
// unmask the wanted port again
- write(evtchn_fd, &ioreq_local_port, sizeof(port));
+ write(evtchn_fd, &port, sizeof(port));
//get the io packet from shared memory
- return __cpu_get_ioreq();
+ send_vcpu = i;
+ return __cpu_get_ioreq(i);
}
//read error or read nothing
@@ -361,6 +370,8 @@
ioreq_t *req = cpu_get_ioreq();
if (req) {
+ req->state = STATE_IOREQ_INPROCESS;
+
if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) {
if (req->size != 4)
req->u.data &= (1UL << (8 * req->size))-1;
@@ -465,7 +476,7 @@
struct ioctl_evtchn_notify notify;
env->send_event = 0;
- notify.port = ioreq_local_port;
+ notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport;
(void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify);
}
}
@@ -488,7 +499,7 @@
{
CPUX86State *env;
struct ioctl_evtchn_bind_interdomain bind;
- int rc;
+ int i, rc;
cpu_exec_init();
qemu_register_reset(qemu_hvm_reset, NULL);
@@ -509,14 +520,17 @@
return NULL;
}
+ /* FIXME: how about if we overflow the page here? */
bind.remote_domain = domid;
- bind.remote_port = ioreq_remote_port;
- rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
- if (rc == -1) {
- fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
- return NULL;
- }
- ioreq_local_port = rc;
+ for ( i = 0; i < vcpus; i++ ) {
+ bind.remote_port = shared_page->vcpu_iodata[i].vp_eport;
+ rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+ if ( rc == -1 ) {
+ fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
+ return NULL;
+ }
+ shared_page->vcpu_iodata[i].dm_eport = rc;
+ }
return env;
}
diff -r d940ec92958d -r 6c43118bdba8 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Fri Feb 24 21:03:07 2006
+++ b/tools/ioemu/vl.c Fri Feb 24 22:41:08 2006
@@ -2337,7 +2337,6 @@
QEMU_OPTION_S,
QEMU_OPTION_s,
- QEMU_OPTION_p,
QEMU_OPTION_d,
QEMU_OPTION_l,
QEMU_OPTION_hdachs,
@@ -2414,7 +2413,6 @@
{ "S", 0, QEMU_OPTION_S },
{ "s", 0, QEMU_OPTION_s },
- { "p", HAS_ARG, QEMU_OPTION_p },
{ "d", HAS_ARG, QEMU_OPTION_d },
{ "l", HAS_ARG, QEMU_OPTION_l },
{ "hdachs", HAS_ARG, QEMU_OPTION_hdachs },
@@ -2936,13 +2934,6 @@
{
domid = atoi(optarg);
fprintf(logfile, "domid: %d\n", domid);
- }
- break;
- case QEMU_OPTION_p:
- {
- extern evtchn_port_t ioreq_remote_port;
- ioreq_remote_port = atoi(optarg);
- fprintf(logfile, "eport: %d\n", ioreq_remote_port);
}
break;
case QEMU_OPTION_l:
diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Fri Feb 24 21:03:07 2006
+++ b/tools/libxc/xc_hvm_build.c Fri Feb 24 22:41:08 2006
@@ -20,7 +20,7 @@
#define L3_PROT (_PAGE_PRESENT)
#endif
-#define E820MAX 128
+#define E820MAX 128
#define E820_RAM 1
#define E820_RESERVED 2
@@ -137,7 +137,7 @@
*/
static int set_hvm_info(int xc_handle, uint32_t dom,
unsigned long *pfn_list, unsigned int vcpus,
- unsigned int acpi, unsigned int apic)
+ unsigned int pae, unsigned int acpi, unsigned int apic)
{
char *va_map;
struct hvm_info_table *va_hvm;
@@ -149,7 +149,7 @@
PAGE_SIZE,
PROT_READ|PROT_WRITE,
pfn_list[HVM_INFO_PFN]);
-
+
if ( va_map == NULL )
return -1;
@@ -159,6 +159,7 @@
va_hvm->length = sizeof(struct hvm_info_table);
va_hvm->acpi_enabled = acpi;
va_hvm->apic_enabled = apic;
+ va_hvm->pae_enabled = pae;
va_hvm->nr_vcpus = vcpus;
set_hvm_info_checksum(va_hvm);
@@ -174,9 +175,9 @@
unsigned long nr_pages,
vcpu_guest_context_t *ctxt,
unsigned long shared_info_frame,
- unsigned int control_evtchn,
unsigned int vcpus,
- unsigned int acpi,
+ unsigned int pae,
+ unsigned int acpi,
unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
@@ -190,11 +191,7 @@
xc_mmu_t *mmu = NULL;
int rc;
- unsigned long nr_pt_pages;
-
struct domain_setup_info dsi;
- unsigned long vpt_start;
- unsigned long vpt_end;
unsigned long v_end;
unsigned long shared_page_frame = 0;
@@ -214,20 +211,10 @@
/* memsize is in megabytes */
v_end = (unsigned long)memsize << 20;
-#ifdef __i386__
- nr_pt_pages = 1 + ((memsize + 3) >> 2);
-#else
- nr_pt_pages = 5 + ((memsize + 1) >> 1);
-#endif
- vpt_start = v_end;
- vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
-
printf("VIRTUAL MEMORY ARRANGEMENT:\n"
" Loaded HVM loader: %08lx->%08lx\n"
- " Page tables: %08lx->%08lx\n"
" TOTAL: %08lx->%08lx\n",
dsi.v_kernstart, dsi.v_kernend,
- vpt_start, vpt_end,
dsi.v_start, v_end);
printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
@@ -265,7 +252,7 @@
goto error_out;
}
- if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
+ if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) ) {
fprintf(stderr, "Couldn't set hvm info for HVM guest.\n");
goto error_out;
}
@@ -296,7 +283,19 @@
shared_page_frame)) == 0 )
goto error_out;
memset(sp, 0, PAGE_SIZE);
- sp->sp_global.eport = control_evtchn;
+
+ /* FIXME: how about if we overflow the page here? */
+ for ( i = 0; i < vcpus; i++ ) {
+ unsigned int vp_eport;
+
+ vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0);
+ if ( vp_eport < 0 ) {
+ fprintf(stderr, "Couldn't get unbound port from VMX guest.\n");
+ goto error_out;
+ }
+ sp->vcpu_iodata[i].vp_eport = vp_eport;
+ }
+
munmap(sp, PAGE_SIZE);
*store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
@@ -343,9 +342,9 @@
uint32_t domid,
int memsize,
const char *image_name,
- unsigned int control_evtchn,
unsigned int vcpus,
- unsigned int acpi,
+ unsigned int pae,
+ unsigned int acpi,
unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
@@ -366,8 +365,8 @@
if ( !strstr(xen_caps, "hvm") )
{
- PERROR("CPU doesn't support HVM extensions or "
- "the extensions are not enabled");
+ PERROR("CPU doesn't support HVM extensions or "
+ "the extensions are not enabled");
goto error_out;
}
@@ -399,8 +398,8 @@
ctxt->flags = VGCF_HVM_GUEST;
if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
- ctxt, op.u.getdomaininfo.shared_info_frame,
control_evtchn,
- vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
+ ctxt, op.u.getdomaininfo.shared_info_frame,
+ vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
{
ERROR("Error constructing guest OS");
goto error_out;
diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c Fri Feb 24 21:03:07 2006
+++ b/tools/libxc/xc_ia64_stubs.c Fri Feb 24 22:41:08 2006
@@ -16,7 +16,7 @@
#undef __IA64_UL
#define __IA64_UL(x) ((unsigned long)(x))
#undef __ASSEMBLY__
-
+
unsigned long xc_ia64_fpsr_default(void)
{
return FPSR_DEFAULT;
@@ -569,12 +569,14 @@
static int setup_guest( int xc_handle,
uint32_t dom, unsigned long memsize,
char *image, unsigned long image_size,
- unsigned int control_evtchn,
+ uint32_t vcpus,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
unsigned long page_array[2];
shared_iopage_t *sp;
+ int i;
+
// FIXME: initialize pfn list for a temp hack
if (xc_ia64_get_pfn_list(xc_handle, dom, NULL, -1, -1) == -1) {
PERROR("Could not allocate continuous memory");
@@ -612,7 +614,18 @@
page_array[0])) == 0)
goto error_out;
memset(sp, 0, PAGE_SIZE);
- sp->sp_global.eport = control_evtchn;
+
+ for (i = 0; i < vcpus; i++) {
+ uint32_t vp_eport;
+
+ vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0);
+ if (vp_eport < 0) {
+ fprintf(stderr, "Couldn't get unbound port from VMX guest.\n");
+ goto error_out;
+ }
+ sp->vcpu_iodata[i].vp_eport = vp_eport;
+ }
+
munmap(sp, PAGE_SIZE);
return 0;
@@ -625,10 +638,10 @@
uint32_t domid,
int memsize,
const char *image_name,
- unsigned int control_evtchn,
- unsigned int lapic,
unsigned int vcpus,
+ unsigned int pae,
unsigned int acpi,
+ unsigned int apic,
unsigned int store_evtchn,
unsigned long *store_mfn)
{
@@ -667,8 +680,8 @@
memset(ctxt, 0, sizeof(*ctxt));
- if ( setup_guest(xc_handle, domid, (unsigned long)memsize, image,
image_size,
- control_evtchn, store_evtchn, store_mfn ) < 0 ){
+ if ( setup_guest(xc_handle, domid, (unsigned long)memsize, image,
+ image_size, vcpus, store_evtchn, store_mfn ) < 0 ){
ERROR("Error constructing guest OS");
goto error_out;
}
diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Fri Feb 24 21:03:07 2006
+++ b/tools/libxc/xc_load_bin.c Fri Feb 24 22:41:08 2006
@@ -231,6 +231,7 @@
dsi->v_kernstart = dsi->v_start;
dsi->v_kernend = dsi->v_end;
dsi->v_kernentry = image_info->entry_addr;
+ dsi->xen_guest_string = "";
return 0;
}
diff -r d940ec92958d -r 6c43118bdba8 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Fri Feb 24 21:03:07 2006
+++ b/tools/libxc/xenguest.h Fri Feb 24 22:41:08 2006
@@ -40,7 +40,7 @@
int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
unsigned long nr_pfns, unsigned int store_evtchn,
unsigned long *store_mfn, unsigned int console_evtchn,
- unsigned long *console_mfn);
+ unsigned long *console_mfn);
int xc_linux_build(int xc_handle,
uint32_t domid,
@@ -57,8 +57,8 @@
uint32_t domid,
int memsize,
const char *image_name,
- unsigned int control_evtchn,
unsigned int vcpus,
+ unsigned int pae,
unsigned int acpi,
unsigned int apic,
unsigned int store_evtchn,
diff -r d940ec92958d -r 6c43118bdba8 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Fri Feb 24 21:03:07 2006
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri Feb 24 22:41:08 2006
@@ -363,23 +363,24 @@
{
uint32_t dom;
char *image;
- int control_evtchn, store_evtchn;
+ int store_evtchn;
int memsize;
int vcpus = 1;
+ int pae = 0;
int acpi = 0;
int apic = 0;
unsigned long store_mfn = 0;
- static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
- "memsize", "image", "vcpus", "acpi", "apic",
+ static char *kwd_list[] = { "dom", "store_evtchn",
+ "memsize", "image", "vcpus", "pae", "acpi",
"apic",
NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
- &dom, &control_evtchn, &store_evtchn,
- &memsize, &image, &vcpus, &acpi, &apic) )
- return NULL;
-
- if ( xc_hvm_build(self->xc_handle, dom, memsize, image, control_evtchn,
- vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiisiiii", kwd_list,
+ &dom, &store_evtchn, &memsize,
+ &image, &vcpus, &pae, &acpi, &apic) )
+ return NULL;
+
+ if ( xc_hvm_build(self->xc_handle, dom, memsize, image,
+ vcpus, pae, acpi, apic, store_evtchn, &store_mfn) != 0 )
return PyErr_SetFromErrno(xc_error);
return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
diff -r d940ec92958d -r 6c43118bdba8 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Fri Feb 24 21:03:07 2006
+++ b/tools/python/xen/xend/image.py Fri Feb 24 22:41:08 2006
@@ -191,8 +191,8 @@
ImageHandler.configure(self, imageConfig, deviceConfig)
info = xc.xeninfo()
- if not 'hvm' in info['xen_caps']:
- raise VmError("Not an HVM capable platform, we stop creating!")
+ if not 'hvm' in info['xen_caps']:
+ raise VmError("Not an HVM capable platform, we stop creating!")
self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
self.device_model = sxp.child_value(imageConfig, 'device_model')
@@ -205,28 +205,24 @@
("image/device-model", self.device_model),
("image/display", self.display))
- self.device_channel = None
self.pid = 0
self.dmargs += self.configVNC(imageConfig)
+
+ self.pae = int(sxp.child_value(imageConfig, 'pae', 0))
self.acpi = int(sxp.child_value(imageConfig, 'acpi', 0))
self.apic = int(sxp.child_value(imageConfig, 'apic', 0))
def buildDomain(self):
- # Create an event channel
- self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(),
- remote_dom=0)
- log.info("HVM device model port: %d", self.device_channel)
-
store_evtchn = self.vm.getStorePort()
log.debug("dom = %d", self.vm.getDomid())
log.debug("image = %s", self.kernel)
- log.debug("control_evtchn = %d", self.device_channel)
log.debug("store_evtchn = %d", store_evtchn)
log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024)
log.debug("vcpus = %d", self.vm.getVCpuCount())
+ log.debug("pae = %d", self.pae)
log.debug("acpi = %d", self.acpi)
log.debug("apic = %d", self.apic)
@@ -234,10 +230,10 @@
return xc.hvm_build(dom = self.vm.getDomid(),
image = self.kernel,
- control_evtchn = self.device_channel,
store_evtchn = store_evtchn,
memsize = self.vm.getMemoryTarget() / 1024,
vcpus = self.vm.getVCpuCount(),
+ pae = self.pae,
acpi = self.acpi,
apic = self.apic)
@@ -341,7 +337,6 @@
if len(vnc):
args = args + vnc
args = args + ([ "-d", "%d" % self.vm.getDomid(),
- "-p", "%d" % self.device_channel,
"-m", "%s" % (self.vm.getMemoryTarget() / 1024)])
args = args + self.dmargs
env = dict(os.environ)
@@ -379,28 +374,12 @@
def getDomainMemory(self, mem):
"""@see ImageHandler.getDomainMemory"""
page_kb = 4
+ extra_pages = 0
if os.uname()[4] == 'ia64':
page_kb = 16
- # for ioreq_t and xenstore
- static_pages = 2
- return mem + (self.getPageTableSize(mem / 1024) + static_pages) *
page_kb
-
- def getPageTableSize(self, mem_mb):
- """Return the pages of memory needed for 1:1 page tables for physical
- mode.
-
- @param mem_mb: size in MB
- @return size in KB
- """
- # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
- if os.uname()[4] == 'x86_64':
- return 5 + ((mem_mb + 1) >> 1)
- elif os.uname()[4] == 'ia64':
- # 1:1 pgtable is allocated on demand ia64, so just return rom size
- # for guest firmware
- return 1024
- else:
- return 1 + ((mem_mb + 3) >> 2)
+ # ROM size for guest firmware, ioreq page and xenstore page
+ extra_pages = 1024 + 2
+ return mem + extra_pages * page_kb
def register_shutdown_watch(self):
""" add xen store watch on control/shutdown """
diff -r d940ec92958d -r 6c43118bdba8 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Fri Feb 24 21:03:07 2006
+++ b/tools/python/xen/xm/create.py Fri Feb 24 22:41:08 2006
@@ -160,6 +160,10 @@
gopts.var('cpus', val='CPUS',
fn=set_int, default=None,
use="CPUS to run the domain on.")
+
+gopts.var('pae', val='PAE',
+ fn=set_int, default=0,
+ use="Disable or enable PAE of HVM domain.")
gopts.var('acpi', val='ACPI',
fn=set_int, default=0,
@@ -545,7 +549,7 @@
def configure_hvm(config_image, vals):
"""Create the config for HVM devices.
"""
- args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
+ args = [ 'device_model', 'pae', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'acpi', 'apic',
'xauthority' ]
diff -r d940ec92958d -r 6c43118bdba8 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Fri Feb 24 21:03:07 2006
+++ b/tools/xenstore/xenstored_core.c Fri Feb 24 22:41:08 2006
@@ -573,14 +573,11 @@
{
struct buffered_data *data;
- data = talloc(ctx, struct buffered_data);
+ data = talloc_zero(ctx, struct buffered_data);
if (data == NULL)
return NULL;
data->inhdr = true;
- data->used = 0;
- data->buffer = NULL;
-
return data;
}
@@ -1394,17 +1391,13 @@
struct node *node;
struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
- node = talloc(NULL, struct node);
+ node = talloc_zero(NULL, struct node);
node->name = name;
node->perms = &perms;
node->num_perms = 1;
- node->data = NULL;
- node->datalen = 0;
node->children = (char *)child;
if (child)
node->childlen = strlen(child) + 1;
- else
- node->childlen = 0;
if (!write_node(NULL, node))
barf_perror("Could not create initial node %s", name);
diff -r d940ec92958d -r 6c43118bdba8 tools/xm-test/README
--- a/tools/xm-test/README Fri Feb 24 21:03:07 2006
+++ b/tools/xm-test/README Fri Feb 24 22:41:08 2006
@@ -48,6 +48,15 @@
Simply copy the initrd-X.Y.img file into ramdisk/ and then run:
# make existing
+
+Or, you can run:
+ # INITRD="http://url.of.initrd.repo/" make existing
+
+You do not need to include the name of the image itself in the url,
+however, an initrd with the right name (initrd.X.Y.img) and version
+number must exist at that location. The script will determine which
+version of the initrd it needs and try to download the right file from
+that location.
This will set up the link so that xm-test will use the existing
ramdisk. Next, just run "runtest.sh" normally. Note that in general,
diff -r d940ec92958d -r 6c43118bdba8 tools/xm-test/ramdisk/Makefile.am
--- a/tools/xm-test/ramdisk/Makefile.am Fri Feb 24 21:03:07 2006
+++ b/tools/xm-test/ramdisk/Makefile.am Fri Feb 24 22:41:08 2006
@@ -57,6 +57,9 @@
fi
existing:
+ @if test -n "$(INITRD)"; then \
+ wget $(INITRD)/$(XMTEST_VER_IMG); \
+ fi
@if [ -f $(XMTEST_VER_IMG) ] ; then \
ln -sf $(XMTEST_VER_IMG) initrd.img; \
else \
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/vmx/mmio.c
--- a/xen/arch/ia64/vmx/mmio.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/ia64/vmx/mmio.c Fri Feb 24 22:41:08 2006
@@ -154,7 +154,7 @@
set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
p->state = STATE_IOREQ_READY;
- evtchn_send(iopacket_port(v->domain));
+ evtchn_send(iopacket_port(v));
vmx_wait_io();
if(dir==IOREQ_READ){ //read
*val=p->u.data;
@@ -186,7 +186,7 @@
set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
p->state = STATE_IOREQ_READY;
- evtchn_send(iopacket_port(v->domain));
+ evtchn_send(iopacket_port(v));
vmx_wait_io();
if(dir==IOREQ_READ){ //read
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/ia64/vmx/vmx_init.c Fri Feb 24 22:41:08 2006
@@ -49,6 +49,7 @@
#include <xen/mm.h>
#include <public/arch-ia64.h>
#include <asm/hvm/vioapic.h>
+#include <public/event_channel.h>
/* Global flag to identify whether Intel vmx feature is on */
u32 vmx_enabled = 0;
@@ -250,9 +251,6 @@
{
vpd_t *vpd;
- /* Allocate resources for vcpu 0 */
- //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct));
-
vpd = alloc_vpd();
ASSERT(vpd);
@@ -371,20 +369,15 @@
void vmx_setup_platform(struct domain *d, struct vcpu_guest_context *c)
{
- shared_iopage_t *sp;
-
ASSERT(d != dom0); /* only for non-privileged vti domain */
d->arch.vmx_platform.shared_page_va =
(unsigned long)__va(__gpa_to_mpa(d, IO_PAGE_START));
- sp = get_sp(d);
- //memset((char *)sp,0,PAGE_SIZE);
/* TEMP */
d->arch.vmx_platform.pib_base = 0xfee00000UL;
/* Only open one port for I/O and interrupt emulation */
memset(&d->shared_info->evtchn_mask[0], 0xff,
sizeof(d->shared_info->evtchn_mask));
- clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
/* Initialize the virtual interrupt lines */
vmx_virq_line_init(d);
@@ -393,4 +386,16 @@
hvm_vioapic_init(d);
}
-
+void vmx_do_launch(struct vcpu *v)
+{
+ if (evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0) {
+ printk("VMX domain bind port %d to vcpu %d failed!\n",
+ iopacket_port(v), v->vcpu_id);
+ domain_crash_synchronous();
+ }
+
+ clear_bit(iopacket_port(v),
+ &v->domain->shared_info->evtchn_mask[0]);
+
+ vmx_load_all_rr(v);
+}
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/vmx/vmx_support.c
--- a/xen/arch/ia64/vmx/vmx_support.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/ia64/vmx/vmx_support.c Fri Feb 24 22:41:08 2006
@@ -38,7 +38,7 @@
{
struct vcpu *v = current;
struct domain *d = v->domain;
- int port = iopacket_port(d);
+ int port = iopacket_port(v);
do {
if (!test_bit(port,
@@ -129,7 +129,7 @@
struct domain *d = v->domain;
extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu,
unsigned long *pend_irr);
- int port = iopacket_port(d);
+ int port = iopacket_port(v);
/* I/O emulation is atomic, so it's impossible to see execution flow
* out of vmx_wait_io, when guest is still waiting for response.
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/xen/process.c
--- a/xen/arch/ia64/xen/process.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/ia64/xen/process.c Fri Feb 24 22:41:08 2006
@@ -71,7 +71,7 @@
context_saved(prev);
if (VMX_DOMAIN(current)) {
- vmx_load_all_rr(current);
+ vmx_do_launch(current);
} else {
load_region_regs(current);
vcpu_load_kernel_regs(current);
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/ia64/xen/xenmisc.c
--- a/xen/arch/ia64/xen/xenmisc.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/ia64/xen/xenmisc.c Fri Feb 24 22:41:08 2006
@@ -347,6 +347,10 @@
void continue_running(struct vcpu *same)
{
/* nothing to do */
+}
+
+void arch_dump_domain_info(struct domain *d)
+{
}
void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/domain.c Fri Feb 24 22:41:08 2006
@@ -346,19 +346,22 @@
struct vcpu *v, struct vcpu_guest_context *c)
{
struct domain *d = v->domain;
- unsigned long phys_basetab;
+ unsigned long phys_basetab = INVALID_MFN;
int i, rc;
- /*
- * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
- * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
- * If SS RPL or DPL differs from CS RPL then we'll #GP.
- */
if ( !(c->flags & VGCF_HVM_GUEST) )
{
- if ( ((c->user_regs.cs & 3) == 0) ||
- ((c->user_regs.ss & 3) == 0) )
- return -EINVAL;
+ fixup_guest_selector(c->user_regs.ss);
+ fixup_guest_selector(c->kernel_ss);
+ fixup_guest_selector(c->user_regs.cs);
+
+#ifdef __i386__
+ fixup_guest_selector(c->event_callback_cs);
+ fixup_guest_selector(c->failsafe_callback_cs);
+#endif
+
+ for ( i = 0; i < 256; i++ )
+ fixup_guest_selector(c->trap_ctxt[i].cs);
}
else if ( !hvm_enabled )
return -EINVAL;
@@ -372,6 +375,7 @@
v->arch.flags |= TF_kernel_mode;
memcpy(&v->arch.guest_context, c, sizeof(*c));
+ init_int80_direct_trap(v);
if ( !(c->flags & VGCF_HVM_GUEST) )
{
@@ -398,17 +402,27 @@
if ( v->vcpu_id == 0 )
d->vm_assist = c->vm_assist;
- phys_basetab = c->ctrlreg[3];
- phys_basetab =
- (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) |
- (phys_basetab & ~PAGE_MASK);
-
- v->arch.guest_table = mk_pagetable(phys_basetab);
+ if ( !(c->flags & VGCF_HVM_GUEST) )
+ {
+ phys_basetab = c->ctrlreg[3];
+ phys_basetab =
+ (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) |
+ (phys_basetab & ~PAGE_MASK);
+
+ v->arch.guest_table = mk_pagetable(phys_basetab);
+ }
if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
return rc;
- if ( shadow_mode_refcounts(d) )
+ if ( c->flags & VGCF_HVM_GUEST )
+ {
+ v->arch.guest_table = mk_pagetable(0);
+
+ if ( !hvm_initialize_guest_resources(v) )
+ return -EINVAL;
+ }
+ else if ( shadow_mode_refcounts(d) )
{
if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) )
{
@@ -416,7 +430,7 @@
return -EINVAL;
}
}
- else if ( !(c->flags & VGCF_HVM_GUEST) )
+ else
{
if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d,
PGT_base_page_table) )
@@ -424,17 +438,6 @@
destroy_gdt(v);
return -EINVAL;
}
- }
-
- if ( c->flags & VGCF_HVM_GUEST )
- {
- /* HVM uses the initially provided page tables as the P2M map. */
- if ( !pagetable_get_paddr(d->arch.phys_table) )
- d->arch.phys_table = v->arch.guest_table;
- v->arch.guest_table = mk_pagetable(0);
-
- if ( !hvm_initialize_guest_resources(v) )
- return -EINVAL;
}
update_pagetables(v);
@@ -610,9 +613,6 @@
struct cpu_user_regs *regs = &ctxt->user_regs;
unsigned int dirty_segment_mask = 0;
- if ( HVM_DOMAIN(v) )
- hvm_save_segments(v);
-
regs->ds = read_segment_register(ds);
regs->es = read_segment_register(es);
regs->fs = read_segment_register(fs);
@@ -682,9 +682,15 @@
stack_regs,
CTXT_SWITCH_STACK_BYTES);
unlazy_fpu(p);
- save_segments(p);
- if ( HVM_DOMAIN(p) )
+ if ( !HVM_DOMAIN(p) )
+ {
+ save_segments(p);
+ }
+ else
+ {
+ hvm_save_segments(p);
hvm_load_msrs();
+ }
}
if ( !is_idle_vcpu(n) )
@@ -980,6 +986,26 @@
relinquish_memory(d, &d->page_list);
}
+void arch_dump_domain_info(struct domain *d)
+{
+ if ( shadow_mode_enabled(d) )
+ {
+ printk(" shadow mode: ");
+ if ( shadow_mode_refcounts(d) )
+ printk("refcounts ");
+ if ( shadow_mode_write_all(d) )
+ printk("write_all ");
+ if ( shadow_mode_log_dirty(d) )
+ printk("log_dirty ");
+ if ( shadow_mode_translate(d) )
+ printk("translate ");
+ if ( shadow_mode_external(d) )
+ printk("external ");
+ if ( shadow_mode_wr_pt_pte(d) )
+ printk("wr_pt_pte ");
+ printk("\n");
+ }
+}
/*
* Local variables:
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/domain_build.c Fri Feb 24 22:41:08 2006
@@ -17,6 +17,7 @@
#include <xen/domain.h>
#include <xen/compile.h>
#include <xen/iocap.h>
+#include <xen/bitops.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -24,6 +25,8 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/shadow.h>
+
+#include <public/version.h>
static long dom0_nrpages;
@@ -56,9 +59,6 @@
static unsigned int opt_dom0_shadow;
boolean_param("dom0_shadow", opt_dom0_shadow);
-static unsigned int opt_dom0_translate;
-boolean_param("dom0_translate", opt_dom0_translate);
-
static char opt_dom0_ioports_disable[200] = "";
string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
@@ -131,6 +131,62 @@
if ( ioports_deny_access(dom0, io_from, io_to) != 0 )
BUG();
+ }
+}
+
+static const char *feature_names[XENFEAT_NR_SUBMAPS*32] = {
+ [XENFEAT_writable_page_tables] = "writable_page_tables",
+ [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables",
+ [XENFEAT_auto_translated_physmap] = "auto_translated_physmap",
+ [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel",
+ [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb"
+};
+
+static void parse_features(
+ const char *feats,
+ uint32_t supported[XENFEAT_NR_SUBMAPS],
+ uint32_t required[XENFEAT_NR_SUBMAPS])
+{
+ const char *end, *p;
+ int i, req;
+
+ if ( (end = strchr(feats, ',')) == NULL )
+ end = feats + strlen(feats);
+
+ while ( feats < end )
+ {
+ p = strchr(feats, '|');
+ if ( (p == NULL) || (p > end) )
+ p = end;
+
+ req = (*feats == '!');
+ if ( req )
+ feats++;
+
+ for ( i = 0; i < XENFEAT_NR_SUBMAPS*32; i++ )
+ {
+ if ( feature_names[i] == NULL )
+ continue;
+
+ if ( strncmp(feature_names[i], feats, p-feats) == 0 )
+ {
+ set_bit(i, supported);
+ if ( req )
+ set_bit(i, required);
+ break;
+ }
+ }
+
+ if ( i == XENFEAT_NR_SUBMAPS*32 )
+ {
+ printk("Unknown kernel feature \"%.*s\".\n",
+ (int)(p-feats), feats);
+ panic("Domain 0 requires an unknown hypervisor feature.\n");
+ }
+
+ feats = p;
+ if ( *feats == '|' )
+ feats++;
}
}
@@ -188,6 +244,10 @@
/* Machine address of next candidate page-table page. */
unsigned long mpt_alloc;
+ /* Features supported. */
+ uint32_t dom0_features_supported[XENFEAT_NR_SUBMAPS] = { 0 };
+ uint32_t dom0_features_required[XENFEAT_NR_SUBMAPS] = { 0 };
+
extern void translate_l2pgtable(
struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn);
@@ -245,8 +305,19 @@
return -EINVAL;
}
- if ( strstr(dsi.xen_section_string, "SHADOW=translate") )
- opt_dom0_translate = 1;
+ if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL )
+ {
+ parse_features(
+ p + strlen("FEATURES="),
+ dom0_features_supported,
+ dom0_features_required);
+ printk("Domain 0 kernel supports features = { %08x }.\n",
+ dom0_features_supported[0]);
+ printk("Domain 0 kernel requires features = { %08x }.\n",
+ dom0_features_required[0]);
+ if ( dom0_features_required[0] )
+ panic("Domain 0 requires an unsupported hypervisor feature.\n");
+ }
/* Align load address to 4MB boundary. */
dsi.v_start &= ~((1UL<<22)-1);
@@ -650,11 +721,6 @@
si->nr_pages = nr_pages;
si->shared_info = virt_to_maddr(d->shared_info);
- if ( opt_dom0_translate )
- {
- si->shared_info = max_page << PAGE_SHIFT;
- set_gpfn_from_mfn(virt_to_maddr(d->shared_info) >> PAGE_SHIFT,
max_page);
- }
si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
si->pt_base = vpt_start;
@@ -669,7 +735,7 @@
mfn = pfn + alloc_spfn;
#ifndef NDEBUG
#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
- if ( !opt_dom0_translate && (pfn > REVERSE_START) )
+ if ( pfn > REVERSE_START )
mfn = alloc_epfn - (pfn - REVERSE_START);
#endif
((unsigned long *)vphysmap_start)[pfn] = mfn;
@@ -720,48 +786,10 @@
new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
- if ( opt_dom0_shadow || opt_dom0_translate )
- {
- printk("dom0: shadow enable\n");
- shadow_mode_enable(d, (opt_dom0_translate
- ? SHM_enable | SHM_refcounts | SHM_translate
- : SHM_enable));
- if ( opt_dom0_translate )
- {
- printk("dom0: shadow translate\n");
-#if defined(__i386__) && defined(CONFIG_X86_PAE)
- printk("FIXME: PAE code needed here: %s:%d (%s)\n",
- __FILE__, __LINE__, __FUNCTION__);
- for ( ; ; )
- __asm__ __volatile__ ( "hlt" );
-#else
- /* Hmm, what does this?
- Looks like isn't portable across 32/64 bit and pae/non-pae ...
- -- kraxel */
-
- /* mafetter: This code is mostly a hack in order to be able to
- * test with dom0's which are running with shadow translate.
- * I expect we'll rip this out once we have a stable set of
- * domU clients which use the various shadow modes, but it's
- * useful to leave this here for now...
- */
-
- // map this domain's p2m table into current page table,
- // so that we can easily access it.
- //
- ASSERT( root_get_intpte(idle_pg_table[1]) == 0 );
- ASSERT( pagetable_get_paddr(d->arch.phys_table) );
- idle_pg_table[1] = root_from_paddr(
- pagetable_get_paddr(d->arch.phys_table), __PAGE_HYPERVISOR);
- translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
- pagetable_get_pfn(v->arch.guest_table));
- idle_pg_table[1] = root_empty();
- local_flush_tlb();
-#endif
- }
-
- update_pagetables(v); /* XXX SMP */
- printk("dom0: shadow setup done\n");
+ if ( opt_dom0_shadow )
+ {
+ shadow_mode_enable(d, SHM_enable);
+ update_pagetables(v);
}
rc = 0;
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/hvm.c Fri Feb 24 22:41:08 2006
@@ -124,11 +124,6 @@
domain_crash_synchronous();
}
d->arch.hvm_domain.shared_page_va = (unsigned long)p;
-
- HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d));
-
- clear_bit(iopacket_port(d),
- &d->shared_info->evtchn_mask[0]);
}
static int validate_hvm_info(struct hvm_info_table *t)
@@ -175,10 +170,12 @@
if ( validate_hvm_info(t) ) {
d->arch.hvm_domain.nr_vcpus = t->nr_vcpus;
d->arch.hvm_domain.apic_enabled = t->apic_enabled;
+ d->arch.hvm_domain.pae_enabled = t->pae_enabled;
} else {
printk("Bad hvm info table\n");
d->arch.hvm_domain.nr_vcpus = 1;
d->arch.hvm_domain.apic_enabled = 0;
+ d->arch.hvm_domain.pae_enabled = 0;
}
unmap_domain_page(p);
@@ -188,8 +185,10 @@
{
struct hvm_domain *platform;
- if (!(HVM_DOMAIN(current) && (current->vcpu_id == 0)))
+ if ( !HVM_DOMAIN(current) || (current->vcpu_id != 0) )
return;
+
+ shadow_direct_map_init(d);
hvm_map_io_shared_page(d);
hvm_get_info(d);
@@ -198,7 +197,8 @@
pic_init(&platform->vpic, pic_irq_request, &platform->interrupt_request);
register_pic_io_hook();
- if ( hvm_apic_support(d) ) {
+ if ( hvm_apic_support(d) )
+ {
spin_lock_init(&d->arch.hvm_domain.round_robin_lock);
hvm_vioapic_init(d);
}
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/intercept.c Fri Feb 24 22:41:08 2006
@@ -332,8 +332,8 @@
void hlt_timer_fn(void *data)
{
struct vcpu *v = data;
-
- evtchn_set_pending(v, iopacket_port(v->domain));
+
+ evtchn_set_pending(v, iopacket_port(v));
}
static __inline__ void missed_ticks(struct hvm_virpit*vpit)
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/io.c Fri Feb 24 22:41:08 2006
@@ -697,8 +697,8 @@
void hvm_wait_io(void)
{
struct vcpu *v = current;
- struct domain *d = v->domain;
- int port = iopacket_port(d);
+ struct domain *d = v->domain;
+ int port = iopacket_port(v);
for ( ; ; )
{
@@ -729,8 +729,8 @@
void hvm_safe_block(void)
{
struct vcpu *v = current;
- struct domain *d = v->domain;
- int port = iopacket_port(d);
+ struct domain *d = v->domain;
+ int port = iopacket_port(v);
for ( ; ; )
{
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/platform.c Fri Feb 24 22:41:08 2006
@@ -41,8 +41,6 @@
#define DECODE_success 1
#define DECODE_failure 0
-
-extern long evtchn_send(int lport);
#if defined (__x86_64__)
static inline long __get_reg_value(unsigned long reg, int size)
@@ -648,6 +646,8 @@
p->count = count;
p->df = regs->eflags & EF_DF ? 1 : 0;
+ p->io_count++;
+
if (pvalid) {
if (hvm_paging_enabled(current))
p->u.pdata = (void *) gva_to_gpa(value);
@@ -664,18 +664,18 @@
p->state = STATE_IOREQ_READY;
- evtchn_send(iopacket_port(v->domain));
+ evtchn_send(iopacket_port(v));
hvm_wait_io();
}
-void send_mmio_req(unsigned char type, unsigned long gpa,
- unsigned long count, int size, long value, int dir, int
pvalid)
+void send_mmio_req(
+ unsigned char type, unsigned long gpa,
+ unsigned long count, int size, long value, int dir, int pvalid)
{
struct vcpu *v = current;
vcpu_iodata_t *vio;
ioreq_t *p;
struct cpu_user_regs *regs;
- extern long evtchn_send(int lport);
regs = current->arch.hvm_vcpu.mmio_op.inst_decoder_regs;
@@ -701,6 +701,8 @@
p->addr = gpa;
p->count = count;
p->df = regs->eflags & EF_DF ? 1 : 0;
+
+ p->io_count++;
if (pvalid) {
if (hvm_paging_enabled(v))
@@ -718,7 +720,7 @@
p->state = STATE_IOREQ_READY;
- evtchn_send(iopacket_port(v->domain));
+ evtchn_send(iopacket_port(v));
hvm_wait_io();
}
@@ -760,12 +762,12 @@
void handle_mmio(unsigned long va, unsigned long gpa)
{
- unsigned long inst_len, inst_addr;
+ unsigned long inst_addr;
struct mmio_op *mmio_opp;
struct cpu_user_regs *regs;
struct instruction mmio_inst;
unsigned char inst[MAX_INST_LEN];
- int i, realmode, ret;
+ int i, realmode, ret, inst_len;
struct vcpu *v = current;
mmio_opp = &v->arch.hvm_vcpu.mmio_op;
@@ -795,7 +797,7 @@
if (hvm_decode(realmode, inst, &mmio_inst) == DECODE_failure) {
printf("handle_mmio: failed to decode instruction\n");
- printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:",
+ printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %d:",
va, gpa, inst_len);
for (i = 0; i < inst_len; i++)
printf(" %02x", inst[i] & 0xFF);
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/svm/instrlen.c
--- a/xen/arch/x86/hvm/svm/instrlen.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/svm/instrlen.c Fri Feb 24 22:41:08 2006
@@ -2,17 +2,19 @@
* instrlen.c - calculates the instruction length for all operating modes
*
* Travis Betak, travis.betak@xxxxxxx
- * Copyright (c) 2005 AMD
+ * Copyright (c) 2005,2006 AMD
+ * Copyright (c) 2005 Keir Fraser
*
- * Essentially a very, very stripped version of Keir Fraser's work in
- * x86_emulate.c. Used primarily for MMIO.
+ * Essentially a very, very stripped version of Keir Fraser's work in
+ * x86_emulate.c. Used for MMIO.
*/
/*
- * TODO: the way in which we use svm_instrlen is very inefficient as is now
- * stands. it will be worth while to return the actual instruction buffer
- * along with the instruction length since we are getting the instruction
length
- * so we know how much of the buffer we need to fetch.
+ * TODO: the way in which we use svm_instrlen is very inefficient as is now
+ * stands. It will be worth while to return the actual instruction buffer
+ * along with the instruction length since one of the reasons we are getting
+ * the instruction length is to know how many instruction bytes we need to
+ * fetch.
*/
#include <xen/config.h>
@@ -22,6 +24,11 @@
#include <asm/regs.h>
#define DPRINTF DPRINTK
#include <asm-x86/x86_emulate.h>
+
+/* read from guest memory */
+extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
+ int length);
+extern void svm_dump_inst(unsigned long eip);
/*
* Opcode effective-address decode tables.
@@ -33,98 +40,101 @@
*/
/* Operand sizes: 8-bit operands or specified/overridden size. */
-#define BYTE_OP (1<<0) /* 8-bit operands. */
+#define ByteOp (1<<0) /* 8-bit operands. */
/* Destination operand type. */
-#define IMPLICIT_OPS (1<<1) /* Implicit in opcode. No generic decode. */
-#define DST_REG (2<<1) /* Register operand. */
-#define DST_MEM (3<<1) /* Memory operand. */
-#define DST_MASK (3<<1)
+#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
+#define DstReg (2<<1) /* Register operand. */
+#define DstMem (3<<1) /* Memory operand. */
+#define DstMask (3<<1)
/* Source operand type. */
-#define SRC_NONE (0<<3) /* No source operand. */
-#define SRC_IMPLICIT (0<<3) /* Source operand is implicit in the opcode. */
-#define SRC_REG (1<<3) /* Register operand. */
-#define SRC_MEM (2<<3) /* Memory operand. */
-#define SRC_IMM (3<<3) /* Immediate operand. */
-#define SRC_IMMBYTE (4<<3) /* 8-bit sign-extended immediate operand. */
-#define SRC_MASK (7<<3)
-/* Generic MODRM decode. */
-#define MODRM (1<<6)
+#define SrcNone (0<<3) /* No source operand. */
+#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
+#define SrcReg (1<<3) /* Register operand. */
+#define SrcMem (2<<3) /* Memory operand. */
+#define SrcMem16 (3<<3) /* Memory operand (16-bit). */
+#define SrcMem32 (4<<3) /* Memory operand (32-bit). */
+#define SrcImm (5<<3) /* Immediate operand. */
+#define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */
+#define SrcMask (7<<3)
+/* Generic ModRM decode. */
+#define ModRM (1<<6)
/* Destination is only written; never read. */
#define Mov (1<<7)
-static u8 opcode_table[256] = {
+static uint8_t opcode_table[256] = {
/* 0x00 - 0x07 */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x08 - 0x0F */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x10 - 0x17 */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x18 - 0x1F */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x20 - 0x27 */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x28 - 0x2F */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x30 - 0x37 */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x38 - 0x3F */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
0, 0, 0, 0,
/* 0x40 - 0x4F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x50 - 0x5F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x60 - 0x6F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x70 - 0x7F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x80 - 0x87 */
- BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMM | MODRM,
- BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMMBYTE | MODRM,
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
/* 0x88 - 0x8F */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
- BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
- 0, 0, 0, DST_MEM | SRC_NONE | MODRM | Mov,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
+ 0, 0, 0, DstMem|SrcNone|ModRM|Mov,
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- BYTE_OP | DST_REG | SRC_MEM | Mov, DST_REG | SRC_MEM | Mov,
- BYTE_OP | DST_MEM | SRC_REG | Mov, DST_MEM | SRC_REG | Mov,
- BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov,
- BYTE_OP | IMPLICIT_OPS, IMPLICIT_OPS,
+ ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
+ ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps, ImplicitOps,
/* 0xA8 - 0xAF */
- 0, 0, BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov,
- BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov,
- BYTE_OP | IMPLICIT_OPS, IMPLICIT_OPS,
+ 0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ ByteOp|ImplicitOps, ImplicitOps,
/* 0xB0 - 0xBF */
- SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE,
- SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE,
+ SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
+ SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xC0 - 0xC7 */
- BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMMBYTE | MODRM, 0, 0,
- 0, 0, BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMM | MODRM,
+ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
+ 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
/* 0xC8 - 0xCF */
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xD7 */
- BYTE_OP | DST_MEM | SRC_IMPLICIT | MODRM, DST_MEM | SRC_IMPLICIT | MODRM,
- BYTE_OP | DST_MEM | SRC_IMPLICIT | MODRM, DST_MEM | SRC_IMPLICIT | MODRM,
+ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
+ ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
0, 0, 0, 0,
/* 0xD8 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0,
@@ -132,31 +142,31 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
- 0, 0, BYTE_OP | DST_MEM | SRC_NONE | MODRM, DST_MEM | SRC_NONE | MODRM,
+ 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
/* 0xF8 - 0xFF */
0, 0, 0, 0,
- 0, 0, BYTE_OP | DST_MEM | SRC_NONE | MODRM, DST_MEM | SRC_NONE | MODRM
+ 0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
};
-static u8 twobyte_table[256] = {
+static uint8_t twobyte_table[256] = {
/* 0x00 - 0x0F */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
/* 0x10 - 0x1F */
- 0, 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
/* 0x20 - 0x2F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x30 - 0x3F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
/* 0x48 - 0x4F */
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
- DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
+ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
/* 0x50 - 0x5F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x60 - 0x6F */
@@ -168,20 +178,17 @@
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- 0, 0, 0, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0,
+ 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
/* 0xA8 - 0xAF */
- 0, 0, 0, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0,
+ 0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
/* 0xB0 - 0xB7 */
- BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, 0,
- DST_MEM | SRC_REG | MODRM,
- 0, 0,
- DST_REG | SRC_MEM | MODRM,
- DST_REG | SRC_REG | MODRM,
-
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
+ 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
/* 0xB8 - 0xBF */
- 0, 0, DST_MEM | SRC_IMMBYTE | MODRM, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0,
+ 0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
+ 0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
/* 0xC0 - 0xCF */
- 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xEF */
@@ -189,11 +196,6 @@
/* 0xF0 - 0xFF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-
-/* read from guest memory */
-extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
- int length);
-extern void svm_dump_inst(unsigned long eip);
/*
* insn_fetch - fetch the next 1 to 4 bytes from instruction stream
@@ -219,206 +221,250 @@
(_type)_x; \
})
+
/**
- * get_instruction_length - returns the current instructions length
+ * svn_instrlen - returns the current instructions length
*
* @regs: guest register state
- * @cr2: target address
- * @ops: guest memory operations
* @mode: guest operating mode
*
* EXTERNAL this routine calculates the length of the current instruction
* pointed to by eip. The guest state is _not_ changed by this routine.
*/
-unsigned long svm_instrlen(struct cpu_user_regs *regs, int mode)
+int svm_instrlen(struct cpu_user_regs *regs, int mode)
{
- u8 b, d, twobyte = 0;
- u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
- unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
- unsigned int i;
+ uint8_t b, d, twobyte = 0, rex_prefix = 0;
+ uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+ unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
int rc = 0;
- u32 length = 0;
- u8 tmp;
-
- /* Copy the registers so we don't alter the guest's present state */
- volatile struct cpu_user_regs _regs = *regs;
-
- /* Check for Real Mode */
- if (mode == 2)
- _regs.eip += (_regs.cs << 4);
-
- /* Legacy prefix check */
- for (i = 0; i < 8; i++) {
- switch (b = insn_fetch(u8, 1, _regs.eip, length)) {
- case 0x66: /* operand-size override */
- op_bytes ^= 6; /* switch between 2/4 bytes */
- break;
- case 0x67: /* address-size override */
- ad_bytes ^= (mode == 8) ? 12 : 6; /* 2/4/8 bytes */
- break;
- case 0x2e: /* CS override */
- case 0x3e: /* DS override */
- case 0x26: /* ES override */
- case 0x64: /* FS override */
- case 0x65: /* GS override */
- case 0x36: /* SS override */
- case 0xf0: /* LOCK */
- case 0xf3: /* REP/REPE/REPZ */
- case 0xf2: /* REPNE/REPNZ */
+ int length = 0;
+ unsigned int tmp;
+
+ /* Shadow copy of register state. Committed on successful emulation. */
+ struct cpu_user_regs _regs = *regs;
+
+ /* include CS for 16-bit modes */
+ if (mode == X86EMUL_MODE_REAL || mode == X86EMUL_MODE_PROT16)
+ _regs.eip += (_regs.cs << 4);
+
+ switch ( mode )
+ {
+ case X86EMUL_MODE_REAL:
+ case X86EMUL_MODE_PROT16:
+ op_bytes = ad_bytes = 2;
+ break;
+ case X86EMUL_MODE_PROT32:
+ op_bytes = ad_bytes = 4;
+ break;
+#ifdef __x86_64__
+ case X86EMUL_MODE_PROT64:
+ op_bytes = 4;
+ ad_bytes = 8;
+ break;
+#endif
+ default:
+ return -1;
+ }
+
+ /* Legacy prefixes. */
+ for ( i = 0; i < 8; i++ )
+ {
+ switch ( b = insn_fetch(uint8_t, 1, _regs.eip, length) )
+ {
+ case 0x66: /* operand-size override */
+ op_bytes ^= 6; /* switch between 2/4 bytes */
+ break;
+ case 0x67: /* address-size override */
+ if ( mode == X86EMUL_MODE_PROT64 )
+ ad_bytes ^= 12; /* switch between 4/8 bytes */
+ else
+ ad_bytes ^= 6; /* switch between 2/4 bytes */
+ break;
+ case 0x2e: /* CS override */
+ case 0x3e: /* DS override */
+ case 0x26: /* ES override */
+ case 0x64: /* FS override */
+ case 0x65: /* GS override */
+ case 0x36: /* SS override */
+ break;
+ case 0xf0: /* LOCK */
+ lock_prefix = 1;
+ break;
+ case 0xf3: /* REP/REPE/REPZ */
+ rep_prefix = 1;
+ break;
+ case 0xf2: /* REPNE/REPNZ */
break;
default:
goto done_prefixes;
}
}
-
done_prefixes:
- /* REX prefix check */
- if ((mode == 8) && ((b & 0xf0) == 0x40))
- {
- if (b & 8)
- op_bytes = 8; /* REX.W */
- modrm_reg = (b & 4) << 1; /* REX.R */
+ /* Note quite the same as 80386 real mode, but hopefully good enough. */
+ if ( (mode == X86EMUL_MODE_REAL) && (ad_bytes != 2) ) {
+ printf("sonofabitch!! we don't support 32-bit addresses in
realmode\n");
+ goto cannot_emulate;
+ }
+
+ /* REX prefix. */
+ if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
+ {
+ rex_prefix = b;
+ if ( b & 8 )
+ op_bytes = 8; /* REX.W */
+ modrm_reg = (b & 4) << 1; /* REX.R */
/* REX.B and REX.X do not need to be decoded. */
- b = insn_fetch(u8, 1, _regs.eip, length);
+ b = insn_fetch(uint8_t, 1, _regs.eip, length);
}
/* Opcode byte(s). */
d = opcode_table[b];
- if (d == 0)
+ if ( d == 0 )
{
/* Two-byte opcode? */
- if (b == 0x0f) {
+ if ( b == 0x0f )
+ {
twobyte = 1;
- b = insn_fetch(u8, 1, _regs.eip, length);
+ b = insn_fetch(uint8_t, 1, _regs.eip, length);
d = twobyte_table[b];
}
/* Unrecognised? */
- if (d == 0)
+ if ( d == 0 )
goto cannot_emulate;
}
- /* MODRM and SIB bytes. */
- if (d & MODRM)
- {
- modrm = insn_fetch(u8, 1, _regs.eip, length);
+ /* ModRM and SIB bytes. */
+ if ( d & ModRM )
+ {
+ modrm = insn_fetch(uint8_t, 1, _regs.eip, length);
modrm_mod |= (modrm & 0xc0) >> 6;
modrm_reg |= (modrm & 0x38) >> 3;
- modrm_rm |= (modrm & 0x07);
- switch (modrm_mod)
- {
- case 0:
- if ((modrm_rm == 4) &&
- (((insn_fetch(u8, 1, _regs.eip,
- length)) & 7) == 5))
+ modrm_rm |= (modrm & 0x07);
+
+ if ( modrm_mod == 3 )
+ {
+ DPRINTF("Cannot parse ModRM.mod == 3.\n");
+ goto cannot_emulate;
+ }
+
+ if ( ad_bytes == 2 )
+ {
+ /* 16-bit ModR/M decode. */
+ switch ( modrm_mod )
{
- length += 4;
- _regs.eip += 4; /* skip SIB.base disp32 */
- }
- else if (modrm_rm == 5)
+ case 0:
+ if ( modrm_rm == 6 )
+ {
+ length += 2;
+ _regs.eip += 2; /* skip disp16 */
+ }
+ break;
+ case 1:
+ length += 1;
+ _regs.eip += 1; /* skip disp8 */
+ break;
+ case 2:
+ length += 2;
+ _regs.eip += 2; /* skip disp16 */
+ break;
+ }
+ }
+ else
+ {
+ /* 32/64-bit ModR/M decode. */
+ switch ( modrm_mod )
{
+ case 0:
+ if ( (modrm_rm == 4) &&
+ (((insn_fetch(uint8_t, 1, _regs.eip, length)) & 7)
+ == 5) )
+ {
+ length += 4;
+ _regs.eip += 4; /* skip disp32 specified by SIB.base */
+ }
+ else if ( modrm_rm == 5 )
+ {
+ length += 4;
+ _regs.eip += 4; /* skip disp32 */
+ }
+ break;
+ case 1:
+ if ( modrm_rm == 4 )
+ {
+ insn_fetch(uint8_t, 1, _regs.eip, length);
+ }
+ length += 1;
+ _regs.eip += 1; /* skip disp8 */
+ break;
+ case 2:
+ if ( modrm_rm == 4 )
+ {
+ insn_fetch(uint8_t, 1, _regs.eip, length);
+ }
length += 4;
_regs.eip += 4; /* skip disp32 */
+ break;
}
- break;
- case 1:
- if (modrm_rm == 4)
- {
- insn_fetch(u8, 1, _regs.eip, length);
- }
- length += 1;
- _regs.eip += 1; /* skip disp8 */
- break;
- case 2:
- if (modrm_rm == 4)
- {
- insn_fetch(u8, 1, _regs.eip, length);
- }
- length += 4;
- _regs.eip += 4; /* skip disp32 */
- break;
- case 3:
- DPRINTF("Cannot parse ModRM.mod == 3.\n");
- goto cannot_emulate;
}
}
/* Decode and fetch the destination operand: register or memory. */
- switch (d & DST_MASK)
- {
- case IMPLICIT_OPS:
+ switch ( d & DstMask )
+ {
+ case ImplicitOps:
/* Special instructions do their own operand decoding. */
goto done;
}
- /* Decode and fetch the source operand: register, memory or immediate */
- switch (d & SRC_MASK)
- {
- case SRC_IMM:
- tmp = (d & BYTE_OP) ? 1 : op_bytes;
- if (tmp == 8)
- tmp = 4;
+ /* Decode and fetch the source operand: register, memory or immediate. */
+ switch ( d & SrcMask )
+ {
+ case SrcImm:
+ tmp = (d & ByteOp) ? 1 : op_bytes;
+ if ( tmp == 8 ) tmp = 4;
/* NB. Immediates are sign-extended as necessary. */
- switch (tmp) {
- case 1:
- insn_fetch(s8, 1, _regs.eip, length);
- break;
- case 2:
- insn_fetch(s16, 2, _regs.eip, length);
- break;
- case 4:
- insn_fetch(s32, 4, _regs.eip, length);
- break;
- }
- break;
- case SRC_IMMBYTE:
- insn_fetch(s8, 1, _regs.eip, length);
- break;
- }
-
- if (twobyte)
+ switch ( tmp )
+ {
+ case 1: insn_fetch(int8_t, 1, _regs.eip, length); break;
+ case 2: insn_fetch(int16_t, 2, _regs.eip, length); break;
+ case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
+ }
+ break;
+ case SrcImmByte:
+ insn_fetch(int8_t, 1, _regs.eip, length);
+ break;
+ }
+
+ if ( twobyte )
goto done;
- switch (b)
- {
- case 0xa0:
- case 0xa1: /* mov */
+ switch ( b )
+ {
+ case 0xa0 ... 0xa1: /* mov */
length += ad_bytes;
- _regs.eip += ad_bytes; /* skip src displacement */
- break;
- case 0xa2:
- case 0xa3: /* mov */
+ _regs.eip += ad_bytes; /* skip src displacement */
+ break;
+ case 0xa2 ... 0xa3: /* mov */
length += ad_bytes;
- _regs.eip += ad_bytes; /* skip dst displacement */
- break;
- case 0xf6:
- case 0xf7: /* Grp3 */
- switch (modrm_reg)
- {
- case 0:
- case 1: /* test */
- /*
- * Special case in Grp3: test has an
- * immediate source operand.
- */
- tmp = (d & BYTE_OP) ? 1 : op_bytes;
- if (tmp == 8)
- tmp = 4;
- switch (tmp)
+ _regs.eip += ad_bytes; /* skip dst displacement */
+ break;
+ case 0xf6 ... 0xf7: /* Grp3 */
+ switch ( modrm_reg )
+ {
+ case 0 ... 1: /* test */
+ /* Special case in Grp3: test has an immediate source operand. */
+ tmp = (d & ByteOp) ? 1 : op_bytes;
+ if ( tmp == 8 ) tmp = 4;
+ switch ( tmp )
{
- case 1:
- insn_fetch(s8, 1, _regs.eip, length);
- break;
- case 2:
- insn_fetch(s16, 2, _regs.eip, length);
- break;
- case 4:
- insn_fetch(s32, 4, _regs.eip, length);
- break;
+ case 1: insn_fetch(int8_t, 1, _regs.eip, length); break;
+ case 2: insn_fetch(int16_t, 2, _regs.eip, length); break;
+ case 4: insn_fetch(int32_t, 4, _regs.eip, length); break;
}
goto done;
- }
+ }
break;
}
@@ -429,5 +475,5 @@
DPRINTF("Cannot emulate %02x at address %lx (eip %lx, mode %d)\n",
b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode);
svm_dump_inst(_regs.eip);
- return (unsigned long)-1;
+ return -1;
}
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/svm/svm.c Fri Feb 24 22:41:08 2006
@@ -64,7 +64,6 @@
/*
* External functions, etc. We should move these to some suitable header
file(s) */
-extern long evtchn_send(int lport);
extern void do_nmi(struct cpu_user_regs *, unsigned long);
extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
@@ -797,12 +796,13 @@
free_host_save_area(v->arch.hvm_svm.host_save_area);
#endif
- if (v->vcpu_id == 0) {
+ if ( v->vcpu_id == 0 )
+ {
/* unmap IO shared page */
struct domain *d = v->domain;
- if (d->arch.hvm_domain.shared_page_va)
+ if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page((void *)d->arch.hvm_domain.shared_page_va);
- shadow_direct_map_clean(v);
+ shadow_direct_map_clean(d);
}
destroy_vmcb(&v->arch.hvm_svm);
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/svm/vmcb.c Fri Feb 24 22:41:08 2006
@@ -421,6 +421,18 @@
if (v->vcpu_id == 0)
hvm_setup_platform(v->domain);
+ if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 )
+ {
+ printk("HVM domain bind port %d to vcpu %d failed!\n",
+ iopacket_port(v), v->vcpu_id);
+ domain_crash_synchronous();
+ }
+
+ HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v));
+
+ clear_bit(iopacket_port(v),
+ &v->domain->shared_info->evtchn_mask[0]);
+
if (hvm_apic_support(v->domain))
vlapic_init(v);
init_timer(&v->arch.hvm_svm.hlt_timer,
@@ -443,8 +455,6 @@
pt = pagetable_get_paddr(v->domain->arch.phys_table);
printk("%s: phys_table = %lx\n", __func__, pt);
}
-
- shadow_direct_map_init(v);
if ( svm_paging_enabled(v) )
vmcb->cr3 = pagetable_get_paddr(v->arch.guest_table);
@@ -492,7 +502,7 @@
svm_stts(v);
- if ( test_bit(iopacket_port(d), &d->shared_info->evtchn_pending[0]) ||
+ if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) )
hvm_wait_io();
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/vlapic.c Fri Feb 24 22:41:08 2006
@@ -210,7 +210,7 @@
set_bit(vector, &vlapic->tmr[0]);
}
}
- evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain));
+ evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->vcpu));
result = 1;
break;
@@ -834,7 +834,7 @@
}
else
vlapic->intr_pending_count[vlapic_lvt_vector(vlapic,
VLAPIC_LVT_TIMER)]++;
- evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain));
+ evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->vcpu));
}
vlapic->timer_current_update = NOW();
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/vmx/io.c Fri Feb 24 22:41:08 2006
@@ -178,7 +178,7 @@
vmx_stts();
- if ( test_bit(iopacket_port(d), &d->shared_info->evtchn_pending[0]) ||
+ if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags) )
hvm_wait_io();
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Fri Feb 24 22:41:08 2006
@@ -200,6 +200,18 @@
if (v->vcpu_id == 0)
hvm_setup_platform(v->domain);
+ if ( evtchn_bind_vcpu(iopacket_port(v), v->vcpu_id) < 0 )
+ {
+ printk("VMX domain bind port %d to vcpu %d failed!\n",
+ iopacket_port(v), v->vcpu_id);
+ domain_crash_synchronous();
+ }
+
+ HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x", iopacket_port(v));
+
+ clear_bit(iopacket_port(v),
+ &v->domain->shared_info->evtchn_mask[0]);
+
__asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : );
error |= __vmwrite(GUEST_CR0, cr0);
@@ -230,7 +242,6 @@
error |= __vmwrite(GUEST_TR_BASE, 0);
error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
- shadow_direct_map_init(v);
__vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
__vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Feb 24 22:41:08 2006
@@ -81,14 +81,14 @@
void vmx_relinquish_resources(struct vcpu *v)
{
struct hvm_virpit *vpit;
-
+
if (v->vcpu_id == 0) {
/* unmap IO shared page */
struct domain *d = v->domain;
if ( d->arch.hvm_domain.shared_page_va )
unmap_domain_page_global(
(void *)d->arch.hvm_domain.shared_page_va);
- shadow_direct_map_clean(v);
+ shadow_direct_map_clean(d);
}
vmx_request_clear_vmcs(v);
@@ -448,7 +448,6 @@
return 0; /* dummy */
}
-extern long evtchn_send(int lport);
void do_nmi(struct cpu_user_regs *);
static int check_vmx_controls(ctrls, msr)
@@ -643,7 +642,7 @@
}
/* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
-#define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
+#define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46
static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs
*regs)
{
@@ -662,19 +661,21 @@
cpuid(input, &eax, &ebx, &ecx, &edx);
- if (input == 1)
+ if ( input == 1 )
{
if ( hvm_apic_support(v->domain) &&
!vlapic_global_enabled((VLAPIC(v))) )
clear_bit(X86_FEATURE_APIC, &edx);
#if CONFIG_PAGING_LEVELS < 3
+ clear_bit(X86_FEATURE_PAE, &edx);
clear_bit(X86_FEATURE_PSE, &edx);
- clear_bit(X86_FEATURE_PAE, &edx);
clear_bit(X86_FEATURE_PSE36, &edx);
#else
if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
{
+ if ( !v->domain->arch.hvm_domain.pae_enabled )
+ clear_bit(X86_FEATURE_PAE, &edx);
clear_bit(X86_FEATURE_PSE, &edx);
clear_bit(X86_FEATURE_PSE36, &edx);
}
@@ -1184,8 +1185,12 @@
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
- if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
+ if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
+ {
+ unsigned long cr4;
+
/*
+ * Trying to enable guest paging.
* The guest CR3 must be pointing to the guest physical.
*/
if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
@@ -1197,52 +1202,51 @@
}
#if defined(__x86_64__)
- if (test_bit(VMX_CPU_STATE_LME_ENABLED,
- &v->arch.hvm_vmx.cpu_state) &&
- !test_bit(VMX_CPU_STATE_PAE_ENABLED,
- &v->arch.hvm_vmx.cpu_state)){
- HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
+ if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) &&
+ !test_bit(VMX_CPU_STATE_PAE_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enabled\n");
vmx_inject_exception(v, TRAP_gp_fault, 0);
}
- if (test_bit(VMX_CPU_STATE_LME_ENABLED,
- &v->arch.hvm_vmx.cpu_state)){
- /* Here the PAE is should to be opened */
- HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
+
+ if ( test_bit(VMX_CPU_STATE_LME_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
+ {
+ /* Here the PAE is should be opened */
+ HVM_DBG_LOG(DBG_LEVEL_1, "Enable long mode\n");
set_bit(VMX_CPU_STATE_LMA_ENABLED,
&v->arch.hvm_vmx.cpu_state);
+
__vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
__vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
-#if CONFIG_PAGING_LEVELS >= 4
- if(!shadow_set_guest_paging_levels(v->domain, 4)) {
+ if ( !shadow_set_guest_paging_levels(v->domain, 4) ) {
+ printk("Unsupported guest paging levels\n");
+ domain_crash_synchronous(); /* need to take a clean path */
+ }
+ }
+ else
+#endif /* __x86_64__ */
+ {
+#if CONFIG_PAGING_LEVELS >= 3
+ if ( !shadow_set_guest_paging_levels(v->domain, 2) ) {
printk("Unsupported guest paging levels\n");
domain_crash_synchronous(); /* need to take a clean path */
}
#endif
}
- else
-#endif /* __x86_64__ */
+
+ /* update CR4's PAE if needed */
+ __vmread(GUEST_CR4, &cr4);
+ if ( (!(cr4 & X86_CR4_PAE)) &&
+ test_bit(VMX_CPU_STATE_PAE_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
{
-#if CONFIG_PAGING_LEVELS >= 3
- if(!shadow_set_guest_paging_levels(v->domain, 2)) {
- printk("Unsupported guest paging levels\n");
- domain_crash_synchronous(); /* need to take a clean path */
- }
-#endif
- }
-
- {
- unsigned long crn;
- /* update CR4's PAE if needed */
- __vmread(GUEST_CR4, &crn);
- if ( (!(crn & X86_CR4_PAE)) &&
- test_bit(VMX_CPU_STATE_PAE_ENABLED,
- &v->arch.hvm_vmx.cpu_state) )
- {
- HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
- __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
- }
+ HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE in cr4\n");
+ __vmwrite(GUEST_CR4, cr4 | X86_CR4_PAE);
}
/*
@@ -1262,8 +1266,8 @@
v->arch.hvm_vmx.cpu_cr3, mfn);
}
- if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
- if(v->arch.hvm_vmx.cpu_cr3) {
+ if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
+ if ( v->arch.hvm_vmx.cpu_cr3 ) {
put_page(mfn_to_page(get_mfn_from_gpfn(
v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
v->arch.guest_table = mk_pagetable(0);
@@ -1274,7 +1278,8 @@
* real-mode by performing a world switch to VMXAssist whenever
* a partition disables the CR0.PE bit.
*/
- if ((value & X86_CR0_PE) == 0) {
+ if ( (value & X86_CR0_PE) == 0 )
+ {
if ( value & X86_CR0_PG ) {
/* inject GP here */
vmx_inject_exception(v, TRAP_gp_fault, 0);
@@ -1284,8 +1289,9 @@
* Disable paging here.
* Same to PE == 1 && PG == 0
*/
- if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
- &v->arch.hvm_vmx.cpu_state)){
+ if ( test_bit(VMX_CPU_STATE_LMA_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
+ {
clear_bit(VMX_CPU_STATE_LMA_ENABLED,
&v->arch.hvm_vmx.cpu_state);
__vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
@@ -1295,19 +1301,21 @@
}
clear_all_shadow_status(v->domain);
- if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
+ if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) {
set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state);
__vmread(GUEST_RIP, &eip);
HVM_DBG_LOG(DBG_LEVEL_1,
"Transfering control to vmxassist %%eip 0x%lx\n", eip);
return 0; /* do not update eip! */
}
- } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
- &v->arch.hvm_vmx.cpu_state)) {
+ } else if ( test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+ &v->arch.hvm_vmx.cpu_state) )
+ {
__vmread(GUEST_RIP, &eip);
HVM_DBG_LOG(DBG_LEVEL_1,
"Enabling CR0.PE at %%eip 0x%lx\n", eip);
- if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
+ if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
+ {
clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
&v->arch.hvm_vmx.cpu_state);
__vmread(GUEST_RIP, &eip);
@@ -1437,15 +1445,13 @@
}
case 4: /* CR4 */
{
- unsigned long old_cr4;
-
- __vmread(CR4_READ_SHADOW, &old_cr4);
-
- if ( value & X86_CR4_PAE && !(old_cr4 & X86_CR4_PAE) )
+ __vmread(CR4_READ_SHADOW, &old_cr);
+
+ if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
{
set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
- if ( vmx_pgbit_test(v) )
+ if ( vmx_pgbit_test(v) )
{
/* The guest is 32 bit. */
#if CONFIG_PAGING_LEVELS >= 4
@@ -1459,7 +1465,7 @@
if ( !VALID_MFN(mfn = get_mfn_from_gpfn(
v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
- !get_page(mfn_to_page(mfn), v->domain) )
+ !get_page(mfn_to_page(mfn), v->domain) )
{
printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
domain_crash_synchronous(); /* need to take a clean path */
@@ -1488,12 +1494,12 @@
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn =
%lx",
v->arch.hvm_vmx.cpu_cr3, mfn);
#endif
- }
+ }
else
{
/* The guest is 64 bit. */
#if CONFIG_PAGING_LEVELS >= 4
- if ( !shadow_set_guest_paging_levels(v->domain, 4) )
+ if ( !shadow_set_guest_paging_levels(v->domain, 4) )
{
printk("Unsupported guest paging levels\n");
domain_crash_synchronous(); /* need to take a clean path */
@@ -1511,7 +1517,6 @@
clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
}
- __vmread(CR4_READ_SHADOW, &old_cr);
__vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
__vmwrite(CR4_READ_SHADOW, value);
@@ -1751,6 +1756,9 @@
fastcall void smp_call_function_interrupt(void);
fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
+#endif
if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
&& !(vector & INTR_INFO_VALID_MASK))
@@ -1778,6 +1786,11 @@
case ERROR_APIC_VECTOR:
smp_error_interrupt(regs);
break;
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ case THERMAL_APIC_VECTOR:
+ smp_thermal_interrupt(regs);
+ break;
+#endif
default:
regs->entry_vector = vector;
do_IRQ(regs);
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/mm.c Fri Feb 24 22:41:08 2006
@@ -1776,7 +1776,7 @@
pin_page:
if ( shadow_mode_refcounts(FOREIGNDOM) )
- type = PGT_writable_page;
+ break;
okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
if ( unlikely(!okay) )
@@ -1811,6 +1811,9 @@
goto pin_page;
case MMUEXT_UNPIN_TABLE:
+ if ( shadow_mode_refcounts(d) )
+ break;
+
if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
{
MEM_LOG("Mfn %lx bad domain (dom=%p)",
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/shadow.c Fri Feb 24 22:41:08 2006
@@ -3609,7 +3609,7 @@
if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
{
if ( SH_GUEST_32PAE )
- gpfn = hvm_get_guest_ctrl_reg(v, 3);
+ gpfn = (hvm_get_guest_ctrl_reg(v, 3)) >> PAGE_SHIFT;
else
gpfn = pagetable_get_pfn(v->arch.guest_table);
}
@@ -3942,19 +3942,17 @@
* on handling the #PF as such.
*/
if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN )
- {
- goto fail;
- }
+ return 0;
shadow_lock(d);
__direct_get_l3e(v, vpa, &sl3e);
- if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT) )
+ if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT) )
{
page = alloc_domheap_page(NULL);
if ( !page )
- goto nomem;
+ goto nomem;
smfn = page_to_mfn(page);
sl3e = l3e_from_pfn(smfn, _PAGE_PRESENT);
@@ -3968,11 +3966,11 @@
__direct_get_l2e(v, vpa, &sl2e);
- if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
+ if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
{
page = alloc_domheap_page(NULL);
if ( !page )
- goto nomem;
+ goto nomem;
smfn = page_to_mfn(page);
sl2e = l2e_from_pfn(smfn, __PAGE_HYPERVISOR | _PAGE_USER);
@@ -3985,20 +3983,17 @@
__direct_get_l1e(v, vpa, &sl1e);
- if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) )
+ if ( !(l1e_get_flags(sl1e) & _PAGE_PRESENT) )
{
sl1e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR | _PAGE_USER);
__direct_set_l1e(v, vpa, &sl1e);
- }
+ }
shadow_unlock(d);
return EXCRET_fault_fixed;
-fail:
- return 0;
-
nomem:
- shadow_direct_map_clean(v);
+ shadow_direct_map_clean(d);
domain_crash_synchronous();
}
#endif
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/shadow32.c Fri Feb 24 22:41:08 2006
@@ -1039,12 +1039,10 @@
* on handling the #PF as such.
*/
if ( (mfn = get_mfn_from_gpfn(vpa >> PAGE_SHIFT)) == INVALID_MFN )
- {
- goto fail;
- }
+ return 0;
shadow_lock(d);
-
+
__direct_get_l2e(v, vpa, &sl2e);
if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) )
@@ -1059,7 +1057,7 @@
sple = (l1_pgentry_t *)map_domain_page(smfn);
memset(sple, 0, PAGE_SIZE);
__direct_set_l2e(v, vpa, sl2e);
- }
+ }
if ( !sple )
sple = (l1_pgentry_t *)map_domain_page(l2e_get_pfn(sl2e));
@@ -1078,54 +1076,55 @@
shadow_unlock(d);
return EXCRET_fault_fixed;
-fail:
- return 0;
-
nomem:
- shadow_direct_map_clean(v);
+ shadow_direct_map_clean(d);
domain_crash_synchronous();
}
-int shadow_direct_map_init(struct vcpu *v)
+int shadow_direct_map_init(struct domain *d)
{
struct page_info *page;
l2_pgentry_t *root;
if ( !(page = alloc_domheap_page(NULL)) )
- goto fail;
+ return 0;
root = map_domain_page(page_to_mfn(page));
memset(root, 0, PAGE_SIZE);
unmap_domain_page(root);
- v->domain->arch.phys_table = mk_pagetable(page_to_maddr(page));
+ d->arch.phys_table = mk_pagetable(page_to_maddr(page));
return 1;
-
-fail:
- return 0;
-}
-
-void shadow_direct_map_clean(struct vcpu *v)
+}
+
+void shadow_direct_map_clean(struct domain *d)
{
int i;
+ unsigned long mfn;
l2_pgentry_t *l2e;
- l2e = map_domain_page(
- pagetable_get_pfn(v->domain->arch.phys_table));
+ mfn = pagetable_get_pfn(d->arch.phys_table);
+
+ /*
+ * We may fail very early before direct map is built.
+ */
+ if ( !mfn )
+ return;
+
+ l2e = map_domain_page(mfn);
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
if ( l2e_get_flags(l2e[i]) & _PAGE_PRESENT )
free_domheap_page(mfn_to_page(l2e_get_pfn(l2e[i])));
}
-
- free_domheap_page(
- mfn_to_page(pagetable_get_pfn(v->domain->arch.phys_table)));
+ free_domheap_page(mfn_to_page(mfn));
unmap_domain_page(l2e);
- v->domain->arch.phys_table = mk_pagetable(0);
+
+ d->arch.phys_table = mk_pagetable(0);
}
int __shadow_mode_enable(struct domain *d, unsigned int mode)
@@ -1135,7 +1134,7 @@
if(!new_modes) /* Nothing to do - return success */
return 0;
-
+
// can't take anything away by calling this function.
ASSERT(!(d->arch.shadow_mode & ~mode));
@@ -1630,27 +1629,58 @@
perfc_incrc(get_mfn_from_gpfn_foreign);
- va = gpfn << PAGE_SHIFT;
- tabpfn = pagetable_get_pfn(d->arch.phys_table);
- l2 = map_domain_page(tabpfn);
- l2e = l2[l2_table_offset(va)];
- unmap_domain_page(l2);
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- {
- printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
- __func__, d->domain_id, gpfn, l2e_get_intpte(l2e));
- return INVALID_MFN;
- }
- l1 = map_domain_page(l2e_get_pfn(l2e));
- l1e = l1[l1_table_offset(va)];
- unmap_domain_page(l1);
-
+ if ( shadow_mode_external(d) )
+ {
+ unsigned long mfn;
+ unsigned long *l0;
+
+ va = RO_MPT_VIRT_START + (gpfn * sizeof(mfn));
+
+ tabpfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table);
+ if ( !tabpfn )
+ return INVALID_MFN;
+
+ l2 = map_domain_page(tabpfn);
+ l2e = l2[l2_table_offset(va)];
+ unmap_domain_page(l2);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ return INVALID_MFN;
+
+ l1 = map_domain_page(l2e_get_pfn(l2e));
+ l1e = l1[l1_table_offset(va)];
+ unmap_domain_page(l1);
+ if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
+ return INVALID_MFN;
+
+ l0 = map_domain_page(l1e_get_pfn(l1e));
+ mfn = l0[gpfn & ((PAGE_SIZE / sizeof(mfn)) - 1)];
+ unmap_domain_page(l0);
+ return mfn;
+ }
+ else
+ {
+ va = gpfn << PAGE_SHIFT;
+ tabpfn = pagetable_get_pfn(d->arch.phys_table);
+ l2 = map_domain_page(tabpfn);
+ l2e = l2[l2_table_offset(va)];
+ unmap_domain_page(l2);
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ {
+ printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
+ __func__, d->domain_id, gpfn, l2e_get_intpte(l2e));
+ return INVALID_MFN;
+ }
+ l1 = map_domain_page(l2e_get_pfn(l2e));
+ l1e = l1[l1_table_offset(va)];
+ unmap_domain_page(l1);
#if 0
- printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx,
l1e=%lx\n",
- __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT,
tabpfn, l2e, l1tab, l1e);
+ printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx,
l1e=%lx\n",
+ __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >>
PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
#endif
- return l1e_get_intpte(l1e);
+ return l1e_get_intpte(l1e);
+ }
+
}
static unsigned long
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/shadow_public.c Fri Feb 24 22:41:08 2006
@@ -36,35 +36,40 @@
#define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) -
((_encoded) >> 16))
-int shadow_direct_map_init(struct vcpu *v)
+int shadow_direct_map_init(struct domain *d)
{
struct page_info *page;
l3_pgentry_t *root;
if ( !(page = alloc_domheap_pages(NULL, 0, ALLOC_DOM_DMA)) )
- goto fail;
+ return 0;
root = map_domain_page(page_to_mfn(page));
memset(root, 0, PAGE_SIZE);
root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR);
- v->domain->arch.phys_table = mk_pagetable(page_to_maddr(page));
+ d->arch.phys_table = mk_pagetable(page_to_maddr(page));
unmap_domain_page(root);
return 1;
-
-fail:
- return 0;
-}
-
-void shadow_direct_map_clean(struct vcpu *v)
-{
+}
+
+void shadow_direct_map_clean(struct domain *d)
+{
+ unsigned long mfn;
l2_pgentry_t *l2e;
l3_pgentry_t *l3e;
int i, j;
- l3e = (l3_pgentry_t *)map_domain_page(
- pagetable_get_pfn(v->domain->arch.phys_table));
+ mfn = pagetable_get_pfn(d->arch.phys_table);
+
+ /*
+ * We may fail very early before direct map is built.
+ */
+ if ( !mfn )
+ return;
+
+ l3e = (l3_pgentry_t *)map_domain_page(mfn);
for ( i = 0; i < PAE_L3_PAGETABLE_ENTRIES; i++ )
{
@@ -81,12 +86,11 @@
free_domheap_page(mfn_to_page(l3e_get_pfn(l3e[i])));
}
}
-
- free_domheap_page(
- mfn_to_page(pagetable_get_pfn(v->domain->arch.phys_table)));
+ free_domheap_page(mfn_to_page(mfn));
unmap_domain_page(l3e);
- v->domain->arch.phys_table = mk_pagetable(0);
+
+ d->arch.phys_table = mk_pagetable(0);
}
/****************************************************************************/
@@ -1790,39 +1794,56 @@
unsigned long va, tabpfn;
l1_pgentry_t *l1, l1e;
l2_pgentry_t *l2, l2e;
+#if CONFIG_PAGING_LEVELS >= 4
+ pgentry_64_t *l4 = NULL;
+ pgentry_64_t l4e = { 0 };
+#endif
+ pgentry_64_t *l3 = NULL;
+ pgentry_64_t l3e = { 0 };
+ unsigned long *l0tab = NULL;
+ unsigned long mfn;
ASSERT(shadow_mode_translate(d));
perfc_incrc(get_mfn_from_gpfn_foreign);
- va = gpfn << PAGE_SHIFT;
- tabpfn = pagetable_get_pfn(d->arch.phys_table);
- l2 = map_domain_page(tabpfn);
+ va = RO_MPT_VIRT_START + (gpfn * sizeof(mfn));
+
+ tabpfn = pagetable_get_pfn(d->vcpu[0]->arch.monitor_table);
+ if ( !tabpfn )
+ return INVALID_MFN;
+
+#if CONFIG_PAGING_LEVELS >= 4
+ l4 = map_domain_page(tabpfn);
+ l4e = l4[l4_table_offset(va)];
+ unmap_domain_page(l4);
+ if ( !(entry_get_flags(l4e) & _PAGE_PRESENT) )
+ return INVALID_MFN;
+
+ l3 = map_domain_page(entry_get_pfn(l4e));
+#else
+ l3 = map_domain_page(tabpfn);
+#endif
+ l3e = l3[l3_table_offset(va)];
+ unmap_domain_page(l3);
+ if ( !(entry_get_flags(l3e) & _PAGE_PRESENT) )
+ return INVALID_MFN;
+ l2 = map_domain_page(entry_get_pfn(l3e));
l2e = l2[l2_table_offset(va)];
unmap_domain_page(l2);
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- {
- printk("%s(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
- __func__, d->domain_id, gpfn, l2e_get_intpte(l2e));
return INVALID_MFN;
- }
+
l1 = map_domain_page(l2e_get_pfn(l2e));
l1e = l1[l1_table_offset(va)];
unmap_domain_page(l1);
-
-#if 0
- printk("%s(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx,
l1e=%lx\n",
- __func__, d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT,
tabpfn, l2e, l1tab, l1e);
-#endif
-
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
- {
- printk("%s(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
- __func__, d->domain_id, gpfn, l1e_get_intpte(l1e));
return INVALID_MFN;
- }
-
- return l1e_get_pfn(l1e);
+
+ l0tab = map_domain_page(l1e_get_pfn(l1e));
+ mfn = l0tab[gpfn & ((PAGE_SIZE / sizeof (mfn)) - 1)];
+ unmap_domain_page(l0tab);
+ return mfn;
}
static u32 remove_all_access_in_page(
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/traps.c Fri Feb 24 22:41:08 2006
@@ -1430,11 +1430,7 @@
if ( cur.address == 0 )
break;
- if ( !VALID_CODESEL(cur.cs) )
- {
- rc = -EPERM;
- break;
- }
+ fixup_guest_selector(cur.cs);
memcpy(&dst[cur.vector], &cur, sizeof(cur));
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/x86_32/mm.c Fri Feb 24 22:41:08 2006
@@ -223,8 +223,7 @@
int nr = smp_processor_id();
struct tss_struct *t = &init_tss[nr];
- if ( (ss & 3) != 1 )
- return -EPERM;
+ fixup_guest_selector(ss);
current->arch.guest_context.kernel_ss = ss;
current->arch.guest_context.kernel_sp = esp;
@@ -239,6 +238,7 @@
{
unsigned long base, limit;
u32 a = d->a, b = d->b;
+ u16 cs;
/* A not-present descriptor will always fault, so is safe. */
if ( !(b & _SEGMENT_P) )
@@ -250,8 +250,8 @@
* gates (consider a call gate pointing at another kernel descriptor with
* DPL 0 -- this would get the OS ring-0 privileges).
*/
- if ( (b & _SEGMENT_DPL) == 0 )
- goto bad;
+ if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
+ d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
if ( !(b & _SEGMENT_S) )
{
@@ -271,9 +271,12 @@
if ( (b & _SEGMENT_TYPE) != 0xc00 )
goto bad;
- /* Can't allow far jump to a Xen-private segment. */
- if ( !VALID_CODESEL(a>>16) )
+ /* Validate and fix up the target code selector. */
+ cs = a >> 16;
+ fixup_guest_selector(cs);
+ if ( !guest_gate_selector_okay(cs) )
goto bad;
+ a = d->a = (d->a & 0xffffU) | (cs << 16);
/* Reserved bits must be zero. */
if ( (b & 0xe0) != 0 )
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/x86_32/traps.c Fri Feb 24 22:41:08 2006
@@ -254,10 +254,14 @@
/*
* We can't virtualise interrupt gates, as there's no way to get
- * the CPU to automatically clear the events_mask variable.
- */
- if ( TI_GET_IF(ti) )
+ * the CPU to automatically clear the events_mask variable. Also we
+ * must ensure that the CS is safe to poke into an interrupt gate.
+ */
+ if ( TI_GET_IF(ti) || !guest_gate_selector_okay(ti->cs) )
+ {
+ v->arch.int80_desc.a = v->arch.int80_desc.b = 0;
return;
+ }
v->arch.int80_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
v->arch.int80_desc.b =
@@ -274,8 +278,8 @@
{
struct vcpu *d = current;
- if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
- return -EPERM;
+ fixup_guest_selector(event_selector);
+ fixup_guest_selector(failsafe_selector);
d->arch.guest_context.event_callback_cs = event_selector;
d->arch.guest_context.event_callback_eip = event_address;
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/x86_64/entry.S Fri Feb 24 22:41:08 2006
@@ -206,7 +206,7 @@
sti
leaq VCPU_trap_bounce(%rbx),%rdx
movq %rax,TRAPBOUNCE_eip(%rdx)
- movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
+ movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
call create_bounce_frame
jmp test_all_events
1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx)
@@ -229,9 +229,6 @@
1: /* In kernel context already: push new frame at existing %rsp. */
movq UREGS_rsp+8(%rsp),%rsi
andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest.
- testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
- jz 2f
- orb $0x01,UREGS_cs+8(%rsp)
2: andq $~0xf,%rsi # Stack frames are 16-byte aligned.
movq $HYPERVISOR_VIRT_START,%rax
cmpq %rax,%rsi
diff -r d940ec92958d -r 6c43118bdba8 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Fri Feb 24 21:03:07 2006
+++ b/xen/arch/x86/x86_64/mm.c Fri Feb 24 22:41:08 2006
@@ -228,8 +228,7 @@
long do_stack_switch(unsigned long ss, unsigned long esp)
{
- if ( (ss & 3) != 3 )
- return -EPERM;
+ fixup_guest_selector(ss);
current->arch.guest_context.kernel_ss = ss;
current->arch.guest_context.kernel_sp = esp;
return 0;
@@ -292,14 +291,15 @@
int check_descriptor(struct desc_struct *d)
{
u32 a = d->a, b = d->b;
+ u16 cs;
/* A not-present descriptor will always fault, so is safe. */
if ( !(b & _SEGMENT_P) )
goto good;
- /* The guest can only safely be executed in ring 3. */
- if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL )
- goto bad;
+ /* Check and fix up the DPL. */
+ if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL << 13) )
+ d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL << 13);
/* All code and data segments are okay. No base/limit checking. */
if ( (b & _SEGMENT_S) )
@@ -313,9 +313,12 @@
if ( (b & _SEGMENT_TYPE) != 0xc00 )
goto bad;
- /* Can't allow far jump to a Xen-private segment. */
- if ( !VALID_CODESEL(a>>16) )
+ /* Validate and fix up the target code selector. */
+ cs = a >> 16;
+ fixup_guest_selector(cs);
+ if ( !guest_gate_selector_okay(cs) )
goto bad;
+ a = d->a = (d->a & 0xffffU) | (cs << 16);
/* Reserved bits must be zero. */
if ( (b & 0xe0) != 0 )
diff -r d940ec92958d -r 6c43118bdba8 xen/common/elf.c
--- a/xen/common/elf.c Fri Feb 24 21:03:07 2006
+++ b/xen/common/elf.c Fri Feb 24 22:41:08 2006
@@ -61,7 +61,6 @@
continue;
guestinfo = elfbase + shdr->sh_offset;
- printk("Xen-ELF header found: '%s'\n", guestinfo);
if ( (strstr(guestinfo, "LOADER=generic") == NULL) &&
(strstr(guestinfo, "GUEST_OS=linux") == NULL) )
diff -r d940ec92958d -r 6c43118bdba8 xen/common/event_channel.c
--- a/xen/common/event_channel.c Fri Feb 24 21:03:07 2006
+++ b/xen/common/event_channel.c Fri Feb 24 22:41:08 2006
@@ -399,7 +399,7 @@
}
-long evtchn_send(int lport)
+long evtchn_send(unsigned int lport)
{
struct evtchn *lchn, *rchn;
struct domain *ld = current->domain, *rd;
@@ -508,15 +508,13 @@
return rc;
}
-static long evtchn_bind_vcpu(evtchn_bind_vcpu_t *bind)
-{
- struct domain *d = current->domain;
- int port = bind->port;
- int vcpu = bind->vcpu;
+long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id)
+{
+ struct domain *d = current->domain;
struct evtchn *chn;
long rc = 0;
- if ( (vcpu >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu] == NULL) )
+ if ( (vcpu_id >= ARRAY_SIZE(d->vcpu)) || (d->vcpu[vcpu_id] == NULL) )
return -ENOENT;
spin_lock(&d->evtchn_lock);
@@ -533,7 +531,7 @@
case ECS_UNBOUND:
case ECS_INTERDOMAIN:
case ECS_PIRQ:
- chn->notify_vcpu_id = vcpu;
+ chn->notify_vcpu_id = vcpu_id;
break;
default:
rc = -EINVAL;
@@ -638,7 +636,7 @@
break;
case EVTCHNOP_bind_vcpu:
- rc = evtchn_bind_vcpu(&op.u.bind_vcpu);
+ rc = evtchn_bind_vcpu(op.u.bind_vcpu.port, op.u.bind_vcpu.vcpu);
break;
case EVTCHNOP_unmask:
diff -r d940ec92958d -r 6c43118bdba8 xen/common/kernel.c
--- a/xen/common/kernel.c Fri Feb 24 21:03:07 2006
+++ b/xen/common/kernel.c Fri Feb 24 22:41:08 2006
@@ -193,7 +193,8 @@
if ( shadow_mode_translate(current->domain) )
fi.submap |=
(1U << XENFEAT_writable_page_tables) |
- (1U << XENFEAT_auto_translated_physmap);
+ (1U << XENFEAT_auto_translated_physmap) |
+ (1U << XENFEAT_pae_pgdir_above_4gb);
break;
default:
return -EINVAL;
diff -r d940ec92958d -r 6c43118bdba8 xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Fri Feb 24 21:03:07 2006
+++ b/xen/common/keyhandler.c Fri Feb 24 22:41:08 2006
@@ -13,6 +13,7 @@
#include <xen/domain.h>
#include <xen/rangeset.h>
#include <asm/debugger.h>
+#include <asm/shadow.h>
#define KEY_MAX 256
#define STR_MAX 64
@@ -131,6 +132,8 @@
d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7],
d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11],
d->handle[12], d->handle[13], d->handle[14], d->handle[15]);
+
+ arch_dump_domain_info(d);
rangeset_domain_printk(d);
diff -r d940ec92958d -r 6c43118bdba8 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Fri Feb 24 21:03:07 2006
+++ b/xen/common/sched_sedf.c Fri Feb 24 22:41:08 2006
@@ -1609,15 +1609,19 @@
else {
/*time driven domains*/
for_each_vcpu(p, v) {
- /* sanity checking! */
- if(cmd->u.sedf.slice > cmd->u.sedf.period )
+ /*
+ * Sanity checking: note that disabling extra weight requires
+ * that we set a non-zero slice.
+ */
+ if ( (cmd->u.sedf.slice == 0) ||
+ (cmd->u.sedf.slice > cmd->u.sedf.period) )
return -EINVAL;
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->extraweight = 0;
EDOM_INFO(v)->period_orig =
- EDOM_INFO(v)->period = cmd->u.sedf.period;
+ EDOM_INFO(v)->period = cmd->u.sedf.period;
EDOM_INFO(v)->slice_orig =
- EDOM_INFO(v)->slice = cmd->u.sedf.slice;
+ EDOM_INFO(v)->slice = cmd->u.sedf.slice;
}
}
if (sedf_adjust_weights(cmd))
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-ia64/vmx.h Fri Feb 24 22:41:08 2006
@@ -57,9 +57,9 @@
return &((shared_iopage_t
*)d->arch.vmx_platform.shared_page_va)->vcpu_iodata[cpu];
}
-static inline int iopacket_port(struct domain *d)
+static inline int iopacket_port(struct vcpu *v)
{
- return ((shared_iopage_t
*)d->arch.vmx_platform.shared_page_va)->sp_global.eport;
+ return get_vio(v->domain, v->vcpu_id)->vp_eport;
}
static inline shared_iopage_t *get_sp(struct domain *d)
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-x86/desc.h Fri Feb 24 22:41:08 2006
@@ -26,16 +26,28 @@
#define GUEST_KERNEL_RPL 1
#endif
+/* Fix up the RPL of a guest segment selector. */
+#define fixup_guest_selector(sel) \
+ ((sel) = (((sel) & 3) >= GUEST_KERNEL_RPL) ? (sel) : \
+ (((sel) & ~3) | GUEST_KERNEL_RPL))
+
/*
- * Guest OS must provide its own code selectors, or use the one we provide. Any
- * LDT selector value is okay. Note that checking only the RPL is insufficient:
- * if the selector is poked into an interrupt, trap or call gate then the RPL
- * is ignored when the gate is accessed.
+ * We need this function because enforcing the correct guest kernel RPL is
+ * unsufficient if the selector is poked into an interrupt, trap or call gate.
+ * The selector RPL is ignored when a gate is accessed. We must therefore make
+ * sure that the selector does not reference a Xen-private segment.
+ *
+ * Note that selectors used only by IRET do not need to be checked. If the
+ * descriptor DPL fiffers from CS RPL then we'll #GP.
+ *
+ * Stack and data selectors do not need to be checked. If DS, ES, FS, GS are
+ * DPL < CPL then they'll be cleared automatically. If SS RPL or DPL differs
+ * from CS RPL then we'll #GP.
*/
-#define VALID_SEL(_s) \
- (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || ((_s)&4)) && \
- (((_s)&3) == GUEST_KERNEL_RPL))
-#define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s))
+#define guest_gate_selector_okay(sel) \
+ ((((sel)>>3) < FIRST_RESERVED_GDT_ENTRY) || /* Guest seg? */ \
+ ((sel) == FLAT_KERNEL_CS) || /* Xen default seg? */ \
+ ((sel) & 4)) /* LDT seg? */
/* These are bitmasks for the high 32 bits of a descriptor table entry. */
#define _SEGMENT_TYPE (15<< 8)
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-x86/hvm/domain.h Fri Feb 24 22:41:08 2006
@@ -34,6 +34,7 @@
unsigned long shared_page_va;
unsigned int nr_vcpus;
unsigned int apic_enabled;
+ unsigned int pae_enabled;
struct hvm_virpit vpit;
struct hvm_virpic vpic;
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-x86/hvm/io.h Fri Feb 24 22:41:08 2006
@@ -23,6 +23,7 @@
#include <asm/hvm/vpic.h>
#include <asm/hvm/vioapic.h>
#include <public/hvm/ioreq.h>
+#include <public/event_channel.h>
#define MAX_OPERAND_NUM 2
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-x86/hvm/support.h Fri Feb 24 22:41:08 2006
@@ -40,9 +40,9 @@
return &get_sp(d)->vcpu_iodata[cpu];
}
-static inline int iopacket_port(struct domain *d)
+static inline int iopacket_port(struct vcpu *v)
{
- return get_sp(d)->sp_global.eport;
+ return get_vio(v->domain, v->vcpu_id)->vp_eport;
}
/* XXX these are really VMX specific */
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-x86/processor.h Fri Feb 24 22:41:08 2006
@@ -124,7 +124,6 @@
#define TBF_EXCEPTION_ERRCODE 2
#define TBF_INTERRUPT 8
#define TBF_FAILSAFE 16
-#define TBF_SLOW_IRET 32
/* 'arch_vcpu' flags values */
#define _TF_kernel_mode 0
diff -r d940ec92958d -r 6c43118bdba8 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/asm-x86/shadow.h Fri Feb 24 22:41:08 2006
@@ -115,8 +115,8 @@
#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1)
- (_max)) << 16) | (_min))
#define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded)
>> 16))
-extern void shadow_direct_map_clean(struct vcpu *v);
-extern int shadow_direct_map_init(struct vcpu *v);
+extern void shadow_direct_map_clean(struct domain *d);
+extern int shadow_direct_map_init(struct domain *d);
extern int shadow_direct_map_fault(
unsigned long vpa, struct cpu_user_regs *regs);
extern void shadow_mode_init(void);
diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/hvm/hvm_info_table.h
--- a/xen/include/public/hvm/hvm_info_table.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/public/hvm/hvm_info_table.h Fri Feb 24 22:41:08 2006
@@ -17,7 +17,7 @@
uint8_t checksum;
uint8_t acpi_enabled;
uint8_t apic_enabled;
- uint8_t pad[1];
+ uint8_t pae_enabled;
uint32_t nr_vcpus;
};
diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/public/hvm/ioreq.h Fri Feb 24 22:41:08 2006
@@ -53,6 +53,7 @@
uint8_t dir:1; /* 1=read, 0=write */
uint8_t df:1;
uint8_t type; /* I/O type */
+ uint64_t io_count; /* How many IO done on a vcpu */
} ioreq_t;
#define MAX_VECTOR 256
@@ -65,11 +66,13 @@
uint16_t pic_irr;
uint16_t pic_last_irr;
uint16_t pic_clear_irr;
- int eport; /* Event channel port */
} global_iodata_t;
typedef struct {
- ioreq_t vp_ioreq;
+ ioreq_t vp_ioreq;
+ /* Event channel port */
+ unsigned long vp_eport; /* VMX vcpu uses this to notify DM */
+ unsigned long dm_eport; /* DM uses this to notify VMX vcpu */
} vcpu_iodata_t;
typedef struct {
diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/io/xs_wire.h
--- a/xen/include/public/io/xs_wire.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/public/io/xs_wire.h Fri Feb 24 22:41:08 2006
@@ -54,7 +54,7 @@
XSD_ERROR(EROFS),
XSD_ERROR(EBUSY),
XSD_ERROR(EAGAIN),
- XSD_ERROR(EISCONN),
+ XSD_ERROR(EISCONN)
};
struct xsd_sockmsg
@@ -70,7 +70,7 @@
enum xs_watch_type
{
XS_WATCH_PATH = 0,
- XS_WATCH_TOKEN,
+ XS_WATCH_TOKEN
};
/* Inter-domain shared memory communications. */
diff -r d940ec92958d -r 6c43118bdba8 xen/include/public/version.h
--- a/xen/include/public/version.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/public/version.h Fri Feb 24 22:41:08 2006
@@ -18,6 +18,7 @@
/* arg == xen_extraversion_t. */
#define XENVER_extraversion 1
typedef char xen_extraversion_t[16];
+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
/* arg == xen_compile_info_t. */
#define XENVER_compile_info 2
@@ -30,9 +31,11 @@
#define XENVER_capabilities 3
typedef char xen_capabilities_info_t[1024];
+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
#define XENVER_changeset 4
typedef char xen_changeset_info_t[64];
+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
#define XENVER_platform_parameters 5
typedef struct xen_platform_parameters {
@@ -45,9 +48,34 @@
uint32_t submap; /* OUT: 32-bit submap */
} xen_feature_info_t;
+/*
+ * If set, the guest does not need to write-protect its pagetables, and can
+ * update them via direct writes.
+ */
#define XENFEAT_writable_page_tables 0
+
+/*
+ * If set, the guest does not need to write-protect its segment descriptor
+ * tables, and can update them via direct writes.
+ */
#define XENFEAT_writable_descriptor_tables 1
+
+/*
+ * If set, translation between the guest's 'pseudo-physical' address space
+ * and the host's machine address space are handled by the hypervisor. In this
+ * mode the guest does not need to perform phys-to/from-machine translations
+ * when performing page table operations.
+ */
#define XENFEAT_auto_translated_physmap 2
+
+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
+#define XENFEAT_supervisor_mode_kernel 3
+
+/*
+ * If set, the guest does not need to allocate x86 PAE page directories
+ * below 4GB. This flag is usually implied by auto_translated_physmap.
+ */
+#define XENFEAT_pae_pgdir_above_4gb 4
#define XENFEAT_NR_SUBMAPS 1
diff -r d940ec92958d -r 6c43118bdba8 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/xen/domain.h Fri Feb 24 22:41:08 2006
@@ -24,4 +24,6 @@
extern void dump_pageframe_info(struct domain *d);
+extern void arch_dump_domain_info(struct domain *d);
+
#endif /* __XEN_DOMAIN_H__ */
diff -r d940ec92958d -r 6c43118bdba8 xen/include/xen/event.h
--- a/xen/include/xen/event.h Fri Feb 24 21:03:07 2006
+++ b/xen/include/xen/event.h Fri Feb 24 22:41:08 2006
@@ -63,4 +63,10 @@
(!!(v)->vcpu_info->evtchn_upcall_pending & \
!(v)->vcpu_info->evtchn_upcall_mask)
+/* Send a notification from a local event-channel port. */
+extern long evtchn_send(unsigned int lport);
+
+/* Bind a local event-channel port to the specified VCPU. */
+extern long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
+
#endif /* __XEN_EVENT_H__ */
diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/include/hypercall-x86_32.h
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/extras/mini-os/include/hypercall-x86_32.h Fri Feb 24 22:41:08 2006
@@ -0,0 +1,326 @@
+/******************************************************************************
+ * hypercall-x86_32.h
+ *
+ * Copied from XenLinux.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __HYPERCALL_X86_32_H__
+#define __HYPERCALL_X86_32_H__
+
+#include <xen/xen.h>
+#include <xen/sched.h>
+#include <xen/nmi.h>
+#include <mm.h>
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+extern char hypercall_page[PAGE_SIZE];
+
+#define _hypercall0(type, name) \
+({ \
+ long __res; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res) \
+ : \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall1(type, name, a1) \
+({ \
+ long __res, __ign1; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=b" (__ign1) \
+ : "1" ((long)(a1)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall2(type, name, a1, a2) \
+({ \
+ long __res, __ign1, __ign2; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \
+ : "1" ((long)(a1)), "2" ((long)(a2)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall3(type, name, a1, a2, a3) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
+ "=d" (__ign3) \
+ : "1" ((long)(a1)), "2" ((long)(a2)), \
+ "3" ((long)(a3)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4) \
+({ \
+ long __res, __ign1, __ign2, __ign3, __ign4; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
+ "=d" (__ign3), "=S" (__ign4) \
+ : "1" ((long)(a1)), "2" ((long)(a2)), \
+ "3" ((long)(a3)), "4" ((long)(a4)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
+({ \
+ long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
+ "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \
+ : "1" ((long)(a1)), "2" ((long)(a2)), \
+ "3" ((long)(a3)), "4" ((long)(a4)), \
+ "5" ((long)(a5)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+static inline int
+HYPERVISOR_set_trap_table(
+ trap_info_t *table)
+{
+ return _hypercall1(int, set_trap_table, table);
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+ mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmu_update, req, count, success_count, domid);
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+ unsigned long *frame_list, int entries)
+{
+ return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
+static inline int
+HYPERVISOR_stack_switch(
+ unsigned long ss, unsigned long esp)
+{
+ return _hypercall2(int, stack_switch, ss, esp);
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+ unsigned long event_selector, unsigned long event_address,
+ unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+ return _hypercall4(int, set_callbacks,
+ event_selector, event_address,
+ failsafe_selector, failsafe_address);
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+ int set)
+{
+ return _hypercall1(int, fpu_taskswitch, set);
+}
+
+static inline int
+HYPERVISOR_sched_op(
+ int cmd, unsigned long arg)
+{
+ return _hypercall2(int, sched_op, cmd, arg);
+}
+
+static inline long
+HYPERVISOR_set_timer_op(
+ u64 timeout)
+{
+ unsigned long timeout_hi = (unsigned long)(timeout>>32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
+}
+
+static inline int
+HYPERVISOR_dom0_op(
+ dom0_op_t *dom0_op)
+{
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ return _hypercall1(int, dom0_op, dom0_op);
+}
+
+static inline int
+HYPERVISOR_set_debugreg(
+ int reg, unsigned long value)
+{
+ return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+ int reg)
+{
+ return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+ u64 ma, u64 desc)
+{
+ return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
+}
+
+static inline int
+HYPERVISOR_memory_op(
+ unsigned int cmd, void *arg)
+{
+ return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_multicall(
+ void *call_list, int nr_calls)
+{
+ return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+ unsigned long va, pte_t new_val, unsigned long flags)
+{
+ unsigned long pte_hi = 0;
+#ifdef CONFIG_X86_PAE
+ pte_hi = new_val.pte_high;
+#endif
+ return _hypercall4(int, update_va_mapping, va,
+ new_val.pte_low, pte_hi, flags);
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+ void *op)
+{
+ return _hypercall1(int, event_channel_op, op);
+}
+
+static inline int
+HYPERVISOR_xen_version(
+ int cmd, void *arg)
+{
+ return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_console_io(
+ int cmd, int count, char *str)
+{
+ return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+ void *physdev_op)
+{
+ return _hypercall1(int, physdev_op, physdev_op);
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ unsigned long pte_hi = 0;
+#ifdef CONFIG_X86_PAE
+ pte_hi = new_val.pte_high;
+#endif
+ return _hypercall5(int, update_va_mapping_otherdomain, va,
+ new_val.pte_low, pte_hi, flags, domid);
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+ unsigned int cmd, unsigned int type)
+{
+ return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int
+HYPERVISOR_vcpu_op(
+ int cmd, int vcpuid, void *extra_args)
+{
+ return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown,
+ SHUTDOWN_suspend, srec);
+}
+
+static inline int
+HYPERVISOR_nmi_op(
+ unsigned long op,
+ unsigned long arg)
+{
+ return _hypercall2(int, nmi_op, op, arg);
+}
+
+#endif /* __HYPERCALL_X86_32_H__ */
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r d940ec92958d -r 6c43118bdba8 extras/mini-os/include/hypercall-x86_64.h
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/extras/mini-os/include/hypercall-x86_64.h Fri Feb 24 22:41:08 2006
@@ -0,0 +1,326 @@
+/******************************************************************************
+ * hypercall-x86_64.h
+ *
+ * Copied from XenLinux.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * 64-bit updates:
+ * Benjamin Liu <benjamin.liu@xxxxxxxxx>
+ * Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __HYPERCALL_X86_64_H__
+#define __HYPERCALL_X86_64_H__
+
+#include <xen/xen.h>
+#include <xen/sched.h>
+#include <mm.h>
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+extern char hypercall_page[PAGE_SIZE];
+
+#define _hypercall0(type, name) \
+({ \
+ long __res; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res) \
+ : \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall1(type, name, a1) \
+({ \
+ long __res, __ign1; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=D" (__ign1) \
+ : "1" ((long)(a1)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall2(type, name, a1, a2) \
+({ \
+ long __res, __ign1, __ign2; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \
+ : "1" ((long)(a1)), "2" ((long)(a2)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall3(type, name, a1, a2, a3) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
+ "=d" (__ign3) \
+ : "1" ((long)(a1)), "2" ((long)(a2)), \
+ "3" ((long)(a3)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ "movq %7,%%r10; " \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
+ "=d" (__ign3) \
+ : "1" ((long)(a1)), "2" ((long)(a2)), \
+ "3" ((long)(a3)), "g" ((long)(a4)) \
+ : "memory", "r10" ); \
+ (type)__res; \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ "movq %7,%%r10; movq %8,%%r8; " \
+ "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
+ "=d" (__ign3) \
+ : "1" ((long)(a1)), "2" ((long)(a2)), \
+ "3" ((long)(a3)), "g" ((long)(a4)), \
+ "g" ((long)(a5)) \
+ : "memory", "r10", "r8" ); \
+ (type)__res; \
+})
+
+static inline int
+HYPERVISOR_set_trap_table(
+ trap_info_t *table)
+{
+ return _hypercall1(int, set_trap_table, table);
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+ mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmu_update, req, count, success_count, domid);
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+ unsigned long *frame_list, int entries)
+{
+ return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
+static inline int
+HYPERVISOR_stack_switch(
+ unsigned long ss, unsigned long esp)
+{
+ return _hypercall2(int, stack_switch, ss, esp);
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+ unsigned long event_address, unsigned long failsafe_address,
+ unsigned long syscall_address)
+{
+ return _hypercall3(int, set_callbacks,
+ event_address, failsafe_address, syscall_address);
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+ int set)
+{
+ return _hypercall1(int, fpu_taskswitch, set);
+}
+
+static inline int
+HYPERVISOR_sched_op(
+ int cmd, unsigned long arg)
+{
+ return _hypercall2(int, sched_op, cmd, arg);
+}
+
+static inline long
+HYPERVISOR_set_timer_op(
+ u64 timeout)
+{
+ return _hypercall1(long, set_timer_op, timeout);
+}
+
+static inline int
+HYPERVISOR_dom0_op(
+ dom0_op_t *dom0_op)
+{
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ return _hypercall1(int, dom0_op, dom0_op);
+}
+
+static inline int
+HYPERVISOR_set_debugreg(
+ int reg, unsigned long value)
+{
+ return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+ int reg)
+{
+ return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+ unsigned long ma, unsigned long word)
+{
+ return _hypercall2(int, update_descriptor, ma, word);
+}
+
+static inline int
+HYPERVISOR_memory_op(
+ unsigned int cmd, void *arg)
+{
+ return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_multicall(
+ void *call_list, int nr_calls)
+{
+ return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+ unsigned long va, pte_t new_val, unsigned long flags)
+{
+ return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+ void *op)
+{
+ return _hypercall1(int, event_channel_op, op);
+}
+
+static inline int
+HYPERVISOR_xen_version(
+ int cmd, void *arg)
+{
+ return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_console_io(
+ int cmd, int count, char *str)
+{
+ return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+ void *physdev_op)
+{
+ return _hypercall1(int, physdev_op, physdev_op);
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ return _hypercall4(int, update_va_mapping_otherdomain, va,
+ new_val.pte, flags, domid);
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+ unsigned int cmd, unsigned int type)
+{
+ return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int
+HYPERVISOR_vcpu_op(
+ int cmd, int vcpuid, void *extra_args)
+{
+ return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
+}
+
+static inline int
+HYPERVISOR_set_segment_base(
+ int reg, unsigned long value)
+{
+ return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown,
+ SHUTDOWN_suspend, srec);
+}
+
+static inline int
+HYPERVISOR_nmi_op(
+ unsigned long op,
+ unsigned long arg)
+{
+ return _hypercall2(int, nmi_op, op, arg);
+}
+
+#endif /* __HYPERCALL_X86_64_H__ */
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/arch/i386/Kconfig.cpu
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Fri Feb 24 22:41:08 2006
@@ -0,0 +1,315 @@
+# Put here option for CPU selection and depending optimization
+if !X86_ELAN
+
+choice
+ prompt "Processor family"
+ default M686
+
+config M386
+ bool "386"
+ ---help---
+ This is the processor type of your CPU. This information is used for
+ optimizing purposes. In order to compile a kernel that can run on
+ all x86 CPU types (albeit not optimally fast), you can specify
+ "386" here.
+
+ The kernel will not necessarily run on earlier architectures than
+ the one you have chosen, e.g. a Pentium optimized kernel will run on
+ a PPro, but not necessarily on a i486.
+
+ Here are the settings recommended for greatest speed:
+ - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
+ 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels
+ will run on a 386 class machine.
+ - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
+ SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
+ - "586" for generic Pentium CPUs lacking the TSC
+ (time stamp counter) register.
+ - "Pentium-Classic" for the Intel Pentium.
+ - "Pentium-MMX" for the Intel Pentium MMX.
+ - "Pentium-Pro" for the Intel Pentium Pro.
+ - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
+ - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
+ - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
+ - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
+ - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
+ - "Crusoe" for the Transmeta Crusoe series.
+ - "Efficeon" for the Transmeta Efficeon series.
+ - "Winchip-C6" for original IDT Winchip.
+ - "Winchip-2" for IDT Winchip 2.
+ - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+ - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
+ - "Geode GX/LX" For AMD Geode GX and LX processors.
+ - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
+ - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
+
+ If you don't know what to do, choose "386".
+
+config M486
+ bool "486"
+ help
+ Select this for a 486 series processor, either Intel or one of the
+ compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
+ DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
+ U5S.
+
+config M586
+ bool "586/K5/5x86/6x86/6x86MX"
+ help
+ Select this for an 586 or 686 series processor such as the AMD K5,
+ the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
+ assume the RDTSC (Read Time Stamp Counter) instruction.
+
+config M586TSC
+ bool "Pentium-Classic"
+ help
+ Select this for a Pentium Classic processor with the RDTSC (Read
+ Time Stamp Counter) instruction for benchmarking.
+
+config M586MMX
+ bool "Pentium-MMX"
+ help
+ Select this for a Pentium with the MMX graphics/multimedia
+ extended instructions.
+
+config M686
+ bool "Pentium-Pro"
+ help
+ Select this for Intel Pentium Pro chips. This enables the use of
+ Pentium Pro extended instructions, and disables the init-time guard
+ against the f00f bug found in earlier Pentiums.
+
+config MPENTIUMII
+ bool "Pentium-II/Celeron(pre-Coppermine)"
+ help
+ Select this for Intel chips based on the Pentium-II and
+ pre-Coppermine Celeron core. This option enables an unaligned
+ copy optimization, compiles the kernel with optimization flags
+ tailored for the chip, and applies any applicable Pentium Pro
+ optimizations.
+
+config MPENTIUMIII
+ bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
+ help
+ Select this for Intel chips based on the Pentium-III and
+ Celeron-Coppermine core. This option enables use of some
+ extended prefetch instructions in addition to the Pentium II
+ extensions.
+
+config MPENTIUMM
+ bool "Pentium M"
+ help
+ Select this for Intel Pentium M (not Pentium-4 M)
+ notebook chips.
+
+config MPENTIUM4
+ bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
+ help
+ Select this for Intel Pentium 4 chips. This includes the
+ Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
+ (not Pentium M) chips. This option enables compile flags
+ optimized for the chip, uses the correct cache shift, and
+ applies any applicable Pentium III optimizations.
+
+config MK6
+ bool "K6/K6-II/K6-III"
+ help
+ Select this for an AMD K6-family processor. Enables use of
+ some extended instructions, and passes appropriate optimization
+ flags to GCC.
+
+config MK7
+ bool "Athlon/Duron/K7"
+ help
+ Select this for an AMD Athlon K7-family processor. Enables use of
+ some extended instructions, and passes appropriate optimization
+ flags to GCC.
+
+config MK8
+ bool "Opteron/Athlon64/Hammer/K8"
+ help
+ Select this for an AMD Opteron or Athlon64 Hammer-family processor.
Enables
+ use of some extended instructions, and passes appropriate optimization
+ flags to GCC.
+
+config MCRUSOE
+ bool "Crusoe"
+ help
+ Select this for a Transmeta Crusoe processor. Treats the processor
+ like a 586 with TSC, and sets some GCC optimization flags (like a
+ Pentium Pro with no alignment requirements).
+
+config MEFFICEON
+ bool "Efficeon"
+ help
+ Select this for a Transmeta Efficeon processor.
+
+config MWINCHIPC6
+ bool "Winchip-C6"
+ help
+ Select this for an IDT Winchip C6 chip. Linux and GCC
+ treat this chip as a 586TSC with some extended instructions
+ and alignment requirements.
+
+config MWINCHIP2
+ bool "Winchip-2"
+ help
+ Select this for an IDT Winchip-2. Linux and GCC
+ treat this chip as a 586TSC with some extended instructions
+ and alignment requirements.
+
+config MWINCHIP3D
+ bool "Winchip-2A/Winchip-3"
+ help
+ Select this for an IDT Winchip-2A or 3. Linux and GCC
+ treat this chip as a 586TSC with some extended instructions
+ and alignment reqirements. Also enable out of order memory
+ stores for this CPU, which can increase performance of some
+ operations.
+
+config MGEODEGX1
+ bool "GeodeGX1"
+ help
+ Select this for a Geode GX1 (Cyrix MediaGX) chip.
+
+config MGEODE_LX
+ bool "Geode GX/LX"
+ help
+ Select this for AMD Geode GX and LX processors.
+
+config MCYRIXIII
+ bool "CyrixIII/VIA-C3"
+ help
+ Select this for a Cyrix III or C3 chip. Presently Linux and GCC
+ treat this chip as a generic 586. Whilst the CPU is 686 class,
+ it lacks the cmov extension which gcc assumes is present when
+ generating 686 code.
+ Note that Nehemiah (Model 9) and above will not boot with this
+ kernel due to them lacking the 3DNow! instructions used in earlier
+ incarnations of the CPU.
+
+config MVIAC3_2
+ bool "VIA C3-2 (Nehemiah)"
+ help
+ Select this for a VIA C3 "Nehemiah". Selecting this enables usage
+ of SSE and tells gcc to treat the CPU as a 686.
+ Note, this kernel will not boot on older (pre model 9) C3s.
+
+endchoice
+
+config X86_GENERIC
+ bool "Generic x86 support"
+ help
+ Instead of just including optimizations for the selected
+ x86 variant (e.g. PII, Crusoe or Athlon), include some more
+ generic optimizations as well. This will make the kernel
+ perform better on x86 CPUs other than that selected.
+
+ This is really intended for distributors who need more
+ generic optimizations.
+
+endif
+
+#
+# Define implied options from the CPU selection here
+#
+config X86_CMPXCHG
+ bool
+ depends on !M386
+ default y
+
+config X86_XADD
+ bool
+ depends on !M386
+ default y
+
+config X86_L1_CACHE_SHIFT
+ int
+ default "7" if MPENTIUM4 || X86_GENERIC
+ default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
+ default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE ||
MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX
|| M586TSC || M586 || MVIAC3_2 || MGEODE_LX
+ default "6" if MK7 || MK8 || MPENTIUMM
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on M386
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ depends on !M386
+ default y
+
+config GENERIC_CALIBRATE_DELAY
+ bool
+ default y
+
+config X86_PPRO_FENCE
+ bool
+ depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 ||
MGEODEGX1
+ default y
+
+config X86_F00F_BUG
+ bool
+ depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT
+ default y
+
+config X86_WP_WORKS_OK
+ bool
+ depends on !M386
+ default y
+
+config X86_INVLPG
+ bool
+ depends on !M386
+ default y
+
+config X86_BSWAP
+ bool
+ depends on !M386
+ default y
+
+config X86_POPAD_OK
+ bool
+ depends on !M386
+ default y
+
+config X86_CMPXCHG64
+ bool
+ depends on !M386 && !M486
+ default y
+
+config X86_ALIGNMENT_16
+ bool
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII ||
X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+ default y
+
+config X86_GOOD_APIC
+ bool
+ depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || MK8 || MEFFICEON
+ default y
+
+config X86_INTEL_USERCOPY
+ bool
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII ||
M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
+ default y
+
+config X86_USE_PPRO_CHECKSUM
+ bool
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 ||
MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 ||
MVIAC3_2 || MEFFICEON || MGEODE_LX
+ default y
+
+config X86_USE_3DNOW
+ bool
+ depends on MCYRIXIII || MK7 || MGEODE_LX
+ default y
+
+config X86_OOSTORE
+ bool
+ depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+ default y
+
+config X86_TSC
+ bool
+ depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON ||
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
+ default y
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c Fri Feb 24
22:41:08 2006
@@ -0,0 +1,75 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/personality.h>
+#include <linux/suspend.h>
+#include <asm/ucontext.h>
+#include "sigframe.h"
+#include <asm/fixmap.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/elf.h>
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+#define OFFSET(sym, str, mem) \
+ DEFINE(sym, offsetof(struct str, mem));
+
+void foo(void)
+{
+ OFFSET(SIGCONTEXT_eax, sigcontext, eax);
+ OFFSET(SIGCONTEXT_ebx, sigcontext, ebx);
+ OFFSET(SIGCONTEXT_ecx, sigcontext, ecx);
+ OFFSET(SIGCONTEXT_edx, sigcontext, edx);
+ OFFSET(SIGCONTEXT_esi, sigcontext, esi);
+ OFFSET(SIGCONTEXT_edi, sigcontext, edi);
+ OFFSET(SIGCONTEXT_ebp, sigcontext, ebp);
+ OFFSET(SIGCONTEXT_esp, sigcontext, esp);
+ OFFSET(SIGCONTEXT_eip, sigcontext, eip);
+ BLANK();
+
+ OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
+ OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
+ OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
+ OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+ OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
+ OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
+ OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
+ OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
+ BLANK();
+
+ OFFSET(TI_task, thread_info, task);
+ OFFSET(TI_exec_domain, thread_info, exec_domain);
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_cpu, thread_info, cpu);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_restart_block, thread_info, restart_block);
+ BLANK();
+
+ OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
+ OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
+ BLANK();
+
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
+
+#ifdef CONFIG_X86_SYSENTER
+ /* Offset from the sysenter stack to tss.esp0 */
+ DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+ sizeof(struct tss_struct));
+#endif
+
+ DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+ DEFINE(VSYSCALL_BASE, VSYSCALL_BASE);
+}
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c Fri Feb 24 22:41:08 2006
@@ -0,0 +1,137 @@
+/*
+ * linux/arch/i386/kernel/sysenter.c
+ *
+ * (C) Copyright 2002 Linus Torvalds
+ *
+ * This file contains the needed initializations to support sysenter.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+#include <asm/pgtable.h>
+#include <asm/unistd.h>
+
+extern asmlinkage void sysenter_entry(void);
+
+void enable_sep_cpu(void)
+{
+#ifdef CONFIG_X86_SYSENTER
+ int cpu = get_cpu();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ put_cpu();
+ return;
+ }
+
+ tss->ss1 = __KERNEL_CS;
+ tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+ wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
+ put_cpu();
+#endif
+}
+
+/*
+ * These symbols are defined by vsyscall.o to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vsyscall_int80_start, vsyscall_int80_end;
+extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+static void *syscall_page;
+
+int __init sysenter_setup(void)
+{
+ syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+
+#ifdef CONFIG_X86_SYSENTER
+ if (boot_cpu_has(X86_FEATURE_SEP)) {
+ memcpy(syscall_page,
+ &vsyscall_sysenter_start,
+ &vsyscall_sysenter_end - &vsyscall_sysenter_start);
+ return 0;
+ }
+#endif
+
+ memcpy(syscall_page,
+ &vsyscall_int80_start,
+ &vsyscall_int80_end - &vsyscall_int80_start);
+
+ return 0;
+}
+
+static struct page*
+syscall_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
+{
+ struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
+ get_page(p);
+ return p;
+}
+
+/* Prevent VMA merging */
+static void syscall_vma_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct syscall_vm_ops = {
+ .close = syscall_vma_close,
+ .nopage = syscall_nopage,
+};
+
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!vma)
+ return -ENOMEM;
+
+ memset(vma, 0, sizeof(struct vm_area_struct));
+ /* Could randomize here */
+ vma->vm_start = VSYSCALL_BASE;
+ vma->vm_end = VSYSCALL_BASE + PAGE_SIZE;
+ /* MAYWRITE to allow gdb to COW and set breakpoints */
+ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ vma->vm_ops = &syscall_vm_ops;
+ vma->vm_mm = mm;
+
+ down_write(&mm->mmap_sem);
+ if ((ret = insert_vm_struct(mm, vma))) {
+ up_write(&mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return ret;
+ }
+ mm->total_vm++;
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+ return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ return 0;
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+ return 0;
+}
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/vm86.c
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c Fri Feb 24 22:41:08 2006
@@ -0,0 +1,817 @@
+/*
+ * linux/kernel/vm86.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
+ * stack - Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>
+ *
+ * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle
+ * them correctly. Now the emulation will be in a
+ * consistent state after stackfaults - Kasper Dupont
+ * <kasperd@xxxxxxxxxxx>
+ *
+ * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
+ * <kasperd@xxxxxxxxxxx>
+ *
+ * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
+ * caused by Kasper Dupont's changes - Stas Sergeev
+ *
+ * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
+ * Kasper Dupont <kasperd@xxxxxxxxxxx>
+ *
+ * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
+ * Kasper Dupont <kasperd@xxxxxxxxxxx>
+ *
+ * 9 apr 2002 - Changed stack access macros to jump to a label
+ * instead of returning to userspace. This simplifies
+ * do_int, and is needed by handle_vm6_fault. Kasper
+ * Dupont <kasperd@xxxxxxxxxxx>
+ *
+ */
+
+#include <linux/capability.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/tlbflush.h>
+#include <asm/irq.h>
+
+/*
+ * Known problems:
+ *
+ * Interrupt handling is not guaranteed:
+ * - a real x86 will disable all interrupts for one instruction
+ * after a "mov ss,xx" to make stack handling atomic even without
+ * the 'lss' instruction. We can't guarantee this in v86 mode,
+ * as the next instruction might result in a page fault or similar.
+ * - a real x86 will have interrupts disabled for one instruction
+ * past the 'sti' that enables them. We don't bother with all the
+ * details yet.
+ *
+ * Let's hope these problems do not actually matter for anything.
+ */
+
+
+#define KVM86 ((struct kernel_vm86_struct *)regs)
+#define VMPI KVM86->vm86plus
+
+
+/*
+ * 8- and 16-bit register defines..
+ */
+#define AL(regs) (((unsigned char *)&((regs)->eax))[0])
+#define AH(regs) (((unsigned char *)&((regs)->eax))[1])
+#define IP(regs) (*(unsigned short *)&((regs)->eip))
+#define SP(regs) (*(unsigned short *)&((regs)->esp))
+
+/*
+ * virtual flags (16 and 32-bit versions)
+ */
+#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
+#define VEFLAGS (current->thread.v86flags)
+
+#define set_flags(X,new,mask) \
+((X) = ((X) & ~(mask)) | ((new) & (mask)))
+
+#define SAFE_MASK (0xDD5)
+#define RETURN_MASK (0xDFF)
+
+#define VM86_REGS_PART2 orig_eax
+#define VM86_REGS_SIZE1 \
+ ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
+#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
+
+struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
+struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
+{
+#ifndef CONFIG_X86_NO_TSS
+ struct tss_struct *tss;
+#endif
+ struct pt_regs *ret;
+ unsigned long tmp;
+
+ /*
+ * This gets called from entry.S with interrupts disabled, but
+ * from process context. Enable interrupts here, before trying
+ * to access user space.
+ */
+ local_irq_enable();
+
+ if (!current->thread.vm86_info) {
+ printk("no vm86_info: BAD\n");
+ do_exit(SIGSEGV);
+ }
+ set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+ tmp = copy_to_user(¤t->thread.vm86_info->regs,regs,
VM86_REGS_SIZE1);
+ tmp += copy_to_user(¤t->thread.vm86_info->regs.VM86_REGS_PART2,
+ ®s->VM86_REGS_PART2, VM86_REGS_SIZE2);
+ tmp +=
put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap);
+ if (tmp) {
+ printk("vm86: could not access userspace vm86_info\n");
+ do_exit(SIGSEGV);
+ }
+
+#ifndef CONFIG_X86_NO_TSS
+ tss = &per_cpu(init_tss, get_cpu());
+#endif
+ current->thread.esp0 = current->thread.saved_esp0;
+ current->thread.sysenter_cs = __KERNEL_CS;
+ load_esp0(tss, ¤t->thread);
+ current->thread.saved_esp0 = 0;
+ put_cpu();
+
+ loadsegment(fs, current->thread.saved_fs);
+ loadsegment(gs, current->thread.saved_gs);
+ ret = KVM86->regs32;
+ return ret;
+}
+
+static void mark_screen_rdonly(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
+ int i;
+
+ pgd = pgd_offset(mm, 0xA0000);
+ if (pgd_none_or_clear_bad(pgd))
+ goto out;
+ pud = pud_offset(pgd, 0xA0000);
+ if (pud_none_or_clear_bad(pud))
+ goto out;
+ pmd = pmd_offset(pud, 0xA0000);
+ if (pmd_none_or_clear_bad(pmd))
+ goto out;
+ pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
+ for (i = 0; i < 32; i++) {
+ if (pte_present(*pte))
+ set_pte(pte, pte_wrprotect(*pte));
+ pte++;
+ }
+ pte_unmap_unlock(pte, ptl);
+out:
+ flush_tlb();
+}
+
+
+
+static int do_vm86_irq_handling(int subfunction, int irqnumber);
+static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct
*tsk);
+
+asmlinkage int sys_vm86old(struct pt_regs regs)
+{
+ struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx;
+ struct kernel_vm86_struct info; /* declare this _on top_,
+ * this avoids wasting of stack space.
+ * This remains on the stack until we
+ * return to 32 bit user space.
+ */
+ struct task_struct *tsk;
+ int tmp, ret = -EPERM;
+
+ tsk = current;
+ if (tsk->thread.saved_esp0)
+ goto out;
+ tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
+ tmp += copy_from_user(&info.regs.VM86_REGS_PART2,
&v86->regs.VM86_REGS_PART2,
+ (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
+ ret = -EFAULT;
+ if (tmp)
+ goto out;
+ memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
+ info.regs32 = ®s;
+ tsk->thread.vm86_info = v86;
+ do_sys_vm86(&info, tsk);
+ ret = 0; /* we never return here */
+out:
+ return ret;
+}
+
+
+asmlinkage int sys_vm86(struct pt_regs regs)
+{
+ struct kernel_vm86_struct info; /* declare this _on top_,
+ * this avoids wasting of stack space.
+ * This remains on the stack until we
+ * return to 32 bit user space.
+ */
+ struct task_struct *tsk;
+ int tmp, ret;
+ struct vm86plus_struct __user *v86;
+
+ tsk = current;
+ switch (regs.ebx) {
+ case VM86_REQUEST_IRQ:
+ case VM86_FREE_IRQ:
+ case VM86_GET_IRQ_BITS:
+ case VM86_GET_AND_RESET_IRQ:
+ ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx);
+ goto out;
+ case VM86_PLUS_INSTALL_CHECK:
+ /* NOTE: on old vm86 stuff this will return the error
+ from access_ok(), because the subfunction is
+ interpreted as (invalid) address to vm86_struct.
+ So the installation check works.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
+ ret = -EPERM;
+ if (tsk->thread.saved_esp0)
+ goto out;
+ v86 = (struct vm86plus_struct __user *)regs.ecx;
+ tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
+ tmp += copy_from_user(&info.regs.VM86_REGS_PART2,
&v86->regs.VM86_REGS_PART2,
+ (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
+ ret = -EFAULT;
+ if (tmp)
+ goto out;
+ info.regs32 = ®s;
+ info.vm86plus.is_vm86pus = 1;
+ tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
+ do_sys_vm86(&info, tsk);
+ ret = 0; /* we never return here */
+out:
+ return ret;
+}
+
+
+static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct
*tsk)
+{
+#ifndef CONFIG_X86_NO_TSS
+ struct tss_struct *tss;
+#endif
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+ info->regs.__null_ds = 0;
+ info->regs.__null_es = 0;
+
+/* we are clearing fs,gs later just before "jmp resume_userspace",
+ * because starting with Linux 2.1.x they aren't no longer saved/restored
+ */
+
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ VEFLAGS = info->regs.eflags;
+ info->regs.eflags &= SAFE_MASK;
+ info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
+ info->regs.eflags |= VM_MASK;
+
+ switch (info->cpu_type) {
+ case CPU_286:
+ tsk->thread.v86mask = 0;
+ break;
+ case CPU_386:
+ tsk->thread.v86mask = NT_MASK | IOPL_MASK;
+ break;
+ case CPU_486:
+ tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
+ break;
+ default:
+ tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK |
IOPL_MASK;
+ break;
+ }
+
+/*
+ * Save old state, set default return value (%eax) to 0
+ */
+ info->regs32->eax = 0;
+ tsk->thread.saved_esp0 = tsk->thread.esp0;
+ savesegment(fs, tsk->thread.saved_fs);
+ savesegment(gs, tsk->thread.saved_gs);
+
+#ifndef CONFIG_X86_NO_TSS
+ tss = &per_cpu(init_tss, get_cpu());
+#endif
+ tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+ if (cpu_has_sep)
+ tsk->thread.sysenter_cs = 0;
+ load_esp0(tss, &tsk->thread);
+ put_cpu();
+
+ tsk->thread.screen_bitmap = info->screen_bitmap;
+ if (info->flags & VM86_SCREEN_BITMAP)
+ mark_screen_rdonly(tsk->mm);
+ __asm__ __volatile__(
+ "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
+ "movl %0,%%esp\n\t"
+ "movl %1,%%ebp\n\t"
+ "jmp resume_userspace"
+ : /* no outputs */
+ :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
+ /* we never return here */
+}
+
+static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int
retval)
+{
+ struct pt_regs * regs32;
+
+ regs32 = save_v86_state(regs16);
+ regs32->eax = retval;
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "movl %1,%%ebp\n\t"
+ "jmp resume_userspace"
+ : : "r" (regs32), "r" (current_thread_info()));
+}
+
+static inline void set_IF(struct kernel_vm86_regs * regs)
+{
+ VEFLAGS |= VIF_MASK;
+ if (VEFLAGS & VIP_MASK)
+ return_to_32bit(regs, VM86_STI);
+}
+
+static inline void clear_IF(struct kernel_vm86_regs * regs)
+{
+ VEFLAGS &= ~VIF_MASK;
+}
+
+static inline void clear_TF(struct kernel_vm86_regs * regs)
+{
+ regs->eflags &= ~TF_MASK;
+}
+
+static inline void clear_AC(struct kernel_vm86_regs * regs)
+{
+ regs->eflags &= ~AC_MASK;
+}
+
+/* It is correct to call set_IF(regs) from the set_vflags_*
+ * functions. However someone forgot to call clear_IF(regs)
+ * in the opposite case.
+ * After the command sequence CLI PUSHF STI POPF you should
+ * end up with interrups disabled, but you ended up with
+ * interrupts enabled.
+ * ( I was testing my own changes, but the only bug I
+ * could find was in a function I had not changed. )
+ * [KD]
+ */
+
+static inline void set_vflags_long(unsigned long eflags, struct
kernel_vm86_regs * regs)
+{
+ set_flags(VEFLAGS, eflags, current->thread.v86mask);
+ set_flags(regs->eflags, eflags, SAFE_MASK);
+ if (eflags & IF_MASK)
+ set_IF(regs);
+ else
+ clear_IF(regs);
+}
+
+static inline void set_vflags_short(unsigned short flags, struct
kernel_vm86_regs * regs)
+{
+ set_flags(VFLAGS, flags, current->thread.v86mask);
+ set_flags(regs->eflags, flags, SAFE_MASK);
+ if (flags & IF_MASK)
+ set_IF(regs);
+ else
+ clear_IF(regs);
+}
+
+static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
+{
+ unsigned long flags = regs->eflags & RETURN_MASK;
+
+ if (VEFLAGS & VIF_MASK)
+ flags |= IF_MASK;
+ flags |= IOPL_MASK;
+ return flags | (VEFLAGS & current->thread.v86mask);
+}
+
+static inline int is_revectored(int nr, struct revectored_struct * bitmap)
+{
+ __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
+ :"=r" (nr)
+ :"m" (*bitmap),"r" (nr));
+ return nr;
+}
+
+#define val_byte(val, n) (((__u8 *)&val)[n])
+
+#define pushb(base, ptr, val, err_label) \
+ do { \
+ __u8 __val = val; \
+ ptr--; \
+ if (put_user(__val, base + ptr) < 0) \
+ goto err_label; \
+ } while(0)
+
+#define pushw(base, ptr, val, err_label) \
+ do { \
+ __u16 __val = val; \
+ ptr--; \
+ if (put_user(val_byte(__val, 1), base + ptr) < 0) \
+ goto err_label; \
+ ptr--; \
+ if (put_user(val_byte(__val, 0), base + ptr) < 0) \
+ goto err_label; \
+ } while(0)
+
+#define pushl(base, ptr, val, err_label) \
+ do { \
+ __u32 __val = val; \
+ ptr--; \
+ if (put_user(val_byte(__val, 3), base + ptr) < 0) \
+ goto err_label; \
+ ptr--; \
+ if (put_user(val_byte(__val, 2), base + ptr) < 0) \
+ goto err_label; \
+ ptr--; \
+ if (put_user(val_byte(__val, 1), base + ptr) < 0) \
+ goto err_label; \
+ ptr--; \
+ if (put_user(val_byte(__val, 0), base + ptr) < 0) \
+ goto err_label; \
+ } while(0)
+
+#define popb(base, ptr, err_label) \
+ ({ \
+ __u8 __res; \
+ if (get_user(__res, base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ __res; \
+ })
+
+#define popw(base, ptr, err_label) \
+ ({ \
+ __u16 __res; \
+ if (get_user(val_byte(__res, 0), base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ if (get_user(val_byte(__res, 1), base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ __res; \
+ })
+
+#define popl(base, ptr, err_label) \
+ ({ \
+ __u32 __res; \
+ if (get_user(val_byte(__res, 0), base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ if (get_user(val_byte(__res, 1), base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ if (get_user(val_byte(__res, 2), base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ if (get_user(val_byte(__res, 3), base + ptr) < 0) \
+ goto err_label; \
+ ptr++; \
+ __res; \
+ })
+
+/* There are so many possible reasons for this function to return
+ * VM86_INTx, so adding another doesn't bother me. We can expect
+ * userspace programs to be able to handle it. (Getting a problem
+ * in userspace is always better than an Oops anyway.) [KD]
+ */
+static void do_int(struct kernel_vm86_regs *regs, int i,
+ unsigned char __user * ssp, unsigned short sp)
+{
+ unsigned long __user *intr_ptr;
+ unsigned long segoffs;
+
+ if (regs->cs == BIOSSEG)
+ goto cannot_handle;
+ if (is_revectored(i, &KVM86->int_revectored))
+ goto cannot_handle;
+ if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored))
+ goto cannot_handle;
+ intr_ptr = (unsigned long __user *) (i << 2);
+ if (get_user(segoffs, intr_ptr))
+ goto cannot_handle;
+ if ((segoffs >> 16) == BIOSSEG)
+ goto cannot_handle;
+ pushw(ssp, sp, get_vflags(regs), cannot_handle);
+ pushw(ssp, sp, regs->cs, cannot_handle);
+ pushw(ssp, sp, IP(regs), cannot_handle);
+ regs->cs = segoffs >> 16;
+ SP(regs) -= 6;
+ IP(regs) = segoffs & 0xffff;
+ clear_TF(regs);
+ clear_IF(regs);
+ clear_AC(regs);
+ return;
+
+cannot_handle:
+ return_to_32bit(regs, VM86_INTx + (i << 8));
+}
+
+int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int
trapno)
+{
+ if (VMPI.is_vm86pus) {
+ if ( (trapno==3) || (trapno==1) )
+ return_to_32bit(regs, VM86_TRAP + (trapno << 8));
+ do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4),
SP(regs));
+ return 0;
+ }
+ if (trapno !=1)
+ return 1; /* we let this handle by the calling routine */
+ if (current->ptrace & PT_PTRACED) {
+ unsigned long flags;
+ spin_lock_irqsave(¤t->sighand->siglock, flags);
+ sigdelset(¤t->blocked, SIGTRAP);
+ recalc_sigpending();
+ spin_unlock_irqrestore(¤t->sighand->siglock, flags);
+ }
+ send_sig(SIGTRAP, current, 1);
+ current->thread.trap_no = trapno;
+ current->thread.error_code = error_code;
+ return 0;
+}
+
+void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
+{
+ unsigned char opcode;
+ unsigned char __user *csp;
+ unsigned char __user *ssp;
+ unsigned short ip, sp, orig_flags;
+ int data32, pref_done;
+
+#define CHECK_IF_IN_TRAP \
+ if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
+ newflags |= TF_MASK
+#define VM86_FAULT_RETURN do { \
+ if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
+ return_to_32bit(regs, VM86_PICRETURN); \
+ if (orig_flags & TF_MASK) \
+ handle_vm86_trap(regs, 0, 1); \
+ return; } while (0)
+
+ orig_flags = *(unsigned short *)®s->eflags;
+
+ csp = (unsigned char __user *) (regs->cs << 4);
+ ssp = (unsigned char __user *) (regs->ss << 4);
+ sp = SP(regs);
+ ip = IP(regs);
+
+ data32 = 0;
+ pref_done = 0;
+ do {
+ switch (opcode = popb(csp, ip, simulate_sigsegv)) {
+ case 0x66: /* 32-bit data */ data32=1; break;
+ case 0x67: /* 32-bit address */ break;
+ case 0x2e: /* CS */ break;
+ case 0x3e: /* DS */ break;
+ case 0x26: /* ES */ break;
+ case 0x36: /* SS */ break;
+ case 0x65: /* GS */ break;
+ case 0x64: /* FS */ break;
+ case 0xf2: /* repnz */ break;
+ case 0xf3: /* rep */ break;
+ default: pref_done = 1;
+ }
+ } while (!pref_done);
+
+ switch (opcode) {
+
+ /* pushf */
+ case 0x9c:
+ if (data32) {
+ pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
+ SP(regs) -= 4;
+ } else {
+ pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
+ SP(regs) -= 2;
+ }
+ IP(regs) = ip;
+ VM86_FAULT_RETURN;
+
+ /* popf */
+ case 0x9d:
+ {
+ unsigned long newflags;
+ if (data32) {
+ newflags=popl(ssp, sp, simulate_sigsegv);
+ SP(regs) += 4;
+ } else {
+ newflags = popw(ssp, sp, simulate_sigsegv);
+ SP(regs) += 2;
+ }
+ IP(regs) = ip;
+ CHECK_IF_IN_TRAP;
+ if (data32) {
+ set_vflags_long(newflags, regs);
+ } else {
+ set_vflags_short(newflags, regs);
+ }
+ VM86_FAULT_RETURN;
+ }
+
+ /* int xx */
+ case 0xcd: {
+ int intno=popb(csp, ip, simulate_sigsegv);
+ IP(regs) = ip;
+ if (VMPI.vm86dbg_active) {
+ if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >>
3] )
+ return_to_32bit(regs, VM86_INTx + (intno << 8));
+ }
+ do_int(regs, intno, ssp, sp);
+ return;
+ }
+
+ /* iret */
+ case 0xcf:
+ {
+ unsigned long newip;
+ unsigned long newcs;
+ unsigned long newflags;
+ if (data32) {
+ newip=popl(ssp, sp, simulate_sigsegv);
+ newcs=popl(ssp, sp, simulate_sigsegv);
+ newflags=popl(ssp, sp, simulate_sigsegv);
+ SP(regs) += 12;
+ } else {
+ newip = popw(ssp, sp, simulate_sigsegv);
+ newcs = popw(ssp, sp, simulate_sigsegv);
+ newflags = popw(ssp, sp, simulate_sigsegv);
+ SP(regs) += 6;
+ }
+ IP(regs) = newip;
+ regs->cs = newcs;
+ CHECK_IF_IN_TRAP;
+ if (data32) {
+ set_vflags_long(newflags, regs);
+ } else {
+ set_vflags_short(newflags, regs);
+ }
+ VM86_FAULT_RETURN;
+ }
+
+ /* cli */
+ case 0xfa:
+ IP(regs) = ip;
+ clear_IF(regs);
+ VM86_FAULT_RETURN;
+
+ /* sti */
+ /*
+ * Damn. This is incorrect: the 'sti' instruction should actually
+ * enable interrupts after the /next/ instruction. Not good.
+ *
+ * Probably needs some horsing around with the TF flag. Aiee..
+ */
+ case 0xfb:
+ IP(regs) = ip;
+ set_IF(regs);
+ VM86_FAULT_RETURN;
+
+ default:
+ return_to_32bit(regs, VM86_UNKNOWN);
+ }
+
+ return;
+
+simulate_sigsegv:
+ /* FIXME: After a long discussion with Stas we finally
+ * agreed, that this is wrong. Here we should
+ * really send a SIGSEGV to the user program.
+ * But how do we create the correct context? We
+ * are inside a general protection fault handler
+ * and has just returned from a page fault handler.
+ * The correct context for the signal handler
+ * should be a mixture of the two, but how do we
+ * get the information? [KD]
+ */
+ return_to_32bit(regs, VM86_UNKNOWN);
+}
+
+/* ---------------- vm86 special IRQ passing stuff ----------------- */
+
+#define VM86_IRQNAME "vm86irq"
+
+static struct vm86_irqs {
+ struct task_struct *tsk;
+ int sig;
+} vm86_irqs[16];
+
+static DEFINE_SPINLOCK(irqbits_lock);
+static int irqbits;
+
+#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \
+ | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \
+ | (1 << SIGUNUSED) )
+
+static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
+{
+ int irq_bit;
+ unsigned long flags;
+
+ spin_lock_irqsave(&irqbits_lock, flags);
+ irq_bit = 1 << intno;
+ if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk)
+ goto out;
+ irqbits |= irq_bit;
+ if (vm86_irqs[intno].sig)
+ send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
+ /*
+ * IRQ will be re-enabled when user asks for the irq (whether
+ * polling or as a result of the signal)
+ */
+ disable_irq_nosync(intno);
+ spin_unlock_irqrestore(&irqbits_lock, flags);
+ return IRQ_HANDLED;
+
+out:
+ spin_unlock_irqrestore(&irqbits_lock, flags);
+ return IRQ_NONE;
+}
+
+static inline void free_vm86_irq(int irqnumber)
+{
+ unsigned long flags;
+
+ free_irq(irqnumber, NULL);
+ vm86_irqs[irqnumber].tsk = NULL;
+
+ spin_lock_irqsave(&irqbits_lock, flags);
+ irqbits &= ~(1 << irqnumber);
+ spin_unlock_irqrestore(&irqbits_lock, flags);
+}
+
+void release_vm86_irqs(struct task_struct *task)
+{
+ int i;
+ for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
+ if (vm86_irqs[i].tsk == task)
+ free_vm86_irq(i);
+}
+
+static inline int get_and_reset_irq(int irqnumber)
+{
+ int bit;
+ unsigned long flags;
+ int ret = 0;
+
+ if (invalid_vm86_irq(irqnumber)) return 0;
+ if (vm86_irqs[irqnumber].tsk != current) return 0;
+ spin_lock_irqsave(&irqbits_lock, flags);
+ bit = irqbits & (1 << irqnumber);
+ irqbits &= ~bit;
+ if (bit) {
+ enable_irq(irqnumber);
+ ret = 1;
+ }
+
+ spin_unlock_irqrestore(&irqbits_lock, flags);
+ return ret;
+}
+
+
+static int do_vm86_irq_handling(int subfunction, int irqnumber)
+{
+ int ret;
+ switch (subfunction) {
+ case VM86_GET_AND_RESET_IRQ: {
+ return get_and_reset_irq(irqnumber);
+ }
+ case VM86_GET_IRQ_BITS: {
+ return irqbits;
+ }
+ case VM86_REQUEST_IRQ: {
+ int sig = irqnumber >> 8;
+ int irq = irqnumber & 255;
+ if (!capable(CAP_SYS_ADMIN)) return -EPERM;
+ if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
+ if (invalid_vm86_irq(irq)) return -EPERM;
+ if (vm86_irqs[irq].tsk) return -EPERM;
+ ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME,
NULL);
+ if (ret) return ret;
+ vm86_irqs[irq].sig = sig;
+ vm86_irqs[irq].tsk = current;
+ return irq;
+ }
+ case VM86_FREE_IRQ: {
+ if (invalid_vm86_irq(irqnumber)) return -EPERM;
+ if (!vm86_irqs[irqnumber].tsk) return 0;
+ if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
+ free_vm86_irq(irqnumber);
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S Fri Feb 24 22:41:08 2006
@@ -0,0 +1,17 @@
+#include <linux/init.h>
+
+__INITDATA
+
+ .globl vsyscall_int80_start, vsyscall_int80_end
+vsyscall_int80_start:
+ .incbin "arch/i386/kernel/vsyscall-int80.so"
+vsyscall_int80_end:
+
+#ifdef CONFIG_X86_SYSENTER
+ .globl vsyscall_sysenter_start, vsyscall_sysenter_end
+vsyscall_sysenter_start:
+ .incbin "arch/i386/kernel/vsyscall-sysenter.so"
+vsyscall_sysenter_end:
+#endif
+
+__FINIT
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/asm-offsets.c Fri Feb 24
22:41:08 2006
@@ -0,0 +1,74 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <asm/pda.h>
+#include <asm/processor.h>
+#include <asm/segment.h>
+#include <asm/thread_info.h>
+#include <asm/ia32.h>
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+int main(void)
+{
+#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
+ ENTRY(state);
+ ENTRY(flags);
+ ENTRY(thread);
+ ENTRY(pid);
+ BLANK();
+#undef ENTRY
+#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info,
entry))
+ ENTRY(flags);
+ ENTRY(addr_limit);
+ ENTRY(preempt_count);
+ ENTRY(status);
+ BLANK();
+#undef ENTRY
+#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
+ ENTRY(kernelstack);
+ ENTRY(oldrsp);
+ ENTRY(pcurrent);
+ ENTRY(irqcount);
+ ENTRY(cpunumber);
+ ENTRY(irqstackptr);
+ ENTRY(data_offset);
+ BLANK();
+#undef ENTRY
+#ifdef CONFIG_IA32_EMULATION
+#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct
sigcontext_ia32, entry))
+ ENTRY(eax);
+ ENTRY(ebx);
+ ENTRY(ecx);
+ ENTRY(edx);
+ ENTRY(esi);
+ ENTRY(edi);
+ ENTRY(ebp);
+ ENTRY(esp);
+ ENTRY(eip);
+ BLANK();
+#undef ENTRY
+ DEFINE(IA32_RT_SIGFRAME_sigcontext,
+ offsetof (struct rt_sigframe32, uc.uc_mcontext));
+ BLANK();
+#endif
+ DEFINE(pbe_address, offsetof(struct pbe, address));
+ DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+ DEFINE(pbe_next, offsetof(struct pbe, next));
+#ifndef CONFIG_X86_NO_TSS
+ BLANK();
+ DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
+#endif
+ return 0;
+}
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c Fri Feb 24
22:41:08 2006
@@ -0,0 +1,52 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+#ifndef CONFIG_X86_NO_TSS
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+DEFINE_PER_CPU(struct tss_struct, init_tss)
____cacheline_internodealigned_in_smp = INIT_TSS;
+#endif
+
+#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/drivers/video/Kconfig
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/drivers/video/Kconfig Fri Feb 24 22:41:08 2006
@@ -0,0 +1,1462 @@
+#
+# Video configuration
+#
+
+menu "Graphics support"
+
+config FB
+ tristate "Support for frame buffer devices"
+ ---help---
+ The frame buffer device provides an abstraction for the graphics
+ hardware. It represents the frame buffer of some video hardware and
+ allows application software to access the graphics hardware through
+ a well-defined interface, so the software doesn't need to know
+ anything about the low-level (hardware register) stuff.
+
+ Frame buffer devices work identically across the different
+ architectures supported by Linux and make the implementation of
+ application programs easier and more portable; at this point, an X
+ server exists which uses the frame buffer device exclusively.
+ On several non-X86 architectures, the frame buffer device is the
+ only way to use the graphics hardware.
+
+ The device is accessed through special device nodes, usually located
+ in the /dev directory, i.e. /dev/fb*.
+
+ You need an utility program called fbset to make full use of frame
+ buffer devices. Please read <file:Documentation/fb/framebuffer.txt>
+ and the Framebuffer-HOWTO at
+ <http://www.tahallah.demon.co.uk/programming/prog.html> for more
+ information.
+
+ Say Y here and to the driver for your graphics board below if you
+ are compiling a kernel for a non-x86 architecture.
+
+ If you are compiling for the x86 architecture, you can say Y if you
+ want to play with it, but it is not essential. Please note that
+ running graphical applications that directly touch the hardware
+ (e.g. an accelerated X server) and that are not frame buffer
+ device-aware may cause unexpected results. If unsure, say N.
+
+config FB_CFB_FILLRECT
+ tristate
+ depends on FB
+ default n
+ ---help---
+ Include the cfb_fillrect function for generic software rectangle
+ filling. This is used by drivers that don't provide their own
+ (accelerated) version.
+
+config FB_CFB_COPYAREA
+ tristate
+ depends on FB
+ default n
+ ---help---
+ Include the cfb_copyarea function for generic software area copying.
+ This is used by drivers that don't provide their own (accelerated)
+ version.
+
+config FB_CFB_IMAGEBLIT
+ tristate
+ depends on FB
+ default n
+ ---help---
+ Include the cfb_imageblit function for generic software image
+ blitting. This is used by drivers that don't provide their own
+ (accelerated) version.
+
+config FB_MACMODES
+ tristate
+ depends on FB
+ default n
+
+config FB_MODE_HELPERS
+ bool "Enable Video Mode Handling Helpers"
+ depends on FB
+ default n
+ ---help---
+ This enables functions for handling video modes using the
+ Generalized Timing Formula and the EDID parser. A few drivers rely
+ on this feature such as the radeonfb, rivafb, and the i810fb. If
+ your driver does not take advantage of this feature, choosing Y will
+ just increase the kernel size by about 5K.
+
+config FB_TILEBLITTING
+ bool "Enable Tile Blitting Support"
+ depends on FB
+ default n
+ ---help---
+ This enables tile blitting. Tile blitting is a drawing technique
+ where the screen is divided into rectangular sections (tiles), whereas
+ the standard blitting divides the screen into pixels. Because the
+ default drawing element is a tile, drawing functions will be passed
+ parameters in terms of number of tiles instead of number of pixels.
+ For example, to draw a single character, instead of using bitmaps,
+ an index to an array of bitmaps will be used. To clear or move a
+ rectangular section of a screen, the rectangle will be described in
+ terms of number of tiles in the x- and y-axis.
+
+ This is particularly important to one driver, matroxfb. If
+ unsure, say N.
+
+config FB_CIRRUS
+ tristate "Cirrus Logic support"
+ depends on FB && (ZORRO || PCI)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ This enables support for Cirrus Logic GD542x/543x based boards on
+ Amiga: SD64, Piccolo, Picasso II/II+, Picasso IV, or EGS Spectrum.
+
+ If you have a PCI-based system, this enables support for these
+ chips: GD-543x, GD-544x, GD-5480.
+
+ Please read the file <file:Documentation/fb/cirrusfb.txt>.
+
+ Say N unless you have such a graphics board or plan to get one
+ before you next recompile the kernel.
+
+config FB_PM2
+ tristate "Permedia2 support"
+ depends on FB && ((AMIGA && BROKEN) || PCI)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Permedia2 AGP frame
+ buffer card from ASK, aka `Graphic Blaster Exxtreme'. There is a
+ product page at
+ <http://www.ask.com.hk/product/Permedia%202/permedia2.htm>.
+
+config FB_PM2_FIFO_DISCONNECT
+ bool "enable FIFO disconnect feature"
+ depends on FB_PM2 && PCI
+ help
+ Support the Permedia2 FIFO disconnect feature (see CONFIG_FB_PM2).
+
+config FB_ARMCLCD
+ tristate "ARM PrimeCell PL110 support"
+ depends on FB && ARM && ARM_AMBA
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This framebuffer device driver is for the ARM PrimeCell PL110
+ Colour LCD controller. ARM PrimeCells provide the building
+ blocks for System on a Chip devices.
+
+ If you want to compile this as a module (=code which can be
+ inserted into and removed from the running kernel), say M
+ here and read <file:Documentation/modules.txt>. The module
+ will be called amba-clcd.
+
+config FB_ACORN
+ bool "Acorn VIDC support"
+ depends on (FB = y) && ARM && (ARCH_ACORN || ARCH_CLPS7500)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Acorn VIDC graphics
+ hardware found in Acorn RISC PCs and other ARM-based machines. If
+ unsure, say N.
+
+config FB_CLPS711X
+ bool "CLPS711X LCD support"
+ depends on (FB = y) && ARM && ARCH_CLPS711X
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Say Y to enable the Framebuffer driver for the CLPS7111 and
+ EP7212 processors.
+
+config FB_SA1100
+ bool "SA-1100 LCD support"
+ depends on (FB = y) && ARM && ARCH_SA1100
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is a framebuffer device for the SA-1100 LCD Controller.
+ See <http://www.linux-fbdev.org/> for information on framebuffer
+ devices.
+
+ If you plan to use the LCD display with your SA-1100 system, say
+ Y here.
+
+config FB_IMX
+ tristate "Motorola i.MX LCD support"
+ depends on FB && ARM && ARCH_IMX
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+
+config FB_CYBER2000
+ tristate "CyberPro 2000/2010/5000 support"
+ depends on FB && PCI && (BROKEN || !SPARC64)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This enables support for the Integraphics CyberPro 20x0 and 5000
+ VGA chips used in the Rebel.com Netwinder and other machines.
+ Say Y if you have a NetWinder or a graphics card containing this
+ device, otherwise say N.
+
+config FB_APOLLO
+ bool
+ depends on (FB = y) && APOLLO
+ default y
+ select FB_CFB_FILLRECT
+ select FB_CFB_IMAGEBLIT
+
+config FB_Q40
+ bool
+ depends on (FB = y) && Q40
+ default y
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+
+config FB_AMIGA
+ tristate "Amiga native chipset support"
+ depends on FB && AMIGA
+ help
+ This is the frame buffer device driver for the builtin graphics
+ chipset found in Amigas.
+
+ To compile this driver as a module, choose M here: the
+ module will be called amifb.
+
+config FB_AMIGA_OCS
+ bool "Amiga OCS chipset support"
+ depends on FB_AMIGA
+ help
+ This enables support for the original Agnus and Denise video chips,
+ found in the Amiga 1000 and most A500's and A2000's. If you intend
+ to run Linux on any of these systems, say Y; otherwise say N.
+
+config FB_AMIGA_ECS
+ bool "Amiga ECS chipset support"
+ depends on FB_AMIGA
+ help
+ This enables support for the Enhanced Chip Set, found in later
+ A500's, later A2000's, the A600, the A3000, the A3000T and CDTV. If
+ you intend to run Linux on any of these systems, say Y; otherwise
+ say N.
+
+config FB_AMIGA_AGA
+ bool "Amiga AGA chipset support"
+ depends on FB_AMIGA
+ help
+ This enables support for the Advanced Graphics Architecture (also
+ known as the AGA or AA) Chip Set, found in the A1200, A4000, A4000T
+ and CD32. If you intend to run Linux on any of these systems, say Y;
+ otherwise say N.
+
+config FB_CYBER
+ tristate "Amiga CyberVision 64 support"
+ depends on FB && ZORRO && BROKEN
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This enables support for the Cybervision 64 graphics card from
+ Phase5. Please note that its use is not all that intuitive (i.e. if
+ you have any questions, be sure to ask!). Say N unless you have a
+ Cybervision 64 or plan to get one before you next recompile the
+ kernel. Please note that this driver DOES NOT support the
+ Cybervision 64/3D card, as they use incompatible video chips.
+
+config FB_VIRGE
+ bool "Amiga CyberVision 64/3D support "
+ depends on (FB = y) && ZORRO && BROKEN
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This enables support for the Cybervision 64/3D graphics card from
+ Phase5. Please note that its use is not all that intuitive (i.e. if
+ you have any questions, be sure to ask!). Say N unless you have a
+ Cybervision 64/3D or plan to get one before you next recompile the
+ kernel. Please note that this driver DOES NOT support the older
+ Cybervision 64 card, as they use incompatible video chips.
+
+config FB_RETINAZ3
+ tristate "Amiga Retina Z3 support"
+ depends on (FB = y) && ZORRO && BROKEN
+ help
+ This enables support for the Retina Z3 graphics card. Say N unless
+ you have a Retina Z3 or plan to get one before you next recompile
+ the kernel.
+
+config FB_FM2
+ bool "Amiga FrameMaster II/Rainbow II support"
+ depends on (FB = y) && ZORRO
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Amiga FrameMaster
+ card from BSC (exhibited 1992 but not shipped as a CBM product).
+
+config FB_ARC
+ tristate "Arc Monochrome LCD board support"
+ depends on FB && X86
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This enables support for the Arc Monochrome LCD board. The board
+ is based on the KS-108 lcd controller and is typically a matrix
+ of 2*n chips. This driver was tested with a 128x64 panel. This
+ driver supports it for use with x86 SBCs through a 16 bit GPIO
+ interface (8 bit data, 8 bit control). If you anticpate using
+ this driver, say Y or M; otherwise say N. You must specify the
+ GPIO IO address to be used for setting control and data.
+
+config FB_ATARI
+ bool "Atari native chipset support"
+ depends on (FB = y) && ATARI && BROKEN
+ help
+ This is the frame buffer device driver for the builtin graphics
+ chipset found in Ataris.
+
+config FB_OF
+ bool "Open Firmware frame buffer device support"
+ depends on (FB = y) && (PPC64 || PPC_OF)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES
+ help
+ Say Y if you want support with Open Firmware for your graphics
+ board.
+
+config FB_CONTROL
+ bool "Apple \"control\" display support"
+ depends on (FB = y) && PPC_PMAC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES
+ help
+ This driver supports a frame buffer for the graphics adapter in the
+ Power Macintosh 7300 and others.
+
+config FB_PLATINUM
+ bool "Apple \"platinum\" display support"
+ depends on (FB = y) && PPC_PMAC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES
+ help
+ This driver supports a frame buffer for the "platinum" graphics
+ adapter in some Power Macintoshes.
+
+config FB_VALKYRIE
+ bool "Apple \"valkyrie\" display support"
+ depends on (FB = y) && (MAC || PPC_PMAC)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES
+ help
+ This driver supports a frame buffer for the "valkyrie" graphics
+ adapter in some Power Macintoshes.
+
+config FB_CT65550
+ bool "Chips 65550 display support"
+ depends on (FB = y) && PPC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Chips & Technologies
+ 65550 graphics chip in PowerBooks.
+
+config FB_ASILIANT
+ bool "Asiliant (Chips) 69000 display support"
+ depends on (FB = y) && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+
+config FB_IMSTT
+ bool "IMS Twin Turbo display support"
+ depends on (FB = y) && PCI
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES if PPC
+ help
+ The IMS Twin Turbo is a PCI-based frame buffer card bundled with
+ many Macintosh and compatible computers.
+
+config FB_VGA16
+ tristate "VGA 16-color graphics support"
+ depends on FB && (X86 || PPC)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for VGA 16 color graphic
+ cards. Say Y if you have such a card.
+
+ To compile this driver as a module, choose M here: the
+ module will be called vga16fb.
+
+config FB_STI
+ tristate "HP STI frame buffer device support"
+ depends on FB && PARISC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ default y
+ ---help---
+ STI refers to the HP "Standard Text Interface" which is a set of
+ BIOS routines contained in a ROM chip in HP PA-RISC based machines.
+ Enabling this option will implement the linux framebuffer device
+ using calls to the STI BIOS routines for initialisation.
+
+ If you enable this option, you will get a planar framebuffer device
+ /dev/fb which will work on the most common HP graphic cards of the
+ NGLE family, including the artist chips (in the 7xx and Bxxx series),
+ HCRX, HCRX24, CRX, CRX24 and VisEG series.
+
+ It is safe to enable this option, so you should probably say "Y".
+
+config FB_MAC
+ bool "Generic Macintosh display support"
+ depends on (FB = y) && MAC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES
+
+# bool ' Apple DAFB display support' CONFIG_FB_DAFB
+config FB_HP300
+ bool
+ depends on (FB = y) && HP300
+ select FB_CFB_FILLRECT
+ select FB_CFB_IMAGEBLIT
+ default y
+
+config FB_TGA
+ tristate "TGA framebuffer support"
+ depends on FB && ALPHA
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for generic TGA graphic
+ cards. Say Y if you have one of those.
+
+config FB_VESA
+ bool "VESA VGA graphics support"
+ depends on (FB = y) && X86
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for generic VESA 2.0
+ compliant graphic cards. The older VESA 1.2 cards are not supported.
+ You will get a boot time penguin logo at no additional cost. Please
+ read <file:Documentation/fb/vesafb.txt>. If unsure, say Y.
+
+config VIDEO_SELECT
+ bool
+ depends on FB_VESA
+ default y
+
+config FB_HGA
+ tristate "Hercules mono graphics support"
+ depends on FB && X86
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Say Y here if you have a Hercules mono graphics card.
+
+ To compile this driver as a module, choose M here: the
+ module will be called hgafb.
+
+ As this card technology is 15 years old, most people will answer N
+ here.
+
+config FB_HGA_ACCEL
+ bool "Hercules mono Acceleration functions (EXPERIMENTAL)"
+ depends on FB_HGA && EXPERIMENTAL
+ ---help---
+ This will compile the Hercules mono graphics with
+ acceleration functions.
+
+
+config VIDEO_SELECT
+ bool
+ depends on (FB = y) && X86 && !XEN
+ default y
+
+config FB_SGIVW
+ tristate "SGI Visual Workstation framebuffer support"
+ depends on FB && X86_VISWS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ SGI Visual Workstation support for framebuffer graphics.
+
+config FB_GBE
+ bool "SGI Graphics Backend frame buffer support"
+ depends on (FB = y) && (SGI_IP32 || X86_VISWS)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for SGI Graphics Backend.
+ This chip is used in SGI O2 and Visual Workstation 320/540.
+
+config FB_GBE_MEM
+ int "Video memory size in MB"
+ depends on FB_GBE
+ default 8
+ help
+ This is the amount of memory reserved for the framebuffer,
+ which can be any value between 1MB and 8MB.
+
+config FB_SUN3
+ bool "Sun3 framebuffer support"
+ depends on (FB = y) && (SUN3 || SUN3X) && BROKEN
+
+config FB_SBUS
+ bool "SBUS and UPA framebuffers"
+ depends on (FB = y) && SPARC
+ help
+ Say Y if you want support for SBUS or UPA based frame buffer device.
+
+config FB_BW2
+ bool "BWtwo support"
+ depends on (FB = y) && (SPARC && FB_SBUS || (SUN3 || SUN3X) && FB_SUN3)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the BWtwo frame buffer.
+
+config FB_CG3
+ bool "CGthree support"
+ depends on (FB = y) && (SPARC && FB_SBUS || (SUN3 || SUN3X) && FB_SUN3)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the CGthree frame buffer.
+
+config FB_CG6
+ bool "CGsix (GX,TurboGX) support"
+ depends on (FB = y) && (SPARC && FB_SBUS || (SUN3 || SUN3X) && FB_SUN3)
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the CGsix (GX, TurboGX)
+ frame buffer.
+
+config FB_PVR2
+ tristate "NEC PowerVR 2 display support"
+ depends on FB && SH_DREAMCAST
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ Say Y here if you have a PowerVR 2 card in your box. If you plan to
+ run linux on your Dreamcast, you will have to say Y here.
+ This driver may or may not work on other PowerVR 2 cards, but is
+ totally untested. Use at your own risk. If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called pvr2fb.
+
+ You can pass several parameters to the driver at boot time or at
+ module load time. The parameters look like "video=pvr2:XXX", where
+ the meaning of XXX can be found at the end of the main source file
+ (<file:drivers/video/pvr2fb.c>). Please see the file
+ <file:Documentation/fb/pvr2fb.txt>.
+
+config FB_EPSON1355
+ bool "Epson 1355 framebuffer support"
+ depends on (FB = y) && (SUPERH || ARCH_CEIVA)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Build in support for the SED1355 Epson Research Embedded RAMDAC
+ LCD/CRT Controller (since redesignated as the S1D13505) as a
+ framebuffer. Product specs at
+ <http://www.erd.epson.com/vdc/html/products.htm>.
+
+config FB_S1D13XXX
+ tristate "Epson S1D13XXX framebuffer support"
+ depends on FB
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Support for S1D13XXX framebuffer device family (currently only
+ working with S1D13806). Product specs at
+ <http://www.erd.epson.com/vdc/html/legacy_13xxx.htm>
+
+config FB_NVIDIA
+ tristate "nVidia Framebuffer Support"
+ depends on FB && PCI
+ select I2C_ALGOBIT if FB_NVIDIA_I2C
+ select I2C if FB_NVIDIA_I2C
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This driver supports graphics boards with the nVidia chips, TNT
+ and newer. For very old chipsets, such as the RIVA128, then use
+ the rivafb.
+ Say Y if you have such a graphics board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called nvidiafb.
+
+config FB_NVIDIA_I2C
+ bool "Enable DDC Support"
+ depends on FB_NVIDIA
+ help
+ This enables I2C support for nVidia Chipsets. This is used
+ only for getting EDID information from the attached display
+ allowing for robust video mode handling and switching.
+
+ Because fbdev-2.6 requires that drivers must be able to
+ independently validate video mode parameters, you should say Y
+ here.
+
+config FB_RIVA
+ tristate "nVidia Riva support"
+ depends on FB && PCI
+ select I2C_ALGOBIT if FB_RIVA_I2C
+ select I2C if FB_RIVA_I2C
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This driver supports graphics boards with the nVidia Riva/Geforce
+ chips.
+ Say Y if you have such a graphics board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rivafb.
+
+config FB_RIVA_I2C
+ bool "Enable DDC Support"
+ depends on FB_RIVA
+ help
+ This enables I2C support for nVidia Chipsets. This is used
+ only for getting EDID information from the attached display
+ allowing for robust video mode handling and switching.
+
+ Because fbdev-2.6 requires that drivers must be able to
+ independently validate video mode parameters, you should say Y
+ here.
+
+config FB_RIVA_DEBUG
+ bool "Lots of debug output from Riva(nVidia) driver"
+ depends on FB_RIVA
+ default n
+ help
+ Say Y here if you want the Riva driver to output all sorts
+ of debugging informations to provide to the maintainer when
+ something goes wrong.
+
+config FB_I810
+ tristate "Intel 810/815 support (EXPERIMENTAL)"
+ depends on FB && EXPERIMENTAL && PCI && X86_32
+ select AGP
+ select AGP_INTEL
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This driver supports the on-board graphics built in to the Intel 810
+ and 815 chipsets. Say Y if you have and plan to use such a board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called i810fb.
+
+ For more information, please read
+ <file:Documentation/fb/intel810.txt>
+
+config FB_I810_GTF
+ bool "use VESA Generalized Timing Formula"
+ depends on FB_I810
+ help
+ If you say Y, then the VESA standard, Generalized Timing Formula
+ or GTF, will be used to calculate the required video timing values
+ per video mode. Since the GTF allows nondiscrete timings
+ (nondiscrete being a range of values as opposed to discrete being a
+ set of values), you'll be able to use any combination of horizontal
+ and vertical resolutions, and vertical refresh rates without having
+ to specify your own timing parameters. This is especially useful
+ to maximize the performance of an aging display, or if you just
+ have a display with nonstandard dimensions. A VESA compliant
+ monitor is recommended, but can still work with non-compliant ones.
+ If you need or want this, then select this option. The timings may
+ not be compliant with Intel's recommended values. Use at your own
+ risk.
+
+ If you say N, the driver will revert to discrete video timings
+ using a set recommended by Intel in their documentation.
+
+ If unsure, say N.
+
+config FB_I810_I2C
+ bool "Enable DDC Support"
+ depends on FB_I810 && FB_I810_GTF
+ select I2C
+ select I2C_ALGOBIT
+ help
+
+config FB_INTEL
+ tristate "Intel 830M/845G/852GM/855GM/865G support (EXPERIMENTAL)"
+ depends on FB && EXPERIMENTAL && PCI && X86_32
+ select AGP
+ select AGP_INTEL
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This driver supports the on-board graphics built in to the Intel
+ 830M/845G/852GM/855GM/865G chipsets.
+ Say Y if you have and plan to use such a board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called intelfb.
+
+config FB_INTEL_DEBUG
+ bool "Intel driver Debug Messages"
+ depends on FB_INTEL
+ ---help---
+ Say Y here if you want the Intel driver to output all sorts
+ of debugging informations to provide to the maintainer when
+ something goes wrong.
+
+config FB_MATROX
+ tristate "Matrox acceleration"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_TILEBLITTING
+ select FB_MACMODES if PPC_PMAC
+ ---help---
+ Say Y here if you have a Matrox Millennium, Matrox Millennium II,
+ Matrox Mystique, Matrox Mystique 220, Matrox Productiva G100, Matrox
+ Mystique G200, Matrox Millennium G200, Matrox Marvel G200 video,
+ Matrox G400, G450 or G550 card in your box.
+
+ To compile this driver as a module, choose M here: the
+ module will be called matroxfb.
+
+ You can pass several parameters to the driver at boot time or at
+ module load time. The parameters look like "video=matrox:XXX", and
+ are described in <file:Documentation/fb/matroxfb.txt>.
+
+config FB_MATROX_MILLENIUM
+ bool "Millennium I/II support"
+ depends on FB_MATROX
+ help
+ Say Y here if you have a Matrox Millennium or Matrox Millennium II
+ video card. If you select "Advanced lowlevel driver options" below,
+ you should check 4 bpp packed pixel, 8 bpp packed pixel, 16 bpp
+ packed pixel, 24 bpp packed pixel and 32 bpp packed pixel. You can
+ also use font widths different from 8.
+
+config FB_MATROX_MYSTIQUE
+ bool "Mystique support"
+ depends on FB_MATROX
+ help
+ Say Y here if you have a Matrox Mystique or Matrox Mystique 220
+ video card. If you select "Advanced lowlevel driver options" below,
+ you should check 8 bpp packed pixel, 16 bpp packed pixel, 24 bpp
+ packed pixel and 32 bpp packed pixel. You can also use font widths
+ different from 8.
+
+config FB_MATROX_G
+ bool "G100/G200/G400/G450/G550 support"
+ depends on FB_MATROX
+ ---help---
+ Say Y here if you have a Matrox G100, G200, G400, G450 or G550 based
+ video card. If you select "Advanced lowlevel driver options", you
+ should check 8 bpp packed pixel, 16 bpp packed pixel, 24 bpp packed
+ pixel and 32 bpp packed pixel. You can also use font widths
+ different from 8.
+
+ If you need support for G400 secondary head, you must first say Y to
+ "I2C support" in the character devices section, and then to
+ "Matrox I2C support" and "G400 second head support" here in the
+ framebuffer section. G450/G550 secondary head and digital output
+ are supported without additional modules.
+
+ The driver starts in monitor mode. You must use the matroxset tool
+ (available at <ftp://platan.vc.cvut.cz/pub/linux/matrox-latest/>) to
+ swap primary and secondary head outputs, or to change output mode.
+ Secondary head driver always start in 640x480 resolution and you
+ must use fbset to change it.
+
+ Do not forget that second head supports only 16 and 32 bpp
+ packed pixels, so it is a good idea to compile them into the kernel
+ too. You can use only some font widths, as the driver uses generic
+ painting procedures (the secondary head does not use acceleration
+ engine).
+
+ G450/G550 hardware can display TV picture only from secondary CRTC,
+ and it performs no scaling, so picture must have 525 or 625 lines.
+
+config FB_MATROX_I2C
+ tristate "Matrox I2C support"
+ depends on FB_MATROX && I2C
+ select I2C_ALGOBIT
+ ---help---
+ This drivers creates I2C buses which are needed for accessing the
+ DDC (I2C) bus present on all Matroxes, an I2C bus which
+ interconnects Matrox optional devices, like MGA-TVO on G200 and
+ G400, and the secondary head DDC bus, present on G400 only.
+
+ You can say Y or M here if you want to experiment with monitor
+ detection code. You must say Y or M here if you want to use either
+ second head of G400 or MGA-TVO on G200 or G400.
+
+ If you compile it as module, it will create a module named
+ i2c-matroxfb.
+
+config FB_MATROX_MAVEN
+ tristate "G400 second head support"
+ depends on FB_MATROX_G && FB_MATROX_I2C
+ ---help---
+ WARNING !!! This support does not work with G450 !!!
+
+ Say Y or M here if you want to use a secondary head (meaning two
+ monitors in parallel) on G400 or MGA-TVO add-on on G200. Secondary
+ head is not compatible with accelerated XFree 3.3.x SVGA servers -
+ secondary head output is blanked while you are in X. With XFree
+ 3.9.17 preview you can use both heads if you use SVGA over fbdev or
+ the fbdev driver on first head and the fbdev driver on second head.
+
+ If you compile it as module, two modules are created,
+ matroxfb_crtc2 and matroxfb_maven. Matroxfb_maven is needed for
+ both G200 and G400, matroxfb_crtc2 is needed only by G400. You must
+ also load i2c-matroxfb to get it to run.
+
+ The driver starts in monitor mode and you must use the matroxset
+ tool (available at
+ <ftp://platan.vc.cvut.cz/pub/linux/matrox-latest/>) to switch it to
+ PAL or NTSC or to swap primary and secondary head outputs.
+ Secondary head driver also always start in 640x480 resolution, you
+ must use fbset to change it.
+
+ Also do not forget that second head supports only 16 and 32 bpp
+ packed pixels, so it is a good idea to compile them into the kernel
+ too. You can use only some font widths, as the driver uses generic
+ painting procedures (the secondary head does not use acceleration
+ engine).
+
+config FB_MATROX_MULTIHEAD
+ bool "Multihead support"
+ depends on FB_MATROX
+ ---help---
+ Say Y here if you have more than one (supported) Matrox device in
+ your computer and you want to use all of them for different monitors
+ ("multihead"). If you have only one device, you should say N because
+ the driver compiled with Y is larger and a bit slower, especially on
+ ia32 (ix86).
+
+ If you said M to "Matrox unified accelerated driver" and N here, you
+ will still be able to use several Matrox devices simultaneously:
+ insert several instances of the module matroxfb into the kernel
+ with insmod, supplying the parameter "dev=N" where N is 0, 1, etc.
+ for the different Matrox devices. This method is slightly faster but
+ uses 40 KB of kernel memory per Matrox card.
+
+ There is no need for enabling 'Matrox multihead support' if you have
+ only one Matrox card in the box.
+
+config FB_RADEON_OLD
+ tristate "ATI Radeon display support (Old driver)"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES if PPC
+ help
+ Choose this option if you want to use an ATI Radeon graphics card as
+ a framebuffer device. There are both PCI and AGP versions. You
+ don't need to choose this to run the Radeon in plain VGA mode.
+ There is a product page at
+ <http://www.ati.com/na/pages/products/pc/radeon32/index.html>.
+
+config FB_RADEON
+ tristate "ATI Radeon display support"
+ depends on FB && PCI
+ select I2C_ALGOBIT if FB_RADEON_I2C
+ select I2C if FB_RADEON_I2C
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES if PPC_OF
+ help
+ Choose this option if you want to use an ATI Radeon graphics card as
+ a framebuffer device. There are both PCI and AGP versions. You
+ don't need to choose this to run the Radeon in plain VGA mode.
+
+ If you say Y here and want DDC/I2C support you must first say Y to
+ "I2C support" and "I2C bit-banging support" in the character devices
+ section.
+
+ If you say M here then "I2C support" and "I2C bit-banging support"
+ can be build either as modules or built-in.
+
+ There is a product page at
+ http://apps.ati.com/ATIcompare/
+config FB_RADEON_I2C
+ bool "DDC/I2C for ATI Radeon support"
+ depends on FB_RADEON
+ default y
+ help
+ Say Y here if you want DDC/I2C support for your Radeon board.
+
+config FB_RADEON_DEBUG
+ bool "Lots of debug output from Radeon driver"
+ depends on FB_RADEON
+ default n
+ help
+ Say Y here if you want the Radeon driver to output all sorts
+ of debugging informations to provide to the maintainer when
+ something goes wrong.
+
+config FB_ATY128
+ tristate "ATI Rage128 display support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES if PPC_PMAC
+ help
+ This driver supports graphics boards with the ATI Rage128 chips.
+ Say Y if you have such a graphics board and read
+ <file:Documentation/fb/aty128fb.txt>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called aty128fb.
+
+config FB_ATY
+ tristate "ATI Mach64 display support" if PCI || ATARI
+ depends on FB
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_MACMODES if PPC
+ help
+ This driver supports graphics boards with the ATI Mach64 chips.
+ Say Y if you have such a graphics board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called atyfb.
+
+config FB_ATY_CT
+ bool "Mach64 CT/VT/GT/LT (incl. 3D RAGE) support"
+ depends on PCI && FB_ATY
+ default y if SPARC64 && FB_PCI
+ help
+ Say Y here to support use of ATI's 64-bit Rage boards (or other
+ boards based on the Mach64 CT, VT, GT, and LT chipsets) as a
+ framebuffer device. The ATI product support page for these boards
+ is at <http://support.ati.com/products/pc/mach64/>.
+
+config FB_ATY_GENERIC_LCD
+ bool "Mach64 generic LCD support (EXPERIMENTAL)"
+ depends on FB_ATY_CT
+ help
+ Say Y if you have a laptop with an ATI Rage LT PRO, Rage Mobility,
+ Rage XC, or Rage XL chipset.
+
+config FB_ATY_GX
+ bool "Mach64 GX support" if PCI
+ depends on FB_ATY
+ default y if ATARI
+ help
+ Say Y here to support use of the ATI Mach64 Graphics Expression
+ board (or other boards based on the Mach64 GX chipset) as a
+ framebuffer device. The ATI product support page for these boards
+ is at
+ <http://support.ati.com/products/pc/mach64/graphics_xpression.html>.
+
+config FB_S3TRIO
+ bool "S3 Trio display support"
+ depends on (FB = y) && PPC && BROKEN
+ help
+ If you have a S3 Trio say Y. Say N for S3 Virge.
+
+config FB_SAVAGE
+ tristate "S3 Savage support"
+ depends on FB && PCI && EXPERIMENTAL
+ select I2C_ALGOBIT if FB_SAVAGE_I2C
+ select I2C if FB_SAVAGE_I2C
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This driver supports notebooks and computers with S3 Savage PCI/AGP
+ chips.
+
+ Say Y if you have such a graphics card.
+
+ To compile this driver as a module, choose M here; the module
+ will be called savagefb.
+
+config FB_SAVAGE_I2C
+ bool "Enable DDC2 Support"
+ depends on FB_SAVAGE
+ help
+ This enables I2C support for S3 Savage Chipsets. This is used
+ only for getting EDID information from the attached display
+ allowing for robust video mode handling and switching.
+
+ Because fbdev-2.6 requires that drivers must be able to
+ independently validate video mode parameters, you should say Y
+ here.
+
+config FB_SAVAGE_ACCEL
+ bool "Enable Console Acceleration"
+ depends on FB_SAVAGE
+ default n
+ help
+ This option will compile in console acceleration support. If
+ the resulting framebuffer console has bothersome glitches, then
+ choose N here.
+
+config FB_SIS
+ tristate "SiS/XGI display support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the SiS 300, 315, 330
+ and 340 series as well as XGI V3XT, V5, V8, Z7 graphics chipsets.
+ Specs available at <http://www.sis.com> and <http://www.xgitech.com>.
+
+ To compile this driver as a module, choose M here; the module
+ will be called sisfb.
+
+config FB_SIS_300
+ bool "SiS 300 series support"
+ depends on FB_SIS
+ help
+ Say Y here to support use of the SiS 300/305, 540, 630 and 730.
+
+config FB_SIS_315
+ bool "SiS 315/330/340 series and XGI support"
+ depends on FB_SIS
+ help
+ Say Y here to support use of the SiS 315, 330 and 340 series
+ (315/H/PRO, 55x, 650, 651, 740, 330, 661, 741, 760, 761) as well
+ as XGI V3XT, V5, V8 and Z7.
+
+config FB_NEOMAGIC
+ tristate "NeoMagic display support"
+ depends on FB && PCI
+ select FB_MODE_HELPERS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This driver supports notebooks with NeoMagic PCI chips.
+ Say Y if you have such a graphics card.
+
+ To compile this driver as a module, choose M here: the
+ module will be called neofb.
+
+config FB_KYRO
+ tristate "IMG Kyro support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Say Y here if you have a STG4000 / Kyro / PowerVR 3 based
+ graphics board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called kyrofb.
+
+config FB_3DFX
+ tristate "3Dfx Banshee/Voodoo3 display support"
+ depends on FB && PCI
+ select FB_CFB_IMAGEBLIT
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ help
+ This driver supports graphics boards with the 3Dfx Banshee/Voodoo3
+ chips. Say Y if you have such a graphics board.
+
+ To compile this driver as a module, choose M here: the
+ module will be called tdfxfb.
+
+config FB_3DFX_ACCEL
+ bool "3Dfx Banshee/Voodoo3 Acceleration functions (EXPERIMENTAL)"
+ depends on FB_3DFX && EXPERIMENTAL
+ ---help---
+ This will compile the 3Dfx Banshee/Voodoo3 frame buffer device
+ with acceleration functions.
+
+
+config FB_VOODOO1
+ tristate "3Dfx Voodoo Graphics (sst1) support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ Say Y here if you have a 3Dfx Voodoo Graphics (Voodoo1/sst1) or
+ Voodoo2 (cvg) based graphics card.
+
+ To compile this driver as a module, choose M here: the
+ module will be called sstfb.
+
+ WARNING: Do not use any application that uses the 3D engine
+ (namely glide) while using this driver.
+ Please read the <file:Documentation/fb/README-sstfb.txt> for supported
+ options and other important info support.
+
+config FB_CYBLA
+ tristate "Cyberblade/i1 support"
+ depends on FB && PCI && X86_32 && !64BIT
+ select FB_CFB_IMAGEBLIT
+ select VIDEO_SELECT
+ ---help---
+ This driver is supposed to support the Trident Cyberblade/i1
+ graphics core integrated in the VIA VT8601A North Bridge,
+ also known as VIA Apollo PLE133.
+
+ Status:
+ - Developed, tested and working on EPIA 5000 and EPIA 800.
+ - Does work reliable on all systems with CRT/LCD connected to
+ normal VGA ports.
+ - Should work on systems that do use the internal LCD port, but
+ this is absolutely not tested.
+
+ Character imageblit, copyarea and rectangle fill are hw accelerated,
+ ypan scrolling is used by default.
+
+ Please do read <file:Documentation/fb/cyblafb/*>.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cyblafb.
+
+config FB_TRIDENT
+ tristate "Trident support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ This driver is supposed to support graphics boards with the
+ Trident CyberXXXX/Image/CyberBlade chips mostly found in laptops
+ but also on some motherboards. For more information, read
+ <file:Documentation/fb/tridentfb.txt>
+
+ Cyberblade/i1 support will be removed soon, use the cyblafb driver
+ instead.
+
+ Say Y if you have such a graphics board.
+
+
+ To compile this driver as a module, choose M here: the
+ module will be called tridentfb.
+
+config FB_TRIDENT_ACCEL
+ bool "Trident Acceleration functions (EXPERIMENTAL)"
+ depends on FB_TRIDENT && EXPERIMENTAL
+ ---help---
+ This will compile the Trident frame buffer device with
+ acceleration functions.
+
+config FB_PM3
+ tristate "Permedia3 support"
+ depends on FB && PCI && BROKEN
+ help
+ This is the frame buffer device driver for the 3DLabs Permedia3
+ chipset, used in Formac ProFormance III, 3DLabs Oxygen VX1 &
+ similar boards, 3DLabs Permedia3 Create!, Appian Jeronimo 2000
+ and maybe other boards.
+
+config FB_AU1100
+ bool "Au1100 LCD Driver"
+ depends on (FB = y) && EXPERIMENTAL && PCI && MIPS && MIPS_PB1100=y
+
+source "drivers/video/geode/Kconfig"
+
+config FB_FFB
+ bool "Creator/Creator3D/Elite3D support"
+ depends on FB_SBUS && SPARC64
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Creator, Creator3D,
+ and Elite3D graphics boards.
+
+config FB_TCX
+ bool "TCX (SS4/SS5 only) support"
+ depends on FB_SBUS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the TCX 24/8bit frame
+ buffer.
+
+config FB_CG14
+ bool "CGfourteen (SX) support"
+ depends on FB_SBUS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the CGfourteen frame
+ buffer on Desktop SPARCsystems with the SX graphics option.
+
+config FB_P9100
+ bool "P9100 (Sparcbook 3 only) support"
+ depends on FB_SBUS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the P9100 card
+ supported on Sparcbook 3 machines.
+
+config FB_LEO
+ bool "Leo (ZX) support"
+ depends on FB_SBUS
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the SBUS-based Sun ZX
+ (leo) frame buffer cards.
+
+config FB_PCI
+ bool "PCI framebuffers"
+ depends on (FB = y) && PCI && SPARC
+
+config FB_IGA
+ bool "IGA 168x display support"
+ depends on SPARC32 && FB_PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the framebuffer device for the INTERGRAPHICS 1680 and
+ successor frame buffer cards.
+
+config FB_HIT
+ tristate "HD64461 Frame Buffer support"
+ depends on FB && HD64461
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This is the frame buffer device driver for the Hitachi HD64461 LCD
+ frame buffer card.
+
+config FB_PMAG_AA
+ bool "PMAG-AA TURBOchannel framebuffer support"
+ depends on (FB = y) && TC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Support for the PMAG-AA TURBOchannel framebuffer card (1280x1024x1)
+ used mainly in the MIPS-based DECstation series.
+
+config FB_PMAG_BA
+ bool "PMAG-BA TURBOchannel framebuffer support"
+ depends on (FB = y) && TC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Support for the PMAG-BA TURBOchannel framebuffer card (1024x864x8)
+ used mainly in the MIPS-based DECstation series.
+
+config FB_PMAGB_B
+ bool "PMAGB-B TURBOchannel framebuffer support"
+ depends on (FB = y) && TC
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Support for the PMAGB-B TURBOchannel framebuffer card used mainly
+ in the MIPS-based DECstation series. The card is currently only
+ supported in 1280x1024x8 mode.
+
+config FB_MAXINE
+ bool "Maxine (Personal DECstation) onboard framebuffer support"
+ depends on (FB = y) && MACH_DECSTATION
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Support for the onboard framebuffer (1024x768x8) in the Personal
+ DECstation series (Personal DECstation 5000/20, /25, /33, /50,
+ Codename "Maxine").
+
+config FB_TX3912
+ bool "TMPTX3912/PR31700 frame buffer support"
+ depends on (FB = y) && NINO
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ The TX3912 is a Toshiba RISC processor based on the MIPS 3900 core
+ see <http://www.toshiba.com/taec/components/Generic/risc/tx3912.htm>.
+
+ Say Y here to enable kernel support for the on-board framebuffer.
+
+config FB_G364
+ bool "G364 frame buffer support"
+ depends on (FB = y) && (MIPS_MAGNUM_4000 || OLIVETTI_M700)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ The G364 driver is the framebuffer used in MIPS Magnum 4000 and
+ Olivetti M700-10 systems.
+
+config FB_68328
+ bool "Motorola 68328 native frame buffer support"
+ depends on FB && (M68328 || M68EZ328 || M68VZ328)
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ Say Y here if you want to support the built-in frame buffer of
+ the Motorola 68328 CPU family.
+
+config FB_PXA
+ tristate "PXA LCD framebuffer support"
+ depends on FB && ARCH_PXA
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ Frame buffer driver for the built-in LCD controller in the Intel
+ PXA2x0 processor.
+
+ This driver is also available as a module ( = code which can be
+ inserted and removed from the running kernel whenever you want). The
+ module will be called pxafb. If you want to compile it as a module,
+ say M here and read <file:Documentation/modules.txt>.
+
+ If unsure, say N.
+
+config FB_PXA_PARAMETERS
+ bool "PXA LCD command line parameters"
+ default n
+ depends on FB_PXA
+ ---help---
+ Enable the use of kernel command line or module parameters
+ to configure the physical properties of the LCD panel when
+ using the PXA LCD driver.
+
+ This option allows you to override the panel parameters
+ supplied by the platform in order to support multiple
+ different models of flatpanel. If you will only be using a
+ single model of flatpanel then you can safely leave this
+ option disabled.
+
+ <file:Documentation/fb/pxafb.txt> describes the available parameters.
+
+config FB_W100
+ tristate "W100 frame buffer support"
+ depends on FB && PXA_SHARPSL
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ Frame buffer driver for the w100 as found on the Sharp SL-Cxx series.
+
+ This driver is also available as a module ( = code which can be
+ inserted and removed from the running kernel whenever you want). The
+ module will be called w100fb. If you want to compile it as a module,
+ say M here and read <file:Documentation/modules.txt>.
+
+ If unsure, say N.
+
+config FB_S3C2410
+ tristate "S3C2410 LCD framebuffer support"
+ depends on FB && ARCH_S3C2410
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ Frame buffer driver for the built-in LCD controller in the Samsung
+ S3C2410 processor.
+
+ This driver is also available as a module ( = code which can be
+ inserted and removed from the running kernel whenever you want). The
+ module will be called s3c2410fb. If you want to compile it as a
module,
+ say M here and read <file:Documentation/modules.txt>.
+
+ If unsure, say N.
+config FB_S3C2410_DEBUG
+ bool "S3C2410 lcd debug messages"
+ depends on FB_S3C2410
+ help
+ Turn on debugging messages. Note that you can set/unset at run time
+ through sysfs
+
+config FB_VIRTUAL
+ tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
+ depends on FB
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ ---help---
+ This is a `virtual' frame buffer device. It operates on a chunk of
+ unswappable kernel memory instead of on the memory of a graphics
+ board. This means you cannot see any output sent to this frame
+ buffer device, while it does consume precious memory. The main use
+ of this frame buffer device is testing and debugging the frame
+ buffer subsystem. Do NOT enable it for normal systems! To protect
+ the innocent, it has to be enabled explicitly at boot time using the
+ kernel option `video=vfb:'.
+
+ To compile this driver as a module, choose M here: the
+ module will be called vfb.
+
+ If unsure, say N.
+if VT
+ source "drivers/video/console/Kconfig"
+endif
+
+if FB || SGI_NEWPORT_CONSOLE
+ source "drivers/video/logo/Kconfig"
+endif
+
+if FB && SYSFS
+ source "drivers/video/backlight/Kconfig"
+endif
+
+endmenu
+
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/a.out.h
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/a.out.h Fri Feb 24 22:41:08 2006
@@ -0,0 +1,26 @@
+#ifndef __I386_A_OUT_H__
+#define __I386_A_OUT_H__
+
+struct exec
+{
+ unsigned long a_info; /* Use macros N_MAGIC, etc for access */
+ unsigned a_text; /* length of text, in bytes */
+ unsigned a_data; /* length of data, in bytes */
+ unsigned a_bss; /* length of uninitialized data area for file,
in bytes */
+ unsigned a_syms; /* length of symbol table data in file, in
bytes */
+ unsigned a_entry; /* start address */
+ unsigned a_trsize; /* length of relocation info for text, in bytes
*/
+ unsigned a_drsize; /* length of relocation info for data, in bytes
*/
+};
+
+#define N_TRSIZE(a) ((a).a_trsize)
+#define N_DRSIZE(a) ((a).a_drsize)
+#define N_SYMSIZE(a) ((a).a_syms)
+
+#ifdef __KERNEL__
+
+#define STACK_TOP (TASK_SIZE - 3*PAGE_SIZE)
+
+#endif
+
+#endif /* __A_OUT_GNU_H__ */
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-i386/apic.h
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/apic.h Fri Feb 24 22:41:08 2006
@@ -0,0 +1,147 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+#include <linux/config.h>
+#include <linux/pm.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#define Dprintk(x...)
+
+/*
+ * Debugging macros
+ */
+#define APIC_QUIET 0
+#define APIC_VERBOSE 1
+#define APIC_DEBUG 2
+
+extern int enable_local_apic;
+extern int apic_verbosity;
+
+static inline void lapic_disable(void)
+{
+ enable_local_apic = -1;
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+}
+
+static inline void lapic_enable(void)
+{
+ enable_local_apic = 1;
+}
+
+/*
+ * Define the default level of output to be very little
+ * This can be turned up by using apic=verbose for more
+ * information and apic=debug for _lots_ of information.
+ * apic_verbosity is defined in apic.c
+ */
+#define apic_printk(v, s, a...) do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+ } while (0)
+
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned long v)
+{
+ *((volatile unsigned long *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+ xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline unsigned long apic_read(unsigned long reg)
+{
+ return *((volatile unsigned long *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+ while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
+ cpu_relax();
+}
+
+int get_physical_broadcast(void);
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+ /*
+ * ack_APIC_irq() actually gets compiled as a single instruction:
+ * - a single rmw on Pentium/82489DX
+ * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+ * ... yummie.
+ */
+
+ /* Docs say use 0 for future compatibility */
+ apic_write_around(APIC_EOI, 0);
+}
+
+extern void (*wait_timer_tick)(void);
+
+extern int get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (int virt_wire_setup);
+extern void disable_local_APIC (void);
+extern void lapic_shutdown (void);
+extern int verify_local_APIC (void);
+extern void cache_APIC_registers (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void smp_local_timer_interrupt (struct pt_regs * regs);
+extern void setup_boot_APIC_clock (void);
+extern void setup_secondary_APIC_clock (void);
+extern void setup_apic_nmi_watchdog (void);
+extern int reserve_lapic_nmi(void);
+extern void release_lapic_nmi(void);
+extern void disable_timer_nmi_watchdog(void);
+extern void enable_timer_nmi_watchdog(void);
+extern void nmi_watchdog_tick (struct pt_regs * regs);
+extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+
+extern void enable_NMI_through_LVT0 (void * dummy);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE 0
+#define NMI_IO_APIC 1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID 3
+
+extern int disable_timer_pin_1;
+
+#ifndef CONFIG_XEN
+void smp_send_timer_broadcast_ipi(struct pt_regs *regs);
+void switch_APIC_timer_to_ipi(void *cpumask);
+void switch_ipi_to_APIC_timer(void *cpumask);
+#define ARCH_APICTIMER_STOPS_ON_C3 1
+#endif
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff -r d940ec92958d -r 6c43118bdba8 linux-2.6-xen-sparse/include/asm-i386/elf.h
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/elf.h Fri Feb 24 22:41:08 2006
@@ -0,0 +1,194 @@
+#ifndef __ASMi386_ELF_H
+#define __ASMi386_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+#include <asm/processor.h>
+#include <asm/system.h> /* for savesegment */
+#include <asm/auxvec.h>
+
+#include <linux/utsname.h>
+
+#define R_386_NONE 0
+#define R_386_32 1
+#define R_386_PC32 2
+#define R_386_GOT32 3
+#define R_386_PLT32 4
+#define R_386_COPY 5
+#define R_386_GLOB_DAT 6
+#define R_386_JMP_SLOT 7
+#define R_386_RELATIVE 8
+#define R_386_GOTOFF 9
+#define R_386_GOTPC 10
+#define R_386_NUM 11
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_i387_struct elf_fpregset_t;
+typedef struct user_fxsr_struct elf_fpxregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+ (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS ELFCLASS32
+#define ELF_DATA ELFDATA2LSB
+#define ELF_ARCH EM_386
+
+/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
+ contains a pointer to a function which might be registered using `atexit'.
+ This provides a mean for the dynamic linker to call DT_FINI functions for
+ shared libraries that have been loaded before the code runs.
+
+ A value of 0 tells we have no such handler.
+
+ We might as well make sure everything else is cleared too (except for %esp),
+ just to make things more deterministic.
+ */
+#define ELF_PLAT_INIT(_r, load_addr) do { \
+ _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
+ _r->esi = 0; _r->edi = 0; _r->ebp = 0; \
+ _r->eax = 0; \
+} while (0)
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE 4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ use of this is to invoke "./ld.so someprog" to test out a new version of
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. */
+
+#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
+
+/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+ now struct_user_regs, they are different) */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs) \
+ pr_reg[0] = regs->ebx; \
+ pr_reg[1] = regs->ecx; \
+ pr_reg[2] = regs->edx; \
+ pr_reg[3] = regs->esi; \
+ pr_reg[4] = regs->edi; \
+ pr_reg[5] = regs->ebp; \
+ pr_reg[6] = regs->eax; \
+ pr_reg[7] = regs->xds; \
+ pr_reg[8] = regs->xes; \
+ savesegment(fs,pr_reg[9]); \
+ savesegment(gs,pr_reg[10]); \
+ pr_reg[11] = regs->orig_eax; \
+ pr_reg[12] = regs->eip; \
+ pr_reg[13] = regs->xcs; \
+ pr_reg[14] = regs->eflags; \
+ pr_reg[15] = regs->esp; \
+ pr_reg[16] = regs->xss;
+
+/* This yields a mask that user programs can use to figure out what
+ instruction set this CPU supports. This could be done in user space,
+ but it's not easy, and we've already done it here. */
+
+#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
+
+/* This yields a string that ld.so will use to load implementation
+ specific libraries for optimization. This is more specific in
+ intent than poking at uname or /proc/cpuinfo.
+
+ For the moment, we have only optimizations for the Intel generations,
+ but that could change... */
+
+#define ELF_PLATFORM (system_utsname.machine)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex, ibcs2) do { } while (0)
+
+/*
+ * An executable for which elf_read_implies_exec() returns TRUE will
+ * have the READ_IMPLIES_EXEC personality flag set automatically.
+ */
+#define elf_read_implies_exec(ex, executable_stack) (executable_stack !=
EXSTACK_DISABLE_X)
+
+struct task_struct;
+
+extern int dump_task_regs (struct task_struct *, elf_gregset_t *);
+extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
+extern int dump_task_extended_fpu (struct task_struct *, struct
user_fxsr_struct *);
+
+#define ELF_CORE_COPY_TASK_REGS(tsk, elf_regs) dump_task_regs(tsk, elf_regs)
+#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs)
+#define ELF_CORE_COPY_XFPREGS(tsk, elf_xfpregs) dump_task_extended_fpu(tsk,
elf_xfpregs)
+
+#define VSYSCALL_BASE (PAGE_OFFSET - 2*PAGE_SIZE)
+#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE)
+#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
+extern void __kernel_vsyscall;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int executable_stack);
+
+#define ARCH_DLINFO \
+do { \
+ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
+} while (0)
+
+/*
+ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the vsyscall DSO contents. Dumping its
+ * contents makes post-mortem fully interpretable later without matching up
+ * the same kernel and hardware config to see what PC values meant.
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the vsyscall DSO was being used.
+ */
+#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum)
+#define ELF_CORE_WRITE_EXTRA_PHDRS \
+do { \
+ const struct elf_phdr *const vsyscall_phdrs = \
+ (const struct elf_phdr *) (VSYSCALL_BASE \
+ + VSYSCALL_EHDR->e_phoff); \
+ int i; \
+ Elf32_Off ofs = 0; \
+ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
+ struct elf_phdr phdr = vsyscall_phdrs[i]; \
+ if (phdr.p_type == PT_LOAD) { \
+ BUG_ON(ofs != 0); \
+ ofs = phdr.p_offset = offset; \
+ phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \
+ phdr.p_filesz = phdr.p_memsz; \
+ offset += phdr.p_filesz; \
+ } \
+ else \
+ phdr.p_offset += ofs; \
+ phdr.p_paddr = 0; /* match other core phdrs */ \
+ DUMP_WRITE(&phdr, sizeof(phdr)); \
+ } \
+} while (0)
+#define ELF_CORE_WRITE_EXTRA_DATA \
+do { \
+ const struct elf_phdr *const vsyscall_phdrs = \
+ (const struct elf_phdr *) (VSYSCALL_BASE \
+ + VSYSCALL_EHDR->e_phoff); \
+ int i; \
+ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
+ if (vsyscall_phdrs[i].p_type == PT_LOAD) \
+ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
+ PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \
+ } \
+} while (0)
+
+#endif
+
+#endif
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-x86_64/apic.h
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/apic.h Fri Feb 24 22:41:08 2006
@@ -0,0 +1,120 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+#include <linux/config.h>
+#include <linux/pm.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#define Dprintk(x...)
+
+/*
+ * Debugging macros
+ */
+#define APIC_QUIET 0
+#define APIC_VERBOSE 1
+#define APIC_DEBUG 2
+
+extern int apic_verbosity;
+extern int apic_runs_main_timer;
+
+/*
+ * Define the default level of output to be very little
+ * This can be turned up by using apic=verbose for more
+ * information and apic=debug for _lots_ of information.
+ * apic_verbosity is defined in apic.c
+ */
+#define apic_printk(v, s, a...) do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+ } while (0)
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+struct pt_regs;
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned int v)
+{
+ *((volatile unsigned int *)(APIC_BASE+reg)) = v;
+}
+
+static __inline unsigned int apic_read(unsigned long reg)
+{
+ return *((volatile unsigned int *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+ while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+static inline void ack_APIC_irq(void)
+{
+ /*
+ * ack_APIC_irq() actually gets compiled as a single instruction:
+ * - a single rmw on Pentium/82489DX
+ * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+ * ... yummie.
+ */
+
+ /* Docs say use 0 for future compatibility */
+ apic_write(APIC_EOI, 0);
+}
+
+extern int get_maxlvt (void);
+extern void clear_local_APIC (void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (int virt_wire_setup);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void cache_APIC_registers (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void smp_local_timer_interrupt (struct pt_regs * regs);
+extern void setup_boot_APIC_clock (void);
+extern void setup_secondary_APIC_clock (void);
+extern void setup_apic_nmi_watchdog (void);
+extern int reserve_lapic_nmi(void);
+extern void release_lapic_nmi(void);
+extern void disable_timer_nmi_watchdog(void);
+extern void enable_timer_nmi_watchdog(void);
+extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+extern void clustered_apic_check(void);
+
+extern void nmi_watchdog_default(void);
+extern int setup_nmi_watchdog(char *);
+
+extern unsigned int nmi_watchdog;
+#define NMI_DEFAULT -1
+#define NMI_NONE 0
+#define NMI_IO_APIC 1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID 3
+
+extern int disable_timer_pin_1;
+
+extern void setup_threshold_lvt(unsigned long lvt_off);
+
+#ifndef CONFIG_XEN
+void smp_send_timer_broadcast_ipi(void);
+void switch_APIC_timer_to_ipi(void *cpumask);
+void switch_ipi_to_APIC_timer(void *cpumask);
+
+#define ARCH_APICTIMER_STOPS_ON_C3 1
+#endif
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+extern unsigned boot_cpu_id;
+
+#endif /* __ASM_APIC_H */
diff -r d940ec92958d -r 6c43118bdba8
patches/linux-2.6.16-rc4/i386-mach-io-check-nmi.patch
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/patches/linux-2.6.16-rc4/i386-mach-io-check-nmi.patch Fri Feb 24
22:41:08 2006
@@ -0,0 +1,45 @@
+diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c
./arch/i386/kernel/traps.c
+--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c 2006-02-15
20:38:51.000000000 +0000
++++ ./arch/i386/kernel/traps.c 2006-02-15 20:40:43.000000000 +0000
+@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch
+
+ static void io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+- unsigned long i;
+-
+ printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+- reason = (reason & 0xf) | 8;
+- outb(reason, 0x61);
+- i = 2000;
+- while (--i) udelay(1000);
+- reason &= ~8;
+- outb(reason, 0x61);
++ clear_io_check_error(reason);
+ }
+
+ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+diff -pruN
../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h
./include/asm-i386/mach-default/mach_traps.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h
2006-01-03 03:21:10.000000000 +0000
++++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-15
20:40:43.000000000 +0000
+@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig
+ outb(reason, 0x61);
+ }
+
++static inline void clear_io_check_error(unsigned char reason)
++{
++ unsigned long i;
++
++ reason = (reason & 0xf) | 8;
++ outb(reason, 0x61);
++ i = 2000;
++ while (--i) udelay(1000);
++ reason &= ~8;
++ outb(reason, 0x61);
++}
++
+ static inline unsigned char get_nmi_reason(void)
+ {
+ return inb(0x61);
diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/net-csum.patch
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/patches/linux-2.6.16-rc4/net-csum.patch Fri Feb 24 22:41:08 2006
@@ -0,0 +1,41 @@
+diff -pruN
../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c
./net/ipv4/netfilter/ip_nat_proto_tcp.c
+--- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c
2006-02-02 17:39:51.000000000 +0000
++++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:44:18.000000000
+0000
+@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb,
+ if (hdrsize < sizeof(*hdr))
+ return 1;
+
+- hdr->check = ip_nat_cheat_check(~oldip, newip,
++ if ((*pskb)->proto_csum_blank) {
++ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
++ } else {
++ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(oldport ^ 0xFFFF,
+ newport,
+ hdr->check));
++ }
+ return 1;
+ }
+
+diff -pruN
../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c
./net/ipv4/netfilter/ip_nat_proto_udp.c
+--- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c
2006-02-02 17:39:51.000000000 +0000
++++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:44:18.000000000
+0000
+@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+- if (hdr->check) /* 0 is a special case meaning no checksum */
+- hdr->check = ip_nat_cheat_check(~oldip, newip,
++ if (hdr->check) { /* 0 is a special case meaning no checksum */
++ if ((*pskb)->proto_csum_blank) {
++ hdr->check = ip_nat_cheat_check(oldip, ~newip,
hdr->check);
++ } else {
++ hdr->check = ip_nat_cheat_check(~oldip, newip,
+ ip_nat_cheat_check(*portptr ^ 0xFFFF,
+ newport,
+ hdr->check));
++ }
++ }
+ *portptr = newport;
+ return 1;
+ }
diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/pmd-shared.patch
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/patches/linux-2.6.16-rc4/pmd-shared.patch Fri Feb 24 22:41:08 2006
@@ -0,0 +1,111 @@
+diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c
./arch/i386/mm/pageattr.c
+--- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c 2006-02-02
17:39:29.000000000 +0000
++++ ./arch/i386/mm/pageattr.c 2006-02-02 17:45:14.000000000 +0000
+@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns
+ unsigned long flags;
+
+ set_pte_atomic(kpte, pte); /* change init_mm */
+- if (PTRS_PER_PMD > 1)
++ if (HAVE_SHARED_KERNEL_PMD)
+ return;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c
./arch/i386/mm/pgtable.c
+--- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c 2006-01-03
03:21:10.000000000 +0000
++++ ./arch/i386/mm/pgtable.c 2006-02-02 17:45:14.000000000 +0000
+@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
+ spin_lock_irqsave(&pgd_lock, flags);
+ }
+
+- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+- swapper_pg_dir + USER_PTRS_PER_PGD,
+- KERNEL_PGD_PTRS);
++ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD)
++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
++ swapper_pg_dir + USER_PTRS_PER_PGD,
++ KERNEL_PGD_PTRS);
+ if (PTRS_PER_PMD > 1)
+ return;
+
+@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
++
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
++
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
++ if (!pmd)
++ goto out_oom;
++ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
++ }
++
++ spin_lock_irqsave(&pgd_lock, flags);
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
++ pgd_t *kpgd = pgd_offset_k(v);
++ pud_t *kpud = pud_offset(kpgd, v);
++ pmd_t *kpmd = pmd_offset(kpud, v);
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ memcpy(pmd, kpmd, PAGE_SIZE);
++ }
++ pgd_list_add(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ }
++
+ return pgd;
+
+ out_oom:
+@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd)
+ int i;
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+- if (PTRS_PER_PMD > 1)
+- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+- kmem_cache_free(pmd_cache, (void
*)__va(pgd_val(pgd[i])-1));
++ if (PTRS_PER_PMD > 1) {
++ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ kmem_cache_free(pmd_cache, pmd);
++ }
++ if (!HAVE_SHARED_KERNEL_PMD) {
++ unsigned long flags;
++ spin_lock_irqsave(&pgd_lock, flags);
++ pgd_list_del(pgd);
++ spin_unlock_irqrestore(&pgd_lock, flags);
++ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
++ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
++ kmem_cache_free(pmd_cache, pmd);
++ }
++ }
++ }
+ /* in the non-PAE case, free_pgtables() clears user pgd entries */
+ kmem_cache_free(pgd_cache, pgd);
+ }
+diff -pruN
../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h
./include/asm-i386/pgtable-2level-defs.h
+--- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h
2006-01-03 03:21:10.000000000 +0000
++++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-02 17:45:14.000000000
+0000
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
+ #define _I386_PGTABLE_2LEVEL_DEFS_H
+
++#define HAVE_SHARED_KERNEL_PMD 0
++
+ /*
+ * traditional i386 two-level paging structure:
+ */
+diff -pruN
../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h
./include/asm-i386/pgtable-3level-defs.h
+--- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h
2006-01-03 03:21:10.000000000 +0000
++++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-02 17:45:14.000000000
+0000
+@@ -1,6 +1,8 @@
+ #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
+ #define _I386_PGTABLE_3LEVEL_DEFS_H
+
++#define HAVE_SHARED_KERNEL_PMD 1
++
+ /*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc4/smp-alts.patch
--- /dev/null Fri Feb 24 21:03:07 2006
+++ b/patches/linux-2.6.16-rc4/smp-alts.patch Fri Feb 24 22:41:08 2006
@@ -0,0 +1,591 @@
+diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig ./arch/i386/Kconfig
+--- ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig 2006-02-15
20:38:51.000000000 +0000
++++ ./arch/i386/Kconfig 2006-02-15 20:45:57.000000000 +0000
+@@ -202,6 +202,19 @@ config SMP
+
+ If you don't know what to do here, say N.
+
++config SMP_ALTERNATIVES
++ bool "SMP alternatives support (EXPERIMENTAL)"
++ depends on SMP && EXPERIMENTAL
++ help
++ Try to reduce the overhead of running an SMP kernel on a uniprocessor
++ host slightly by replacing certain key instruction sequences
++ according to whether we currently have more than one CPU available.
++ This should provide a noticeable boost to performance when
++ running SMP kernels on UP machines, and have negligible impact
++ when running on an true SMP host.
++
++ If unsure, say N.
++
+ config NR_CPUS
+ int "Maximum number of CPUs (2-255)"
+ range 2 255
+diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile
./arch/i386/kernel/Makefile
+--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile 2006-02-15
20:38:51.000000000 +0000
++++ ./arch/i386/kernel/Makefile 2006-02-15 20:45:57.000000000 +0000
+@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
+ obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
+ obj-$(CONFIG_VM86) += vm86.o
+ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
++obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o
+
+ EXTRA_AFLAGS := -traditional
+
+diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c
./arch/i386/kernel/smpalts.c
+--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c 1970-01-01
01:00:00.000000000 +0100
++++ ./arch/i386/kernel/smpalts.c 2006-02-15 20:45:57.000000000 +0000
+@@ -0,0 +1,85 @@
++#include <linux/kernel.h>
++#include <asm/system.h>
++#include <asm/smp_alt.h>
++#include <asm/processor.h>
++#include <asm/string.h>
++
++struct smp_replacement_record {
++ unsigned char targ_size;
++ unsigned char smp1_size;
++ unsigned char smp2_size;
++ unsigned char up_size;
++ unsigned char feature;
++ unsigned char data[0];
++};
++
++struct smp_alternative_record {
++ void *targ_start;
++ struct smp_replacement_record *repl;
++};
++
++extern struct smp_alternative_record __start_smp_alternatives_table,
++ __stop_smp_alternatives_table;
++extern unsigned long __init_begin, __init_end;
++
++void prepare_for_smp(void)
++{
++ struct smp_alternative_record *r;
++ printk(KERN_INFO "Enabling SMP...\n");
++ for (r = &__start_smp_alternatives_table;
++ r != &__stop_smp_alternatives_table;
++ r++) {
++ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
++ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
++ BUG_ON(r->repl->targ_size < r->repl->up_size);
++ if (system_state == SYSTEM_RUNNING &&
++ r->targ_start >= (void *)&__init_begin &&
++ r->targ_start < (void *)&__init_end)
++ continue;
++ if (r->repl->feature != (unsigned char)-1 &&
++ boot_cpu_has(r->repl->feature)) {
++ memcpy(r->targ_start,
++ r->repl->data + r->repl->smp1_size,
++ r->repl->smp2_size);
++ memset(r->targ_start + r->repl->smp2_size,
++ 0x90,
++ r->repl->targ_size - r->repl->smp2_size);
++ } else {
++ memcpy(r->targ_start,
++ r->repl->data,
++ r->repl->smp1_size);
++ memset(r->targ_start + r->repl->smp1_size,
++ 0x90,
++ r->repl->targ_size - r->repl->smp1_size);
++ }
++ }
++ /* Paranoia */
++ asm volatile ("jmp 1f\n1:");
++ mb();
++}
++
++void unprepare_for_smp(void)
++{
++ struct smp_alternative_record *r;
++ printk(KERN_INFO "Disabling SMP...\n");
++ for (r = &__start_smp_alternatives_table;
++ r != &__stop_smp_alternatives_table;
++ r++) {
++ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
++ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
++ BUG_ON(r->repl->targ_size < r->repl->up_size);
++ if (system_state == SYSTEM_RUNNING &&
++ r->targ_start >= (void *)&__init_begin &&
++ r->targ_start < (void *)&__init_end)
++ continue;
++ memcpy(r->targ_start,
++ r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
++ r->repl->up_size);
++ memset(r->targ_start + r->repl->up_size,
++ 0x90,
++ r->repl->targ_size - r->repl->up_size);
++ }
++ /* Paranoia */
++ asm volatile ("jmp 1f\n1:");
++ mb();
++}
+diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c
./arch/i386/kernel/smpboot.c
+--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c 2006-02-15
20:38:51.000000000 +0000
++++ ./arch/i386/kernel/smpboot.c 2006-02-15 20:45:57.000000000 +0000
+@@ -1214,6 +1214,11 @@ static void __init smp_boot_cpus(unsigne
+ if (max_cpus <= cpucount+1)
+ continue;
+
++#ifdef CONFIG_SMP_ALTERNATIVES
++ if (kicked == 1)
++ prepare_for_smp();
++#endif
++
+ if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
+ printk("CPU #%d not responding - cannot use it.\n",
+ apicid);
+@@ -1392,6 +1397,11 @@ int __devinit __cpu_up(unsigned int cpu)
+ return -EIO;
+ }
+
++#ifdef CONFIG_SMP_ALTERNATIVES
++ if (num_online_cpus() == 1)
++ prepare_for_smp();
++#endif
++
+ local_irq_enable();
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+ /* Unleash the CPU! */
+diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S
./arch/i386/kernel/vmlinux.lds.S
+--- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S
2006-01-03 03:21:10.000000000 +0000
++++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-15 20:45:57.000000000 +0000
+@@ -34,6 +34,13 @@ SECTIONS
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
+ __stop___ex_table = .;
+
++ . = ALIGN(16);
++ __start_smp_alternatives_table = .;
++ __smp_alternatives : { *(__smp_alternatives) }
++ __stop_smp_alternatives_table = .;
++
++ __smp_replacements : { *(__smp_replacements) }
++
+ RODATA
+
+ /* writeable */
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h
./include/asm-i386/atomic.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h 2006-02-15
20:38:57.000000000 +0000
++++ ./include/asm-i386/atomic.h 2006-02-15 20:45:57.000000000 +0000
+@@ -4,18 +4,13 @@
+ #include <linux/config.h>
+ #include <linux/compiler.h>
+ #include <asm/processor.h>
++#include <asm/smp_alt.h>
+
+ /*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ */
+
+-#ifdef CONFIG_SMP
+-#define LOCK "lock ; "
+-#else
+-#define LOCK ""
+-#endif
+-
+ /*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h
./include/asm-i386/bitops.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h 2006-02-15
20:38:57.000000000 +0000
++++ ./include/asm-i386/bitops.h 2006-02-15 20:45:57.000000000 +0000
+@@ -7,6 +7,7 @@
+
+ #include <linux/config.h>
+ #include <linux/compiler.h>
++#include <asm/smp_alt.h>
+
+ /*
+ * These have to be done with inline assembly: that way the bit-setting
+@@ -16,12 +17,6 @@
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+-#ifdef CONFIG_SMP
+-#define LOCK_PREFIX "lock ; "
+-#else
+-#define LOCK_PREFIX ""
+-#endif
+-
+ #define ADDR (*(volatile long *) addr)
+
+ /**
+@@ -41,7 +36,7 @@
+ */
+ static inline void set_bit(int nr, volatile unsigned long * addr)
+ {
+- __asm__ __volatile__( LOCK_PREFIX
++ __asm__ __volatile__( LOCK
+ "btsl %1,%0"
+ :"+m" (ADDR)
+ :"Ir" (nr));
+@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol
+ */
+ static inline void clear_bit(int nr, volatile unsigned long * addr)
+ {
+- __asm__ __volatile__( LOCK_PREFIX
++ __asm__ __volatile__( LOCK
+ "btrl %1,%0"
+ :"+m" (ADDR)
+ :"Ir" (nr));
+@@ -121,7 +116,7 @@ static inline void __change_bit(int nr,
+ */
+ static inline void change_bit(int nr, volatile unsigned long * addr)
+ {
+- __asm__ __volatile__( LOCK_PREFIX
++ __asm__ __volatile__( LOCK
+ "btcl %1,%0"
+ :"+m" (ADDR)
+ :"Ir" (nr));
+@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n
+ {
+ int oldbit;
+
+- __asm__ __volatile__( LOCK_PREFIX
++ __asm__ __volatile__( LOCK
+ "btsl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"+m" (ADDR)
+ :"Ir" (nr) : "memory");
+@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int
+ {
+ int oldbit;
+
+- __asm__ __volatile__( LOCK_PREFIX
++ __asm__ __volatile__( LOCK
+ "btrl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"+m" (ADDR)
+ :"Ir" (nr) : "memory");
+@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in
+ {
+ int oldbit;
+
+- __asm__ __volatile__( LOCK_PREFIX
++ __asm__ __volatile__( LOCK
+ "btcl %2,%1\n\tsbbl %0,%0"
+ :"=r" (oldbit),"+m" (ADDR)
+ :"Ir" (nr) : "memory");
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h
./include/asm-i386/futex.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h 2006-02-15
20:38:57.000000000 +0000
++++ ./include/asm-i386/futex.h 2006-02-15 20:45:57.000000000 +0000
+@@ -28,7 +28,7 @@
+ "1: movl %2, %0\n\
+ movl %0, %3\n" \
+ insn "\n" \
+-"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\
++"2: " LOCK "cmpxchgl %3, %2\n\
+ jnz 1b\n\
+ 3: .section .fixup,\"ax\"\n\
+ 4: mov %5, %1\n\
+@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op,
+ #endif
+ switch (op) {
+ case FUTEX_OP_ADD:
+- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret,
++ __futex_atomic_op1(LOCK "xaddl %0, %2", ret,
+ oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h
./include/asm-i386/rwsem.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h 2006-01-03
03:21:10.000000000 +0000
++++ ./include/asm-i386/rwsem.h 2006-02-15 20:45:57.000000000 +0000
+@@ -40,6 +40,7 @@
+
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
++#include <asm/smp_alt.h>
+
+ struct rwsem_waiter;
+
+@@ -99,7 +100,7 @@ static inline void __down_read(struct rw
+ {
+ __asm__ __volatile__(
+ "# beginning down_read\n\t"
+-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old
value */
++LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old
value */
+ " js 2f\n\t" /* jump if we weren't granted the lock */
+ "1:\n\t"
+ LOCK_SECTION_START("")
+@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st
+ " movl %1,%2\n\t"
+ " addl %3,%2\n\t"
+ " jle 2f\n\t"
+-LOCK_PREFIX " cmpxchgl %2,%0\n\t"
++LOCK " cmpxchgl %2,%0\n\t"
+ " jnz 1b\n\t"
+ "2:\n\t"
+ "# ending __down_read_trylock\n\t"
+@@ -150,7 +151,7 @@ static inline void __down_write(struct r
+ tmp = RWSEM_ACTIVE_WRITE_BIAS;
+ __asm__ __volatile__(
+ "# beginning down_write\n\t"
+-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns
the old value */
++LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns
the old value */
+ " testl %%edx,%%edx\n\t" /* was the count 0 before? */
+ " jnz 2f\n\t" /* jump if we weren't granted the lock */
+ "1:\n\t"
+@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s
+ __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+ __asm__ __volatile__(
+ "# beginning __up_read\n\t"
+-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old
value */
++LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old
value */
+ " js 2f\n\t" /* jump if the lock is being waited upon */
+ "1:\n\t"
+ LOCK_SECTION_START("")
+@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_
+ __asm__ __volatile__(
+ "# beginning __up_write\n\t"
+ " movl %2,%%edx\n\t"
+-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition
0xffff0001 -> 0x00000000 */
++LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition
0xffff0001 -> 0x00000000 */
+ " jnz 2f\n\t" /* jump if the lock is being waited upon */
+ "1:\n\t"
+ LOCK_SECTION_START("")
+@@ -239,7 +240,7 @@ static inline void __downgrade_write(str
+ {
+ __asm__ __volatile__(
+ "# beginning __downgrade_write\n\t"
+-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 ->
0xYYYY0001 */
++LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 ->
0xYYYY0001 */
+ " js 2f\n\t" /* jump if the lock is being waited upon */
+ "1:\n\t"
+ LOCK_SECTION_START("")
+@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t"
+ static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+ {
+ __asm__ __volatile__(
+-LOCK_PREFIX "addl %1,%0"
++LOCK "addl %1,%0"
+ : "=m"(sem->count)
+ : "ir"(delta), "m"(sem->count));
+ }
+@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in
+ int tmp = delta;
+
+ __asm__ __volatile__(
+-LOCK_PREFIX "xadd %0,(%2)"
++LOCK "xadd %0,(%2)"
+ : "+r"(tmp), "=m"(sem->count)
+ : "r"(sem), "m"(sem->count)
+ : "memory");
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h
./include/asm-i386/smp_alt.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h 1970-01-01
01:00:00.000000000 +0100
++++ ./include/asm-i386/smp_alt.h 2006-02-15 20:45:57.000000000 +0000
+@@ -0,0 +1,32 @@
++#ifndef __ASM_SMP_ALT_H__
++#define __ASM_SMP_ALT_H__
++
++#include <linux/config.h>
++
++#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
++#define LOCK \
++ "6677: nop\n" \
++ ".section __smp_alternatives,\"a\"\n" \
++ ".long 6677b\n" \
++ ".long 6678f\n" \
++ ".previous\n" \
++ ".section __smp_replacements,\"a\"\n" \
++ "6678: .byte 1\n" \
++ ".byte 1\n" \
++ ".byte 0\n" \
++ ".byte 1\n" \
++ ".byte -1\n" \
++ "lock\n" \
++ "nop\n" \
++ ".previous\n"
++void prepare_for_smp(void);
++void unprepare_for_smp(void);
++#else
++#define LOCK "lock ; "
++#endif
++#else
++#define LOCK ""
++#endif
++
++#endif /* __ASM_SMP_ALT_H__ */
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h
./include/asm-i386/spinlock.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h 2006-01-03
03:21:10.000000000 +0000
++++ ./include/asm-i386/spinlock.h 2006-02-15 20:45:57.000000000 +0000
+@@ -6,6 +6,7 @@
+ #include <asm/page.h>
+ #include <linux/config.h>
+ #include <linux/compiler.h>
++#include <asm/smp_alt.h>
+
+ /*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+@@ -23,7 +24,8 @@
+
+ #define __raw_spin_lock_string \
+ "\n1:\t" \
+- "lock ; decb %0\n\t" \
++ LOCK \
++ "decb %0\n\t" \
+ "jns 3f\n" \
+ "2:\t" \
+ "rep;nop\n\t" \
+@@ -34,7 +36,8 @@
+
+ #define __raw_spin_lock_string_flags \
+ "\n1:\t" \
+- "lock ; decb %0\n\t" \
++ LOCK \
++ "decb %0\n\t" \
+ "jns 4f\n\t" \
+ "2:\t" \
+ "testl $0x200, %1\n\t" \
+@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags
+ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+ char oldval;
++#ifdef CONFIG_SMP_ALTERNATIVES
+ __asm__ __volatile__(
+- "xchgb %b0,%1"
++ "1:movb %1,%b0\n"
++ "movb $0,%1\n"
++ "2:"
++ ".section __smp_alternatives,\"a\"\n"
++ ".long 1b\n"
++ ".long 3f\n"
++ ".previous\n"
++ ".section __smp_replacements,\"a\"\n"
++ "3: .byte 2b - 1b\n"
++ ".byte 5f-4f\n"
++ ".byte 0\n"
++ ".byte 6f-5f\n"
++ ".byte -1\n"
++ "4: xchgb %b0,%1\n"
++ "5: movb %1,%b0\n"
++ "movb $0,%1\n"
++ "6:\n"
++ ".previous\n"
+ :"=q" (oldval), "=m" (lock->slock)
+ :"0" (0) : "memory");
++#else
++ __asm__ __volatile__(
++ "xchgb %b0,%1\n"
++ :"=q" (oldval), "=m" (lock->slock)
++ :"0" (0) : "memory");
++#endif
+ return oldval > 0;
+ }
+
+@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra
+
+ static inline void __raw_read_unlock(raw_rwlock_t *rw)
+ {
+- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory");
++ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
+ }
+
+ static inline void __raw_write_unlock(raw_rwlock_t *rw)
+ {
+- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0"
++ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
+ : "=m" (rw->lock) : : "memory");
+ }
+
+diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h
./include/asm-i386/system.h
+--- ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h 2006-02-15
20:38:57.000000000 +0000
++++ ./include/asm-i386/system.h 2006-02-15 20:45:57.000000000 +0000
+@@ -5,7 +5,7 @@
+ #include <linux/kernel.h>
+ #include <asm/segment.h>
+ #include <asm/cpufeature.h>
+-#include <linux/bitops.h> /* for LOCK_PREFIX */
++#include <asm/smp_alt.h>
+
+ #ifdef __KERNEL__
+
+@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
++ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
+ : "=a"(prev)
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
++ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
++ __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
+ : "=a"(prev)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
+ : "memory");
+@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc
+ unsigned long long new)
+ {
+ unsigned long long prev;
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
++ __asm__ __volatile__(LOCK "cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+@@ -503,11 +503,55 @@ struct alt_instr {
+ #endif
+
+ #ifdef CONFIG_SMP
++#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
++#define smp_alt_mb(instr) \
++__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
++ ".section __smp_alternatives,\"a\"\n" \
++ ".long 6667b\n" \
++ ".long 6673f\n" \
++ ".previous\n" \
++ ".section __smp_replacements,\"a\"\n" \
++ "6673:.byte 6668b-6667b\n" \
++ ".byte 6670f-6669f\n" \
++ ".byte 6671f-6670f\n" \
++ ".byte 0\n" \
++ ".byte %c0\n" \
++ "6669:lock;addl $0,0(%%esp)\n" \
++ "6670:" instr "\n" \
++ "6671:\n" \
++ ".previous\n" \
++ : \
++ : "i" (X86_FEATURE_XMM2) \
++ : "memory")
++#define smp_rmb() smp_alt_mb("lfence")
++#define smp_mb() smp_alt_mb("mfence")
++#define set_mb(var, value) do { \
++unsigned long __set_mb_temp; \
++__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \
++ ".section __smp_alternatives,\"a\"\n" \
++ ".long 6667b\n" \
++ ".long 6673f\n" \
++ ".previous\n" \
++ ".section __smp_replacements,\"a\"\n" \
++ "6673: .byte 6668b-6667b\n" \
++ ".byte 6670f-6669f\n" \
++ ".byte 0\n" \
++ ".byte 6671f-6670f\n" \
++ ".byte -1\n" \
++ "6669: xchg %1, %0\n" \
++ "6670:movl %1, %0\n" \
++ "6671:\n" \
++ ".previous\n" \
++ : "=m" (var), "=r" (__set_mb_temp) \
++ : "1" (value) \
++ : "memory"); } while (0)
++#else
+ #define smp_mb() mb()
+ #define smp_rmb() rmb()
++#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
++#endif
+ #define smp_wmb() wmb()
+ #define smp_read_barrier_depends() read_barrier_depends()
+-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
+ #else
+ #define smp_mb() barrier()
+ #define smp_rmb() barrier()
diff -r d940ec92958d -r 6c43118bdba8
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/segment.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/segment.h Fri Feb
24 21:03:07 2006
+++ /dev/null Fri Feb 24 22:41:08 2006
@@ -1,44 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-#include <asm/cache.h>
-
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x1b
-
-#define __KERNEL32_CS 0x3b
-
-/*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
- */
-
-#define __USER32_CS 0x23 /* 4*8+3 */
-#define __USER_DS 0x2b /* 5*8+3 */
-#define __USER_CS 0x33 /* 6*8+3 */
-#define __USER32_DS __USER_DS
-
-#define GDT_ENTRY_TLS 1
-#define GDT_ENTRY_TSS 8 /* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-/* 15 free */
-
-#define GDT_ENTRY_TLS_ENTRIES 3
-
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 16
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-
-#endif
diff -r d940ec92958d -r 6c43118bdba8
patches/linux-2.6.16-rc3/i386-mach-io-check-nmi.patch
--- a/patches/linux-2.6.16-rc3/i386-mach-io-check-nmi.patch Fri Feb 24
21:03:07 2006
+++ /dev/null Fri Feb 24 22:41:08 2006
@@ -1,45 +0,0 @@
-diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c
./arch/i386/kernel/traps.c
---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/traps.c 2006-02-15
20:38:51.000000000 +0000
-+++ ./arch/i386/kernel/traps.c 2006-02-15 20:40:43.000000000 +0000
-@@ -567,18 +567,11 @@ static void mem_parity_error(unsigned ch
-
- static void io_check_error(unsigned char reason, struct pt_regs * regs)
- {
-- unsigned long i;
--
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
- show_registers(regs);
-
- /* Re-enable the IOCK line, wait for a few seconds */
-- reason = (reason & 0xf) | 8;
-- outb(reason, 0x61);
-- i = 2000;
-- while (--i) udelay(1000);
-- reason &= ~8;
-- outb(reason, 0x61);
-+ clear_io_check_error(reason);
- }
-
- static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-diff -pruN
../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h
./include/asm-i386/mach-default/mach_traps.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/mach-default/mach_traps.h
2006-01-03 03:21:10.000000000 +0000
-+++ ./include/asm-i386/mach-default/mach_traps.h 2006-02-15
20:40:43.000000000 +0000
-@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsig
- outb(reason, 0x61);
- }
-
-+static inline void clear_io_check_error(unsigned char reason)
-+{
-+ unsigned long i;
-+
-+ reason = (reason & 0xf) | 8;
-+ outb(reason, 0x61);
-+ i = 2000;
-+ while (--i) udelay(1000);
-+ reason &= ~8;
-+ outb(reason, 0x61);
-+}
-+
- static inline unsigned char get_nmi_reason(void)
- {
- return inb(0x61);
diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/net-csum.patch
--- a/patches/linux-2.6.16-rc3/net-csum.patch Fri Feb 24 21:03:07 2006
+++ /dev/null Fri Feb 24 22:41:08 2006
@@ -1,41 +0,0 @@
-diff -pruN
../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c
./net/ipv4/netfilter/ip_nat_proto_tcp.c
---- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_tcp.c
2006-02-02 17:39:51.000000000 +0000
-+++ ./net/ipv4/netfilter/ip_nat_proto_tcp.c 2006-02-02 17:44:18.000000000
+0000
-@@ -129,10 +129,14 @@ tcp_manip_pkt(struct sk_buff **pskb,
- if (hdrsize < sizeof(*hdr))
- return 1;
-
-- hdr->check = ip_nat_cheat_check(~oldip, newip,
-+ if ((*pskb)->proto_csum_blank) {
-+ hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
-+ } else {
-+ hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(oldport ^ 0xFFFF,
- newport,
- hdr->check));
-+ }
- return 1;
- }
-
-diff -pruN
../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c
./net/ipv4/netfilter/ip_nat_proto_udp.c
---- ../pristine-linux-2.6.16-rc1-git4/net/ipv4/netfilter/ip_nat_proto_udp.c
2006-02-02 17:39:51.000000000 +0000
-+++ ./net/ipv4/netfilter/ip_nat_proto_udp.c 2006-02-02 17:44:18.000000000
+0000
-@@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb,
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
-- if (hdr->check) /* 0 is a special case meaning no checksum */
-- hdr->check = ip_nat_cheat_check(~oldip, newip,
-+ if (hdr->check) { /* 0 is a special case meaning no checksum */
-+ if ((*pskb)->proto_csum_blank) {
-+ hdr->check = ip_nat_cheat_check(oldip, ~newip,
hdr->check);
-+ } else {
-+ hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(*portptr ^ 0xFFFF,
- newport,
- hdr->check));
-+ }
-+ }
- *portptr = newport;
- return 1;
- }
diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/pmd-shared.patch
--- a/patches/linux-2.6.16-rc3/pmd-shared.patch Fri Feb 24 21:03:07 2006
+++ /dev/null Fri Feb 24 22:41:08 2006
@@ -1,111 +0,0 @@
-diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c
./arch/i386/mm/pageattr.c
---- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pageattr.c 2006-02-02
17:39:29.000000000 +0000
-+++ ./arch/i386/mm/pageattr.c 2006-02-02 17:45:14.000000000 +0000
-@@ -78,7 +78,7 @@ static void set_pmd_pte(pte_t *kpte, uns
- unsigned long flags;
-
- set_pte_atomic(kpte, pte); /* change init_mm */
-- if (PTRS_PER_PMD > 1)
-+ if (HAVE_SHARED_KERNEL_PMD)
- return;
-
- spin_lock_irqsave(&pgd_lock, flags);
-diff -pruN ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c
./arch/i386/mm/pgtable.c
---- ../pristine-linux-2.6.16-rc1-git4/arch/i386/mm/pgtable.c 2006-01-03
03:21:10.000000000 +0000
-+++ ./arch/i386/mm/pgtable.c 2006-02-02 17:45:14.000000000 +0000
-@@ -215,9 +215,10 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
- spin_lock_irqsave(&pgd_lock, flags);
- }
-
-- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-- swapper_pg_dir + USER_PTRS_PER_PGD,
-- KERNEL_PGD_PTRS);
-+ if (PTRS_PER_PMD == 1 || HAVE_SHARED_KERNEL_PMD)
-+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-+ swapper_pg_dir + USER_PTRS_PER_PGD,
-+ KERNEL_PGD_PTRS);
- if (PTRS_PER_PMD > 1)
- return;
-
-@@ -249,6 +250,30 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
- goto out_oom;
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
- }
-+
-+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ unsigned long flags;
-+
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-+ if (!pmd)
-+ goto out_oom;
-+ set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
-+ }
-+
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ unsigned long v = (unsigned long)i << PGDIR_SHIFT;
-+ pgd_t *kpgd = pgd_offset_k(v);
-+ pud_t *kpud = pud_offset(kpgd, v);
-+ pmd_t *kpmd = pmd_offset(kpud, v);
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ memcpy(pmd, kpmd, PAGE_SIZE);
-+ }
-+ pgd_list_add(pgd);
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ }
-+
- return pgd;
-
- out_oom:
-@@ -263,9 +288,23 @@ void pgd_free(pgd_t *pgd)
- int i;
-
- /* in the PAE case user pgd entries are overwritten before usage */
-- if (PTRS_PER_PMD > 1)
-- for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-- kmem_cache_free(pmd_cache, (void
*)__va(pgd_val(pgd[i])-1));
-+ if (PTRS_PER_PMD > 1) {
-+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ kmem_cache_free(pmd_cache, pmd);
-+ }
-+ if (!HAVE_SHARED_KERNEL_PMD) {
-+ unsigned long flags;
-+ spin_lock_irqsave(&pgd_lock, flags);
-+ pgd_list_del(pgd);
-+ spin_unlock_irqrestore(&pgd_lock, flags);
-+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-+ pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
-+ kmem_cache_free(pmd_cache, pmd);
-+ }
-+ }
-+ }
- /* in the non-PAE case, free_pgtables() clears user pgd entries */
- kmem_cache_free(pgd_cache, pgd);
- }
-diff -pruN
../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h
./include/asm-i386/pgtable-2level-defs.h
---- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-2level-defs.h
2006-01-03 03:21:10.000000000 +0000
-+++ ./include/asm-i386/pgtable-2level-defs.h 2006-02-02 17:45:14.000000000
+0000
-@@ -1,6 +1,8 @@
- #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
- #define _I386_PGTABLE_2LEVEL_DEFS_H
-
-+#define HAVE_SHARED_KERNEL_PMD 0
-+
- /*
- * traditional i386 two-level paging structure:
- */
-diff -pruN
../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h
./include/asm-i386/pgtable-3level-defs.h
---- ../pristine-linux-2.6.16-rc1-git4/include/asm-i386/pgtable-3level-defs.h
2006-01-03 03:21:10.000000000 +0000
-+++ ./include/asm-i386/pgtable-3level-defs.h 2006-02-02 17:45:14.000000000
+0000
-@@ -1,6 +1,8 @@
- #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
- #define _I386_PGTABLE_3LEVEL_DEFS_H
-
-+#define HAVE_SHARED_KERNEL_PMD 1
-+
- /*
- * PGDIR_SHIFT determines what a top-level page table entry can map
- */
diff -r d940ec92958d -r 6c43118bdba8 patches/linux-2.6.16-rc3/smp-alts.patch
--- a/patches/linux-2.6.16-rc3/smp-alts.patch Fri Feb 24 21:03:07 2006
+++ /dev/null Fri Feb 24 22:41:08 2006
@@ -1,591 +0,0 @@
-diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig ./arch/i386/Kconfig
---- ../pristine-linux-2.6.16-rc3/arch/i386/Kconfig 2006-02-15
20:38:51.000000000 +0000
-+++ ./arch/i386/Kconfig 2006-02-15 20:45:57.000000000 +0000
-@@ -202,6 +202,19 @@ config SMP
-
- If you don't know what to do here, say N.
-
-+config SMP_ALTERNATIVES
-+ bool "SMP alternatives support (EXPERIMENTAL)"
-+ depends on SMP && EXPERIMENTAL
-+ help
-+ Try to reduce the overhead of running an SMP kernel on a uniprocessor
-+ host slightly by replacing certain key instruction sequences
-+ according to whether we currently have more than one CPU available.
-+ This should provide a noticeable boost to performance when
-+ running SMP kernels on UP machines, and have negligible impact
-+ when running on an true SMP host.
-+
-+ If unsure, say N.
-+
- config NR_CPUS
- int "Maximum number of CPUs (2-255)"
- range 2 255
-diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile
./arch/i386/kernel/Makefile
---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/Makefile 2006-02-15
20:38:51.000000000 +0000
-+++ ./arch/i386/kernel/Makefile 2006-02-15 20:45:57.000000000 +0000
-@@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
- obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
- obj-$(CONFIG_VM86) += vm86.o
- obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-+obj-$(CONFIG_SMP_ALTERNATIVES) += smpalts.o
-
- EXTRA_AFLAGS := -traditional
-
-diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c
./arch/i386/kernel/smpalts.c
---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpalts.c 1970-01-01
01:00:00.000000000 +0100
-+++ ./arch/i386/kernel/smpalts.c 2006-02-15 20:45:57.000000000 +0000
-@@ -0,0 +1,85 @@
-+#include <linux/kernel.h>
-+#include <asm/system.h>
-+#include <asm/smp_alt.h>
-+#include <asm/processor.h>
-+#include <asm/string.h>
-+
-+struct smp_replacement_record {
-+ unsigned char targ_size;
-+ unsigned char smp1_size;
-+ unsigned char smp2_size;
-+ unsigned char up_size;
-+ unsigned char feature;
-+ unsigned char data[0];
-+};
-+
-+struct smp_alternative_record {
-+ void *targ_start;
-+ struct smp_replacement_record *repl;
-+};
-+
-+extern struct smp_alternative_record __start_smp_alternatives_table,
-+ __stop_smp_alternatives_table;
-+extern unsigned long __init_begin, __init_end;
-+
-+void prepare_for_smp(void)
-+{
-+ struct smp_alternative_record *r;
-+ printk(KERN_INFO "Enabling SMP...\n");
-+ for (r = &__start_smp_alternatives_table;
-+ r != &__stop_smp_alternatives_table;
-+ r++) {
-+ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
-+ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
-+ BUG_ON(r->repl->targ_size < r->repl->up_size);
-+ if (system_state == SYSTEM_RUNNING &&
-+ r->targ_start >= (void *)&__init_begin &&
-+ r->targ_start < (void *)&__init_end)
-+ continue;
-+ if (r->repl->feature != (unsigned char)-1 &&
-+ boot_cpu_has(r->repl->feature)) {
-+ memcpy(r->targ_start,
-+ r->repl->data + r->repl->smp1_size,
-+ r->repl->smp2_size);
-+ memset(r->targ_start + r->repl->smp2_size,
-+ 0x90,
-+ r->repl->targ_size - r->repl->smp2_size);
-+ } else {
-+ memcpy(r->targ_start,
-+ r->repl->data,
-+ r->repl->smp1_size);
-+ memset(r->targ_start + r->repl->smp1_size,
-+ 0x90,
-+ r->repl->targ_size - r->repl->smp1_size);
-+ }
-+ }
-+ /* Paranoia */
-+ asm volatile ("jmp 1f\n1:");
-+ mb();
-+}
-+
-+void unprepare_for_smp(void)
-+{
-+ struct smp_alternative_record *r;
-+ printk(KERN_INFO "Disabling SMP...\n");
-+ for (r = &__start_smp_alternatives_table;
-+ r != &__stop_smp_alternatives_table;
-+ r++) {
-+ BUG_ON(r->repl->targ_size < r->repl->smp1_size);
-+ BUG_ON(r->repl->targ_size < r->repl->smp2_size);
-+ BUG_ON(r->repl->targ_size < r->repl->up_size);
-+ if (system_state == SYSTEM_RUNNING &&
-+ r->targ_start >= (void *)&__init_begin &&
-+ r->targ_start < (void *)&__init_end)
-+ continue;
-+ memcpy(r->targ_start,
-+ r->repl->data + r->repl->smp1_size + r->repl->smp2_size,
-+ r->repl->up_size);
-+ memset(r->targ_start + r->repl->up_size,
-+ 0x90,
-+ r->repl->targ_size - r->repl->up_size);
-+ }
-+ /* Paranoia */
-+ asm volatile ("jmp 1f\n1:");
-+ mb();
-+}
-diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c
./arch/i386/kernel/smpboot.c
---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/smpboot.c 2006-02-15
20:38:51.000000000 +0000
-+++ ./arch/i386/kernel/smpboot.c 2006-02-15 20:45:57.000000000 +0000
-@@ -1214,6 +1214,11 @@ static void __init smp_boot_cpus(unsigne
- if (max_cpus <= cpucount+1)
- continue;
-
-+#ifdef CONFIG_SMP_ALTERNATIVES
-+ if (kicked == 1)
-+ prepare_for_smp();
-+#endif
-+
- if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
- printk("CPU #%d not responding - cannot use it.\n",
- apicid);
-@@ -1392,6 +1397,11 @@ int __devinit __cpu_up(unsigned int cpu)
- return -EIO;
- }
-
-+#ifdef CONFIG_SMP_ALTERNATIVES
-+ if (num_online_cpus() == 1)
-+ prepare_for_smp();
-+#endif
-+
- local_irq_enable();
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
- /* Unleash the CPU! */
-diff -pruN ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S
./arch/i386/kernel/vmlinux.lds.S
---- ../pristine-linux-2.6.16-rc3/arch/i386/kernel/vmlinux.lds.S
2006-01-03 03:21:10.000000000 +0000
-+++ ./arch/i386/kernel/vmlinux.lds.S 2006-02-15 20:45:57.000000000 +0000
-@@ -34,6 +34,13 @@ SECTIONS
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
- __stop___ex_table = .;
-
-+ . = ALIGN(16);
-+ __start_smp_alternatives_table = .;
-+ __smp_alternatives : { *(__smp_alternatives) }
-+ __stop_smp_alternatives_table = .;
-+
-+ __smp_replacements : { *(__smp_replacements) }
-+
- RODATA
-
- /* writeable */
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h
./include/asm-i386/atomic.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/atomic.h 2006-02-15
20:38:57.000000000 +0000
-+++ ./include/asm-i386/atomic.h 2006-02-15 20:45:57.000000000 +0000
-@@ -4,18 +4,13 @@
- #include <linux/config.h>
- #include <linux/compiler.h>
- #include <asm/processor.h>
-+#include <asm/smp_alt.h>
-
- /*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
--#ifdef CONFIG_SMP
--#define LOCK "lock ; "
--#else
--#define LOCK ""
--#endif
--
- /*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h
./include/asm-i386/bitops.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/bitops.h 2006-02-15
20:38:57.000000000 +0000
-+++ ./include/asm-i386/bitops.h 2006-02-15 20:45:57.000000000 +0000
-@@ -7,6 +7,7 @@
-
- #include <linux/config.h>
- #include <linux/compiler.h>
-+#include <asm/smp_alt.h>
-
- /*
- * These have to be done with inline assembly: that way the bit-setting
-@@ -16,12 +17,6 @@
- * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
- */
-
--#ifdef CONFIG_SMP
--#define LOCK_PREFIX "lock ; "
--#else
--#define LOCK_PREFIX ""
--#endif
--
- #define ADDR (*(volatile long *) addr)
-
- /**
-@@ -41,7 +36,7 @@
- */
- static inline void set_bit(int nr, volatile unsigned long * addr)
- {
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btsl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-@@ -76,7 +71,7 @@ static inline void __set_bit(int nr, vol
- */
- static inline void clear_bit(int nr, volatile unsigned long * addr)
- {
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btrl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-@@ -121,7 +116,7 @@ static inline void __change_bit(int nr,
- */
- static inline void change_bit(int nr, volatile unsigned long * addr)
- {
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btcl %1,%0"
- :"+m" (ADDR)
- :"Ir" (nr));
-@@ -140,7 +135,7 @@ static inline int test_and_set_bit(int n
- {
- int oldbit;
-
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btsl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
-@@ -180,7 +175,7 @@ static inline int test_and_clear_bit(int
- {
- int oldbit;
-
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btrl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
-@@ -231,7 +226,7 @@ static inline int test_and_change_bit(in
- {
- int oldbit;
-
-- __asm__ __volatile__( LOCK_PREFIX
-+ __asm__ __volatile__( LOCK
- "btcl %2,%1\n\tsbbl %0,%0"
- :"=r" (oldbit),"+m" (ADDR)
- :"Ir" (nr) : "memory");
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h
./include/asm-i386/futex.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/futex.h 2006-02-15
20:38:57.000000000 +0000
-+++ ./include/asm-i386/futex.h 2006-02-15 20:45:57.000000000 +0000
-@@ -28,7 +28,7 @@
- "1: movl %2, %0\n\
- movl %0, %3\n" \
- insn "\n" \
--"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\
-+"2: " LOCK "cmpxchgl %3, %2\n\
- jnz 1b\n\
- 3: .section .fixup,\"ax\"\n\
- 4: mov %5, %1\n\
-@@ -68,7 +68,7 @@ futex_atomic_op_inuser (int encoded_op,
- #endif
- switch (op) {
- case FUTEX_OP_ADD:
-- __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret,
-+ __futex_atomic_op1(LOCK "xaddl %0, %2", ret,
- oldval, uaddr, oparg);
- break;
- case FUTEX_OP_OR:
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h
./include/asm-i386/rwsem.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/rwsem.h 2006-01-03
03:21:10.000000000 +0000
-+++ ./include/asm-i386/rwsem.h 2006-02-15 20:45:57.000000000 +0000
-@@ -40,6 +40,7 @@
-
- #include <linux/list.h>
- #include <linux/spinlock.h>
-+#include <asm/smp_alt.h>
-
- struct rwsem_waiter;
-
-@@ -99,7 +100,7 @@ static inline void __down_read(struct rw
- {
- __asm__ __volatile__(
- "# beginning down_read\n\t"
--LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old
value */
-+LOCK " incl (%%eax)\n\t" /* adds 0x00000001, returns the old
value */
- " js 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -130,7 +131,7 @@ static inline int __down_read_trylock(st
- " movl %1,%2\n\t"
- " addl %3,%2\n\t"
- " jle 2f\n\t"
--LOCK_PREFIX " cmpxchgl %2,%0\n\t"
-+LOCK " cmpxchgl %2,%0\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
-@@ -150,7 +151,7 @@ static inline void __down_write(struct r
- tmp = RWSEM_ACTIVE_WRITE_BIAS;
- __asm__ __volatile__(
- "# beginning down_write\n\t"
--LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns
the old value */
-+LOCK " xadd %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns
the old value */
- " testl %%edx,%%edx\n\t" /* was the count 0 before? */
- " jnz 2f\n\t" /* jump if we weren't granted the lock */
- "1:\n\t"
-@@ -188,7 +189,7 @@ static inline void __up_read(struct rw_s
- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
- __asm__ __volatile__(
- "# beginning __up_read\n\t"
--LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old
value */
-+LOCK " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old
value */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -214,7 +215,7 @@ static inline void __up_write(struct rw_
- __asm__ __volatile__(
- "# beginning __up_write\n\t"
- " movl %2,%%edx\n\t"
--LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition
0xffff0001 -> 0x00000000 */
-+LOCK " xaddl %%edx,(%%eax)\n\t" /* tries to transition
0xffff0001 -> 0x00000000 */
- " jnz 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -239,7 +240,7 @@ static inline void __downgrade_write(str
- {
- __asm__ __volatile__(
- "# beginning __downgrade_write\n\t"
--LOCK_PREFIX " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 ->
0xYYYY0001 */
-+LOCK " addl %2,(%%eax)\n\t" /* transitions 0xZZZZ0001 ->
0xYYYY0001 */
- " js 2f\n\t" /* jump if the lock is being waited upon */
- "1:\n\t"
- LOCK_SECTION_START("")
-@@ -263,7 +264,7 @@ LOCK_PREFIX " addl %2,(%%eax)\n\t"
- static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
- {
- __asm__ __volatile__(
--LOCK_PREFIX "addl %1,%0"
-+LOCK "addl %1,%0"
- : "=m"(sem->count)
- : "ir"(delta), "m"(sem->count));
- }
-@@ -276,7 +277,7 @@ static inline int rwsem_atomic_update(in
- int tmp = delta;
-
- __asm__ __volatile__(
--LOCK_PREFIX "xadd %0,(%2)"
-+LOCK "xadd %0,(%2)"
- : "+r"(tmp), "=m"(sem->count)
- : "r"(sem), "m"(sem->count)
- : "memory");
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h
./include/asm-i386/smp_alt.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/smp_alt.h 1970-01-01
01:00:00.000000000 +0100
-+++ ./include/asm-i386/smp_alt.h 2006-02-15 20:45:57.000000000 +0000
-@@ -0,0 +1,32 @@
-+#ifndef __ASM_SMP_ALT_H__
-+#define __ASM_SMP_ALT_H__
-+
-+#include <linux/config.h>
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-+#define LOCK \
-+ "6677: nop\n" \
-+ ".section __smp_alternatives,\"a\"\n" \
-+ ".long 6677b\n" \
-+ ".long 6678f\n" \
-+ ".previous\n" \
-+ ".section __smp_replacements,\"a\"\n" \
-+ "6678: .byte 1\n" \
-+ ".byte 1\n" \
-+ ".byte 0\n" \
-+ ".byte 1\n" \
-+ ".byte -1\n" \
-+ "lock\n" \
-+ "nop\n" \
-+ ".previous\n"
-+void prepare_for_smp(void);
-+void unprepare_for_smp(void);
-+#else
-+#define LOCK "lock ; "
-+#endif
-+#else
-+#define LOCK ""
-+#endif
-+
-+#endif /* __ASM_SMP_ALT_H__ */
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h
./include/asm-i386/spinlock.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/spinlock.h 2006-01-03
03:21:10.000000000 +0000
-+++ ./include/asm-i386/spinlock.h 2006-02-15 20:45:57.000000000 +0000
-@@ -6,6 +6,7 @@
- #include <asm/page.h>
- #include <linux/config.h>
- #include <linux/compiler.h>
-+#include <asm/smp_alt.h>
-
- /*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
-@@ -23,7 +24,8 @@
-
- #define __raw_spin_lock_string \
- "\n1:\t" \
-- "lock ; decb %0\n\t" \
-+ LOCK \
-+ "decb %0\n\t" \
- "jns 3f\n" \
- "2:\t" \
- "rep;nop\n\t" \
-@@ -34,7 +36,8 @@
-
- #define __raw_spin_lock_string_flags \
- "\n1:\t" \
-- "lock ; decb %0\n\t" \
-+ LOCK \
-+ "decb %0\n\t" \
- "jns 4f\n\t" \
- "2:\t" \
- "testl $0x200, %1\n\t" \
-@@ -65,10 +68,34 @@ static inline void __raw_spin_lock_flags
- static inline int __raw_spin_trylock(raw_spinlock_t *lock)
- {
- char oldval;
-+#ifdef CONFIG_SMP_ALTERNATIVES
- __asm__ __volatile__(
-- "xchgb %b0,%1"
-+ "1:movb %1,%b0\n"
-+ "movb $0,%1\n"
-+ "2:"
-+ ".section __smp_alternatives,\"a\"\n"
-+ ".long 1b\n"
-+ ".long 3f\n"
-+ ".previous\n"
-+ ".section __smp_replacements,\"a\"\n"
-+ "3: .byte 2b - 1b\n"
-+ ".byte 5f-4f\n"
-+ ".byte 0\n"
-+ ".byte 6f-5f\n"
-+ ".byte -1\n"
-+ "4: xchgb %b0,%1\n"
-+ "5: movb %1,%b0\n"
-+ "movb $0,%1\n"
-+ "6:\n"
-+ ".previous\n"
- :"=q" (oldval), "=m" (lock->slock)
- :"0" (0) : "memory");
-+#else
-+ __asm__ __volatile__(
-+ "xchgb %b0,%1\n"
-+ :"=q" (oldval), "=m" (lock->slock)
-+ :"0" (0) : "memory");
-+#endif
- return oldval > 0;
- }
-
-@@ -178,12 +205,12 @@ static inline int __raw_write_trylock(ra
-
- static inline void __raw_read_unlock(raw_rwlock_t *rw)
- {
-- asm volatile("lock ; incl %0" :"=m" (rw->lock) : : "memory");
-+ asm volatile(LOCK "incl %0" :"=m" (rw->lock) : : "memory");
- }
-
- static inline void __raw_write_unlock(raw_rwlock_t *rw)
- {
-- asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ", %0"
-+ asm volatile(LOCK "addl $" RW_LOCK_BIAS_STR ", %0"
- : "=m" (rw->lock) : : "memory");
- }
-
-diff -pruN ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h
./include/asm-i386/system.h
---- ../pristine-linux-2.6.16-rc3/include/asm-i386/system.h 2006-02-15
20:38:57.000000000 +0000
-+++ ./include/asm-i386/system.h 2006-02-15 20:45:57.000000000 +0000
-@@ -5,7 +5,7 @@
- #include <linux/kernel.h>
- #include <asm/segment.h>
- #include <asm/cpufeature.h>
--#include <linux/bitops.h> /* for LOCK_PREFIX */
-+#include <asm/smp_alt.h>
-
- #ifdef __KERNEL__
-
-@@ -271,19 +271,19 @@ static inline unsigned long __cmpxchg(vo
- unsigned long prev;
- switch (size) {
- case 1:
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
-+ __asm__ __volatile__(LOCK "cmpxchgb %b1,%2"
- : "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 2:
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
-+ __asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
- return prev;
- case 4:
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
-+ __asm__ __volatile__(LOCK "cmpxchgl %1,%2"
- : "=a"(prev)
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
- : "memory");
-@@ -336,7 +336,7 @@ static inline unsigned long long __cmpxc
- unsigned long long new)
- {
- unsigned long long prev;
-- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
-+ __asm__ __volatile__(LOCK "cmpxchg8b %3"
- : "=A"(prev)
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
-@@ -503,11 +503,55 @@ struct alt_instr {
- #endif
-
- #ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP_ALTERNATIVES) && !defined(MODULE)
-+#define smp_alt_mb(instr) \
-+__asm__ __volatile__("6667:\nnop\nnop\nnop\nnop\nnop\nnop\n6668:\n" \
-+ ".section __smp_alternatives,\"a\"\n" \
-+ ".long 6667b\n" \
-+ ".long 6673f\n" \
-+ ".previous\n" \
-+ ".section __smp_replacements,\"a\"\n" \
-+ "6673:.byte 6668b-6667b\n" \
-+ ".byte 6670f-6669f\n" \
-+ ".byte 6671f-6670f\n" \
-+ ".byte 0\n" \
-+ ".byte %c0\n" \
-+ "6669:lock;addl $0,0(%%esp)\n" \
-+ "6670:" instr "\n" \
-+ "6671:\n" \
-+ ".previous\n" \
-+ : \
-+ : "i" (X86_FEATURE_XMM2) \
-+ : "memory")
-+#define smp_rmb() smp_alt_mb("lfence")
-+#define smp_mb() smp_alt_mb("mfence")
-+#define set_mb(var, value) do { \
-+unsigned long __set_mb_temp; \
-+__asm__ __volatile__("6667:movl %1, %0\n6668:\n" \
-+ ".section __smp_alternatives,\"a\"\n" \
-+ ".long 6667b\n" \
-+ ".long 6673f\n" \
-+ ".previous\n" \
-+ ".section __smp_replacements,\"a\"\n" \
-+ "6673: .byte 6668b-6667b\n" \
-+ ".byte 6670f-6669f\n" \
-+ ".byte 0\n" \
-+ ".byte 6671f-6670f\n" \
-+ ".byte -1\n" \
-+ "6669: xchg %1, %0\n" \
-+ "6670:movl %1, %0\n" \
-+ "6671:\n" \
-+ ".previous\n" \
-+ : "=m" (var), "=r" (__set_mb_temp) \
-+ : "1" (value) \
-+ : "memory"); } while (0)
-+#else
- #define smp_mb() mb()
- #define smp_rmb() rmb()
-+#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
-+#endif
- #define smp_wmb() wmb()
- #define smp_read_barrier_depends() read_barrier_depends()
--#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
- #else
- #define smp_mb() barrier()
- #define smp_rmb() barrier()
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|