WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-merge

Re: [Xen-merge] i386 subarch

Here's that output from the script.  The diffstat can be a guiding list
of files to be converted. (Kconfig and Makefiles can be ignored)


 arch/i386/Kconfig                   |  718 +++++----------------
 arch/i386/Makefile                  |  128 ---
 arch/i386/kernel/Makefile           |   79 +-
 arch/i386/kernel/acpi/Makefile      |   15 
 arch/i386/kernel/acpi/boot.c        |   26 
 arch/i386/kernel/apic.c             | 1201 ------------------------------------
 arch/i386/kernel/cpu/Makefile       |   34 -
 arch/i386/kernel/cpu/common.c       |   58 -
 arch/i386/kernel/cpu/mtrr/Makefile  |   19 
 arch/i386/kernel/cpu/mtrr/main.c    |  629 +-----------------
 arch/i386/kernel/entry.S            |  358 ++++++----
 arch/i386/kernel/head.S             |  457 +------------
 arch/i386/kernel/i386_ksyms.c       |    2 
 arch/i386/kernel/io_apic.c          |  104 ++-
 arch/i386/kernel/ioport.c           |   74 --
 arch/i386/kernel/irq.c              |   66 +
 arch/i386/kernel/ldt.c              |   32 
 arch/i386/kernel/microcode.c        |  375 -----------
 arch/i386/kernel/mpparse.c          |   27 
 arch/i386/kernel/pci-dma.c          |  141 ++++
 arch/i386/kernel/process.c          |  291 +++-----
 arch/i386/kernel/quirks.c           |   11 
 arch/i386/kernel/setup.c            |  241 ++++++-
 arch/i386/kernel/signal.c           |    2 
 arch/i386/kernel/smp.c              |  208 +++---
 arch/i386/kernel/smpboot.c          |  476 ++++++++++++--
 arch/i386/kernel/time.c             |  553 +++++++++++++++-
 arch/i386/kernel/timers/Makefile    |   16 
 arch/i386/kernel/timers/timer_tsc.c |  277 +-------
 arch/i386/kernel/traps.c            |  210 ++----
 arch/i386/kernel/vsyscall.S         |    4 
 arch/i386/mach-default/Makefile     |    9 
 arch/i386/mm/Makefile               |   22 
 arch/i386/mm/fault.c                |   35 -
 arch/i386/mm/highmem.c              |   15 
 arch/i386/mm/hypervisor.c           |  363 ++++++++++
 arch/i386/mm/init.c                 |  131 +++
 arch/i386/mm/ioremap.c              |  312 ++++++---
 arch/i386/mm/pgtable.c              |  309 ++++++++-
 arch/i386/pci/Makefile              |   38 -
 arch/i386/pci/irq.c                 |    5 
 41 files changed, 3673 insertions(+), 4398 deletions(-)


diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/Kconfig linux-2.6-xen-sparse/arch/i386/Kconfig
--- pristine-linux-2.6.12/arch/i386/Kconfig     2005-06-17 12:48:29.000000000 
-0700
+++ linux-2.6-xen-sparse/arch/i386/Kconfig      2005-07-28 13:17:07.000000000 
-0700
@@ -3,7 +3,11 @@
 # see Documentation/kbuild/kconfig-language.txt.
 #
 
-mainmenu "Linux Kernel Configuration"
+menu "X86 Processor Configuration"
+
+config XENARCH
+       string
+       default i386
 
 config X86
        bool
@@ -33,119 +37,6 @@ config GENERIC_IOMAP
        bool
        default y
 
-source "init/Kconfig"
-
-menu "Processor type and features"
-
-choice
-       prompt "Subarchitecture Type"
-       default X86_PC
-
-config X86_PC
-       bool "PC-compatible"
-       help
-         Choose this option if your computer is a standard PC or compatible.
-
-config X86_ELAN
-       bool "AMD Elan"
-       help
-         Select this for an AMD Elan processor.
-
-         Do not use this option for K6/Athlon/Opteron processors!
-
-         If unsure, choose "PC-compatible" instead.
-
-config X86_VOYAGER
-       bool "Voyager (NCR)"
-       help
-         Voyager is an MCA-based 32-way capable SMP architecture proprietary
-         to NCR Corp.  Machine classes 345x/35xx/4100/51xx are Voyager-based.
-
-         *** WARNING ***
-
-         If you do not specifically know you have a Voyager based machine,
-         say N here, otherwise the kernel you build will not be bootable.
-
-config X86_NUMAQ
-       bool "NUMAQ (IBM/Sequent)"
-       select DISCONTIGMEM
-       select NUMA
-       help
-         This option is used for getting Linux to run on a (IBM/Sequent) NUMA
-         multiquad box. This changes the way that processors are bootstrapped,
-         and uses Clustered Logical APIC addressing mode instead of Flat 
Logical.
-         You will need a new lynxer.elf file to flash your firmware with - send
-         email to <Martin.Bligh@xxxxxxxxxx>.
-
-config X86_SUMMIT
-       bool "Summit/EXA (IBM x440)"
-       depends on SMP
-       help
-         This option is needed for IBM systems that use the Summit/EXA chipset.
-         In particular, it is needed for the x440.
-
-         If you don't have one of these computers, you should say N here.
-
-config X86_BIGSMP
-       bool "Support for other sub-arch SMP systems with more than 8 CPUs"
-       depends on SMP
-       help
-         This option is needed for the systems that have more than 8 CPUs
-         and if the system is not of any sub-arch type above.
-
-         If you don't have such a system, you should say N here.
-
-config X86_VISWS
-       bool "SGI 320/540 (Visual Workstation)"
-       help
-         The SGI Visual Workstation series is an IA32-based workstation
-         based on SGI systems chips with some legacy PC hardware attached.
-
-         Say Y here to create a kernel to run on the SGI 320 or 540.
-
-         A kernel compiled for the Visual Workstation will not run on PCs
-         and vice versa. See <file:Documentation/sgi-visws.txt> for details.
-
-config X86_GENERICARCH
-       bool "Generic architecture (Summit, bigsmp, ES7000, default)"
-       depends on SMP
-       help
-          This option compiles in the Summit, bigsmp, ES7000, default 
subarchitectures.
-         It is intended for a generic binary kernel.
-
-config X86_ES7000
-       bool "Support for Unisys ES7000 IA32 series"
-       depends on SMP
-       help
-         Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
-         supposed to run on an IA32-based Unisys ES7000 system.
-         Only choose this option if you have such a system, otherwise you
-         should say N here.
-
-endchoice
-
-config ACPI_SRAT
-       bool
-       default y
-       depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
-
-config X86_SUMMIT_NUMA
-       bool
-       default y
-       depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
-
-config X86_CYCLONE_TIMER
-       bool
-       default y
-       depends on X86_SUMMIT || X86_GENERICARCH
-
-config ES7000_CLUSTERED_APIC
-       bool
-       default y
-       depends on SMP && X86_ES7000 && MPENTIUMIII
-
-if !X86_ELAN
-
 choice
        prompt "Processor family"
        default M686
@@ -347,8 +238,6 @@ config X86_GENERIC
          This is really intended for distributors who need more
          generic optimizations.
 
-endif
-
 #
 # Define implied options from the CPU selection here
 #
@@ -444,19 +333,21 @@ config X86_OOSTORE
        default y
 
 config HPET_TIMER
-       bool "HPET Timer Support"
-       help
-         This enables the use of the HPET for the kernel's internal timer.
-         HPET is the next generation timer replacing legacy 8254s.
-         You can safely choose Y here.  However, HPET will only be
-         activated if the platform and the BIOS support this feature.
-         Otherwise the 8254 will be used for timing services.
-
-         Choose N to continue using the legacy 8254 timer.
+       bool
+       default n
+#config HPET_TIMER
+#      bool "HPET Timer Support"
+#      help
+#        This enables the use of the HPET for the kernel's internal timer.
+#        HPET is the next generation timer replacing legacy 8254s.
+#        You can safely choose Y here.  However, HPET will only be
+#        activated if the platform and the BIOS support this feature.
+#        Otherwise the 8254 will be used for timing services.
+#
+#        Choose N to continue using the legacy 8254 timer.
 
 config HPET_EMULATE_RTC
-       bool "Provide RTC interrupt"
-       depends on HPET_TIMER && RTC=y
+       def_bool HPET_TIMER && RTC=y
 
 config SMP
        bool "Symmetric multi-processing support"
@@ -487,6 +378,19 @@ config SMP
 
          If you don't know what to do here, say N.
 
+config SMP_ALTERNATIVES
+        bool "SMP alternatives support (EXPERIMENTAL)"
+        depends on SMP && EXPERIMENTAL
+        help
+          Try to reduce the overhead of running an SMP kernel on a uniprocessor
+          host slightly by replacing certain key instruction sequences
+          according to whether we currently have more than one CPU available.
+          This should provide a noticeable boost to performance when
+          running SMP kernels on UP machines, and have negligible impact
+          when running on an true SMP host.
+
+          If unsure, say N.
+
 config NR_CPUS
        int "Maximum number of CPUs (2-255)"
        range 2 255
@@ -534,122 +438,47 @@ config PREEMPT_BKL
          Say Y here if you are building a kernel for a desktop system.
          Say N if you are unsure.
 
-config X86_UP_APIC
-       bool "Local APIC support on uniprocessors"
-       depends on !SMP && !(X86_VISWS || X86_VOYAGER)
-       help
-         A local APIC (Advanced Programmable Interrupt Controller) is an
-         integrated interrupt controller in the CPU. If you have a single-CPU
-         system which has a processor with a local APIC, you can say Y here to
-         enable and use it. If you say Y here even though your machine doesn't
-         have a local APIC, then the kernel will still run with no slowdown at
-         all. The local APIC supports CPU-generated self-interrupts (timer,
-         performance counters), and the NMI watchdog which detects hard
-         lockups.
-
-config X86_UP_IOAPIC
-       bool "IO-APIC support on uniprocessors"
-       depends on X86_UP_APIC
-       help
-         An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
-         SMP-capable replacement for PC-style interrupt controllers. Most
-         SMP systems and many recent uniprocessor systems have one.
-
-         If you have a single-CPU system with an IO-APIC, you can say Y here
-         to use it. If you say Y here even though your machine doesn't have
-         an IO-APIC, then the kernel will still run with no slowdown at all.
-
-config X86_LOCAL_APIC
-       bool
-       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
-       default y
-
-config X86_IO_APIC
-       bool
-       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
-       default y
-
-config X86_VISWS_APIC
-       bool
-       depends on X86_VISWS
-       default y
-
-config X86_TSC
-       bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
-       default y
-
-config X86_MCE
-       bool "Machine Check Exception"
-       depends on !X86_VOYAGER
-       ---help---
-         Machine Check Exception support allows the processor to notify the
-         kernel if it detects a problem (e.g. overheating, component failure).
-         The action the kernel takes depends on the severity of the problem,
-         ranging from a warning message on the console, to halting the machine.
-         Your processor must be a Pentium or newer to support this - check the
-         flags in /proc/cpuinfo for mce.  Note that some older Pentium systems
-         have a design flaw which leads to false MCE events - hence MCE is
-         disabled on all P5 processors, unless explicitly enabled with "mce"
-         as a boot argument.  Similarly, if MCE is built in and creates a
-         problem on some new non-standard machine, you can boot with "nomce"
-         to disable it.  MCE support simply ignores non-MCE processors like
-         the 386 and 486, so nearly everyone can say Y here.
-
-config X86_MCE_NONFATAL
-       tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel 
Pentium 4"
-       depends on X86_MCE
-       help
-         Enabling this feature starts a timer that triggers every 5 seconds 
which
-         will look at the machine check registers to see if anything happened.
-         Non-fatal problems automatically get corrected (but still logged).
-         Disable this if you don't want to see these messages.
-         Seeing the messages this option prints out may be indicative of dying 
hardware,
-         or out-of-spec (ie, overclocked) hardware.
-         This option only does something on certain CPUs.
-         (AMD Athlon/Duron and Intel Pentium 4)
-
-config X86_MCE_P4THERMAL
-       bool "check for P4 thermal throttling interrupt."
-       depends on X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS
-       help
-         Enabling this feature will cause a message to be printed when the P4
-         enters thermal throttling.
-
-config TOSHIBA
-       tristate "Toshiba Laptop support"
-       ---help---
-         This adds a driver to safely access the System Management Mode of
-         the CPU on Toshiba portables with a genuine Toshiba BIOS. It does
-         not work on models with a Phoenix BIOS. The System Management Mode
-         is used to set the BIOS and power saving options on Toshiba portables.
-
-         For information on utilities to make use of this driver see the
-         Toshiba Linux utilities web site at:
-         <http://www.buzzard.org.uk/toshiba/>.
-
-         Say Y if you intend to run this kernel on a Toshiba portable.
-         Say N otherwise.
-
-config I8K
-       tristate "Dell laptop support"
-       ---help---
-         This adds a driver to safely access the System Management Mode
-         of the CPU on the Dell Inspiron 8000. The System Management Mode
-         is used to read cpu temperature and cooling fan status and to
-         control the fans on the I8K portables.
-
-         This driver has been tested only on the Inspiron 8000 but it may
-         also work with other Dell laptops. You can force loading on other
-         models by passing the parameter `force=1' to the module. Use at
-         your own risk.
-
-         For information on utilities to make use of this driver see the
-         I8K Linux utilities web site at:
-         <http://people.debian.org/~dz/i8k/>
-
-         Say Y if you intend to run this kernel on a Dell Inspiron 8000.
-         Say N otherwise.
+#config X86_TSC
+#       bool
+#      depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
+#       default y
+
+#config X86_MCE
+#       bool "Machine Check Exception"
+#      depends on !X86_VOYAGER
+#       ---help---
+#         Machine Check Exception support allows the processor to notify the
+#         kernel if it detects a problem (e.g. overheating, component failure).
+#         The action the kernel takes depends on the severity of the problem,
+#         ranging from a warning message on the console, to halting the 
machine.
+#         Your processor must be a Pentium or newer to support this - check the
+#         flags in /proc/cpuinfo for mce.  Note that some older Pentium systems
+#         have a design flaw which leads to false MCE events - hence MCE is
+#         disabled on all P5 processors, unless explicitly enabled with "mce"
+#         as a boot argument.  Similarly, if MCE is built in and creates a
+#         problem on some new non-standard machine, you can boot with "nomce"
+#         to disable it.  MCE support simply ignores non-MCE processors like
+#         the 386 and 486, so nearly everyone can say Y here.
+
+#config X86_MCE_NONFATAL
+#      tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel 
Pentium 4"
+#       depends on X86_MCE
+#       help
+#         Enabling this feature starts a timer that triggers every 5 seconds 
which
+#         will look at the machine check registers to see if anything happened.
+#         Non-fatal problems automatically get corrected (but still logged).
+#         Disable this if you don't want to see these messages.
+#         Seeing the messages this option prints out may be indicative of 
dying hardware,
+#         or out-of-spec (ie, overclocked) hardware.
+#         This option only does something on certain CPUs.
+#         (AMD Athlon/Duron and Intel Pentium 4)
+
+#config X86_MCE_P4THERMAL
+#       bool "check for P4 thermal throttling interrupt."
+#       depends on X86_MCE && (X86_UP_APIC || SMP)
+#       help
+#         Enabling this feature will cause a message to be printed when the P4
+#         enters thermal throttling.
 
 config X86_REBOOTFIXUPS
        bool "Enable X86 board specific fixups for reboot"
@@ -671,6 +500,7 @@ config X86_REBOOTFIXUPS
 
 config MICROCODE
        tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+        depends on XEN_PRIVILEGED_GUEST
        ---help---
          If you say Y here and also to "/dev file system support" in the
          'File systems' section, you will be able to update the microcode on
@@ -686,14 +516,14 @@ config MICROCODE
          To compile this driver as a module, choose M here: the
          module will be called microcode.
 
-config X86_MSR
-       tristate "/dev/cpu/*/msr - Model-specific register support"
-       help
-         This device gives privileged processes access to the x86
-         Model-Specific Registers (MSRs).  It is a character device with
-         major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
-         MSR accesses are directed to a specific CPU on multi-processor
-         systems.
+#config X86_MSR
+#       tristate "/dev/cpu/*/msr - Model-specific register support"
+#       help
+#         This device gives privileged processes access to the x86
+#         Model-Specific Registers (MSRs).  It is a character device with
+#         major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
+#         MSR accesses are directed to a specific CPU on multi-processor
+#         systems.
 
 config X86_CPUID
        tristate "/dev/cpu/*/cpuid - CPU information support"
@@ -803,95 +633,57 @@ config NEED_NODE_MEMMAP_SIZE
        depends on DISCONTIGMEM
        default y
 
-config HIGHPTE
-       bool "Allocate 3rd-level pagetables from highmem"
-       depends on HIGHMEM4G || HIGHMEM64G
-       help
-         The VM uses one page table entry for each page of physical memory.
-         For systems with a lot of RAM, this can be wasteful of precious
-         low memory.  Setting this option will put user-space page table
-         entries in high memory.
-
-config MATH_EMULATION
-       bool "Math emulation"
-       ---help---
-         Linux can emulate a math coprocessor (used for floating point
-         operations) if you don't have one. 486DX and Pentium processors have
-         a math coprocessor built in, 486SX and 386 do not, unless you added
-         a 487DX or 387, respectively. (The messages during boot time can
-         give you some hints here ["man dmesg"].) Everyone needs either a
-         coprocessor or this emulation.
-
-         If you don't have a math coprocessor, you need to say Y here; if you
-         say Y here even though you have a coprocessor, the coprocessor will
-         be used nevertheless. (This behavior can be changed with the kernel
-         command line option "no387", which comes handy if your coprocessor
-         is broken. Try "man bootparam" or see the documentation of your boot
-         loader (lilo or loadlin) about how to pass options to the kernel at
-         boot time.) This means that it is a good idea to say Y here if you
-         intend to use this kernel on different machines.
-
-         More information about the internals of the Linux math coprocessor
-         emulation can be found in <file:arch/i386/math-emu/README>.
-
-         If you are not sure, say Y; apart from resulting in a 66 KB bigger
-         kernel, it won't hurt.
+#config HIGHPTE
+#      bool "Allocate 3rd-level pagetables from highmem"
+#      depends on HIGHMEM4G || HIGHMEM64G
+#      help
+#        The VM uses one page table entry for each page of physical memory.
+#        For systems with a lot of RAM, this can be wasteful of precious
+#        low memory.  Setting this option will put user-space page table
+#        entries in high memory.
 
 config MTRR
-       bool "MTRR (Memory Type Range Register) support"
-       ---help---
-         On Intel P6 family processors (Pentium Pro, Pentium II and later)
-         the Memory Type Range Registers (MTRRs) may be used to control
-         processor access to memory ranges. This is most useful if you have
-         a video (VGA) card on a PCI or AGP bus. Enabling write-combining
-         allows bus write transfers to be combined into a larger transfer
-         before bursting over the PCI/AGP bus. This can increase performance
-         of image write operations 2.5 times or more. Saying Y here creates a
-         /proc/mtrr file which may be used to manipulate your processor's
-         MTRRs. Typically the X server should use this.
-
-         This code has a reasonably generic interface so that similar
-         control registers on other processors can be easily supported
-         as well:
-
-         The Cyrix 6x86, 6x86MX and M II processors have Address Range
-         Registers (ARRs) which provide a similar functionality to MTRRs. For
-         these, the ARRs are used to emulate the MTRRs.
-         The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
-         MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing
-         write-combining. All of these processors are supported by this code
-         and it makes sense to say Y here if you have one of them.
-
-         Saying Y here also fixes a problem with buggy SMP BIOSes which only
-         set the MTRRs for the boot CPU and not for the secondary CPUs. This
-         can lead to all sorts of problems, so it's good to say Y here.
-
-         You can safely say Y even if your machine doesn't have MTRRs, you'll
-         just add about 9 KB to your kernel.
-
-         See <file:Documentation/mtrr.txt> for more information.
-
-config EFI
-       bool "Boot from EFI support (EXPERIMENTAL)"
-       depends on ACPI
-       default n
-       ---help---
-       This enables the the kernel to boot on EFI platforms using
-       system configuration information passed to it from the firmware.
-       This also enables the kernel to use any EFI runtime services that are
-       available (such as the EFI variable services).
-
-       This option is only useful on systems that have EFI firmware
-       and will result in a kernel image that is ~8k larger.  In addition,
-       you must use the latest ELILO loader available at
-       <http://elilo.sourceforge.net> in order to take advantage of
-       kernel initialization using EFI information (neither GRUB nor LILO know
-       anything about EFI).  However, even with this option, the resultant
-       kernel should continue to boot on existing non-EFI platforms.
+       bool
+       depends on XEN_PRIVILEGED_GUEST
+       default y
+
+#config MTRR
+#       bool "MTRR (Memory Type Range Register) support"
+#       ---help---
+#         On Intel P6 family processors (Pentium Pro, Pentium II and later)
+#         the Memory Type Range Registers (MTRRs) may be used to control
+#         processor access to memory ranges. This is most useful if you have
+#         a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+#         allows bus write transfers to be combined into a larger transfer
+#         before bursting over the PCI/AGP bus. This can increase performance
+#         of image write operations 2.5 times or more. Saying Y here creates a
+#         /proc/mtrr file which may be used to manipulate your processor's
+#         MTRRs. Typically the X server should use this.
+#
+#         This code has a reasonably generic interface so that similar
+#         control registers on other processors can be easily supported
+#         as well:
+#
+#         The Cyrix 6x86, 6x86MX and M II processors have Address Range
+#         Registers (ARRs) which provide a similar functionality to MTRRs. For
+#         these, the ARRs are used to emulate the MTRRs.
+#         The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+#         MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing
+#         write-combining. All of these processors are supported by this code
+#         and it makes sense to say Y here if you have one of them.
+#
+#         Saying Y here also fixes a problem with buggy SMP BIOSes which only
+#         set the MTRRs for the boot CPU and not for the secondary CPUs. This
+#         can lead to all sorts of problems, so it's good to say Y here.
+#
+#         You can safely say Y even if your machine doesn't have MTRRs, you'll
+#         just add about 9 KB to your kernel.
+#
+#         See <file:Documentation/mtrr.txt> for more information.
 
 config IRQBALANCE
        bool "Enable kernel irq balancing"
-       depends on SMP && X86_IO_APIC
+       depends on SMP && X86_IO_APIC && !XEN
        default y
        help
          The default yes will allow the kernel to do irq load balancing.
@@ -922,186 +714,59 @@ config REGPARM
        generate incorrect output with certain kernel constructs when
        -mregparm=3 is used.
 
-config SECCOMP
-       bool "Enable seccomp to safely compute untrusted bytecode"
-       depends on PROC_FS
+config X86_LOCAL_APIC
+       bool
+       depends on XEN_PRIVILEGED_GUEST && (X86_UP_APIC || ((X86_VISWS || SMP) 
&& !X86_VOYAGER))
        default y
-       help
-         This kernel feature is useful for number crunching applications
-         that may need to compute untrusted bytecode during their
-         execution. By using pipes or other transports made available to
-         the process as file descriptors supporting the read/write
-         syscalls, it's possible to isolate those applications in
-         their own address space using seccomp. Once seccomp is
-         enabled via /proc/<pid>/seccomp, it cannot be disabled
-         and the task is only allowed to execute a few safe syscalls
-         defined by each seccomp mode.
-
-         If unsure, say Y. Only embedded should say N here.
-
-endmenu
-
-
-menu "Power management options (ACPI, APM)"
-       depends on !X86_VOYAGER
-
-source kernel/power/Kconfig
 
-source "drivers/acpi/Kconfig"
+config X86_IO_APIC
+       bool
+       depends on XEN_PRIVILEGED_GUEST && (X86_UP_IOAPIC || (SMP && 
!(X86_VISWS || X86_VOYAGER)))
+       default y
 
-menu "APM (Advanced Power Management) BIOS Support"
-depends on PM && !X86_VISWS
+config X86_VISWS_APIC
+       bool
+       depends on X86_VISWS
+       default y
 
-config APM
-       tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM
+config HOTPLUG_CPU
+       bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+       depends on SMP && HOTPLUG && EXPERIMENTAL
        ---help---
-         APM is a BIOS specification for saving power using several different
-         techniques. This is mostly useful for battery powered laptops with
-         APM compliant BIOSes. If you say Y here, the system time will be
-         reset after a RESUME operation, the /proc/apm device will provide
-         battery status information, and user-space programs will receive
-         notification of APM "events" (e.g. battery status change).
-
-         If you select "Y" here, you can disable actual use of the APM
-         BIOS by passing the "apm=off" option to the kernel at boot time.
-
-         Note that the APM support is almost completely disabled for
-         machines with more than one CPU.
-
-         In order to use APM, you will need supporting software. For location
-         and more information, read <file:Documentation/pm.txt> and the
-         Battery Powered Linux mini-HOWTO, available from
-         <http://www.tldp.org/docs.html#howto>.
+         Say Y here to experiment with turning CPUs off and on.  CPUs
+         can be controlled through /sys/devices/system/cpu.
 
-         This driver does not spin down disk drives (see the hdparm(8)
-         manpage ("man 8 hdparm") for that), and it doesn't turn off
-         VESA-compliant "green" monitors.
-
-         This driver does not support the TI 4000M TravelMate and the ACER
-         486/DX4/75 because they don't have compliant BIOSes. Many "green"
-         desktop machines also don't have compliant BIOSes, and this driver
-         may cause those machines to panic during the boot phase.
-
-         Generally, if you don't have a battery in your machine, there isn't
-         much point in using this driver and you should say N. If you get
-         random kernel OOPSes or reboots that don't seem to be related to
-         anything, try disabling/enabling this option (or disabling/enabling
-         APM in your BIOS).
-
-         Some other things you should try when experiencing seemingly random,
-         "weird" problems:
-
-         1) make sure that you have enough swap space and that it is
-         enabled.
-         2) pass the "no-hlt" option to the kernel
-         3) switch on floating point emulation in the kernel and pass
-         the "no387" option to the kernel
-         4) pass the "floppy=nodma" option to the kernel
-         5) pass the "mem=4M" option to the kernel (thereby disabling
-         all but the first 4 MB of RAM)
-         6) make sure that the CPU is not over clocked.
-         7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
-         8) disable the cache from your BIOS settings
-         9) install a fan for the video card or exchange video RAM
-         10) install a better fan for the CPU
-         11) exchange RAM chips
-         12) exchange the motherboard.
+         Say N.
 
-         To compile this driver as a module, choose M here: the
-         module will be called apm.
 
-config APM_IGNORE_USER_SUSPEND
-       bool "Ignore USER SUSPEND"
-       depends on APM
-       help
-         This option will ignore USER SUSPEND requests. On machines with a
-         compliant APM BIOS, you want to say N. However, on the NEC Versa M
-         series notebooks, it is necessary to say Y because of a BIOS bug.
-
-config APM_DO_ENABLE
-       bool "Enable PM at boot time"
-       depends on APM
-       ---help---
-         Enable APM features at boot time. From page 36 of the APM BIOS
-         specification: "When disabled, the APM BIOS does not automatically
-         power manage devices, enter the Standby State, enter the Suspend
-         State, or take power saving steps in response to CPU Idle calls."
-         This driver will make CPU Idle calls when Linux is idle (unless this
-         feature is turned off -- see "Do CPU IDLE calls", below). This
-         should always save battery power, but more complicated APM features
-         will be dependent on your BIOS implementation. You may need to turn
-         this option off if your computer hangs at boot time when using APM
-         support, or if it beeps continuously instead of suspending. Turn
-         this off if you have a NEC UltraLite Versa 33/C or a Toshiba
-         T400CDT. This is off by default since most machines do fine without
-         this feature.
-
-config APM_CPU_IDLE
-       bool "Make CPU Idle calls when idle"
-       depends on APM
-       help
-         Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
-         On some machines, this can activate improved power savings, such as
-         a slowed CPU clock rate, when the machine is idle. These idle calls
-         are made after the idle loop has run for some length of time (e.g.,
-         333 mS). On some machines, this will cause a hang at boot time or
-         whenever the CPU becomes idle. (On machines with more than one CPU,
-         this option does nothing.)
-
-config APM_DISPLAY_BLANK
-       bool "Enable console blanking using APM"
-       depends on APM
-       help
-         Enable console blanking using the APM. Some laptops can use this to
-         turn off the LCD backlight when the screen blanker of the Linux
-         virtual console blanks the screen. Note that this is only used by
-         the virtual console screen blanker, and won't turn off the backlight
-         when using the X Window system. This also doesn't have anything to
-         do with your VESA-compliant power-saving monitor. Further, this
-         option doesn't work for all laptops -- it might not turn off your
-         backlight at all, or it might print a lot of errors to the console,
-         especially if you are using gpm.
-
-config APM_RTC_IS_GMT
-       bool "RTC stores time in GMT"
-       depends on APM
-       help
-         Say Y here if your RTC (Real Time Clock a.k.a. hardware clock)
-         stores the time in GMT (Greenwich Mean Time). Say N if your RTC
-         stores localtime.
-
-         It is in fact recommended to store GMT in your RTC, because then you
-         don't have to worry about daylight savings time changes. The only
-         reason not to use GMT in your RTC is if you also run a broken OS
-         that doesn't understand GMT.
-
-config APM_ALLOW_INTS
-       bool "Allow interrupts during APM BIOS calls"
-       depends on APM
-       help
-         Normally we disable external interrupts while we are making calls to
-         the APM BIOS as a measure to lessen the effects of a badly behaving
-         BIOS implementation.  The BIOS should reenable interrupts if it
-         needs to.  Unfortunately, some BIOSes do not -- especially those in
-         many of the newer IBM Thinkpads.  If you experience hangs when you
-         suspend, try setting this to Y.  Otherwise, say N.
-
-config APM_REAL_MODE_POWER_OFF
-       bool "Use real mode APM BIOS call to power off"
-       depends on APM
-       help
-         Use real mode APM BIOS calls to switch off the computer. This is
-         a work-around for a number of buggy BIOSes. Switch this option on if
-         your computer crashes instead of powering off properly.
+if XEN_PHYSDEV_ACCESS
 
-endmenu
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
 
-source "arch/i386/kernel/cpu/cpufreq/Kconfig"
+config X86_UP_APIC
+       bool "Local APIC support on uniprocessors"
+       depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+       help
+         A local APIC (Advanced Programmable Interrupt Controller) is an
+         integrated interrupt controller in the CPU. If you have a single-CPU
+         system which has a processor with a local APIC, you can say Y here to
+         enable and use it. If you say Y here even though your machine doesn't
+         have a local APIC, then the kernel will still run with no slowdown at
+         all. The local APIC supports CPU-generated self-interrupts (timer,
+         performance counters), and the NMI watchdog which detects hard
+         lockups.
 
-endmenu
+config X86_UP_IOAPIC
+       bool "IO-APIC support on uniprocessors"
+       depends on X86_UP_APIC
+       help
+         An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
+         SMP-capable replacement for PC-style interrupt controllers. Most
+         SMP systems and many recent uniprocessor systems have one.
 
-menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+         If you have a single-CPU system with an IO-APIC, you can say Y here
+         to use it. If you say Y here even though your machine doesn't have
+         an IO-APIC, then the kernel will still run with no slowdown at all.
 
 config PCI
        bool "PCI support" if !X86_VISWS
@@ -1232,25 +897,7 @@ source "drivers/pci/hotplug/Kconfig"
 
 endmenu
 
-menu "Executable file formats"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "arch/i386/oprofile/Kconfig"
-
-source "arch/i386/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
+endif
 
 #
 # Use the generic interrupt handling code in kernel/irq/:
@@ -1268,10 +915,10 @@ config X86_SMP
        depends on SMP && !X86_VOYAGER
        default y
 
-config X86_HT
-       bool
-       depends on SMP && !(X86_VISWS || X86_VOYAGER)
-       default y
+#config X86_HT
+#      bool
+#      depends on SMP && !(X86_VISWS || X86_VOYAGER)
+#      default y
 
 config X86_BIOS_REBOOT
        bool
@@ -1287,3 +934,22 @@ config PC
        bool
        depends on X86 && !EMBEDDED
        default y
+
+config SECCOMP
+       bool "Enable seccomp to safely compute untrusted bytecode"
+       depends on PROC_FS
+       default y
+       help
+         This kernel feature is useful for number crunching applications
+         that may need to compute untrusted bytecode during their
+         execution. By using pipes or other transports made available to
+         the process as file descriptors supporting the read/write
+         syscalls, it's possible to isolate those applications in
+         their own address space using seccomp. Once seccomp is
+         enabled via /proc/<pid>/seccomp, it cannot be disabled
+         and the task is only allowed to execute a few safe syscalls
+         defined by each seccomp mode.
+
+         If unsure, say Y. Only embedded should say N here.
+
+endmenu
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/acpi/boot.c 
linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot.c
--- pristine-linux-2.6.12/arch/i386/kernel/acpi/boot.c  2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/acpi/boot.c   2005-07-28 
13:17:07.000000000 -0700
@@ -36,6 +36,11 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/mpspec.h>
+#ifdef CONFIG_XEN
+#include <asm/fixmap.h>
+#endif
+
+void (*pm_power_off)(void) = NULL;
 
 #ifdef CONFIG_X86_64
 
@@ -100,7 +105,7 @@ EXPORT_SYMBOL(x86_acpiid_to_apicid);
  */
 enum acpi_irq_model_id         acpi_irq_model = ACPI_IRQ_MODEL_PIC;
 
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
 
 /* rely on all ACPI tables being in the direct mapping */
 char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
@@ -133,8 +138,10 @@ char *__acpi_map_table(unsigned long phy
        unsigned long base, offset, mapped_size;
        int idx;
 
+#ifndef CONFIG_XEN
        if (phys + size < 8*1024*1024) 
                return __va(phys); 
+#endif
 
        offset = phys & (PAGE_SIZE - 1);
        mapped_size = PAGE_SIZE - offset;
@@ -462,18 +469,6 @@ unsigned int acpi_register_gsi(u32 gsi, 
        unsigned int irq;
        unsigned int plat_gsi = gsi;
 
-#ifdef CONFIG_PCI
-       /*
-        * Make sure all (legacy) PCI IRQs are set as level-triggered.
-        */
-       if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-               extern void eisa_set_level_irq(unsigned int irq);
-
-               if (edge_level == ACPI_LEVEL_SENSITIVE)
-                               eisa_set_level_irq(gsi);
-       }
-#endif
-
 #ifdef CONFIG_X86_IO_APIC
        if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
                plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low);
@@ -513,13 +508,14 @@ acpi_scan_rsdp (
 {
        unsigned long           offset = 0;
        unsigned long           sig_len = sizeof("RSD PTR ") - 1;
+       unsigned long           vstart = (unsigned long)isa_bus_to_virt(start);
 
        /*
         * Scan all 16-byte boundaries of the physical memory region for the
         * RSDP signature.
         */
        for (offset = 0; offset < length; offset += 16) {
-               if (strncmp((char *) (start + offset), "RSD PTR ", sig_len))
+               if (strncmp((char *) (vstart + offset), "RSD PTR ", sig_len))
                        continue;
                return (start + offset);
        }
@@ -652,6 +648,8 @@ acpi_find_rsdp (void)
        if (!rsdp_phys)
                rsdp_phys = acpi_scan_rsdp (0xE0000, 0x20000);
 
+       set_fixmap(FIX_ACPI_RSDP_PAGE, rsdp_phys);
+
        return rsdp_phys;
 }
 
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/acpi/Makefile 
linux-2.6-xen-sparse/arch/i386/kernel/acpi/Makefile
--- pristine-linux-2.6.12/arch/i386/kernel/acpi/Makefile        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/acpi/Makefile 2005-07-28 
13:17:07.000000000 -0700
@@ -1,4 +1,13 @@
-obj-$(CONFIG_ACPI_BOOT)                := boot.o
-obj-$(CONFIG_X86_IO_APIC)      += earlyquirk.o
-obj-$(CONFIG_ACPI_SLEEP)       += sleep.o wakeup.o
+obj-$(CONFIG_ACPI_BOOT)                        := boot.o
+c-obj-$(CONFIG_X86_IO_APIC)            += earlyquirk.o
+c-obj-$(CONFIG_ACPI_SLEEP)             += sleep.o wakeup.o
 
+c-link                                  :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/kernel/acpi/$(notdir $@) $@
+
+obj-y  += $(c-obj-y) $(s-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/apic.c 
linux-2.6-xen-sparse/arch/i386/kernel/apic.c
--- pristine-linux-2.6.12/arch/i386/kernel/apic.c       2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/apic.c        2005-07-28 
13:17:07.000000000 -0700
@@ -44,8 +44,10 @@
  */
 int apic_verbosity;
 
-
-static void apic_pm_activate(void);
+int get_physical_broadcast(void)
+{
+        return 0xff;
+}
 
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
@@ -65,1212 +67,17 @@ void ack_bad_irq(unsigned int irq)
        ack_APIC_irq();
 }
 
-void __init apic_intr_init(void)
-{
-#ifdef CONFIG_SMP
-       smp_intr_init();
-#endif
-       /* self generated IPI for local APIC timer */
-       set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-       /* IPI vectors for APIC spurious and error interrupts */
-       set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-       set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
-       /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
-       set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-}
-
-/* Using APIC to generate smp_local_timer_interrupt? */
-int using_apic_timer = 0;
-
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
-static int enabled_via_apicbase;
-
-void enable_NMI_through_LVT0 (void * dummy)
-{
-       unsigned int v, ver;
-
-       ver = apic_read(APIC_LVR);
-       ver = GET_APIC_VERSION(ver);
-       v = APIC_DM_NMI;                        /* unmask and set to NMI */
-       if (!APIC_INTEGRATED(ver))              /* 82489DX */
-               v |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write_around(APIC_LVT0, v);
-}
-
-int get_physical_broadcast(void)
-{
-       unsigned int lvr, version;
-       lvr = apic_read(APIC_LVR);
-       version = GET_APIC_VERSION(lvr);
-       if (!APIC_INTEGRATED(version) || version >= 0x14)
-               return 0xff;
-       else
-               return 0xf;
-}
-
-int get_maxlvt(void)
-{
-       unsigned int v, ver, maxlvt;
-
-       v = apic_read(APIC_LVR);
-       ver = GET_APIC_VERSION(v);
-       /* 82489DXs do not report # of LVT entries. */
-       maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
-       return maxlvt;
-}
-
-void clear_local_APIC(void)
-{
-       int maxlvt;
-       unsigned long v;
-
-       maxlvt = get_maxlvt();
-
-       /*
-        * Masking an LVT entry on a P6 can trigger a local APIC error
-        * if the vector is zero. Mask LVTERR first to prevent this.
-        */
-       if (maxlvt >= 3) {
-               v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-               apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
-       }
-       /*
-        * Careful: we have to set masks only first to deassert
-        * any level-triggered sources.
-        */
-       v = apic_read(APIC_LVTT);
-       apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT0);
-       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT1);
-       apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
-       if (maxlvt >= 4) {
-               v = apic_read(APIC_LVTPC);
-               apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
-       }
-
-/* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
-       if (maxlvt >= 5) {
-               v = apic_read(APIC_LVTTHMR);
-               apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-       }
-#endif
-       /*
-        * Clean APIC state for other OSs:
-        */
-       apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
-       apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
-       apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
-       if (maxlvt >= 3)
-               apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
-       if (maxlvt >= 4)
-               apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-       if (maxlvt >= 5)
-               apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
-#endif
-       v = GET_APIC_VERSION(apic_read(APIC_LVR));
-       if (APIC_INTEGRATED(v)) {       /* !82489DX */
-               if (maxlvt > 3)         /* Due to Pentium errata 3AP and 11AP. 
*/
-                       apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-       }
-}
-
-void __init connect_bsp_APIC(void)
-{
-       if (pic_mode) {
-               /*
-                * Do not trust the local APIC being empty at bootup.
-                */
-               clear_local_APIC();
-               /*
-                * PIC mode, enable APIC mode in the IMCR, i.e.
-                * connect BSP's local APIC to INT and NMI lines.
-                */
-               apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-                               "enabling APIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x01, 0x23);
-       }
-       enable_apic_mode();
-}
-
-void disconnect_bsp_APIC(void)
-{
-       if (pic_mode) {
-               /*
-                * Put the board back into PIC mode (has an effect
-                * only on certain older boards).  Note that APIC
-                * interrupts, including IPIs, won't work beyond
-                * this point!  The only exception are INIT IPIs.
-                */
-               apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-                               "entering PIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x00, 0x23);
-       }
-}
-
-void disable_local_APIC(void)
-{
-       unsigned long value;
-
-       clear_local_APIC();
-
-       /*
-        * Disable APIC (implies clearing of registers
-        * for 82489DX!).
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_SPIV_APIC_ENABLED;
-       apic_write_around(APIC_SPIV, value);
-
-       if (enabled_via_apicbase) {
-               unsigned int l, h;
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_ENABLE;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-       unsigned int reg0, reg1;
-
-       /*
-        * The version register is read-only in a real APIC.
-        */
-       reg0 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-       reg1 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-       /*
-        * The two version reads above should print the same
-        * numbers.  If the second one is different, then we
-        * poke at a non-APIC.
-        */
-       if (reg1 != reg0)
-               return 0;
-
-       /*
-        * Check if the version looks reasonably.
-        */
-       reg1 = GET_APIC_VERSION(reg0);
-       if (reg1 == 0x00 || reg1 == 0xff)
-               return 0;
-       reg1 = get_maxlvt();
-       if (reg1 < 0x02 || reg1 == 0xff)
-               return 0;
-
-       /*
-        * The ID register is read/write in a real APIC.
-        */
-       reg0 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-
-       /*
-        * The next two are just to see if we have sane values.
-        * They're only really relevant if we're in Virtual Wire
-        * compatibility mode, but most boxes are anymore.
-        */
-       reg0 = apic_read(APIC_LVT0);
-       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-       reg1 = apic_read(APIC_LVT1);
-       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-       return 1;
-}
-
-void __init sync_Arb_IDs(void)
-{
-       /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */
-       unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-       if (ver >= 0x14)        /* P4 or higher */
-               return;
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-
-       apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-       apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
-                               | APIC_DM_INIT);
-}
-
-extern void __error_in_apic_c (void);
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-       unsigned long value, ver;
-
-       /*
-        * Don't do the setup now if we have a SMP BIOS as the
-        * through-I/O-APIC virtual wire mode might be active.
-        */
-       if (smp_found_config || !cpu_has_apic)
-               return;
-
-       value = apic_read(APIC_LVR);
-       ver = GET_APIC_VERSION(value);
-
-       /*
-        * Do not trust the local APIC being empty at bootup.
-        */
-       clear_local_APIC();
-
-       /*
-        * Enable APIC.
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-       
-       /* This bit is reserved on P4/Xeon and should be cleared */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 
(boot_cpu_data.x86 == 15))
-               value &= ~APIC_SPIV_FOCUS_DISABLED;
-       else
-               value |= APIC_SPIV_FOCUS_DISABLED;
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write_around(APIC_SPIV, value);
-
-       /*
-        * Set up the virtual wire mode.
-        */
-       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
-       value = APIC_DM_NMI;
-       if (!APIC_INTEGRATED(ver))              /* 82489DX */
-               value |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write_around(APIC_LVT1, value);
-}
-
-void __init setup_local_APIC (void)
-{
-       unsigned long oldvalue, value, ver, maxlvt;
-
-       /* Pound the ESR really hard over the head with a big hammer - mbligh */
-       if (esr_disable) {
-               apic_write(APIC_ESR, 0);
-               apic_write(APIC_ESR, 0);
-               apic_write(APIC_ESR, 0);
-               apic_write(APIC_ESR, 0);
-       }
-
-       value = apic_read(APIC_LVR);
-       ver = GET_APIC_VERSION(value);
-
-       if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
-               __error_in_apic_c();
-
-       /*
-        * Double-check whether this APIC is really registered.
-        */
-       if (!apic_id_registered())
-               BUG();
-
-       /*
-        * Intel recommends to set DFR, LDR and TPR before enabling
-        * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-        * document number 292116).  So here it goes...
-        */
-       init_apic_ldr();
-
-       /*
-        * Set Task Priority to 'accept all'. We never change this
-        * later on.
-        */
-       value = apic_read(APIC_TASKPRI);
-       value &= ~APIC_TPRI_MASK;
-       apic_write_around(APIC_TASKPRI, value);
-
-       /*
-        * Now that we are all set up, enable the APIC
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       /*
-        * Enable APIC
-        */
-       value |= APIC_SPIV_APIC_ENABLED;
-
-       /*
-        * Some unknown Intel IO/APIC (or APIC) errata is biting us with
-        * certain networking cards. If high frequency interrupts are
-        * happening on a particular IOAPIC pin, plus the IOAPIC routing
-        * entry is masked/unmasked at a high rate as well then sooner or
-        * later IOAPIC line gets 'stuck', no more interrupts are received
-        * from the device. If focus CPU is disabled then the hang goes
-        * away, oh well :-(
-        *
-        * [ This bug can be reproduced easily with a level-triggered
-        *   PCI Ne2000 networking cards and PII/PIII processors, dual
-        *   BX chipset. ]
-        */
-       /*
-        * Actually disabling the focus CPU check just makes the hang less
-        * frequent as it makes the interrupt distributon model be more
-        * like LRU than MRU (the short-term load is more even across CPUs).
-        * See also the comment in end_level_ioapic_irq().  --macro
-        */
-#if 1
-       /* Enable focus processor (bit==0) */
-       value &= ~APIC_SPIV_FOCUS_DISABLED;
-#else
-       /* Disable focus processor (bit==1) */
-       value |= APIC_SPIV_FOCUS_DISABLED;
-#endif
-       /*
-        * Set spurious IRQ vector
-        */
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write_around(APIC_SPIV, value);
-
-       /*
-        * Set up LVT0, LVT1:
-        *
-        * set up through-local-APIC on the BP's LINT0. This is not
-        * strictly necessery in pure symmetric-IO mode, but sometimes
-        * we delegate interrupts to the 8259A.
-        */
-       /*
-        * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-        */
-       value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-       if (!smp_processor_id() && (pic_mode || !value)) {
-               value = APIC_DM_EXTINT;
-               apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-                               smp_processor_id());
-       } else {
-               value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-               apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-                               smp_processor_id());
-       }
-       apic_write_around(APIC_LVT0, value);
-
-       /*
-        * only the BP should see the LINT1 NMI signal, obviously.
-        */
-       if (!smp_processor_id())
-               value = APIC_DM_NMI;
-       else
-               value = APIC_DM_NMI | APIC_LVT_MASKED;
-       if (!APIC_INTEGRATED(ver))              /* 82489DX */
-               value |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write_around(APIC_LVT1, value);
-
-       if (APIC_INTEGRATED(ver) && !esr_disable) {             /* !82489DX */
-               maxlvt = get_maxlvt();
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               oldvalue = apic_read(APIC_ESR);
-
-               value = ERROR_APIC_VECTOR;      // enables sending errors
-               apic_write_around(APIC_LVTERR, value);
-               /*
-                * spec says clear errors after enabling vector.
-                */
-               if (maxlvt > 3)
-                       apic_write(APIC_ESR, 0);
-               value = apic_read(APIC_ESR);
-               if (value != oldvalue)
-                       apic_printk(APIC_VERBOSE, "ESR value before enabling "
-                               "vector: 0x%08lx  after: 0x%08lx\n",
-                               oldvalue, value);
-       } else {
-               if (esr_disable)        
-                       /* 
-                        * Something untraceble is creating bad interrupts on 
-                        * secondary quads ... for the moment, just leave the
-                        * ESR disabled - we can't do anything useful with the
-                        * errors anyway - mbligh
-                        */
-                       printk("Leaving ESR disabled.\n");
-               else 
-                       printk("No ESR for 82489DX.\n");
-       }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               setup_apic_nmi_watchdog();
-       apic_pm_activate();
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default
- * disable it down before re-entering the BIOS on shutdown.
- * Otherwise the BIOS may get confused and not power-off.
- */
-void lapic_shutdown(void)
-{
-       if (!cpu_has_apic || !enabled_via_apicbase)
-               return;
-
-       local_irq_disable();
-       disable_local_APIC();
-       local_irq_enable();
-}
-
-#ifdef CONFIG_PM
-
-static struct {
-       int active;
-       /* r/w apic fields */
-       unsigned int apic_id;
-       unsigned int apic_taskpri;
-       unsigned int apic_ldr;
-       unsigned int apic_dfr;
-       unsigned int apic_spiv;
-       unsigned int apic_lvtt;
-       unsigned int apic_lvtpc;
-       unsigned int apic_lvt0;
-       unsigned int apic_lvt1;
-       unsigned int apic_lvterr;
-       unsigned int apic_tmict;
-       unsigned int apic_tdcr;
-       unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-       unsigned long flags;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       apic_pm_state.apic_id = apic_read(APIC_ID);
-       apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-       apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-       apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-       apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-       apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-       apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-       apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-       apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-       apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-       apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-       apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-       apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-       
-       local_irq_save(flags);
-       disable_local_APIC();
-       local_irq_restore(flags);
-       return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-       unsigned int l, h;
-       unsigned long flags;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       local_irq_save(flags);
-
-       /*
-        * Make sure the APICBASE points to the right address
-        *
-        * FIXME! This will be wrong if we ever support suspend on
-        * SMP! We'll need to do this as part of the CPU restore!
-        */
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       l &= ~MSR_IA32_APICBASE_BASE;
-       l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-       wrmsr(MSR_IA32_APICBASE, l, h);
-
-       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-       apic_write(APIC_ID, apic_pm_state.apic_id);
-       apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-       apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-       apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-       apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-       apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-       apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-       apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-       apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-       apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-       apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-       apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-       apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-       local_irq_restore(flags);
-       return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-       set_kset_name("lapic"),
-       .resume         = lapic_resume,
-       .suspend        = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-       .id     = 0,
-       .cls    = &lapic_sysclass,
-};
-
-static void __init apic_pm_activate(void)
-{
-       apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-       int error;
-
-       if (!cpu_has_apic)
-               return 0;
-       /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-       error = sysdev_class_register(&lapic_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic);
-       return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else  /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- */
-
-/*
- * Knob to control our willingness to enable the local APIC.
- */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
-
-static int __init lapic_disable(char *str)
-{
-       enable_local_apic = -1;
-       clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-       return 0;
-}
-__setup("nolapic", lapic_disable);
-
-static int __init lapic_enable(char *str)
-{
-       enable_local_apic = 1;
-       return 0;
-}
-__setup("lapic", lapic_enable);
-
-static int __init apic_set_verbosity(char *str)
-{
-       if (strcmp("debug", str) == 0)
-               apic_verbosity = APIC_DEBUG;
-       else if (strcmp("verbose", str) == 0)
-               apic_verbosity = APIC_VERBOSE;
-       else
-               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-                               " use apic=verbose or apic=debug", str);
-
-       return 0;
-}
-
-__setup("apic=", apic_set_verbosity);
-
-static int __init detect_init_APIC (void)
-{
-       u32 h, l, features;
-       extern void get_cpu_vendor(struct cpuinfo_x86*);
-
-       /* Disabled by kernel option? */
-       if (enable_local_apic < 0)
-               return -1;
-
-       /* Workaround for us being called before identify_cpu(). */
-       get_cpu_vendor(&boot_cpu_data);
-
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_AMD:
-               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
-                   (boot_cpu_data.x86 == 15))      
-                       break;
-               goto no_apic;
-       case X86_VENDOR_INTEL:
-               if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-                   (boot_cpu_data.x86 == 5 && cpu_has_apic))
-                       break;
-               goto no_apic;
-       default:
-               goto no_apic;
-       }
-
-       if (!cpu_has_apic) {
-               /*
-                * Over-ride BIOS and try to enable the local
-                * APIC only if "lapic" specified.
-                */
-               if (enable_local_apic <= 0) {
-                       printk("Local APIC disabled by BIOS -- "
-                              "you can enable it with \"lapic\"\n");
-                       return -1;
-               }
-               /*
-                * Some BIOSes disable the local APIC in the
-                * APIC_BASE MSR. This can only be done in
-                * software for Intel P6 or later and AMD K7
-                * (Model > 1) or later.
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-                       printk("Local APIC disabled by BIOS -- reenabling.\n");
-                       l &= ~MSR_IA32_APICBASE_BASE;
-                       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-                       wrmsr(MSR_IA32_APICBASE, l, h);
-                       enabled_via_apicbase = 1;
-               }
-       }
-       /*
-        * The APIC feature bit should now be enabled
-        * in `cpuid'
-        */
-       features = cpuid_edx(1);
-       if (!(features & (1 << X86_FEATURE_APIC))) {
-               printk("Could not enable APIC!\n");
-               return -1;
-       }
-       set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-       /* The BIOS may have set up the APIC at some other address */
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       if (l & MSR_IA32_APICBASE_ENABLE)
-               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-       if (nmi_watchdog != NMI_NONE)
-               nmi_watchdog = NMI_LOCAL_APIC;
-
-       printk("Found and enabled local APIC!\n");
-
-       apic_pm_activate();
-
-       return 0;
-
-no_apic:
-       printk("No local APIC present or hardware disabled\n");
-       return -1;
-}
-
-void __init init_apic_mappings(void)
-{
-       unsigned long apic_phys;
-
-       /*
-        * If no local APIC can be found then set up a fake all
-        * zeroes page to simulate the local APIC and another
-        * one for the IO-APIC.
-        */
-       if (!smp_found_config && detect_init_APIC()) {
-               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-               apic_phys = __pa(apic_phys);
-       } else
-               apic_phys = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
-              apic_phys);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       if (boot_cpu_physical_apicid == -1U)
-               boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-
-#ifdef CONFIG_X86_IO_APIC
-       {
-               unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
-               int i;
-
-               for (i = 0; i < nr_ioapics; i++) {
-                       if (smp_found_config) {
-                               ioapic_phys = mp_ioapics[i].mpc_apicaddr;
-                               if (!ioapic_phys) {
-                                       printk(KERN_ERR
-                                              "WARNING: bogus zero IO-APIC "
-                                              "address found in MPTABLE, "
-                                              "disabling IO/APIC support!\n");
-                                       smp_found_config = 0;
-                                       skip_ioapic_setup = 1;
-                                       goto fake_ioapic_page;
-                               }
-                       } else {
-fake_ioapic_page:
-                               ioapic_phys = (unsigned long)
-                                             alloc_bootmem_pages(PAGE_SIZE);
-                               ioapic_phys = __pa(ioapic_phys);
-                       }
-                       set_fixmap_nocache(idx, ioapic_phys);
-                       printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
-                              __fix_to_virt(idx), ioapic_phys);
-                       idx++;
-               }
-       }
-#endif
-}
-
-/*
- * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
- * per second. We assume that the caller has already set up the local
- * APIC.
- *
- * The APIC timer is not exactly sync with the external timer chip, it
- * closely follows bus clocks.
- */
-
-/*
- * The timer chip is already set up at HZ interrupts per second here,
- * but we do not accept timer interrupts yet. We only allow the BP
- * to calibrate.
- */
-static unsigned int __init get_8254_timer_count(void)
-{
-       extern spinlock_t i8253_lock;
-       unsigned long flags;
-
-       unsigned int count;
-
-       spin_lock_irqsave(&i8253_lock, flags);
-
-       outb_p(0x00, PIT_MODE);
-       count = inb_p(PIT_CH0);
-       count |= inb_p(PIT_CH0) << 8;
-
-       spin_unlock_irqrestore(&i8253_lock, flags);
-
-       return count;
-}
-
-/* next tick in 8254 can be caught by catching timer wraparound */
-static void __init wait_8254_wraparound(void)
-{
-       unsigned int curr_count, prev_count;
-
-       curr_count = get_8254_timer_count();
-       do {
-               prev_count = curr_count;
-               curr_count = get_8254_timer_count();
-
-               /* workaround for broken Mercury/Neptune */
-               if (prev_count >= curr_count + 0x100)
-                       curr_count = get_8254_timer_count();
-
-       } while (prev_count >= curr_count);
-}
-
-/*
- * Default initialization for 8254 timers. If we use other timers like HPET,
- * we override this later
- */
-void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound;
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-
-#define APIC_DIVISOR 16
-
-static void __setup_APIC_LVTT(unsigned int clocks)
-{
-       unsigned int lvtt_value, tmp_value, ver;
-
-       ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-       lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
-       if (!APIC_INTEGRATED(ver))
-               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-       apic_write_around(APIC_LVTT, lvtt_value);
-
-       /*
-        * Divide PICLK by 16
-        */
-       tmp_value = apic_read(APIC_TDCR);
-       apic_write_around(APIC_TDCR, (tmp_value
-                               & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
-                               | APIC_TDR_DIV_16);
-
-       apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
-}
-
-static void __init setup_APIC_timer(unsigned int clocks)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       /*
-        * Wait for IRQ0's slice:
-        */
-       wait_timer_tick();
-
-       __setup_APIC_LVTT(clocks);
-
-       local_irq_restore(flags);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-static int __init calibrate_APIC_clock(void)
-{
-       unsigned long long t1 = 0, t2 = 0;
-       long tt1, tt2;
-       long result;
-       int i;
-       const int LOOPS = HZ/10;
-
-       apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
-
-       /*
-        * Put whatever arbitrary (but long enough) timeout
-        * value into the APIC clock, we just want to get the
-        * counter running for calibration.
-        */
-       __setup_APIC_LVTT(1000000000);
-
-       /*
-        * The timer chip counts down to zero. Let's wait
-        * for a wraparound to start exact measurement:
-        * (the current tick might have been already half done)
-        */
-
-       wait_timer_tick();
-
-       /*
-        * We wrapped around just now. Let's start:
-        */
-       if (cpu_has_tsc)
-               rdtscll(t1);
-       tt1 = apic_read(APIC_TMCCT);
-
-       /*
-        * Let's wait LOOPS wraprounds:
-        */
-       for (i = 0; i < LOOPS; i++)
-               wait_timer_tick();
-
-       tt2 = apic_read(APIC_TMCCT);
-       if (cpu_has_tsc)
-               rdtscll(t2);
-
-       /*
-        * The APIC bus clock counter is 32 bits only, it
-        * might have overflown, but note that we use signed
-        * longs, thus no extra care needed.
-        *
-        * underflown to be exact, as the timer counts down ;)
-        */
-
-       result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
-
-       if (cpu_has_tsc)
-               apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
-                       "%ld.%04ld MHz.\n",
-                       ((long)(t2-t1)/LOOPS)/(1000000/HZ),
-                       ((long)(t2-t1)/LOOPS)%(1000000/HZ));
-
-       apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
-               "%ld.%04ld MHz.\n",
-               result/(1000000/HZ),
-               result%(1000000/HZ));
-
-       return result;
-}
-
-static unsigned int calibration_result;
-
-void __init setup_boot_APIC_clock(void)
-{
-       apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
-       using_apic_timer = 1;
-
-       local_irq_disable();
-
-       calibration_result = calibrate_APIC_clock();
-       /*
-        * Now set up the timer for real.
-        */
-       setup_APIC_timer(calibration_result);
-
-       local_irq_enable();
-}
-
-void __init setup_secondary_APIC_clock(void)
-{
-       setup_APIC_timer(calibration_result);
-}
-
-void __init disable_APIC_timer(void)
-{
-       if (using_apic_timer) {
-               unsigned long v;
-
-               v = apic_read(APIC_LVTT);
-               apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
-       }
-}
-
-void enable_APIC_timer(void)
-{
-       if (using_apic_timer) {
-               unsigned long v;
-
-               v = apic_read(APIC_LVTT);
-               apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
-       }
-}
-
-/*
- * the frequency of the profiling timer can be changed
- * by writing a multiplier value into /proc/profile.
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
-       int i;
-
-       /*
-        * Sanity check. [at least 500 APIC cycles should be
-        * between APIC interrupts as a rule of thumb, to avoid
-        * irqs flooding us]
-        */
-       if ( (!multiplier) || (calibration_result/multiplier < 500))
-               return -EINVAL;
-
-       /* 
-        * Set the new multiplier for each CPU. CPUs don't start using the
-        * new values until the next timer interrupt in which they do process
-        * accounting. At that time they also adjust their APIC timers
-        * accordingly.
-        */
-       for (i = 0; i < NR_CPUS; ++i)
-               per_cpu(prof_multiplier, i) = multiplier;
-
-       return 0;
-}
-
-#undef APIC_DIVISOR
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
-{
-       int cpu = smp_processor_id();
-
-       profile_tick(CPU_PROFILING, regs);
-       if (--per_cpu(prof_counter, cpu) <= 0) {
-               /*
-                * The multiplier may have changed since the last time we got
-                * to this point as a result of the user writing to
-                * /proc/profile. In this case we need to adjust the APIC
-                * timer accordingly.
-                *
-                * Interrupts are already masked off at this point.
-                */
-               per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
-               if (per_cpu(prof_counter, cpu) !=
-                                       per_cpu(prof_old_multiplier, cpu)) {
-                       __setup_APIC_LVTT(
-                                       calibration_result/
-                                       per_cpu(prof_counter, cpu));
-                       per_cpu(prof_old_multiplier, cpu) =
-                                               per_cpu(prof_counter, cpu);
-               }
-
-#ifdef CONFIG_SMP
-               update_process_times(user_mode(regs));
-#endif
-       }
-
-       /*
-        * We take the 'long' return path, and there every subsystem
-        * grabs the apropriate locks (kernel lock/ irq lock).
-        *
-        * we might want to decouple profiling from the 'long path',
-        * and do the profiling totally in assembly.
-        *
-        * Currently this isn't too much of an issue (performance wise),
-        * we can take more than 100K local irqs per second on a 100 MHz P5.
-        */
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-       int cpu = smp_processor_id();
-
-       /*
-        * the NMI deadlock-detector uses this.
-        */
-       per_cpu(irq_stat, cpu).apic_timer_irqs++;
-
-       /*
-        * NOTE! We'd better ACK the irq immediately,
-        * because timer handling can be slow.
-        */
-       ack_APIC_irq();
-       /*
-        * update_process_times() expects us to have done irq_enter().
-        * Besides, if we don't timer interrupts ignore the global
-        * interrupt lock, which is the WrongThing (tm) to do.
-        */
-       irq_enter();
-       smp_local_timer_interrupt(regs);
-       irq_exit();
-}
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-fastcall void smp_spurious_interrupt(struct pt_regs *regs)
-{
-       unsigned long v;
-
-       irq_enter();
-       /*
-        * Check if this really is a spurious interrupt and ACK it
-        * if it is a vectored one.  Just in case...
-        * Spurious interrupts should not be ACKed.
-        */
-       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-               ack_APIC_irq();
-
-       /* see sw-dev-man vol 3, chapter 7.4.13.5 */
-       printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never 
happen.\n",
-                       smp_processor_id());
-       irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-
-fastcall void smp_error_interrupt(struct pt_regs *regs)
-{
-       unsigned long v, v1;
-
-       irq_enter();
-       /* First tickle the hardware, only then report what went on. -- REW */
-       v = apic_read(APIC_ESR);
-       apic_write(APIC_ESR, 0);
-       v1 = apic_read(APIC_ESR);
-       ack_APIC_irq();
-       atomic_inc(&irq_err_count);
-
-       /* Here is what the APIC error bits mean:
-          0: Send CS error
-          1: Receive CS error
-          2: Send accept error
-          3: Receive accept error
-          4: Reserved
-          5: Send illegal vector
-          6: Received illegal vector
-          7: Illegal register address
-       */
-       printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
-               smp_processor_id(), v , v1);
-       irq_exit();
-}
-
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
 int __init APIC_init_uniprocessor (void)
 {
-       if (enable_local_apic < 0)
-               clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
-
-       if (!smp_found_config && !cpu_has_apic)
-               return -1;
-
-       /*
-        * Complain if the BIOS pretends there is one.
-        */
-       if (!cpu_has_apic && 
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-                       boot_cpu_physical_apicid);
-               return -1;
-       }
-
-       verify_local_APIC();
-
-       connect_bsp_APIC();
-
-       phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
-
-       setup_local_APIC();
-
 #ifdef CONFIG_X86_IO_APIC
        if (smp_found_config)
                if (!skip_ioapic_setup && nr_ioapics)
                        setup_IO_APIC();
 #endif
-       setup_boot_APIC_clock();
 
        return 0;
 }
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/cpu/common.c 
linux-2.6-xen-sparse/arch/i386/kernel/cpu/common.c
--- pristine-linux-2.6.12/arch/i386/kernel/cpu/common.c 2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/cpu/common.c  2005-07-28 
13:17:07.000000000 -0700
@@ -15,6 +15,7 @@
 #include <asm/apic.h>
 #include <mach_apic.h>
 #endif
+#include <asm-xen/hypervisor.h>
 
 #include "cpu.h"
 
@@ -32,6 +33,8 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
 
 extern void mcheck_init(struct cpuinfo_x86 *c);
 
+extern void machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c);
+
 extern int disable_pse;
 
 static void default_init(struct cpuinfo_x86 * c)
@@ -409,6 +412,8 @@ void __init identify_cpu(struct cpuinfo_
                                c->x86_vendor, c->x86_model);
        }
 
+       machine_specific_modify_cpu_capabilities(c);
+
        /* Now the feature flags better reflect actual CPU features! */
 
        printk(KERN_DEBUG "CPU: After all inits, caps:");
@@ -554,6 +559,24 @@ void __init early_cpu_init(void)
        disable_pse = 1;
 #endif
 }
+
+void __init cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
+{
+       unsigned long frames[16];
+       unsigned long va;
+       int f;
+
+       for (va = gdt_descr->address, f = 0;
+            va < gdt_descr->address + gdt_descr->size;
+            va += PAGE_SIZE, f++) {
+               frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+               make_page_readonly((void *)va);
+       }
+       if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
+               BUG();
+       lgdt_finish();
+}
+
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
@@ -565,7 +588,6 @@ void __init cpu_init (void)
        int cpu = smp_processor_id();
        struct tss_struct * t = &per_cpu(init_tss, cpu);
        struct thread_struct *thread = &current->thread;
-       __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
 
        if (cpu_test_and_set(cpu, cpu_initialized)) {
                printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -573,7 +595,7 @@ void __init cpu_init (void)
        }
        printk(KERN_INFO "Initializing CPU#%d\n", cpu);
 
-       if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+       if (cpu_has_vme || cpu_has_de)
                clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
        if (tsc_disable && cpu_has_tsc) {
                printk(KERN_NOTICE "Disabling TSC...\n");
@@ -583,30 +605,12 @@ void __init cpu_init (void)
        }
 
        /*
-        * Initialize the per-CPU GDT with the boot GDT,
-        * and set up the GDT descriptor:
-        */
-       memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
-              GDT_SIZE);
-
-       /* Set up GDT entry for 16bit stack */
-       *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
-               ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
-               ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
-               (CPU_16BIT_STACK_SIZE - 1);
-
-       cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
-       cpu_gdt_descr[cpu].address =
-           (unsigned long)&per_cpu(cpu_gdt_table, cpu);
-
-       /*
         * Set up the per-thread TLS descriptor cache:
         */
-       memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
-               GDT_ENTRY_TLS_ENTRIES * 8);
+       memcpy(thread->tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
+              GDT_ENTRY_TLS_ENTRIES * 8);
 
-       __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu]));
-       __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+       cpu_gdt_init(&cpu_gdt_descr[cpu]);
 
        /*
         * Delete NT
@@ -623,19 +627,15 @@ void __init cpu_init (void)
        enter_lazy_tlb(&init_mm, current);
 
        load_esp0(t, thread);
-       set_tss_desc(cpu,t);
-       load_TR_desc();
-       load_LDT(&init_mm.context);
 
-       /* Set up doublefault TSS pointer in the GDT */
-       __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+       load_LDT(&init_mm.context);
 
        /* Clear %fs and %gs. */
        asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
 
        /* Clear all 6 debug registers: */
 
-#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+#define CD(register) HYPERVISOR_set_debugreg(register, 0)
 
        CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
 
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/cpu/Makefile 
linux-2.6-xen-sparse/arch/i386/kernel/cpu/Makefile
--- pristine-linux-2.6.12/arch/i386/kernel/cpu/Makefile 2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/cpu/Makefile  2005-07-28 
13:17:07.000000000 -0700
@@ -2,18 +2,30 @@
 # Makefile for x86-compatible CPU details and quirks
 #
 
-obj-y  :=      common.o proc.o
+CFLAGS += -Iarch/i386/kernel/cpu
 
-obj-y  +=      amd.o
-obj-y  +=      cyrix.o
-obj-y  +=      centaur.o
-obj-y  +=      transmeta.o
-obj-y  +=      intel.o intel_cacheinfo.o
-obj-y  +=      rise.o
-obj-y  +=      nexgen.o
-obj-y  +=      umc.o
+obj-y  :=      common.o
+c-obj-y        +=      proc.o
 
-obj-$(CONFIG_X86_MCE)  +=      mcheck/
+c-obj-y        +=      amd.o
+c-obj-y        +=      cyrix.o
+c-obj-y        +=      centaur.o
+c-obj-y        +=      transmeta.o
+c-obj-y        +=      intel.o intel_cacheinfo.o
+c-obj-y        +=      rise.o
+c-obj-y        +=      nexgen.o
+c-obj-y        +=      umc.o
+
+#obj-$(CONFIG_X86_MCE) +=      ../../../../i386/kernel/cpu/mcheck/
 
 obj-$(CONFIG_MTRR)     +=      mtrr/
-obj-$(CONFIG_CPU_FREQ) +=      cpufreq/
+#obj-$(CONFIG_CPU_FREQ)        +=      ../../../../i386/kernel/cpu/cpufreq/
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/kernel/cpu/$(notdir $@) $@
+
+obj-y  += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/cpu/mtrr/main.c 
linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main.c
--- pristine-linux-2.6.12/arch/i386/kernel/cpu/mtrr/main.c      2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main.c       2005-07-28 
13:17:07.000000000 -0700
@@ -1,116 +1,46 @@
-/*  Generic MTRR (Memory Type Range Register) driver.
-
-    Copyright (C) 1997-2000  Richard Gooch
-    Copyright (c) 2002      Patrick Mochel
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public
-    License as published by the Free Software Foundation; either
-    version 2 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Library General Public License for more details.
-
-    You should have received a copy of the GNU Library General Public
-    License along with this library; if not, write to the Free
-    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-    Richard Gooch may be reached by email at  rgooch@xxxxxxxxxxxxx
-    The postal address is:
-      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
-
-    Source: "Pentium Pro Family Developer's Manual, Volume 3:
-    Operating System Writer's Guide" (Intel document number 242692),
-    section 11.11.7
-
-    This was cleaned and made readable by Patrick Mochel <mochel@xxxxxxxx> 
-    on 6-7 March 2002. 
-    Source: Intel Architecture Software Developers Manual, Volume 3: 
-    System Programming Guide; Section 9.11. (1997 edition - PPro).
-*/
-
-#include <linux/module.h>
 #include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/cpu.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
 
 #include <asm/mtrr.h>
-
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
 #include "mtrr.h"
 
-#define MTRR_VERSION            "2.0 (20020519)"
-
-u32 num_var_ranges = 0;
-
-unsigned int *usage_table;
-static DECLARE_MUTEX(main_lock);
-
-u32 size_or_mask, size_and_mask;
-
-static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
-
-struct mtrr_ops * mtrr_if = NULL;
-
-static void set_mtrr(unsigned int reg, unsigned long base,
-                    unsigned long size, mtrr_type type);
+void generic_get_mtrr(unsigned int reg, unsigned long *base,
+                     unsigned int *size, mtrr_type * type)
+{
+       dom0_op_t op;
 
-extern int arr3_protected;
+       op.cmd = DOM0_READ_MEMTYPE;
+       op.u.read_memtype.reg = reg;
+       (void)HYPERVISOR_dom0_op(&op);
 
-void set_mtrr_ops(struct mtrr_ops * ops)
-{
-       if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
-               mtrr_ops[ops->vendor] = ops;
+       *size = op.u.read_memtype.nr_pfns;
+       *base = op.u.read_memtype.pfn;
+       *type = op.u.read_memtype.type;
 }
 
-/*  Returns non-zero if we have the write-combining memory type  */
-static int have_wrcomb(void)
-{
-       struct pci_dev *dev;
-       u8 rev;
-       
-       if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
-               /* ServerWorks LE chipsets < rev 6 have problems with 
write-combining
-                  Don't allow it and leave room for other chipsets to be 
tagged */
-               if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
-                   dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
-                       pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
-                       if (rev <= 5) {
-                               printk(KERN_INFO "mtrr: Serverworks LE rev < 6 
detected. Write-combining disabled.\n");
-                               pci_dev_put(dev);
-                               return 0;
-                       }
-               }
-               /* Intel 450NX errata # 23. Non ascending cacheline evictions to
-                  write combining memory may resulting in data corruption */
-               if (dev->vendor == PCI_VENDOR_ID_INTEL &&
-                   dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
-                       printk(KERN_INFO "mtrr: Intel 450NX MMC detected. 
Write-combining disabled.\n");
-                       pci_dev_put(dev);
-                       return 0;
-               }
-               pci_dev_put(dev);
-       }               
-       return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
-}
+struct mtrr_ops generic_mtrr_ops = {
+       .use_intel_if      = 1,
+       .get               = generic_get_mtrr,
+};
+
+struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
+unsigned int num_var_ranges;
+unsigned int *usage_table;
 
-/*  This function returns the number of variable MTRRs  */
 static void __init set_num_var_ranges(void)
 {
-       unsigned long config = 0, dummy;
+       dom0_op_t op;
 
-       if (use_intel()) {
-               rdmsr(MTRRcap_MSR, config, dummy);
-       } else if (is_cpu(AMD))
-               config = 2;
-       else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
-               config = 8;
-       num_var_ranges = config & 0xff;
+       for (num_var_ranges = 0; ; num_var_ranges++) {
+               op.cmd = DOM0_READ_MEMTYPE;
+               op.u.read_memtype.reg = num_var_ranges;
+               if (HYPERVISOR_dom0_op(&op) != 0)
+                       break;
+       }
 }
 
 static void __init init_table(void)
@@ -124,293 +54,28 @@ static void __init init_table(void)
                return;
        }
        for (i = 0; i < max; i++)
-               usage_table[i] = 1;
-}
-
-struct set_mtrr_data {
-       atomic_t        count;
-       atomic_t        gate;
-       unsigned long   smp_base;
-       unsigned long   smp_size;
-       unsigned int    smp_reg;
-       mtrr_type       smp_type;
-};
-
-#ifdef CONFIG_SMP
-
-static void ipi_handler(void *info)
-/*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
-    [RETURNS] Nothing.
-*/
-{
-       struct set_mtrr_data *data = info;
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       atomic_dec(&data->count);
-       while(!atomic_read(&data->gate))
-               cpu_relax();
-
-       /*  The master has cleared me to execute  */
-       if (data->smp_reg != ~0U) 
-               mtrr_if->set(data->smp_reg, data->smp_base, 
-                            data->smp_size, data->smp_type);
-       else
-               mtrr_if->set_all();
-
-       atomic_dec(&data->count);
-       while(atomic_read(&data->gate))
-               cpu_relax();
-
-       atomic_dec(&data->count);
-       local_irq_restore(flags);
+               usage_table[i] = 0;
 }
 
-#endif
-
-/**
- * set_mtrr - update mtrrs on all processors
- * @reg:       mtrr in question
- * @base:      mtrr base
- * @size:      mtrr size
- * @type:      mtrr type
- *
- * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
- * 
- * 1. Send IPI to do the following:
- * 2. Disable Interrupts
- * 3. Wait for all procs to do so 
- * 4. Enter no-fill cache mode
- * 5. Flush caches
- * 6. Clear PGE bit
- * 7. Flush all TLBs
- * 8. Disable all range registers
- * 9. Update the MTRRs
- * 10. Enable all range registers
- * 11. Flush all TLBs and caches again
- * 12. Enter normal cache mode and reenable caching
- * 13. Set PGE 
- * 14. Wait for buddies to catch up
- * 15. Enable interrupts.
- * 
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
- * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
- * Meanwhile, they are waiting for that flag to be set. Once it's set, each 
- * CPU goes through the transition of updating MTRRs. The CPU vendors may each 
do it 
- * differently, so we call mtrr_if->set() callback and let them take care of 
it.
- * When they're done, they again decrement data->count and wait for data.gate 
to 
- * be reset. 
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate 
flag.
- * Everyone then enables interrupts and we all continue on.
- *
- * Note that the mechanism is the same for UP systems, too; all the SMP stuff
- * becomes nops.
- */
-static void set_mtrr(unsigned int reg, unsigned long base,
-                    unsigned long size, mtrr_type type)
-{
-       struct set_mtrr_data data;
-       unsigned long flags;
-
-       data.smp_reg = reg;
-       data.smp_base = base;
-       data.smp_size = size;
-       data.smp_type = type;
-       atomic_set(&data.count, num_booting_cpus() - 1);
-       atomic_set(&data.gate,0);
-
-       /*  Start the ball rolling on other CPUs  */
-       if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
-               panic("mtrr: timed out waiting for other CPUs\n");
-
-       local_irq_save(flags);
-
-       while(atomic_read(&data.count))
-               cpu_relax();
-
-       /* ok, reset count and toggle gate */
-       atomic_set(&data.count, num_booting_cpus() - 1);
-       atomic_set(&data.gate,1);
-
-       /* do our MTRR business */
-
-       /* HACK!
-        * We use this same function to initialize the mtrrs on boot.
-        * The state of the boot cpu's mtrrs has been saved, and we want
-        * to replicate across all the APs. 
-        * If we're doing that @reg is set to something special...
-        */
-       if (reg != ~0U) 
-               mtrr_if->set(reg,base,size,type);
-
-       /* wait for the others */
-       while(atomic_read(&data.count))
-               cpu_relax();
-
-       atomic_set(&data.count, num_booting_cpus() - 1);
-       atomic_set(&data.gate,0);
-
-       /*
-        * Wait here for everyone to have seen the gate change
-        * So we're the last ones to touch 'data'
-        */
-       while(atomic_read(&data.count))
-               cpu_relax();
-
-       local_irq_restore(flags);
-}
-
-/**
- *     mtrr_add_page - Add a memory type region
- *     @base: Physical base address of region in pages (4 KB)
- *     @size: Physical size of region in pages (4 KB)
- *     @type: Type of MTRR desired
- *     @increment: If this is true do usage counting on the region
- *
- *     Memory type region registers control the caching on newer Intel and
- *     non Intel processors. This function allows drivers to request an
- *     MTRR is added. The details and hardware specifics of each processor's
- *     implementation are hidden from the caller, but nevertheless the 
- *     caller should expect to need to provide a power of two size on an
- *     equivalent power of two boundary.
- *
- *     If the region cannot be added either because all regions are in use
- *     or the CPU cannot support it a negative value is returned. On success
- *     the register number for this entry is returned, but should be treated
- *     as a cookie only.
- *
- *     On a multiprocessor machine the changes are made to all processors.
- *     This is required on x86 by the Intel processors.
- *
- *     The available types are
- *
- *     %MTRR_TYPE_UNCACHABLE   -       No caching
- *
- *     %MTRR_TYPE_WRBACK       -       Write data back in bursts whenever
- *
- *     %MTRR_TYPE_WRCOMB       -       Write data back soon but allow bursts
- *
- *     %MTRR_TYPE_WRTHROUGH    -       Cache reads but not writes
- *
- *     BUGS: Needs a quiet flag for the cases where drivers do not mind
- *     failures and do not wish system log messages to be sent.
- */
-
 int mtrr_add_page(unsigned long base, unsigned long size, 
                  unsigned int type, char increment)
 {
-       int i;
-       mtrr_type ltype;
-       unsigned long lbase;
-       unsigned int lsize;
        int error;
+       dom0_op_t op;
 
-       if (!mtrr_if)
-               return -ENXIO;
-               
-       if ((error = mtrr_if->validate_add_page(base,size,type)))
+       op.cmd = DOM0_ADD_MEMTYPE;
+       op.u.add_memtype.pfn     = base;
+       op.u.add_memtype.nr_pfns = size;
+       op.u.add_memtype.type    = type;
+       if ((error = HYPERVISOR_dom0_op(&op)))
                return error;
 
-       if (type >= MTRR_NUM_TYPES) {
-               printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
-               return -EINVAL;
-       }
-
-       /*  If the type is WC, check that this processor supports it  */
-       if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-               printk(KERN_WARNING
-                      "mtrr: your processor doesn't support 
write-combining\n");
-               return -ENOSYS;
-       }
-
-       if (base & size_or_mask || size & size_or_mask) {
-               printk(KERN_WARNING "mtrr: base or size exceeds the MTRR 
width\n");
-               return -EINVAL;
-       }
+       if (increment)
+               ++usage_table[op.u.add_memtype.reg];
 
-       error = -EINVAL;
-
-       /*  Search for existing MTRR  */
-       down(&main_lock);
-       for (i = 0; i < num_var_ranges; ++i) {
-               mtrr_if->get(i, &lbase, &lsize, &ltype);
-               if (base >= lbase + lsize)
-                       continue;
-               if ((base < lbase) && (base + size <= lbase))
-                       continue;
-               /*  At this point we know there is some kind of 
overlap/enclosure  */
-               if ((base < lbase) || (base + size > lbase + lsize)) {
-                       printk(KERN_WARNING
-                              "mtrr: 0x%lx000,0x%lx000 overlaps existing"
-                              " 0x%lx000,0x%x000\n", base, size, lbase,
-                              lsize);
-                       goto out;
-               }
-               /*  New region is enclosed by an existing region  */
-               if (ltype != type) {
-                       if (type == MTRR_TYPE_UNCACHABLE)
-                               continue;
-                       printk (KERN_WARNING "mtrr: type mismatch for 
%lx000,%lx000 old: %s new: %s\n",
-                            base, size, mtrr_attrib_to_str(ltype),
-                            mtrr_attrib_to_str(type));
-                       goto out;
-               }
-               if (increment)
-                       ++usage_table[i];
-               error = i;
-               goto out;
-       }
-       /*  Search for an empty MTRR  */
-       i = mtrr_if->get_free_region(base, size);
-       if (i >= 0) {
-               set_mtrr(i, base, size, type);
-               usage_table[i] = 1;
-       } else
-               printk(KERN_INFO "mtrr: no more MTRRs available\n");
-       error = i;
- out:
-       up(&main_lock);
-       return error;
+       return op.u.add_memtype.reg;
 }
 
-/**
- *     mtrr_add - Add a memory type region
- *     @base: Physical base address of region
- *     @size: Physical size of region
- *     @type: Type of MTRR desired
- *     @increment: If this is true do usage counting on the region
- *
- *     Memory type region registers control the caching on newer Intel and
- *     non Intel processors. This function allows drivers to request an
- *     MTRR is added. The details and hardware specifics of each processor's
- *     implementation are hidden from the caller, but nevertheless the 
- *     caller should expect to need to provide a power of two size on an
- *     equivalent power of two boundary.
- *
- *     If the region cannot be added either because all regions are in use
- *     or the CPU cannot support it a negative value is returned. On success
- *     the register number for this entry is returned, but should be treated
- *     as a cookie only.
- *
- *     On a multiprocessor machine the changes are made to all processors.
- *     This is required on x86 by the Intel processors.
- *
- *     The available types are
- *
- *     %MTRR_TYPE_UNCACHABLE   -       No caching
- *
- *     %MTRR_TYPE_WRBACK       -       Write data back in bursts whenever
- *
- *     %MTRR_TYPE_WRCOMB       -       Write data back soon but allow bursts
- *
- *     %MTRR_TYPE_WRTHROUGH    -       Cache reads but not writes
- *
- *     BUGS: Needs a quiet flag for the cases where drivers do not mind
- *     failures and do not wish system log messages to be sent.
- */
-
 int
 mtrr_add(unsigned long base, unsigned long size, unsigned int type,
         char increment)
@@ -424,21 +89,6 @@ mtrr_add(unsigned long base, unsigned lo
                             increment);
 }
 
-/**
- *     mtrr_del_page - delete a memory type region
- *     @reg: Register returned by mtrr_add
- *     @base: Physical base address
- *     @size: Size of region
- *
- *     If register is supplied then base and size are ignored. This is
- *     how drivers should call it.
- *
- *     Releases an MTRR region. If the usage count drops to zero the 
- *     register is freed and the region returns to default state.
- *     On success the register is returned, on failure a negative error
- *     code.
- */
-
 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 {
        int i, max;
@@ -446,12 +96,9 @@ int mtrr_del_page(int reg, unsigned long
        unsigned long lbase;
        unsigned int lsize;
        int error = -EINVAL;
-
-       if (!mtrr_if)
-               return -ENXIO;
+       dom0_op_t op;
 
        max = num_var_ranges;
-       down(&main_lock);
        if (reg < 0) {
                /*  Search for existing MTRR  */
                for (i = 0; i < max; ++i) {
@@ -467,46 +114,20 @@ int mtrr_del_page(int reg, unsigned long
                        goto out;
                }
        }
-       if (reg >= max) {
-               printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
-               goto out;
-       }
-       if (is_cpu(CYRIX) && !use_intel()) {
-               if ((reg == 3) && arr3_protected) {
-                       printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
-                       goto out;
-               }
-       }
-       mtrr_if->get(reg, &lbase, &lsize, &ltype);
-       if (lsize < 1) {
-               printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
-               goto out;
-       }
        if (usage_table[reg] < 1) {
                printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
                goto out;
        }
-       if (--usage_table[reg] < 1)
-               set_mtrr(reg, 0, 0, 0);
+       if (--usage_table[reg] < 1) {
+               op.cmd = DOM0_DEL_MEMTYPE;
+               op.u.del_memtype.handle = 0;
+               op.u.add_memtype.reg    = reg;
+               (void)HYPERVISOR_dom0_op(&op);
+       }
        error = reg;
  out:
-       up(&main_lock);
        return error;
 }
-/**
- *     mtrr_del - delete a memory type region
- *     @reg: Register returned by mtrr_add
- *     @base: Physical base address
- *     @size: Size of region
- *
- *     If register is supplied then base and size are ignored. This is
- *     how drivers should call it.
- *
- *     Releases an MTRR region. If the usage count drops to zero the 
- *     register is freed and the region returns to default state.
- *     On success the register is returned, on failure a negative error
- *     code.
- */
 
 int
 mtrr_del(int reg, unsigned long base, unsigned long size)
@@ -522,157 +143,23 @@ mtrr_del(int reg, unsigned long base, un
 EXPORT_SYMBOL(mtrr_add);
 EXPORT_SYMBOL(mtrr_del);
 
-/* HACK ALERT!
- * These should be called implicitly, but we can't yet until all the initcall
- * stuff is done...
- */
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
-static void __init init_ifs(void)
-{
-       amd_init_mtrr();
-       cyrix_init_mtrr();
-       centaur_init_mtrr();
-}
-
-static void __init init_other_cpus(void)
+static int __init mtrr_init(void)
 {
-       if (use_intel())
-               get_mtrr_state();
-
-       /* bring up the other processors */
-       set_mtrr(~0U,0,0,0);
-
-       if (use_intel()) {
-               finalize_mtrr_state();
-               mtrr_state_warn();
-       }
-}
+       struct cpuinfo_x86 *c = &boot_cpu_data;
 
+       if (!(xen_start_info.flags & SIF_PRIVILEGED))
+               return -ENODEV;
 
-struct mtrr_value {
-       mtrr_type       ltype;
-       unsigned long   lbase;
-       unsigned int    lsize;
-};
+       if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+           (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+           (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+           (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+               return -ENODEV;
 
-static struct mtrr_value * mtrr_state;
+       set_num_var_ranges();
+       init_table();
 
-static int mtrr_save(struct sys_device * sysdev, u32 state)
-{
-       int i;
-       int size = num_var_ranges * sizeof(struct mtrr_value);
-
-       mtrr_state = kmalloc(size,GFP_ATOMIC);
-       if (mtrr_state)
-               memset(mtrr_state,0,size);
-       else
-               return -ENOMEM;
-
-       for (i = 0; i < num_var_ranges; i++) {
-               mtrr_if->get(i,
-                            &mtrr_state[i].lbase,
-                            &mtrr_state[i].lsize,
-                            &mtrr_state[i].ltype);
-       }
        return 0;
 }
 
-static int mtrr_restore(struct sys_device * sysdev)
-{
-       int i;
-
-       for (i = 0; i < num_var_ranges; i++) {
-               if (mtrr_state[i].lsize) 
-                       set_mtrr(i,
-                                mtrr_state[i].lbase,
-                                mtrr_state[i].lsize,
-                                mtrr_state[i].ltype);
-       }
-       kfree(mtrr_state);
-       return 0;
-}
-
-
-
-static struct sysdev_driver mtrr_sysdev_driver = {
-       .suspend        = mtrr_save,
-       .resume         = mtrr_restore,
-};
-
-
-/**
- * mtrr_init - initialize mtrrs on the boot CPU
- *
- * This needs to be called early; before any of the other CPUs are 
- * initialized (i.e. before smp_init()).
- * 
- */
-static int __init mtrr_init(void)
-{
-       init_ifs();
-
-       if (cpu_has_mtrr) {
-               mtrr_if = &generic_mtrr_ops;
-               size_or_mask = 0xff000000;      /* 36 bits */
-               size_and_mask = 0x00f00000;
-
-               /* This is an AMD specific MSR, but we assume(hope?) that
-                  Intel will implement it to when they extend the address
-                  bus of the Xeon. */
-               if (cpuid_eax(0x80000000) >= 0x80000008) {
-                       u32 phys_addr;
-                       phys_addr = cpuid_eax(0x80000008) & 0xff;
-                       size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
-                       size_and_mask = ~size_or_mask & 0xfff00000;
-               } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
-                          boot_cpu_data.x86 == 6) {
-                       /* VIA C* family have Intel style MTRRs, but
-                          don't support PAE */
-                       size_or_mask = 0xfff00000;      /* 32 bits */
-                       size_and_mask = 0;
-               }
-       } else {
-               switch (boot_cpu_data.x86_vendor) {
-               case X86_VENDOR_AMD:
-                       if (cpu_has_k6_mtrr) {
-                               /* Pre-Athlon (K6) AMD CPU MTRRs */
-                               mtrr_if = mtrr_ops[X86_VENDOR_AMD];
-                               size_or_mask = 0xfff00000;      /* 32 bits */
-                               size_and_mask = 0;
-                       }
-                       break;
-               case X86_VENDOR_CENTAUR:
-                       if (cpu_has_centaur_mcr) {
-                               mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
-                               size_or_mask = 0xfff00000;      /* 32 bits */
-                               size_and_mask = 0;
-                       }
-                       break;
-               case X86_VENDOR_CYRIX:
-                       if (cpu_has_cyrix_arr) {
-                               mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
-                               size_or_mask = 0xfff00000;      /* 32 bits */
-                               size_and_mask = 0;
-                       }
-                       break;
-               default:
-                       break;
-               }
-       }
-       printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION);
-
-       if (mtrr_if) {
-               set_num_var_ranges();
-               init_table();
-               init_other_cpus();
-
-               return sysdev_driver_register(&cpu_sysdev_class,
-                                             &mtrr_sysdev_driver);
-       }
-       return -ENXIO;
-}
-
 subsys_initcall(mtrr_init);
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/cpu/mtrr/Makefile 
linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/Makefile
--- pristine-linux-2.6.12/arch/i386/kernel/cpu/mtrr/Makefile    2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/Makefile     2005-07-28 
13:17:07.000000000 -0700
@@ -1,5 +1,16 @@
-obj-y          := main.o if.o generic.o state.o
-obj-y          += amd.o
-obj-y          += cyrix.o
-obj-y          += centaur.o
+obj-y  := main.o
+c-obj-y        := if.o
 
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)): $(obj)/mtrr.h
+       @ln -fsn $(srctree)/arch/i386/kernel/cpu/mtrr/$(notdir $@) $@
+
+$(patsubst %.o,$(obj)/%.c,$(obj-y)): $(obj)/mtrr.h
+
+$(obj)/mtrr.h:
+       @ln -fsn $(srctree)/arch/i386/kernel/cpu/mtrr/mtrr.h $@
+
+obj-y  += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/entry.S 
linux-2.6-xen-sparse/arch/i386/kernel/entry.S
--- pristine-linux-2.6.12/arch/i386/kernel/entry.S      2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/entry.S       2005-07-28 
13:17:07.000000000 -0700
@@ -47,8 +47,8 @@
 #include <asm/segment.h>
 #include <asm/smp.h>
 #include <asm/page.h>
-#include <asm/desc.h>
 #include "irq_vectors.h"
+#include <asm-xen/xen-public/xen.h>
 
 #define nr_syscalls ((syscall_table_size)/4)
 
@@ -64,6 +64,7 @@ ES            = 0x20
 ORIG_EAX       = 0x24
 EIP            = 0x28
 CS             = 0x2C
+EVENT_MASK     = 0x2E
 EFLAGS         = 0x30
 OLDESP         = 0x34
 OLDSS          = 0x38
@@ -75,11 +76,43 @@ DF_MASK             = 0x00000400 
 NT_MASK                = 0x00004000
 VM_MASK                = 0x00020000
 
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending          /* 0 */
+#define evtchn_upcall_mask             1
+
+#define sizeof_vcpu_shift              3
+
+#ifdef CONFIG_SMP
+#define preempt_disable(reg)   incl TI_preempt_count(reg)
+#define preempt_enable(reg)    decl TI_preempt_count(reg)
+#define XEN_GET_VCPU_INFO(reg) preempt_disable(%ebp)                   ; \
+                               movl TI_cpu(%ebp),reg                   ; \
+                               shl  $sizeof_vcpu_shift,reg             ; \
+                               addl HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%ebp)
+#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
+#else
+#define XEN_GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
+#define XEN_PUT_VCPU_INFO(reg)
+#define XEN_PUT_VCPU_INFO_fixup
+#endif
+
+#define XEN_LOCKED_BLOCK_EVENTS(reg)   movb $1,evtchn_upcall_mask(reg)
+#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
+#define XEN_BLOCK_EVENTS(reg)  XEN_GET_VCPU_INFO(reg)                  ; \
+                               XEN_LOCKED_BLOCK_EVENTS(reg)            ; \
+                               XEN_PUT_VCPU_INFO(reg)
+#define XEN_UNBLOCK_EVENTS(reg)        XEN_GET_VCPU_INFO(reg)                  
; \
+                               XEN_LOCKED_UNBLOCK_EVENTS(reg)          ; \
+                               XEN_PUT_VCPU_INFO(reg)
+#define XEN_TEST_PENDING(reg)  testb $0xFF,evtchn_upcall_pending(reg)
+
 #ifdef CONFIG_PREEMPT
-#define preempt_stop           cli
+#define preempt_stop           GET_THREAD_INFO(%ebp)                   ; \
+                               XEN_BLOCK_EVENTS(%esi)
 #else
 #define preempt_stop
-#define resume_kernel          restore_nocheck
+#define resume_kernel          restore_all
 #endif
 
 #define SAVE_ALL \
@@ -123,6 +156,23 @@ VM_MASK            = 0x00020000
 .previous
 
 
+#define RESTORE_ALL    \
+       RESTORE_REGS    \
+       addl $4, %esp;  \
+1:     iret;           \
+.section .fixup,"ax";   \
+2:     movl $(__USER_DS), %edx; \
+       movl %edx, %ds; \
+       movl %edx, %es; \
+       movl $11,%eax;  \
+       call do_exit;   \
+.previous;             \
+.section __ex_table,"a";\
+       .align 4;       \
+       .long 1b,2b;    \
+.previous
+
+
 ENTRY(ret_from_fork)
        pushl %eax
        call schedule_tail
@@ -145,10 +195,10 @@ ret_from_intr:
        GET_THREAD_INFO(%ebp)
        movl EFLAGS(%esp), %eax         # mix EFLAGS and CS
        movb CS(%esp), %al
-       testl $(VM_MASK | 3), %eax
-       jz resume_kernel
+       testl $(VM_MASK | 2), %eax
+       jz resume_kernel                # returning to kernel or vm86-space
 ENTRY(resume_userspace)
-       cli                             # make sure we don't miss an interrupt
+       XEN_BLOCK_EVENTS(%esi)          # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
                                        # between sampling and the iret
        movl TI_flags(%ebp), %ecx
@@ -159,15 +209,15 @@ ENTRY(resume_userspace)
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-       cli
+       XEN_BLOCK_EVENTS(%esi)
        cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
-       jnz restore_nocheck
+       jnz restore_all
 need_resched:
        movl TI_flags(%ebp), %ecx       # need_resched set ?
        testb $_TIF_NEED_RESCHED, %cl
        jz restore_all
-       testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
-       jz restore_all
+       testb $0xFF,EVENT_MASK(%esp)    # interrupts off (exception path) ?
+       jnz restore_all
        call preempt_schedule_irq
        jmp need_resched
 #endif
@@ -202,8 +252,7 @@ sysenter_past_esp:
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
 
-       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
-       testw 
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
        jnz syscall_trace_entry
        cmpl $(nr_syscalls), %eax
        jae syscall_badsys
@@ -227,8 +276,7 @@ ENTRY(system_call)
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
                                        # system call tracing in operation
-       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
-       testw 
$(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
        jnz syscall_trace_entry
        cmpl $(nr_syscalls), %eax
        jae syscall_badsys
@@ -236,63 +284,31 @@ syscall_call:
        call *sys_call_table(,%eax,4)
        movl %eax,EAX(%esp)             # store the return value
 syscall_exit:
-       cli                             # make sure we don't miss an interrupt
+       XEN_BLOCK_EVENTS(%esi)          # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
                                        # between sampling and the iret
        movl TI_flags(%ebp), %ecx
        testw $_TIF_ALLWORK_MASK, %cx   # current->work
        jne syscall_exit_work
-
 restore_all:
-       movl EFLAGS(%esp), %eax         # mix EFLAGS, SS and CS
-       # Warning: OLDSS(%esp) contains the wrong/random values if we
-       # are returning to the kernel.
-       # See comments in process.c:copy_thread() for details.
-       movb OLDSS(%esp), %ah
-       movb CS(%esp), %al
-       andl $(VM_MASK | (4 << 8) | 3), %eax
-       cmpl $((4 << 8) | 3), %eax
-       je ldt_ss                       # returning to user-space with LDT SS
-restore_nocheck:
-       RESTORE_REGS
-       addl $4, %esp
-1:     iret
-.section .fixup,"ax"
-iret_exc:
-       sti
-       pushl $0                        # no error code
-       pushl $do_iret_error
-       jmp error_code
-.previous
-.section __ex_table,"a"
-       .align 4
-       .long 1b,iret_exc
-.previous
+       testl $VM_MASK, EFLAGS(%esp)
+       jnz resume_vm86
+       movb EVENT_MASK(%esp), %al
+       notb %al                        # %al == ~saved_mask
+       XEN_GET_VCPU_INFO(%esi)
+       andb evtchn_upcall_mask(%esi),%al
+       andb $1,%al                     # %al == mask & ~saved_mask
+       jnz restore_all_enable_events   #     != 0 => reenable event delivery
+       XEN_PUT_VCPU_INFO(%esi)
+       RESTORE_ALL
 
-ldt_ss:
-       larl OLDSS(%esp), %eax
-       jnz restore_nocheck
-       testl $0x00400000, %eax         # returning to 32bit stack?
-       jnz restore_nocheck             # allright, normal return
-       /* If returning to userspace with 16bit stack,
-        * try to fix the higher word of ESP, as the CPU
-        * won't restore it.
-        * This is an "official" bug of all the x86-compatible
-        * CPUs, which we can try to work around to make
-        * dosemu and wine happy. */
-       subl $8, %esp           # reserve space for switch16 pointer
-       cli
-       movl %esp, %eax
-       /* Set up the 16bit stack frame with switch32 pointer on top,
-        * and a switch16 pointer on top of the current frame. */
-       call setup_x86_bogus_stack
+resume_vm86:
+       XEN_UNBLOCK_EVENTS(%esi)
        RESTORE_REGS
-       lss 20+4(%esp), %esp    # switch to 16bit stack
-1:     iret
-.section __ex_table,"a"
-       .align 4
-       .long 1b,iret_exc
-.previous
+       movl %eax,(%esp)
+       movl $__HYPERVISOR_switch_vm86,%eax
+       int $0x82
+       ud2
 
        # perform work that needs to be done immediately before resumption
        ALIGN
@@ -301,7 +317,7 @@ work_pending:
        jz work_notifysig
 work_resched:
        call schedule
-       cli                             # make sure we don't miss an interrupt
+       XEN_BLOCK_EVENTS(%esi)          # make sure we don't miss an interrupt
                                        # setting need_resched or sigpending
                                        # between sampling and the iret
        movl TI_flags(%ebp), %ecx
@@ -348,7 +364,7 @@ syscall_trace_entry:
 syscall_exit_work:
        testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
        jz work_pending
-       sti                             # could let do_syscall_trace() call
+       XEN_UNBLOCK_EVENTS(%esi)        # could let do_syscall_trace() call
                                        # schedule() instead
        movl %esp, %eax
        movl $1, %edx
@@ -368,27 +384,7 @@ syscall_badsys:
        movl $-ENOSYS,EAX(%esp)
        jmp resume_userspace
 
-#define FIXUP_ESPFIX_STACK \
-       movl %esp, %eax; \
-       /* switch to 32bit stack using the pointer on top of 16bit stack */ \
-       lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
-       /* copy data from 16bit stack to 32bit stack */ \
-       call fixup_x86_bogus_stack; \
-       /* put ESP to the proper location */ \
-       movl %eax, %esp;
-#define UNWIND_ESPFIX_STACK \
-       pushl %eax; \
-       movl %ss, %eax; \
-       /* see if on 16bit stack */ \
-       cmpw $__ESPFIX_SS, %ax; \
-       jne 28f; \
-       movl $__KERNEL_DS, %edx; \
-       movl %edx, %ds; \
-       movl %edx, %es; \
-       /* switch to 32bit stack */ \
-       FIXUP_ESPFIX_STACK \
-28:    popl %eax;
-
+#if 0 /* XEN */
 /*
  * Build the entry stubs and pointer table with
  * some assembler magic.
@@ -426,6 +422,7 @@ ENTRY(name)                         \
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
+#endif /* XEN */
 
 ENTRY(divide_error)
        pushl $0                        # no error code
@@ -443,9 +440,7 @@ error_code:
        pushl %ecx
        pushl %ebx
        cld
-       pushl %es
-       UNWIND_ESPFIX_STACK
-       popl %ecx
+       movl %es, %ecx
        movl ES(%esp), %edi             # get the function address
        movl ORIG_EAX(%esp), %edx       # get the error code
        movl %eax, ORIG_EAX(%esp)
@@ -457,6 +452,118 @@ error_code:
        call *%edi
        jmp ret_from_exception
 
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(hypervisor_callback)
+       pushl %eax
+       SAVE_ALL
+       movl EIP(%esp),%eax
+       cmpl $scrit,%eax
+       jb   11f
+       cmpl $ecrit,%eax
+       jb   critical_region_fixup
+11:    push %esp
+       call evtchn_do_upcall
+       add  $4,%esp
+       jmp  ret_from_intr
+
+        ALIGN
+restore_all_enable_events:  
+       XEN_LOCKED_UNBLOCK_EVENTS(%esi)
+scrit: /**** START OF CRITICAL REGION ****/
+       XEN_TEST_PENDING(%esi)
+       jnz  14f                        # process more events if necessary...
+       XEN_PUT_VCPU_INFO(%esi)
+       RESTORE_ALL
+14:    XEN_LOCKED_BLOCK_EVENTS(%esi)
+       XEN_PUT_VCPU_INFO(%esi)
+       jmp  11b
+ecrit:  /**** END OF CRITICAL REGION ****/
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame. 
+critical_region_fixup:
+       addl $critical_fixup_table-scrit,%eax
+       movzbl (%eax),%eax              # %eax contains num bytes popped
+       cmpb $0xff,%al                  # 0xff => vcpu_info critical region
+       jne  15f
+       GET_THREAD_INFO(%ebp)
+       XEN_PUT_VCPU_INFO(%esi)         # abort vcpu_info critical region
+        xorl %eax,%eax
+15:    mov  %esp,%esi
+       add  %eax,%esi                  # %esi points at end of src region
+       mov  %esp,%edi
+       add  $0x34,%edi                 # %edi points at end of dst region
+       mov  %eax,%ecx
+       shr  $2,%ecx                    # convert words to bytes
+       je   17f                        # skip loop if nothing to copy
+16:    subl $4,%esi                    # pre-decrementing copy loop
+       subl $4,%edi
+       movl (%esi),%eax
+       movl %eax,(%edi)
+       loop 16b
+17:    movl %edi,%esp                  # final %edi is top of merged stack
+       jmp  11b
+
+critical_fixup_table:
+       .byte 0xff,0xff,0xff            # testb $0xff,(%esi) = XEN_TEST_PENDING
+       .byte 0xff,0xff                 # jnz  14f
+       XEN_PUT_VCPU_INFO_fixup
+       .byte 0x00                      # pop  %ebx
+       .byte 0x04                      # pop  %ecx
+       .byte 0x08                      # pop  %edx
+       .byte 0x0c                      # pop  %esi
+       .byte 0x10                      # pop  %edi
+       .byte 0x14                      # pop  %ebp
+       .byte 0x18                      # pop  %eax
+       .byte 0x1c                      # pop  %ds
+       .byte 0x20                      # pop  %es
+       .byte 0x24,0x24,0x24            # add  $4,%esp
+       .byte 0x28                      # iret
+       .byte 0xff,0xff,0xff,0xff       # movb $1,1(%esi)
+       XEN_PUT_VCPU_INFO_fixup
+       .byte 0x00,0x00                 # jmp  11b
+
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
+1:     popl %ds
+2:     popl %es
+3:     popl %fs
+4:     popl %gs
+       subl $4,%esp
+       SAVE_ALL
+       jmp  ret_from_exception
+.section .fixup,"ax";  \
+6:     movl $0,(%esp); \
+       jmp 1b;         \
+7:     movl $0,(%esp); \
+       jmp 2b;         \
+8:     movl $0,(%esp); \
+       jmp 3b;         \
+9:     movl $0,(%esp); \
+       jmp 4b;         \
+.previous;             \
+.section __ex_table,"a";\
+       .align 4;       \
+       .long 1b,6b;    \
+       .long 2b,7b;    \
+       .long 3b,8b;    \
+       .long 4b,9b;    \
+.previous
+
 ENTRY(coprocessor_error)
        pushl $0
        pushl $do_coprocessor_error
@@ -470,17 +577,9 @@ ENTRY(simd_coprocessor_error)
 ENTRY(device_not_available)
        pushl $-1                       # mark this as an int
        SAVE_ALL
-       movl %cr0, %eax
-       testl $0x4, %eax                # EM (math emulation bit)
-       jne device_not_available_emulate
        preempt_stop
        call math_state_restore
        jmp ret_from_exception
-device_not_available_emulate:
-       pushl $0                        # temporary storage for ORIG_EIP
-       call math_emulate
-       addl $4, %esp
-       jmp ret_from_exception
 
 /*
  * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -516,6 +615,7 @@ debug_stack_correct:
        call do_debug
        jmp ret_from_exception
 
+#if 0 /* XEN */
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
  * a debug fault, and the debug fault hasn't yet been able to
@@ -525,11 +625,6 @@ debug_stack_correct:
  * fault happened on the sysenter path.
  */
 ENTRY(nmi)
-       pushl %eax
-       movl %ss, %eax
-       cmpw $__ESPFIX_SS, %ax
-       popl %eax
-       je nmi_16bit_stack
        cmpl $sysenter_entry,(%esp)
        je nmi_stack_fixup
        pushl %eax
@@ -549,7 +644,7 @@ nmi_stack_correct:
        xorl %edx,%edx          # zero error code
        movl %esp,%eax          # pt_regs pointer
        call do_nmi
-       jmp restore_all
+       RESTORE_ALL
 
 nmi_stack_fixup:
        FIX_STACK(12,nmi_stack_correct, 1)
@@ -564,29 +659,7 @@ nmi_debug_stack_check:
 nmi_debug_stack_fixup:
        FIX_STACK(24,nmi_stack_correct, 1)
        jmp nmi_stack_correct
-
-nmi_16bit_stack:
-       /* create the pointer to lss back */
-       pushl %ss
-       pushl %esp
-       movzwl %sp, %esp
-       addw $4, (%esp)
-       /* copy the iret frame of 12 bytes */
-       .rept 3
-       pushl 16(%esp)
-       .endr
-       pushl %eax
-       SAVE_ALL
-       FIXUP_ESPFIX_STACK              # %eax == %esp
-       xorl %edx,%edx                  # zero error code
-       call do_nmi
-       RESTORE_REGS
-       lss 12+4(%esp), %esp            # back to 16bit stack
-1:     iret
-.section __ex_table,"a"
-       .align 4
-       .long 1b,iret_exc
-.previous
+#endif /* XEN */
 
 ENTRY(int3)
        pushl $-1                       # mark this as an int
@@ -636,9 +709,33 @@ ENTRY(alignment_check)
        pushl $do_alignment_check
        jmp error_code
 
+# This handler is special, because it gets an extra value on its stack,
+# which is the linear faulting address.
+# fastcall register usage:  %eax = pt_regs, %edx = error code,
+#                          %ecx = fault address
 ENTRY(page_fault)
-       pushl $do_page_fault
-       jmp error_code
+       pushl %ds
+       pushl %eax
+       xorl %eax, %eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       decl %eax                       /* eax = -1 */
+       pushl %ecx
+       pushl %ebx
+       cld
+       movl %es,%edi
+       movl ES(%esp), %ecx             /* get the faulting address */
+       movl ORIG_EAX(%esp), %edx       /* get the error code */
+       movl %eax, ORIG_EAX(%esp)
+       movl %edi, ES(%esp)
+       movl $(__KERNEL_DS),%eax
+       movl %eax, %ds
+       movl %eax, %es
+       movl %esp,%eax                  /* pt_regs pointer */
+       call do_page_fault
+       jmp ret_from_exception
 
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
@@ -647,9 +744,8 @@ ENTRY(machine_check)
        jmp error_code
 #endif
 
-ENTRY(spurious_interrupt_bug)
-       pushl $0
-       pushl $do_spurious_interrupt_bug
+ENTRY(fixup_4gb_segment)
+       pushl $do_fixup_4gb_segment
        jmp error_code
 
 #include "syscall_table.S"
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/head.S 
linux-2.6-xen-sparse/arch/i386/kernel/head.S
--- pristine-linux-2.6.12/arch/i386/kernel/head.S       2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/head.S        2005-07-28 
13:17:07.000000000 -0700
@@ -1,24 +1,25 @@
-/*
- *  linux/arch/i386/kernel/head.S -- the 32-bit startup code.
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  Enhanced CPU detection and feature setting code by Mike Jagdis
- *  and Martin Mares, November 1997.
- */
 
-.text
 #include <linux/config.h>
+
+.section __xen_guest
+       .ascii  "GUEST_OS=linux,GUEST_VER=2.6"
+       .ascii  ",XEN_VER=3.0"
+       .ascii  ",VIRT_BASE=0xC0000000"
+#ifdef CONFIG_X86_PAE
+       .ascii  ",PAE=yes"
+#else
+       .ascii  ",PAE=no"
+#endif
+       .ascii  ",LOADER=generic"
+       .byte   0
+
+.text
 #include <linux/threads.h>
 #include <linux/linkage.h>
 #include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/asm_offsets.h>
-#include <asm/setup.h>
+#include <asm-xen/xen-public/arch-x86_32.h>
 
 /*
  * References to members of the new_cpu_data structure.
@@ -33,239 +34,24 @@
 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
 #define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
 
-/*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.  We need one bit for
- * each possible page, but only in low memory, which means
- * 2^32/4096/8 = 128K worst case (4G/4G split.)
- *
- * Modulo rounding, each megabyte assigned here requires a kilobyte of
- * memory, which is currently unreclaimed.
- *
- * This should be a multiple of a page.
- */
-#define INIT_MAP_BEYOND_END    (128*1024)
-
-
-/*
- * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
- * %esi points to the real-mode code as a 32-bit pointer.
- * CS and DS must be 4 GB flat segments, but we don't depend on
- * any particular GDT layout, because we load our own as soon as we
- * can.
- */
 ENTRY(startup_32)
-
-/*
- * Set segments to known values.
- */
        cld
-       lgdt boot_gdt_descr - __PAGE_OFFSET
-       movl $(__BOOT_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-       movl %eax,%fs
-       movl %eax,%gs
 
-/*
- * Clear BSS first so that there are no surprises...
- * No need to cld as DF is already clear from cld above...
- */
-       xorl %eax,%eax
-       movl $__bss_start - __PAGE_OFFSET,%edi
-       movl $__bss_stop - __PAGE_OFFSET,%ecx
-       subl %edi,%ecx
-       shrl $2,%ecx
-       rep ; stosl
-
-/*
- * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond _end.  The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
- * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
- *
- * Warning: don't use %esi or the stack in this code.  However, %esp
- * can be used as a GPR if you really need it...
- */
-page_pde_offset = (__PAGE_OFFSET >> 20);
-
-       movl $(pg0 - __PAGE_OFFSET), %edi
-       movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
-       movl $0x007, %eax                       /* 0x007 = PRESENT+RW+USER */
-10:
-       leal 0x007(%edi),%ecx                   /* Create PDE entry */
-       movl %ecx,(%edx)                        /* Store identity PDE entry */
-       movl %ecx,page_pde_offset(%edx)         /* Store kernel PDE entry */
-       addl $4,%edx
-       movl $1024, %ecx
-11:
-       stosl
-       addl $0x1000,%eax
-       loop 11b
-       /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
-       /* bytes beyond the end of our own page tables; the +0x007 is the 
attribute bits */
-       leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
-       cmpl %ebp,%eax
-       jb 10b
-       movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
+       /* Copy the necessary stuff from xen_start_info structure. */
+       mov  $xen_start_info_union,%edi
+       mov  $512,%ecx
+       rep movsl
 
 #ifdef CONFIG_SMP
-       xorl %ebx,%ebx                          /* This is the boot CPU (BSP) */
-       jmp 3f
-
-/*
- * Non-boot CPU entry point; entered from trampoline.S
- * We can't lgdt here, because lgdt itself uses a data segment, but
- * we know the trampoline has already loaded the boot_gdt_table GDT
- * for us.
- */
 ENTRY(startup_32_smp)
        cld
-       movl $(__BOOT_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-       movl %eax,%fs
-       movl %eax,%gs
-
-/*
- *     New page tables may be in 4Mbyte page mode and may
- *     be using the global pages. 
- *
- *     NOTE! If we are on a 486 we may have no cr4 at all!
- *     So we do not try to touch it unless we really have
- *     some bits in it to set.  This won't work if the BSP
- *     implements cr4 but this AP does not -- very unlikely
- *     but be warned!  The same applies to the pse feature
- *     if not equally supported. --macro
- *
- *     NOTE! We have to correct for the fact that we're
- *     not yet offset PAGE_OFFSET..
- */
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
-       movl cr4_bits,%edx
-       andl %edx,%edx
-       jz 6f
-       movl %cr4,%eax          # Turn on paging options (PSE,PAE,..)
-       orl %edx,%eax
-       movl %eax,%cr4
-
-       btl $5, %eax            # check if PAE is enabled
-       jnc 6f
-
-       /* Check if extended functions are implemented */
-       movl $0x80000000, %eax
-       cpuid
-       cmpl $0x80000000, %eax
-       jbe 6f
-       mov $0x80000001, %eax
-       cpuid
-       /* Execute Disable bit supported? */
-       btl $20, %edx
-       jnc 6f
-
-       /* Setup EFER (Extended Feature Enable Register) */
-       movl $0xc0000080, %ecx
-       rdmsr
-
-       btsl $11, %eax
-       /* Make changes effective */
-       wrmsr
-
-6:
-       /* This is a secondary processor (AP) */
-       xorl %ebx,%ebx
-       incl %ebx
-
-3:
 #endif /* CONFIG_SMP */
 
-/*
- * Enable paging
- */
-       movl $swapper_pg_dir-__PAGE_OFFSET,%eax
-       movl %eax,%cr3          /* set the page table pointer.. */
-       movl %cr0,%eax
-       orl $0x80000000,%eax
-       movl %eax,%cr0          /* ..and set paging (PG) bit */
-       ljmp $__BOOT_CS,$1f     /* Clear prefetch and normalize %eip */
-1:
        /* Set up the stack pointer */
        lss stack_start,%esp
 
-/*
- * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
- */
-       pushl $0
-       popfl
-
-#ifdef CONFIG_SMP
-       andl %ebx,%ebx
-       jz  1f                          /* Initial CPU cleans BSS */
-       jmp checkCPUtype
-1:
-#endif /* CONFIG_SMP */
-
-/*
- * start system 32-bit setup. We need to re-do some of the things done
- * in 16-bit mode for the "real" operations.
- */
-       call setup_idt
-
-/*
- * Copy bootup parameters out of the way.
- * Note: %esi still has the pointer to the real-mode data.
- */
-       movl $boot_params,%edi
-       movl $(PARAM_SIZE/4),%ecx
-       cld
-       rep
-       movsl
-       movl boot_params+NEW_CL_POINTER,%esi
-       andl %esi,%esi
-       jnz 2f                  # New command line protocol
-       cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
-       jne 1f
-       movzwl OLD_CL_OFFSET,%esi
-       addl $(OLD_CL_BASE_ADDR),%esi
-2:
-       movl $saved_command_line,%edi
-       movl $(COMMAND_LINE_SIZE/4),%ecx
-       rep
-       movsl
-1:
 checkCPUtype:
 
-       movl $-1,X86_CPUID              #  -1 for no CPUID initially
-
-/* check if it is 486 or 386. */
-/*
- * XXX - this does a lot of unnecessary setup.  Alignment checks don't
- * apply at our cpl of 0 and the stack ought to be aligned already, and
- * we don't need to preserve eflags.
- */
-
-       movb $3,X86             # at least 386
-       pushfl                  # push EFLAGS
-       popl %eax               # get EFLAGS
-       movl %eax,%ecx          # save original EFLAGS
-       xorl $0x240000,%eax     # flip AC and ID bits in EFLAGS
-       pushl %eax              # copy to EFLAGS
-       popfl                   # set EFLAGS
-       pushfl                  # get new EFLAGS
-       popl %eax               # put it in eax
-       xorl %ecx,%eax          # change in flags
-       pushl %ecx              # restore original EFLAGS
-       popfl
-       testl $0x40000,%eax     # check if AC bit changed
-       je is386
-
-       movb $4,X86             # at least 486
-       testl $0x200000,%eax    # check if ID bit changed
-       je is486
-
        /* get vendor info */
        xorl %eax,%eax                  # call CPUID with 0 -> return vendor ID
        cpuid
@@ -274,9 +60,6 @@ checkCPUtype:
        movl %edx,X86_VENDOR_ID+4       # next 4 chars
        movl %ecx,X86_VENDOR_ID+8       # last 4 chars
 
-       orl %eax,%eax                   # do we have processor info as well?
-       je is486
-
        movl $1,%eax            # Use the CPUID instruction to get CPU type
        cpuid
        movb %al,%cl            # save reg for future use
@@ -289,32 +72,13 @@ checkCPUtype:
        movb %cl,X86_MASK
        movl %edx,X86_CAPABILITY
 
-is486: movl $0x50022,%ecx      # set AM, WP, NE and MP
-       jmp 2f
-
-is386: movl $2,%ecx            # set MP
-2:     movl %cr0,%eax
-       andl $0x80000011,%eax   # Save PG,PE,ET
-       orl %ecx,%eax
-       movl %eax,%cr0
-
-       call check_x87
        incb ready
-       lgdt cpu_gdt_descr
-       lidt idt_descr
-       ljmp $(__KERNEL_CS),$1f
-1:     movl $(__KERNEL_DS),%eax        # reload all the segment registers
-       movl %eax,%ss                   # after changing gdt.
-
-       movl $(__USER_DS),%eax          # DS/ES contains default USER segment
-       movl %eax,%ds
-       movl %eax,%es
 
        xorl %eax,%eax                  # Clear FS/GS and LDT
        movl %eax,%fs
        movl %eax,%gs
-       lldt %ax
        cld                     # gcc2 wants the direction flag cleared at all 
times
+
 #ifdef CONFIG_SMP
        movb ready, %cl 
        cmpb $1,%cl
@@ -329,100 +93,18 @@ L6:
        jmp L6                  # main should never return here, but
                                # just in case, we know what happens.
 
-/*
- * We depend on ET to be correct. This checks for 287/387.
- */
-check_x87:
-       movb $0,X86_HARD_MATH
-       clts
-       fninit
-       fstsw %ax
-       cmpb $0,%al
-       je 1f
-       movl %cr0,%eax          /* no coprocessor: have to set bits */
-       xorl $4,%eax            /* set EM */
-       movl %eax,%cr0
-       ret
-       ALIGN
-1:     movb $1,X86_HARD_MATH
-       .byte 0xDB,0xE4         /* fsetpm for 287, ignored by 387 */
-       ret
+ENTRY(lgdt_finish)
+       movl $(__KERNEL_DS),%eax        # reload all the segment registers
+       movw %ax,%ss                    # after changing gdt.
 
-/*
- *  setup_idt
- *
- *  sets up a idt with 256 entries pointing to
- *  ignore_int, interrupt gates. It doesn't actually load
- *  idt - that can be done only after paging has been enabled
- *  and the kernel moved to PAGE_OFFSET. Interrupts
- *  are enabled elsewhere, when we can be relatively
- *  sure everything is ok.
- *
- *  Warning: %esi is live across this function.
- */
-setup_idt:
-       lea ignore_int,%edx
-       movl $(__KERNEL_CS << 16),%eax
-       movw %dx,%ax            /* selector = 0x0010 = cs */
-       movw $0x8E00,%dx        /* interrupt gate - dpl=0, present */
-
-       lea idt_table,%edi
-       mov $256,%ecx
-rp_sidt:
-       movl %eax,(%edi)
-       movl %edx,4(%edi)
-       addl $8,%edi
-       dec %ecx
-       jne rp_sidt
-       ret
+       movl $(__USER_DS),%eax          # DS/ES contains default USER segment
+       movw %ax,%ds
+       movw %ax,%es
 
-/* This is the default interrupt "handler" :-) */
-       ALIGN
-ignore_int:
-       cld
-#ifdef CONFIG_PRINTK
+       popl %eax                       # reload CS by intersegment return
+       pushl $(__KERNEL_CS)
        pushl %eax
-       pushl %ecx
-       pushl %edx
-       pushl %es
-       pushl %ds
-       movl $(__KERNEL_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-       pushl 16(%esp)
-       pushl 24(%esp)
-       pushl 32(%esp)
-       pushl 40(%esp)
-       pushl $int_msg
-       call printk
-       addl $(5*4),%esp
-       popl %ds
-       popl %es
-       popl %edx
-       popl %ecx
-       popl %eax
-#endif
-       iret
-
-/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
-/*
- * BSS section
- */
-.section ".bss.page_aligned","w"
-ENTRY(swapper_pg_dir)
-       .fill 1024,4,0
-ENTRY(empty_zero_page)
-       .fill 4096,1,0
-
-/*
- * This starts the data section.
- */
-.data
+       lret
 
 ENTRY(stack_start)
        .long init_thread_union+THREAD_SIZE
@@ -430,27 +112,10 @@ ENTRY(stack_start)
 
 ready: .byte 0
 
-int_msg:
-       .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
-
-/*
- * The IDT and GDT 'descriptors' are a strange 48-bit object
- * only used by the lidt and lgdt instructions. They are not
- * like usual segment descriptors - they consist of a 16-bit
- * segment size, and 32-bit linear address value:
- */
-
-.globl boot_gdt_descr
 .globl idt_descr
 .globl cpu_gdt_descr
 
        ALIGN
-# early boot GDT descriptor (must use 1:1 address mapping)
-       .word 0                         # 32 bit align gdt_desc.address
-boot_gdt_descr:
-       .word __BOOT_DS+7
-       .long boot_gdt_table - __PAGE_OFFSET
-
        .word 0                         # 32-bit align idt_desc.address
 idt_descr:
        .word IDT_ENTRIES*8-1           # idt contains 256 entries
@@ -459,25 +124,18 @@ idt_descr:
 # boot GDT descriptor (later on used by CPU#0):
        .word 0                         # 32 bit align gdt_desc.address
 cpu_gdt_descr:
-       .word GDT_ENTRIES*8-1
+       .word GDT_SIZE
        .long cpu_gdt_table
 
        .fill NR_CPUS-1,8,0             # space for the other GDT descriptors
 
-/*
- * The boot_gdt_table must mirror the equivalent in setup.S and is
- * used only for booting.
- */
-       .align L1_CACHE_BYTES
-ENTRY(boot_gdt_table)
-       .fill GDT_ENTRY_BOOT_CS,8,0
-       .quad 0x00cf9a000000ffff        /* kernel 4GB code at 0x00000000 */
-       .quad 0x00cf92000000ffff        /* kernel 4GB data at 0x00000000 */
+.org 0x1000
+ENTRY(empty_zero_page)
 
-/*
- * The Global Descriptor Table contains 28 quadwords, per-CPU.
- */
-       .align PAGE_SIZE_asm
+.org 0x2000
+ENTRY(swapper_pg_dir)
+
+.org 0x3000
 ENTRY(cpu_gdt_table)
        .quad 0x0000000000000000        /* NULL descriptor */
        .quad 0x0000000000000000        /* 0x0b reserved */
@@ -492,32 +150,49 @@ ENTRY(cpu_gdt_table)
        .quad 0x0000000000000000        /* 0x53 reserved */
        .quad 0x0000000000000000        /* 0x5b reserved */
 
-       .quad 0x00cf9a000000ffff        /* 0x60 kernel 4GB code at 0x00000000 */
-       .quad 0x00cf92000000ffff        /* 0x68 kernel 4GB data at 0x00000000 */
-       .quad 0x00cffa000000ffff        /* 0x73 user 4GB code at 0x00000000 */
-       .quad 0x00cff2000000ffff        /* 0x7b user 4GB data at 0x00000000 */
+#ifdef CONFIG_X86_PAE
+       .quad 0x00cfbb00000067ff        /* 0x60 kernel 4GB code at 0x00000000 */
+       .quad 0x00cfb300000067ff        /* 0x68 kernel 4GB data at 0x00000000 */
+       .quad 0x00cffb00000067ff        /* 0x73 user 4GB code at 0x00000000 */
+       .quad 0x00cff300000067ff        /* 0x7b user 4GB data at 0x00000000 */
+#else
+       .quad 0x00cfbb000000c3ff        /* 0x60 kernel 4GB code at 0x00000000 */
+       .quad 0x00cfb3000000c3ff        /* 0x68 kernel 4GB data at 0x00000000 */
+       .quad 0x00cffb000000c3ff        /* 0x73 user 4GB code at 0x00000000 */
+       .quad 0x00cff3000000c3ff        /* 0x7b user 4GB data at 0x00000000 */
+#endif
 
        .quad 0x0000000000000000        /* 0x80 TSS descriptor */
        .quad 0x0000000000000000        /* 0x88 LDT descriptor */
 
        /* Segments used for calling PnP BIOS */
-       .quad 0x00c09a0000000000        /* 0x90 32-bit code */
-       .quad 0x00809a0000000000        /* 0x98 16-bit code */
-       .quad 0x0080920000000000        /* 0xa0 16-bit data */
-       .quad 0x0080920000000000        /* 0xa8 16-bit data */
-       .quad 0x0080920000000000        /* 0xb0 16-bit data */
+       .quad 0x0000000000000000        /* 0x90 32-bit code */
+       .quad 0x0000000000000000        /* 0x98 16-bit code */
+       .quad 0x0000000000000000        /* 0xa0 16-bit data */
+       .quad 0x0000000000000000        /* 0xa8 16-bit data */
+       .quad 0x0000000000000000        /* 0xb0 16-bit data */
        /*
         * The APM segments have byte granularity and their bases
         * and limits are set at run time.
         */
-       .quad 0x00409a0000000000        /* 0xb8 APM CS    code */
-       .quad 0x00009a0000000000        /* 0xc0 APM CS 16 code (16 bit) */
-       .quad 0x0040920000000000        /* 0xc8 APM DS    data */
+       .quad 0x0000000000000000        /* 0xb8 APM CS    code */
+       .quad 0x0000000000000000        /* 0xc0 APM CS 16 code (16 bit) */
+       .quad 0x0000000000000000        /* 0xc8 APM DS    data */
 
-       .quad 0x0000920000000000        /* 0xd0 - ESPFIX 16-bit SS */
+       .quad 0x0000000000000000        /* 0xd0 - unused */
        .quad 0x0000000000000000        /* 0xd8 - unused */
        .quad 0x0000000000000000        /* 0xe0 - unused */
        .quad 0x0000000000000000        /* 0xe8 - unused */
        .quad 0x0000000000000000        /* 0xf0 - unused */
        .quad 0x0000000000000000        /* 0xf8 - GDT entry 31: double-fault 
TSS */
+       .fill GDT_ENTRIES-32,8,0
 
+.org 0x4000
+ENTRY(default_ldt)
+
+.org 0x5000
+/*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/i386_ksyms.c 
linux-2.6-xen-sparse/arch/i386/kernel/i386_ksyms.c
--- pristine-linux-2.6.12/arch/i386/kernel/i386_ksyms.c 2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/i386_ksyms.c  2005-07-28 
13:17:07.000000000 -0700
@@ -76,7 +76,9 @@ EXPORT_SYMBOL(ioremap_nocache);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(pm_idle);
+#ifdef CONFIG_ACPI_BOOT
 EXPORT_SYMBOL(pm_power_off);
+#endif
 EXPORT_SYMBOL(get_cmos_time);
 EXPORT_SYMBOL(cpu_khz);
 EXPORT_SYMBOL(apm_info);
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/io_apic.c 
linux-2.6-xen-sparse/arch/i386/kernel/io_apic.c
--- pristine-linux-2.6.12/arch/i386/kernel/io_apic.c    2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/io_apic.c     2005-07-28 
13:17:07.000000000 -0700
@@ -42,6 +42,48 @@
 
 #include "io_ports.h"
 
+#ifdef CONFIG_XEN
+
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/physdev.h>
+
+/* Fake i8259 */
+#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
+#define disable_8259A_irq(_irq)  ((void)0)
+#define i8259A_irq_pending(_irq) (0)
+
+unsigned long io_apic_irqs;
+
+static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int 
reg)
+{
+       physdev_op_t op;
+       int ret;
+
+       op.cmd = PHYSDEVOP_APIC_READ;
+       op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
+       op.u.apic_op.offset = reg;
+       ret = HYPERVISOR_physdev_op(&op);
+       if (ret)
+               return ret;
+       return op.u.apic_op.value;
+}
+
+static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, 
unsigned int value)
+{
+       physdev_op_t op;
+
+       op.cmd = PHYSDEVOP_APIC_WRITE;
+       op.u.apic_op.apic = mp_ioapics[apic].mpc_apicid;
+       op.u.apic_op.offset = reg;
+       op.u.apic_op.value = value;
+       HYPERVISOR_physdev_op(&op);
+}
+
+#define io_apic_read(a,r)    xen_io_apic_read(a,r)
+#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+
+#endif /* CONFIG_XEN */
+
 int (*ioapic_renumber_irq)(int ioapic, int irq);
 atomic_t irq_mis_count;
 
@@ -107,6 +149,7 @@ static void add_pin_to_irq(unsigned int 
        entry->pin = pin;
 }
 
+#ifndef CONFIG_XEN
 /*
  * Reroute an IRQ to a different pin.
  */
@@ -243,6 +286,9 @@ static void set_ioapic_affinity_irq(unsi
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
+#else
+#define clear_IO_APIC() ((void)0)
+#endif
 
 #if defined(CONFIG_IRQBALANCE)
 # include <asm/processor.h>    /* kernel_thread() */
@@ -664,6 +710,7 @@ static inline void move_irq(int irq) { }
 #ifndef CONFIG_SMP
 void fastcall send_IPI_self(int vector)
 {
+#ifndef CONFIG_XEN
        unsigned int cfg;
 
        /*
@@ -675,6 +722,7 @@ void fastcall send_IPI_self(int vector)
         * Send the IPI. The write to APIC_ICR fires this off.
         */
        apic_write_around(APIC_ICR, cfg);
+#endif
 }
 #endif /* !CONFIG_SMP */
 
@@ -744,6 +792,7 @@ static int find_irq_entry(int apic, int 
        return -1;
 }
 
+#ifndef CONFIG_XEN
 /*
  * Find the pin to which IRQ[irq] (ISA) is connected
  */
@@ -766,6 +815,7 @@ static int find_isa_irq_pin(int irq, int
        }
        return -1;
 }
+#endif
 
 /*
  * Find a specific PCI IRQ entry.
@@ -813,6 +863,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
        return best_guess;
 }
 
+#ifndef CONFIG_XEN
 /*
  * This function currently is only a helper for the i386 smp boot process 
where 
  * we need to reprogram the ioredtbls to cater for the cpus which have come 
online
@@ -836,6 +887,7 @@ void __init setup_ioapic_dest(void)
 
        }
 }
+#endif /* !CONFIG_XEN */
 
 /*
  * EISA Edge/Level control register, ELCR
@@ -1125,26 +1177,22 @@ static inline int IO_APIC_irq_trigger(in
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
+u8 irq_vector[NR_IRQ_VECTORS]; /* = { FIRST_DEVICE_VECTOR , 0 }; */
 
 int assign_irq_vector(int irq)
 {
-       static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+       static int current_vector = FIRST_DEVICE_VECTOR;
+       physdev_op_t op;
 
        BUG_ON(irq >= NR_IRQ_VECTORS);
        if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
                return IO_APIC_VECTOR(irq);
-next:
-       current_vector += 8;
-       if (current_vector == SYSCALL_VECTOR)
-               goto next;
-
-       if (current_vector >= FIRST_SYSTEM_VECTOR) {
-               offset++;
-               if (!(offset%8))
-                       return -ENOSPC;
-               current_vector = FIRST_DEVICE_VECTOR + offset;
-       }
+
+       op.cmd = PHYSDEVOP_ASSIGN_VECTOR;
+       op.u.irq_op.irq = irq;
+       if (HYPERVISOR_physdev_op(&op))
+               return -ENOSPC;
+       current_vector = op.u.irq_op.vector;
 
        vector_irq[current_vector] = irq;
        if (irq != AUTO_ASSIGN)
@@ -1153,6 +1201,7 @@ next:
        return current_vector;
 }
 
+#ifndef CONFIG_XEN
 static struct hw_interrupt_type ioapic_level_type;
 static struct hw_interrupt_type ioapic_edge_type;
 
@@ -1178,6 +1227,9 @@ static inline void ioapic_register_intr(
                set_intr_gate(vector, interrupt[irq]);
        }
 }
+#else
+#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+#endif
 
 static void __init setup_IO_APIC_irqs(void)
 {
@@ -1233,7 +1285,7 @@ static void __init setup_IO_APIC_irqs(vo
                else
                        add_pin_to_irq(irq, apic, pin);
 
-               if (!apic && !IO_APIC_IRQ(irq))
+               if (/*!apic &&*/ !IO_APIC_IRQ(irq))
                        continue;
 
                if (IO_APIC_IRQ(irq)) {
@@ -1258,6 +1310,7 @@ static void __init setup_IO_APIC_irqs(vo
 /*
  * Set up the 8259A-master output pin:
  */
+#ifndef CONFIG_XEN
 static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 {
        struct IO_APIC_route_entry entry;
@@ -1452,8 +1505,6 @@ void __init print_IO_APIC(void)
        return;
 }
 
-#if 0
-
 static void print_APIC_bitfield (int base)
 {
        unsigned int v;
@@ -1595,8 +1646,9 @@ void /*__init*/ print_PIC(void)
        v = inb(0x4d1) << 8 | inb(0x4d0);
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
-
-#endif  /*  0  */
+#else
+void __init print_IO_APIC(void) { }
+#endif /* !CONFIG_XEN */
 
 static void __init enable_IO_APIC(void)
 {
@@ -1638,7 +1690,9 @@ void disable_IO_APIC(void)
         */
        clear_IO_APIC();
 
+#ifndef CONFIG_XEN
        disconnect_bsp_APIC();
+#endif
 }
 
 /*
@@ -1648,7 +1702,7 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@xxxxxxxx>  Tue Dec 21 12:25:05 CST 1999
  */
 
-#ifndef CONFIG_X86_NUMAQ
+#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
 static void __init setup_ioapic_ids_from_mpc(void)
 {
        union IO_APIC_reg_00 reg_00;
@@ -1755,6 +1809,7 @@ static void __init setup_ioapic_ids_from
 static void __init setup_ioapic_ids_from_mpc(void) { }
 #endif
 
+#ifndef CONFIG_XEN
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1979,6 +2034,7 @@ static struct hw_interrupt_type ioapic_l
        .end            = end_level_ioapic,
        .set_affinity   = set_ioapic_affinity,
 };
+#endif /* !CONFIG_XEN */
 
 static inline void init_IO_APIC_traps(void)
 {
@@ -2010,13 +2066,16 @@ static inline void init_IO_APIC_traps(vo
                         */
                        if (irq < 16)
                                make_8259A_irq(irq);
+#ifndef CONFIG_XEN
                        else
                                /* Strange. Oh, well.. */
                                irq_desc[irq].handler = &no_irq_type;
+#endif
                }
        }
 }
 
+#ifndef CONFIG_XEN
 static void enable_lapic_irq (unsigned int irq)
 {
        unsigned long v;
@@ -2243,6 +2302,9 @@ static inline void check_timer(void)
        panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
                "report.  Then try booting with the 'noapic' option");
 }
+#else
+#define check_timer() ((void)0)
+#endif
 
 /*
  *
@@ -2269,7 +2331,9 @@ void __init setup_IO_APIC(void)
         */
        if (!acpi_ioapic)
                setup_ioapic_ids_from_mpc();
+#ifndef CONFIG_XEN
        sync_Arb_IDs();
+#endif
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
@@ -2391,6 +2455,7 @@ device_initcall(ioapic_init_sysfs);
 
 int __init io_apic_get_unique_id (int ioapic, int apic_id)
 {
+#ifndef CONFIG_XEN
        union IO_APIC_reg_00 reg_00;
        static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
        physid_mask_t tmp;
@@ -2457,6 +2522,7 @@ int __init io_apic_get_unique_id (int io
 
        apic_printk(APIC_VERBOSE, KERN_INFO
                        "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+#endif /* !CONFIG_XEN */
 
        return apic_id;
 }
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/ioport.c 
linux-2.6-xen-sparse/arch/i386/kernel/ioport.c
--- pristine-linux-2.6.12/arch/i386/kernel/ioport.c     2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/ioport.c      2005-07-28 
13:17:07.000000000 -0700
@@ -15,6 +15,7 @@
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/thread_info.h>
+#include <asm-xen/xen-public/physdev.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int 
extent, int new_value)
@@ -56,10 +57,9 @@ static void set_bitmap(unsigned long *bi
  */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 {
-       unsigned long i, max_long, bytes, bytes_updated;
        struct thread_struct * t = &current->thread;
-       struct tss_struct * tss;
        unsigned long *bitmap;
+       physdev_op_t op;
 
        if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
                return -EINVAL;
@@ -78,41 +78,15 @@ asmlinkage long sys_ioperm(unsigned long
 
                memset(bitmap, 0xff, IO_BITMAP_BYTES);
                t->io_bitmap_ptr = bitmap;
-       }
 
-       /*
-        * do it in the per-thread copy and in the TSS ...
-        *
-        * Disable preemption via get_cpu() - we must not switch away
-        * because the ->io_bitmap_max value must match the bitmap
-        * contents:
-        */
-       tss = &per_cpu(init_tss, get_cpu());
+               op.cmd = PHYSDEVOP_SET_IOBITMAP;
+               op.u.set_iobitmap.bitmap   = (unsigned long)bitmap;
+               op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
+               HYPERVISOR_physdev_op(&op);
+       }
 
        set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
 
-       /*
-        * Search for a (possibly new) maximum. This is simple and stupid,
-        * to keep it obviously correct:
-        */
-       max_long = 0;
-       for (i = 0; i < IO_BITMAP_LONGS; i++)
-               if (t->io_bitmap_ptr[i] != ~0UL)
-                       max_long = i;
-
-       bytes = (max_long + 1) * sizeof(long);
-       bytes_updated = max(bytes, t->io_bitmap_max);
-
-       t->io_bitmap_max = bytes;
-
-       /*
-        * Sets the lazy trigger so that the next I/O operation will
-        * reload the correct bitmap.
-        */
-       tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-
-       put_cpu();
-
        return 0;
 }
 
@@ -127,21 +101,29 @@ asmlinkage long sys_ioperm(unsigned long
  * code.
  */
 
-asmlinkage long sys_iopl(unsigned long unused)
+asmlinkage long sys_iopl(unsigned int new_io_pl)
 {
-       volatile struct pt_regs * regs = (struct pt_regs *) &unused;
-       unsigned int level = regs->ebx;
-       unsigned int old = (regs->eflags >> 12) & 3;
+       unsigned int old_io_pl = current->thread.io_pl;
+       physdev_op_t op;
 
-       if (level > 3)
+       if (new_io_pl > 3)
                return -EINVAL;
-       /* Trying to gain more privileges? */
-       if (level > old) {
-               if (!capable(CAP_SYS_RAWIO))
-                       return -EPERM;
-       }
-       regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
-       /* Make sure we return the long way (not sysenter) */
-       set_thread_flag(TIF_IRET);
+
+       /* Need "raw I/O" privileges for direct port access. */
+       if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       /* Maintain OS privileges even if user attempts to relinquish them. */
+       if (new_io_pl == 0)
+               new_io_pl = 1;
+
+       /* Change our version of the privilege levels. */
+       current->thread.io_pl = new_io_pl;
+
+       /* Force the change at ring 0. */
+       op.cmd             = PHYSDEVOP_SET_IOPL;
+       op.u.set_iopl.iopl = new_io_pl;
+       HYPERVISOR_physdev_op(&op);
+
        return 0;
 }
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/irq.c 
linux-2.6-xen-sparse/arch/i386/kernel/irq.c
--- pristine-linux-2.6.12/arch/i386/kernel/irq.c        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/irq.c 2005-07-28 13:17:07.000000000 
-0700
@@ -15,6 +15,9 @@
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
 
 DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
 EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -51,7 +54,7 @@ static union irq_ctx *softirq_ctx[NR_CPU
 fastcall unsigned int do_IRQ(struct pt_regs *regs)
 {      
        /* high bits used in ret_from_ code */
-       int irq = regs->orig_eax & 0xff;
+       int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
 #ifdef CONFIG_4KSTACKS
        union irq_ctx *curctx, *irqctx;
        u32 *isp;
@@ -210,9 +213,8 @@ int show_interrupts(struct seq_file *p, 
 
        if (i == 0) {
                seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
+               for_each_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
                seq_putc(p, '\n');
        }
 
@@ -225,9 +227,8 @@ int show_interrupts(struct seq_file *p, 
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+               for_each_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
                seq_printf(p, " %14s", irq_desc[i].handler->typename);
                seq_printf(p, "  %s", action->name);
@@ -240,16 +241,13 @@ skip:
                spin_unlock_irqrestore(&irq_desc[i].lock, flags);
        } else if (i == NR_IRQS) {
                seq_printf(p, "NMI: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", nmi_count(j));
+               for_each_cpu(j)
+                       seq_printf(p, "%10u ", nmi_count(j));
                seq_putc(p, '\n');
 #ifdef CONFIG_X86_LOCAL_APIC
                seq_printf(p, "LOC: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ",
-                                       per_cpu(irq_stat,j).apic_timer_irqs);
+               for_each_cpu(j)
+                       seq_printf(p, "%10u ", per_cpu(irq_stat, 
j).apic_timer_irqs);
                seq_putc(p, '\n');
 #endif
                seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +257,43 @@ skip:
        }
        return 0;
 }
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+       unsigned int irq;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               cpumask_t mask;
+               if (irq == 2)
+                       continue;
+
+               cpus_and(mask, irq_affinity[irq], map);
+               if (any_online_cpu(mask) == NR_CPUS) {
+                       printk("Breaking affinity for irq %i\n", irq);
+                       mask = map;
+               }
+               if (irq_desc[irq].handler->set_affinity)
+                       irq_desc[irq].handler->set_affinity(irq, mask);
+               else if (irq_desc[irq].action)
+                       printk("Cannot set affinity for irq %i\n", irq);
+       }
+
+#if 0
+       barrier();
+       /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+          [note the nop - the interrupt-enable boundary on x86 is two
+          instructions from sti] - to flush out pending hardirqs and
+          IPIs. After this point nothing is supposed to reach this CPU." */
+       __asm__ __volatile__("sti; nop; cli");
+       barrier();
+#else
+       /* That doesn't seem sufficient.  Give it 1ms. */
+       local_irq_enable();
+       mdelay(1);
+       local_irq_disable();
+#endif
+}
+#endif
+
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/ldt.c 
linux-2.6-xen-sparse/arch/i386/kernel/ldt.c
--- pristine-linux-2.6.12/arch/i386/kernel/ldt.c        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/ldt.c 2005-07-28 13:17:07.000000000 
-0700
@@ -18,6 +18,7 @@
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
+#include <asm/mmu_context.h>
 
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)
@@ -58,16 +59,20 @@ static int alloc_ldt(mm_context_t *pc, i
 #ifdef CONFIG_SMP
                cpumask_t mask;
                preempt_disable();
+#endif
+               make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
+                                   PAGE_SIZE);
                load_LDT(pc);
+#ifdef CONFIG_SMP
                mask = cpumask_of_cpu(smp_processor_id());
                if (!cpus_equal(current->mm->cpu_vm_mask, mask))
                        smp_call_function(flush_ldt, NULL, 1, 1);
                preempt_enable();
-#else
-               load_LDT(pc);
 #endif
        }
        if (oldsize) {
+               make_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) /
+                       PAGE_SIZE);
                if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
                        vfree(oldldt);
                else
@@ -82,6 +87,8 @@ static inline int copy_ldt(mm_context_t 
        if (err < 0)
                return err;
        memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+       make_pages_readonly(new->ldt, (new->size * LDT_ENTRY_SIZE) /
+                           PAGE_SIZE);
        return 0;
 }
 
@@ -94,14 +101,19 @@ int init_new_context(struct task_struct 
        struct mm_struct * old_mm;
        int retval = 0;
 
+       memset(&mm->context, 0, sizeof(mm->context));
        init_MUTEX(&mm->context.sem);
-       mm->context.size = 0;
        old_mm = current->mm;
        if (old_mm && old_mm->context.size > 0) {
                down(&old_mm->context.sem);
                retval = copy_ldt(&mm->context, &old_mm->context);
                up(&old_mm->context.sem);
        }
+       if (retval == 0) {
+               spin_lock(&mm_unpinned_lock);
+               list_add(&mm->context.unpinned, &mm_unpinned);
+               spin_unlock(&mm_unpinned_lock);
+       }
        return retval;
 }
 
@@ -113,12 +125,20 @@ void destroy_context(struct mm_struct *m
        if (mm->context.size) {
                if (mm == current->active_mm)
                        clear_LDT();
+               make_pages_writable(mm->context.ldt, 
+                                   (mm->context.size * LDT_ENTRY_SIZE) /
+                                   PAGE_SIZE);
                if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
                        vfree(mm->context.ldt);
                else
                        kfree(mm->context.ldt);
                mm->context.size = 0;
        }
+       if (!mm->context.pinned) {
+               spin_lock(&mm_unpinned_lock);
+               list_del(&mm->context.unpinned);
+               spin_unlock(&mm_unpinned_lock);
+       }
 }
 
 static int read_ldt(void __user * ptr, unsigned long bytecount)
@@ -178,6 +198,7 @@ static int write_ldt(void __user * ptr, 
 {
        struct mm_struct * mm = current->mm;
        __u32 entry_1, entry_2, *lp;
+       unsigned long mach_lp;
        int error;
        struct user_desc ldt_info;
 
@@ -206,6 +227,7 @@ static int write_ldt(void __user * ptr, 
        }
 
        lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) 
mm->context.ldt);
+       mach_lp = arbitrary_virt_to_machine(lp);
 
        /* Allow LDTs to be cleared by the user. */
        if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
@@ -223,9 +245,7 @@ static int write_ldt(void __user * ptr, 
 
        /* Install the new entry ...  */
 install:
-       *lp     = entry_1;
-       *(lp+1) = entry_2;
-       error = 0;
+       error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
 
 out_unlock:
        up(&mm->context.sem);
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/Makefile 
linux-2.6-xen-sparse/arch/i386/kernel/Makefile
--- pristine-linux-2.6.12/arch/i386/kernel/Makefile     2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/Makefile      2005-07-28 
13:17:07.000000000 -0700
@@ -2,41 +2,52 @@
 # Makefile for the linux kernel.
 #
 
-extra-y := head.o init_task.o vmlinux.lds
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
-obj-y  := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
-               ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
-               pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-               doublefault.o quirks.o
+CFLAGS += -Iarch/$(XENARCH)/kernel
+
+extra-y := head.o init_task.o
+
+obj-y  := process.o signal.o entry.o traps.o \
+               time.o ioport.o ldt.o setup.o \
+               pci-dma.o i386_ksyms.o irq.o quirks.o
+
+c-obj-y        := semaphore.o vm86.o \
+               ptrace.o sys_i386.o \
+               i387.o dmi_scan.o bootflag.o \
+               doublefault.o
+s-obj-y        :=
 
 obj-y                          += cpu/
-obj-y                          += timers/
+#obj-y                         += timers/
 obj-$(CONFIG_ACPI_BOOT)                += acpi/
-obj-$(CONFIG_X86_BIOS_REBOOT)  += reboot.o
-obj-$(CONFIG_MCA)              += mca.o
-obj-$(CONFIG_X86_MSR)          += msr.o
-obj-$(CONFIG_X86_CPUID)                += cpuid.o
+#c-obj-$(CONFIG_X86_BIOS_REBOOT)       += reboot.o
+c-obj-$(CONFIG_MCA)            += mca.o
+c-obj-$(CONFIG_X86_MSR)                += msr.o
+c-obj-$(CONFIG_X86_CPUID)      += cpuid.o
 obj-$(CONFIG_MICROCODE)                += microcode.o
-obj-$(CONFIG_APM)              += apm.o
+c-obj-$(CONFIG_APM)            += apm.o
 obj-$(CONFIG_X86_SMP)          += smp.o smpboot.o
-obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline.o
+#obj-$(CONFIG_X86_TRAMPOLINE)  += trampoline.o
 obj-$(CONFIG_X86_MPPARSE)      += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o
+c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
-obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o
-obj-$(CONFIG_X86_NUMAQ)                += numaq.o
-obj-$(CONFIG_X86_SUMMIT_NUMA)  += summit.o
-obj-$(CONFIG_KPROBES)          += kprobes.o
-obj-$(CONFIG_MODULES)          += module.o
-obj-y                          += sysenter.o vsyscall.o
-obj-$(CONFIG_ACPI_SRAT)        += srat.o
-obj-$(CONFIG_HPET_TIMER)       += time_hpet.o
-obj-$(CONFIG_EFI)              += efi.o efi_stub.o
-obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
+c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
+c-obj-$(CONFIG_X86_NUMAQ)      += numaq.o
+c-obj-$(CONFIG_X86_SUMMIT_NUMA)        += summit.o
+c-obj-$(CONFIG_MODULES)                += module.o
+c-obj-y                                += sysenter.o
+obj-y                          += vsyscall.o
+c-obj-$(CONFIG_ACPI_SRAT)      += srat.o
+c-obj-$(CONFIG_HPET_TIMER)     += time_hpet.o
+c-obj-$(CONFIG_EFI)            += efi.o efi_stub.o
+c-obj-$(CONFIG_EARLY_PRINTK)   += early_printk.o
+c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
 
 EXTRA_AFLAGS   := -traditional
 
-obj-$(CONFIG_SCx200)           += scx200.o
+c-obj-$(CONFIG_SCx200)         += scx200.o
 
 # vsyscall.o contains the vsyscall DSO images as __initdata.
 # We must build both images before we can assemble it.
@@ -58,7 +69,7 @@ SYSCFLAGS_vsyscall-int80.so   = $(vsyscall
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
-                     $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
+                     $(obj)/vsyscall-%.o FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -70,5 +81,21 @@ $(obj)/built-in.o: ld_flags += -R $(obj)
 
 SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
-                       $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
+                       $(obj)/vsyscall-sysenter.o FORCE
        $(call if_changed,syscall)
+
+c-link := init_task.o
+s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o syscall_table.o
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
+       @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
+
+$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
+
+$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
+
+obj-y  += $(c-obj-y) $(s-obj-y)
+obj-m  += $(c-obj-m)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-m) $(c-obj-) $(c-link))
+clean-files += $(patsubst %.o,%.S,$(s-obj-y) $(s-obj-) $(s-link))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/microcode.c 
linux-2.6-xen-sparse/arch/i386/kernel/microcode.c
--- pristine-linux-2.6.12/arch/i386/kernel/microcode.c  2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/microcode.c   2005-07-28 
13:17:07.000000000 -0700
@@ -18,55 +18,6 @@
  *     modify it under the terms of the GNU General Public License
  *     as published by the Free Software Foundation; either version
  *     2 of the License, or (at your option) any later version.
- *
- *     1.0     16 Feb 2000, Tigran Aivazian <tigran@xxxxxxx>
- *             Initial release.
- *     1.01    18 Feb 2000, Tigran Aivazian <tigran@xxxxxxx>
- *             Added read() support + cleanups.
- *     1.02    21 Feb 2000, Tigran Aivazian <tigran@xxxxxxx>
- *             Added 'device trimming' support. open(O_WRONLY) zeroes
- *             and frees the saved copy of applied microcode.
- *     1.03    29 Feb 2000, Tigran Aivazian <tigran@xxxxxxx>
- *             Made to use devfs (/dev/cpu/microcode) + cleanups.
- *     1.04    06 Jun 2000, Simon Trimmer <simon@xxxxxxxxxxx>
- *             Added misc device support (now uses both devfs and misc).
- *             Added MICROCODE_IOCFREE ioctl to clear memory.
- *     1.05    09 Jun 2000, Simon Trimmer <simon@xxxxxxxxxxx>
- *             Messages for error cases (non Intel & no suitable microcode).
- *     1.06    03 Aug 2000, Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Removed ->release(). Removed exclusive open and status bitmap.
- *             Added microcode_rwsem to serialize read()/write()/ioctl().
- *             Removed global kernel lock usage.
- *     1.07    07 Sep 2000, Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Write 0 to 0x8B msr and then cpuid before reading revision,
- *             so that it works even if there were no update done by the
- *             BIOS. Otherwise, reading from 0x8B gives junk (which happened
- *             to be 0 on my machine which is why it worked even when I
- *             disabled update by the BIOS)
- *             Thanks to Eric W. Biederman <ebiederman@xxxxxxxx> for the fix.
- *     1.08    11 Dec 2000, Richard Schaal <richard.schaal@xxxxxxxxx> and
- *                          Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Intel Pentium 4 processor support and bugfixes.
- *     1.09    30 Oct 2001, Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Bugfix for HT (Hyper-Threading) enabled processors
- *             whereby processor resources are shared by all logical processors
- *             in a single CPU package.
- *     1.10    28 Feb 2002 Asit K Mallick <asit.k.mallick@xxxxxxxxx> and
- *             Tigran Aivazian <tigran@xxxxxxxxxxx>,
- *             Serialize updates as required on HT processors due to 
speculative
- *             nature of implementation.
- *     1.11    22 Mar 2002 Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Fix the panic when writing zero-length microcode chunk.
- *     1.12    29 Sep 2003 Nitin Kamble <nitin.a.kamble@xxxxxxxxx>, 
- *             Jun Nakajima <jun.nakajima@xxxxxxxxx>
- *             Support for the microcode updates in the new format.
- *     1.13    10 Oct 2003 Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
- *             because we no longer hold a copy of applied microcode 
- *             in kernel memory.
- *     1.14    25 Jun 2004 Tigran Aivazian <tigran@xxxxxxxxxxx>
- *             Fix sigmatch() macro to handle old CPUs with pf == 0.
- *             Thanks to Stuart Swales for pointing out this bug.
  */
 
 //#define DEBUG /* pr_debug */
@@ -79,6 +30,7 @@
 #include <linux/miscdevice.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <linux/syscalls.h>
 
 #include <asm/msr.h>
 #include <asm/uaccess.h>
@@ -88,342 +40,41 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Mi
 MODULE_AUTHOR("Tigran Aivazian <tigran@xxxxxxxxxxx>");
 MODULE_LICENSE("GPL");
 
-#define MICROCODE_VERSION      "1.14"
+#define MICROCODE_VERSION      "1.14-xen"
 
 #define DEFAULT_UCODE_DATASIZE         (2000)    /* 2000 bytes */
 #define MC_HEADER_SIZE         (sizeof (microcode_header_t))     /* 48 bytes */
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 
2048 bytes */
-#define EXT_HEADER_SIZE                (sizeof (struct extended_sigtable)) /* 
20 bytes */
-#define EXT_SIGNATURE_SIZE     (sizeof (struct extended_signature)) /* 12 
bytes */
-#define DWSIZE                 (sizeof (u32))
-#define get_totalsize(mc) \
-       (((microcode_t *)mc)->hdr.totalsize ? \
-        ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
-#define get_datasize(mc) \
-       (((microcode_t *)mc)->hdr.datasize ? \
-        ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define sigmatch(s1, s2, p1, p2) \
-       (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
-/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
 
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
 static DECLARE_MUTEX(microcode_sem);
 
 static void __user *user_buffer;       /* user area microcode data buffer */
 static unsigned int user_buffer_size;  /* it's size */
-
-typedef enum mc_error_code {
-       MC_SUCCESS      = 0,
-       MC_NOTFOUND     = 1,
-       MC_MARKED       = 2,
-       MC_ALLOCATED    = 3,
-} mc_error_code_t;
-
-static struct ucode_cpu_info {
-       unsigned int sig;
-       unsigned int pf;
-       unsigned int rev;
-       unsigned int cksum;
-       mc_error_code_t err;
-       microcode_t *mc;
-} ucode_cpu_info[NR_CPUS];
                                
 static int microcode_open (struct inode *unused1, struct file *unused2)
 {
        return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
-static void collect_cpu_info (void *unused)
-{
-       int cpu_num = smp_processor_id();
-       struct cpuinfo_x86 *c = cpu_data + cpu_num;
-       struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-       unsigned int val[2];
-
-       uci->sig = uci->pf = uci->rev = uci->cksum = 0;
-       uci->err = MC_NOTFOUND; 
-       uci->mc = NULL;
-
-       if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
-               cpu_has(c, X86_FEATURE_IA64)) {
-               printk(KERN_ERR "microcode: CPU%d not a capable Intel 
processor\n", cpu_num);
-               return;
-       } else {
-               uci->sig = cpuid_eax(0x00000001);
-
-               if ((c->x86_model >= 5) || (c->x86 > 6)) {
-                       /* get processor flags from MSR 0x17 */
-                       rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-                       uci->pf = 1 << ((val[1] >> 18) & 7);
-               }
-       }
-
-       wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-       __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
-       /* get the current revision from MSR 0x8B */
-       rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
-       pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
-                       uci->sig, uci->pf, uci->rev);
-}
-
-static inline void mark_microcode_update (int cpu_num, microcode_header_t 
*mc_header, int sig, int pf, int cksum)
-{
-       struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
-       pr_debug("Microcode Found.\n");
-       pr_debug("   Header Revision 0x%x\n", mc_header->hdrver);
-       pr_debug("   Loader Revision 0x%x\n", mc_header->ldrver);
-       pr_debug("   Revision 0x%x \n", mc_header->rev);
-       pr_debug("   Date %x/%x/%x\n",
-               ((mc_header->date >> 24 ) & 0xff),
-               ((mc_header->date >> 16 ) & 0xff),
-               (mc_header->date & 0xFFFF));
-       pr_debug("   Signature 0x%x\n", sig);
-       pr_debug("   Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n",
-               ((sig >> 12) & 0x3),
-               ((sig >> 8) & 0xf),
-               ((sig >> 4) & 0xf),
-               ((sig & 0xf)));
-       pr_debug("   Processor Flags 0x%x\n", pf);
-       pr_debug("   Checksum 0x%x\n", cksum);
-
-       if (mc_header->rev < uci->rev) {
-               printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier 
revision"
-                      " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, 
uci->rev);
-               goto out;
-       } else if (mc_header->rev == uci->rev) {
-               /* notify the caller of success on this cpu */
-               uci->err = MC_SUCCESS;
-               printk(KERN_ERR "microcode: CPU%d already at revision"
-                       " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, 
uci->rev);
-               goto out;
-       }
-
-       pr_debug("microcode: CPU%d found a matching microcode update with "
-               " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, 
uci->rev);
-       uci->cksum = cksum;
-       uci->pf = pf; /* keep the original mc pf for cksum calculation */
-       uci->err = MC_MARKED; /* found the match */
-out:
-       return;
-}
-
-static int find_matching_ucodes (void) 
-{
-       int cursor = 0;
-       int error = 0;
-
-       while (cursor + MC_HEADER_SIZE < user_buffer_size) {
-               microcode_header_t mc_header;
-               void *newmc = NULL;
-               int i, sum, cpu_num, allocated_flag, total_size, data_size, 
ext_table_size;
-
-               if (copy_from_user(&mc_header, user_buffer + cursor, 
MC_HEADER_SIZE)) {
-                       printk(KERN_ERR "microcode: error! Can not read user 
data\n");
-                       error = -EFAULT;
-                       goto out;
-               }
-
-               total_size = get_totalsize(&mc_header);
-               if ((cursor + total_size > user_buffer_size) || (total_size < 
DEFAULT_UCODE_TOTALSIZE)) {
-                       printk(KERN_ERR "microcode: error! Bad data in 
microcode data file\n");
-                       error = -EINVAL;
-                       goto out;
-               }
-
-               data_size = get_datasize(&mc_header);
-               if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < 
DEFAULT_UCODE_DATASIZE)) {
-                       printk(KERN_ERR "microcode: error! Bad data in 
microcode data file\n");
-                       error = -EINVAL;
-                       goto out;
-               }
-
-               if (mc_header.ldrver != 1 || mc_header.hdrver != 1) {
-                       printk(KERN_ERR "microcode: error! Unknown microcode 
update format\n");
-                       error = -EINVAL;
-                       goto out;
-               }
-               
-               for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) {
-                       struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-                       if (uci->err != MC_NOTFOUND) /* already found a match 
or not an online cpu*/
-                               continue;
-
-                       if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, 
uci->pf))
-                               mark_microcode_update(cpu_num, &mc_header, 
mc_header.sig, mc_header.pf, mc_header.cksum);
-               }
-
-               ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
-               if (ext_table_size) {
-                       struct extended_sigtable ext_header;
-                       struct extended_signature ext_sig;
-                       int ext_sigcount;
-
-                       if ((ext_table_size < EXT_HEADER_SIZE) 
-                                       || ((ext_table_size - EXT_HEADER_SIZE) 
% EXT_SIGNATURE_SIZE)) {
-                               printk(KERN_ERR "microcode: error! Bad data in 
microcode data file\n");
-                               error = -EINVAL;
-                               goto out;
-                       }
-                       if (copy_from_user(&ext_header, user_buffer + cursor 
-                                       + MC_HEADER_SIZE + data_size, 
EXT_HEADER_SIZE)) {
-                               printk(KERN_ERR "microcode: error! Can not read 
user data\n");
-                               error = -EFAULT;
-                               goto out;
-                       }
-                       if (ext_table_size != exttable_size(&ext_header)) {
-                               printk(KERN_ERR "microcode: error! Bad data in 
microcode data file\n");
-                               error = -EFAULT;
-                               goto out;
-                       }
-
-                       ext_sigcount = ext_header.count;
-                       
-                       for (i = 0; i < ext_sigcount; i++) {
-                               if (copy_from_user(&ext_sig, user_buffer + 
cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE 
-                                               + EXT_SIGNATURE_SIZE * i, 
EXT_SIGNATURE_SIZE)) {
-                                       printk(KERN_ERR "microcode: error! Can 
not read user data\n");
-                                       error = -EFAULT;
-                                       goto out;
-                               }
-                               for (cpu_num = 0; cpu_num < num_online_cpus(); 
cpu_num++) {
-                                       struct ucode_cpu_info *uci = 
ucode_cpu_info + cpu_num;
-                                       if (uci->err != MC_NOTFOUND) /* already 
found a match or not an online cpu*/
-                                               continue;
-                                       if (sigmatch(ext_sig.sig, uci->sig, 
ext_sig.pf, uci->pf)) {
-                                               mark_microcode_update(cpu_num, 
&mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
-                                       }
-                               }
-                       }
-               }
-               /* now check if any cpu has matched */
-               for (cpu_num = 0, allocated_flag = 0, sum = 0; cpu_num < 
num_online_cpus(); cpu_num++) {
-                       if (ucode_cpu_info[cpu_num].err == MC_MARKED) { 
-                               struct ucode_cpu_info *uci = ucode_cpu_info + 
cpu_num;
-                               if (!allocated_flag) {
-                                       allocated_flag = 1;
-                                       newmc = vmalloc(total_size);
-                                       if (!newmc) {
-                                               printk(KERN_ERR "microcode: 
error! Can not allocate memory\n");
-                                               error = -ENOMEM;
-                                               goto out;
-                                       }
-                                       if (copy_from_user(newmc + 
MC_HEADER_SIZE, 
-                                                               user_buffer + 
cursor + MC_HEADER_SIZE, 
-                                                               total_size - 
MC_HEADER_SIZE)) {
-                                               printk(KERN_ERR "microcode: 
error! Can not read user data\n");
-                                               vfree(newmc);
-                                               error = -EFAULT;
-                                               goto out;
-                                       }
-                                       memcpy(newmc, &mc_header, 
MC_HEADER_SIZE);
-                                       /* check extended table checksum */
-                                       if (ext_table_size) {
-                                               int ext_table_sum = 0;
-                                               int * ext_tablep = (((void *) 
newmc) + MC_HEADER_SIZE + data_size);
-                                               i = ext_table_size / DWSIZE;
-                                               while (i--) ext_table_sum += 
ext_tablep[i];
-                                               if (ext_table_sum) {
-                                                       printk(KERN_WARNING 
"microcode: aborting, bad extended signature table checksum\n");
-                                                       vfree(newmc);
-                                                       error = -EINVAL;
-                                                       goto out;
-                                               }
-                                       }
-
-                                       /* calculate the checksum */
-                                       i = (MC_HEADER_SIZE + data_size) / 
DWSIZE;
-                                       while (i--) sum += ((int *)newmc)[i];
-                                       sum -= (mc_header.sig + mc_header.pf + 
mc_header.cksum);
-                               }
-                               ucode_cpu_info[cpu_num].mc = newmc;
-                               ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* 
mc updated */
-                               if (sum + uci->sig + uci->pf + uci->cksum != 0) 
{
-                                       printk(KERN_ERR "microcode: CPU%d 
aborting, bad checksum\n", cpu_num);
-                                       error = -EINVAL;
-                                       goto out;
-                               }
-                       }
-               }
-               cursor += total_size; /* goto the next update patch */
-       } /* end of while */
-out:
-       return error;
-}
-
-static void do_update_one (void * unused)
-{
-       unsigned long flags;
-       unsigned int val[2];
-       int cpu_num = smp_processor_id();
-       struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
-       if (uci->mc == NULL) {
-               printk(KERN_INFO "microcode: No new microcode data for 
CPU%d\n", cpu_num);
-               return;
-       }
-
-       /* serialize access to the physical write to MSR 0x79 */
-       spin_lock_irqsave(&microcode_update_lock, flags);          
-
-       /* write microcode via MSR 0x79 */
-       wrmsr(MSR_IA32_UCODE_WRITE,
-               (unsigned long) uci->mc->bits, 
-               (unsigned long) uci->mc->bits >> 16 >> 16);
-       wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
-       __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
-       /* get the current revision from MSR 0x8B */
-       rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-       /* notify the caller of success on this cpu */
-       uci->err = MC_SUCCESS;
-       spin_unlock_irqrestore(&microcode_update_lock, flags);
-       printk(KERN_INFO "microcode: CPU%d updated from revision "
-              "0x%x to 0x%x, date = %08x \n", 
-              cpu_num, uci->rev, val[1], uci->mc->hdr.date);
-       return;
-}
 
 static int do_microcode_update (void)
 {
-       int i, error;
+       int err;
+       dom0_op_t op;
 
-       if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) {
-               printk(KERN_ERR "microcode: Error! Could not run on all 
processors\n");
-               error = -EIO;
-               goto out;
-       }
+       err = sys_mlock((unsigned long)user_buffer, user_buffer_size);
+       if (err != 0)
+               return err;
 
-       if ((error = find_matching_ucodes())) {
-               printk(KERN_ERR "microcode: Error in the microcode data\n");
-               goto out_free;
-       }
+       op.cmd = DOM0_MICROCODE;
+       op.u.microcode.data = user_buffer;
+       op.u.microcode.length = user_buffer_size;
+       err = HYPERVISOR_dom0_op(&op);
 
-       if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) {
-               printk(KERN_ERR "microcode: Error! Could not run on all 
processors\n");
-               error = -EIO;
-       }
+       (void)sys_munlock((unsigned long)user_buffer, user_buffer_size);
 
-out_free:
-       for (i = 0; i < num_online_cpus(); i++) {
-               if (ucode_cpu_info[i].mc) {
-                       int j;
-                       void *tmp = ucode_cpu_info[i].mc;
-                       vfree(tmp);
-                       for (j = i; j < num_online_cpus(); j++) {
-                               if (ucode_cpu_info[j].mc == tmp)
-                                       ucode_cpu_info[j].mc = NULL;
-                       }
-               }
-       }
-out:
-       return error;
+       return err;
 }
 
 static ssize_t microcode_write (struct file *file, const char __user *buf, 
size_t len, loff_t *ppos)
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/mpparse.c 
linux-2.6-xen-sparse/arch/i386/kernel/mpparse.c
--- pristine-linux-2.6.12/arch/i386/kernel/mpparse.c    2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/mpparse.c     2005-07-28 
13:17:07.000000000 -0700
@@ -109,7 +109,7 @@ static int MP_valid_apicid(int apicid, i
 {
        return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf;
 }
-#else
+#elif !defined(CONFIG_XEN)
 static int MP_valid_apicid(int apicid, int version)
 {
        if (version >= 0x14)
@@ -119,6 +119,7 @@ static int MP_valid_apicid(int apicid, i
 }
 #endif
 
+#ifndef CONFIG_XEN
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
        int ver, apicid;
@@ -217,6 +218,12 @@ static void __init MP_processor_info (st
        apic_version[m->mpc_apicid] = ver;
        bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
 }
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+       num_processors++;
+}
+#endif /* CONFIG_XEN */
 
 static void __init MP_bus_info (struct mpc_config_bus *m)
 {
@@ -690,7 +697,7 @@ void __init get_smp_config (void)
                 * Read the physical hardware table.  Anything here will
                 * override the defaults.
                 */
-               if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+               if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
                        smp_found_config = 0;
                        printk(KERN_ERR "BIOS bug, MP table errors 
detected!...\n");
                        printk(KERN_ERR "... disabling SMP support. (tell your 
hw vendor)\n");
@@ -725,7 +732,7 @@ void __init get_smp_config (void)
 
 static int __init smp_scan_config (unsigned long base, unsigned long length)
 {
-       unsigned long *bp = phys_to_virt(base);
+       unsigned long *bp = isa_bus_to_virt(base);
        struct intel_mp_floating *mpf;
 
        Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
@@ -741,6 +748,7 @@ static int __init smp_scan_config (unsig
                                || (mpf->mpf_specification == 4)) ) {
 
                        smp_found_config = 1;
+#ifndef CONFIG_XEN
                        printk(KERN_INFO "found SMP MP-table at %08lx\n",
                                                virt_to_phys(mpf));
                        reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
@@ -760,6 +768,10 @@ static int __init smp_scan_config (unsig
                                        size = end - mpf->mpf_physptr;
                                reserve_bootmem(mpf->mpf_physptr, size);
                        }
+#else
+                       printk(KERN_INFO "found SMP MP-table at %08lx\n",
+                               ((unsigned long)bp - (unsigned 
long)isa_bus_to_virt(base)) + base);
+#endif
 
                        mpf_found = mpf;
                        return 1;
@@ -803,9 +815,11 @@ void __init find_smp_config (void)
         * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
         */
 
+#ifndef CONFIG_XEN
        address = get_bios_ebda();
        if (address)
                smp_scan_config(address, 0x400);
+#endif
 }
 
 /* --------------------------------------------------------------------------
@@ -817,14 +831,14 @@ void __init find_smp_config (void)
 void __init mp_register_lapic_address (
        u64                     address)
 {
+#ifndef CONFIG_XEN
        mp_lapic_addr = (unsigned long) address;
 
-       set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-
        if (boot_cpu_physical_apicid == -1U)
                boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 
        Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
 }
 
 
@@ -844,6 +858,7 @@ void __init mp_register_lapic (
        if (id == boot_cpu_physical_apicid)
                boot_cpu = 1;
 
+#ifndef CONFIG_XEN
        processor.mpc_type = MP_PROCESSOR;
        processor.mpc_apicid = id;
        processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -854,6 +869,7 @@ void __init mp_register_lapic (
        processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
        processor.mpc_reserved[0] = 0;
        processor.mpc_reserved[1] = 0;
+#endif
 
        MP_processor_info(&processor);
 }
@@ -913,7 +929,6 @@ void __init mp_register_ioapic (
        mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
        mp_ioapics[idx].mpc_apicaddr = address;
 
-       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
        mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
        mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
        
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/pci-dma.c 
linux-2.6-xen-sparse/arch/i386/kernel/pci-dma.c
--- pristine-linux-2.6.12/arch/i386/kernel/pci-dma.c    2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/pci-dma.c     2005-07-28 
13:17:07.000000000 -0700
@@ -11,7 +11,10 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/pci.h>
+#include <linux/version.h>
 #include <asm/io.h>
+#include <asm-xen/balloon.h>
+#include <asm/tlbflush.h>
 
 struct dma_coherent_mem {
        void            *virt_base;
@@ -26,7 +29,8 @@ void *dma_alloc_coherent(struct device *
 {
        void *ret;
        struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-       int order = get_order(size);
+       unsigned int order = get_order(size);
+       unsigned long vstart;
        /* ignore region specifiers */
        gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
 
@@ -46,11 +50,14 @@ void *dma_alloc_coherent(struct device *
        if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
                gfp |= GFP_DMA;
 
-       ret = (void *)__get_free_pages(gfp, order);
+       vstart = __get_free_pages(gfp, order);
+       ret = (void *)vstart;
 
        if (ret != NULL) {
+               xen_contig_memory(vstart, order);
+
                memset(ret, 0, size);
-               *dma_handle = virt_to_phys(ret);
+               *dma_handle = virt_to_bus(ret);
        }
        return ret;
 }
@@ -145,3 +152,131 @@ void *dma_mark_declared_memory_occupied(
        return mem->virt_base + (pos << PAGE_SHIFT);
 }
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
+
+static LIST_HEAD(dma_map_head);
+static DEFINE_SPINLOCK(dma_map_lock);
+struct dma_map_entry {
+       struct list_head list;
+       dma_addr_t dma;
+       char *bounce, *host;
+       size_t size;
+};
+#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d)))
+
+dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+              enum dma_data_direction direction)
+{
+       struct dma_map_entry *ent;
+       void *bnc;
+       dma_addr_t dma;
+       unsigned long flags;
+
+       BUG_ON(direction == DMA_NONE);
+
+       /*
+        * Even if size is sub-page, the buffer may still straddle a page
+        * boundary. Take into account buffer start offset. All other calls are
+        * conservative and always search the dma_map list if it's non-empty.
+        */
+       if ((((unsigned int)ptr & ~PAGE_MASK) + size) <= PAGE_SIZE) {
+               dma = virt_to_bus(ptr);
+       } else {
+               BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, 0)) == NULL);
+               BUG_ON((ent = kmalloc(sizeof(*ent), GFP_KERNEL)) == NULL);
+               if (direction != DMA_FROM_DEVICE)
+                       memcpy(bnc, ptr, size);
+               ent->dma    = dma;
+               ent->bounce = bnc;
+               ent->host   = ptr;
+               ent->size   = size;
+               spin_lock_irqsave(&dma_map_lock, flags);
+               list_add(&ent->list, &dma_map_head);
+               spin_unlock_irqrestore(&dma_map_lock, flags);
+       }
+
+       flush_write_buffers();
+       return dma;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+                enum dma_data_direction direction)
+{
+       struct dma_map_entry *ent;
+       unsigned long flags;
+
+       BUG_ON(direction == DMA_NONE);
+
+       /* Fast-path check: are there any multi-page DMA mappings? */
+       if (!list_empty(&dma_map_head)) {
+               spin_lock_irqsave(&dma_map_lock, flags);
+               list_for_each_entry ( ent, &dma_map_head, list ) {
+                       if (DMA_MAP_MATCHES(ent, dma_addr)) {
+                               list_del(&ent->list);
+                               break;
+                       }
+               }
+               spin_unlock_irqrestore(&dma_map_lock, flags);
+               if (&ent->list != &dma_map_head) {
+                       BUG_ON(dma_addr != ent->dma);
+                       BUG_ON(size != ent->size);
+                       if (direction != DMA_TO_DEVICE)
+                               memcpy(ent->host, ent->bounce, size);
+                       dma_free_coherent(dev, size, ent->bounce, ent->dma);
+                       kfree(ent);
+               }
+       }
+}
+EXPORT_SYMBOL(dma_unmap_single);
+
+void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+                       enum dma_data_direction direction)
+{
+       struct dma_map_entry *ent;
+       unsigned long flags, off;
+
+       /* Fast-path check: are there any multi-page DMA mappings? */
+       if (!list_empty(&dma_map_head)) {
+               spin_lock_irqsave(&dma_map_lock, flags);
+               list_for_each_entry ( ent, &dma_map_head, list )
+                       if (DMA_MAP_MATCHES(ent, dma_handle))
+                               break;
+               spin_unlock_irqrestore(&dma_map_lock, flags);
+               if (&ent->list != &dma_map_head) {
+                       off = dma_handle - ent->dma;
+                       BUG_ON((off + size) > ent->size);
+                       /*if (direction != DMA_TO_DEVICE)*/
+                               memcpy(ent->host+off, ent->bounce+off, size);
+               }
+       }
+}
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t 
size,
+                           enum dma_data_direction direction)
+{
+       struct dma_map_entry *ent;
+       unsigned long flags, off;
+
+       /* Fast-path check: are there any multi-page DMA mappings? */
+       if (!list_empty(&dma_map_head)) {
+               spin_lock_irqsave(&dma_map_lock, flags);
+               list_for_each_entry ( ent, &dma_map_head, list )
+                       if (DMA_MAP_MATCHES(ent, dma_handle))
+                               break;
+               spin_unlock_irqrestore(&dma_map_lock, flags);
+               if (&ent->list != &dma_map_head) {
+                       off = dma_handle - ent->dma;
+                       BUG_ON((off + size) > ent->size);
+                       /*if (direction != DMA_FROM_DEVICE)*/
+                               memcpy(ent->bounce+off, ent->host+off, size);
+               }
+       }
+
+       flush_write_buffers();
+}
+EXPORT_SYMBOL(dma_sync_single_for_device);
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/process.c 
linux-2.6-xen-sparse/arch/i386/kernel/process.c
--- pristine-linux-2.6.12/arch/i386/kernel/process.c    2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/process.c     2005-07-28 
13:17:07.000000000 -0700
@@ -13,6 +13,7 @@
 
 #include <stdarg.h>
 
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
@@ -47,6 +48,7 @@
 #include <asm/i387.h>
 #include <asm/irq.h>
 #include <asm/desc.h>
+#include <asm-xen/xen-public/physdev.h>
 #ifdef CONFIG_MATH_EMULATION
 #include <asm/math_emu.h>
 #endif
@@ -54,6 +56,9 @@
 #include <linux/irq.h>
 #include <linux/err.h>
 
+#include <asm/tlbflush.h>
+#include <asm/cpu.h>
+
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 static int hlt_counter;
@@ -89,54 +94,48 @@ void enable_hlt(void)
 
 EXPORT_SYMBOL(enable_hlt);
 
-/*
- * We use this if we don't have any better
- * idle routine..
- */
-void default_idle(void)
+/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
+extern void stop_hz_timer(void);
+extern void start_hz_timer(void);
+void xen_idle(void)
 {
-       if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
-               local_irq_disable();
-               if (!need_resched())
-                       safe_halt();
-               else
-                       local_irq_enable();
+       local_irq_disable();
+
+       if (need_resched()) {
+               local_irq_enable();
        } else {
-               cpu_relax();
+               stop_hz_timer();
+               HYPERVISOR_block(); /* implicit local_irq_enable() */
+               start_hz_timer();
        }
 }
 
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle (void)
-{
-       int oldval;
-
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm/nmi.h>
+/* We don't actually take CPU down, just spin without interrupts. */
+static inline void play_dead(void)
+{
+       /* Ack it */
+       __get_cpu_var(cpu_state) = CPU_DEAD;
+
+       /* We shouldn't have to disable interrupts while dead, but
+        * some interrupts just don't seem to go away, and this makes
+        * it "work" for testing purposes. */
+       /* Death loop */
+       while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
+               HYPERVISOR_yield();
+
+       local_irq_disable();
+       __flush_tlb_all();
+       cpu_set(smp_processor_id(), cpu_online_map);
        local_irq_enable();
-
-       /*
-        * Deal with another CPU just having chosen a thread to
-        * run here:
-        */
-       oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
-
-       if (!oldval) {
-               set_thread_flag(TIF_POLLING_NRFLAG);
-               asm volatile(
-                       "2:"
-                       "testl %0, %1;"
-                       "rep; nop;"
-                       "je 2b;"
-                       : : "i"(_TIF_NEED_RESCHED), "m" 
(current_thread_info()->flags));
-
-               clear_thread_flag(TIF_POLLING_NRFLAG);
-       } else {
-               set_need_resched();
-       }
 }
+#else
+static inline void play_dead(void)
+{
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
 
 /*
  * The idle thread. There's no useful work to be
@@ -146,22 +145,26 @@ static void poll_idle (void)
  */
 void cpu_idle (void)
 {
+       int cpu = _smp_processor_id();
+
        /* endless idle loop with no priority at all */
        while (1) {
                while (!need_resched()) {
-                       void (*idle)(void);
 
                        if (__get_cpu_var(cpu_idle_state))
                                __get_cpu_var(cpu_idle_state) = 0;
-
                        rmb();
-                       idle = pm_idle;
 
-                       if (!idle)
-                               idle = default_idle;
+                       if (cpu_is_offline(cpu)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
+                               /* Tell hypervisor to take vcpu down. */
+                               HYPERVISOR_vcpu_down(cpu);
+#endif
+                               play_dead();
+         }
 
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
-                       idle();
+                       xen_idle();
                }
                schedule();
        }
@@ -195,74 +198,18 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- */
-static void mwait_idle(void)
-{
-       local_irq_enable();
-
-       if (!need_resched()) {
-               set_thread_flag(TIF_POLLING_NRFLAG);
-               do {
-                       __monitor((void *)&current_thread_info()->flags, 0, 0);
-                       if (need_resched())
-                               break;
-                       __mwait(0, 0);
-               } while (!need_resched());
-               clear_thread_flag(TIF_POLLING_NRFLAG);
-       }
-}
-
-void __init select_idle_routine(const struct cpuinfo_x86 *c)
-{
-       if (cpu_has(c, X86_FEATURE_MWAIT)) {
-               printk("monitor/mwait feature present.\n");
-               /*
-                * Skip, if setup has overridden idle.
-                * One CPU supports mwait => All CPUs supports mwait
-                */
-               if (!pm_idle) {
-                       printk("using mwait in idle threads.\n");
-                       pm_idle = mwait_idle;
-               }
-       }
-}
-
-static int __init idle_setup (char *str)
-{
-       if (!strncmp(str, "poll", 4)) {
-               printk("using polling idle threads.\n");
-               pm_idle = poll_idle;
-#ifdef CONFIG_X86_SMP
-               if (smp_num_siblings > 1)
-                       printk("WARNING: polling idle and HT enabled, 
performance may degrade.\n");
-#endif
-       } else if (!strncmp(str, "halt", 4)) {
-               printk("using halt in idle threads.\n");
-               pm_idle = default_idle;
-       }
-
-       boot_option_idle_override = 1;
-       return 1;
-}
-
-__setup("idle=", idle_setup);
+/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
+/* Always use xen_idle() instead. */
+void __init select_idle_routine(const struct cpuinfo_x86 *c) {}
 
 void show_regs(struct pt_regs * regs)
 {
-       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-
        printk("\n");
        printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
        printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, 
smp_processor_id());
        print_symbol("EIP is at %s\n", regs->eip);
 
-       if (regs->xcs & 3)
+       if (regs->xcs & 2)
                printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
        printk(" EFLAGS: %08lx    %s  (%s)\n",
               regs->eflags, print_tainted(), system_utsname.release);
@@ -273,17 +220,6 @@ void show_regs(struct pt_regs * regs)
        printk(" DS: %04x ES: %04x\n",
                0xffff & regs->xds,0xffff & regs->xes);
 
-       __asm__("movl %%cr0, %0": "=r" (cr0));
-       __asm__("movl %%cr2, %0": "=r" (cr2));
-       __asm__("movl %%cr3, %0": "=r" (cr3));
-       /* This could fault if %cr4 does not exist */
-       __asm__("1: movl %%cr4, %0              \n"
-               "2:                             \n"
-               ".section __ex_table,\"a\"      \n"
-               ".long 1b,2b                    \n"
-               ".previous                      \n"
-               : "=r" (cr4): "0" (0));
-       printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, 
cr4);
        show_trace(NULL, &regs->esp);
 }
 
@@ -336,20 +272,11 @@ void exit_thread(void)
 
        /* The process may have allocated an io port bitmap... nuke it. */
        if (unlikely(NULL != t->io_bitmap_ptr)) {
-               int cpu = get_cpu();
-               struct tss_struct *tss = &per_cpu(init_tss, cpu);
-
+               physdev_op_t op = { 0 };
+               op.cmd = PHYSDEVOP_SET_IOBITMAP;
+               HYPERVISOR_physdev_op(&op);
                kfree(t->io_bitmap_ptr);
                t->io_bitmap_ptr = NULL;
-               /*
-                * Careful, clear this in the TSS too:
-                */
-               memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
-               t->io_bitmap_max = 0;
-               tss->io_bitmap_owner = NULL;
-               tss->io_bitmap_max = 0;
-               tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
-               put_cpu();
        }
 }
 
@@ -458,6 +385,8 @@ int copy_thread(int nr, unsigned long cl
                desc->b = LDT_entry_b(&info);
        }
 
+       p->thread.io_pl = current->thread.io_pl;
+
        err = 0;
  out:
        if (err && p->thread.io_bitmap_ptr) {
@@ -525,40 +454,10 @@ int dump_task_regs(struct task_struct *t
 
        elf_core_copy_regs(regs, &ptregs);
 
+       boot_option_idle_override = 1;
        return 1;
 }
 
-static inline void
-handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
-{
-       if (!next->io_bitmap_ptr) {
-               /*
-                * Disable the bitmap via an invalid offset. We still cache
-                * the previous bitmap owner and the IO bitmap contents:
-                */
-               tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
-               return;
-       }
-       if (likely(next == tss->io_bitmap_owner)) {
-               /*
-                * Previous owner of the bitmap (hence the bitmap content)
-                * matches the next task, we dont have to do anything but
-                * to set a valid offset in the TSS:
-                */
-               tss->io_bitmap_base = IO_BITMAP_OFFSET;
-               return;
-       }
-       /*
-        * Lazy TSS's I/O bitmap copy. We set an invalid offset here
-        * and we let the task to get a GPF in case an I/O instruction
-        * is performed.  The handler of the GPF will verify that the
-        * faulting task has a valid I/O bitmap and, it true, does the
-        * real copy and restart the instruction.  This will save us
-        * redundant copies when the currently switched task does not
-        * perform any I/O during its timeslice.
-        */
-       tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
-}
 
 /*
  *     switch_to(x,yn) should switch tasks from x to y.
@@ -593,32 +492,77 @@ struct task_struct fastcall * __switch_t
                                 *next = &next_p->thread;
        int cpu = smp_processor_id();
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       physdev_op_t iopl_op, iobmp_op;
+       multicall_entry_t _mcl[8], *mcl = _mcl;
 
-       /* never put a printk in __switch_to... printk() calls wake_up*() 
indirectly */
+       /* XEN NOTE: FS/GS saved in switch_mm(), not here. */
 
-       __unlazy_fpu(prev_p);
+       /*
+        * This is basically '__unlazy_fpu', except that we queue a
+        * multicall to indicate FPU task switch, rather than
+        * synchronously trapping to Xen.
+        */
+       if (prev_p->thread_info->status & TS_USEDFPU) {
+               __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
+               mcl->op      = __HYPERVISOR_fpu_taskswitch;
+               mcl->args[0] = 1;
+               mcl++;
+       }
 
        /*
         * Reload esp0, LDT and the page table pointer:
+        * This is load_esp0(tss, next) with a multicall.
         */
-       load_esp0(tss, next);
+       tss->esp0 = next->esp0;
+       mcl->op      = __HYPERVISOR_stack_switch;
+       mcl->args[0] = tss->ss0;
+       mcl->args[1] = tss->esp0;
+       mcl++;
 
        /*
         * Load the per-thread Thread-Local Storage descriptor.
+        * This is load_TLS(next, cpu) with multicalls.
         */
-       load_TLS(next, cpu);
+#define C(i) do {                                                       \
+       if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||    \
+                    next->tls_array[i].b != prev->tls_array[i].b)) {   \
+               mcl->op      = __HYPERVISOR_update_descriptor;          \
+               mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu)  \
+                                        [GDT_ENTRY_TLS_MIN + i]);      \
+               mcl->args[1] = ((u32 *)&next->tls_array[i])[0];         \
+               mcl->args[2] = ((u32 *)&next->tls_array[i])[1];         \
+               mcl++;                                                  \
+       }                                                               \
+} while (0)
+       C(0); C(1); C(2);
+#undef C
+
+       if (unlikely(prev->io_pl != next->io_pl)) {
+               iopl_op.cmd             = PHYSDEVOP_SET_IOPL;
+               iopl_op.u.set_iopl.iopl = next->io_pl;
+               mcl->op      = __HYPERVISOR_physdev_op;
+               mcl->args[0] = (unsigned long)&iopl_op;
+               mcl++;
+       }
+
+       if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
+               iobmp_op.cmd                     =
+                       PHYSDEVOP_SET_IOBITMAP;
+               iobmp_op.u.set_iobitmap.bitmap   =
+                       (unsigned long)next->io_bitmap_ptr;
+               iobmp_op.u.set_iobitmap.nr_ports =
+                       next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
+               mcl->op      = __HYPERVISOR_physdev_op;
+               mcl->args[0] = (unsigned long)&iobmp_op;
+               mcl++;
+       }
 
-       /*
-        * Save away %fs and %gs. No need to save %es and %ds, as
-        * those are always kernel segments while inside the kernel.
-        */
-       asm volatile("mov %%fs,%0":"=m" (prev->fs));
-       asm volatile("mov %%gs,%0":"=m" (prev->gs));
+       (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
 
        /*
         * Restore %fs and %gs if needed.
         */
-       if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
+       if (unlikely(next->fs | next->gs)) {
                loadsegment(fs, next->fs);
                loadsegment(gs, next->gs);
        }
@@ -636,9 +580,6 @@ struct task_struct fastcall * __switch_t
                loaddebug(next, 7);
        }
 
-       if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
-               handle_io_bitmap(next, tss);
-
        return prev_p;
 }
 
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/quirks.c 
linux-2.6-xen-sparse/arch/i386/kernel/quirks.c
--- pristine-linux-2.6.12/arch/i386/kernel/quirks.c     2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/quirks.c      2005-07-28 
13:17:07.000000000 -0700
@@ -32,14 +32,11 @@ static void __devinit quirk_intel_irqbal
        raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
 
        if (!(word & (1 << 13))) {
+               dom0_op_t op;
                printk(KERN_INFO "Disabling irq balancing and affinity\n");
-#ifdef CONFIG_IRQBALANCE
-               irqbalance_disable("");
-#endif
-               noirqdebug_setup("");
-#ifdef CONFIG_PROC_FS
-               no_irq_affinity = 1;
-#endif
+               op.cmd = DOM0_PLATFORM_QUIRK;
+               op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING;
+               (void)HYPERVISOR_dom0_op(&op);
        }
 
        config &= ~0x2;
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/setup.c 
linux-2.6-xen-sparse/arch/i386/kernel/setup.c
--- pristine-linux-2.6.12/arch/i386/kernel/setup.c      2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/setup.c       2005-07-28 
13:17:07.000000000 -0700
@@ -41,6 +41,9 @@
 #include <linux/init.h>
 #include <linux/edd.h>
 #include <linux/nodemask.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
 #include <video/edid.h>
 #include <asm/e820.h>
 #include <asm/mpspec.h>
@@ -50,13 +53,18 @@
 #include <asm/io_apic.h>
 #include <asm/ist.h>
 #include <asm/io.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/physdev.h>
 #include "setup_arch_pre.h"
 #include <bios_ebda.h>
 
-/* This value is set up by the early boot code to point to the value
-   immediately after the boot time page tables.  It contains a *physical*
-   address, and must not be in the .bss segment! */
-unsigned long init_pg_tables_end __initdata = ~0UL;
+/* Allows setting of maximum possible memory size  */
+static unsigned long xen_override_max_pfn;
+
+static int xen_panic_event(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xen_panic_block = {
+       xen_panic_event, NULL, 0 /* try to go last */
+};
 
 int disable_pse __initdata = 0;
 
@@ -70,9 +78,9 @@ EXPORT_SYMBOL(efi_enabled);
 #endif
 
 /* cpu data as detected by the assembly code in head.S */
-struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
 /* common cpu data for all cpus */
-struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
 
 unsigned long mmu_cr4_features;
 
@@ -146,6 +154,7 @@ static struct resource code_resource = {
        .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static struct resource system_rom_resource = {
        .name   = "System ROM",
        .start  = 0xf0000,
@@ -201,6 +210,7 @@ static struct resource video_rom_resourc
        .end    = 0xc7fff,
        .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
 };
+#endif
 
 static struct resource video_ram_resource = {
        .name   = "Video RAM area",
@@ -259,6 +269,7 @@ static struct resource standard_io_resou
 #define STANDARD_IO_RESOURCES \
        (sizeof standard_io_resources / sizeof standard_io_resources[0])
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
 
 static int __init romchecksum(unsigned char *rom, unsigned long length)
@@ -276,6 +287,10 @@ static void __init probe_roms(void)
        unsigned char *rom;
        int           i;
 
+       /* Nothing to do if not running in dom0. */
+       if (!(xen_start_info.flags & SIF_INITDOMAIN))
+               return;
+
        /* video rom */
        upper = adapter_rom_resources[0].start;
        for (start = video_rom_resource.start; start < upper; start += 2048) {
@@ -334,6 +349,20 @@ static void __init probe_roms(void)
                start = adapter_rom_resources[i++].end & ~2047UL;
        }
 }
+#endif
+
+/*
+ * Point at the empty zero page to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
+
+unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
 
 static void __init limit_regions(unsigned long long size)
 {
@@ -414,6 +443,7 @@ static void __init print_memory_map(char
        }
 }
 
+#if 0
 /*
  * Sanitize the BIOS e820 map.
  *
@@ -633,6 +663,7 @@ static int __init copy_e820_map(struct e
        } while (biosmap++,--nr_map);
        return 0;
 }
+#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -666,11 +697,14 @@ static inline void copy_edd(void)
 static void __init parse_cmdline_early (char ** cmdline_p)
 {
        char c = ' ', *to = command_line, *from = saved_command_line;
-       int len = 0;
+       int len = 0, max_cmdline;
        int userdef = 0;
 
+       if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
+               max_cmdline = COMMAND_LINE_SIZE;
+       memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
        /* Save unparsed command line copy for /proc/cmdline */
-       saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+       saved_command_line[max_cmdline-1] = '\0';
 
        for (;;) {
                if (c != ' ')
@@ -702,8 +736,13 @@ static void __init parse_cmdline_early (
                                unsigned long long mem_size;
  
                                mem_size = memparse(from+4, &from);
+#if 0
                                limit_regions(mem_size);
                                userdef=1;
+#else
+                               xen_override_max_pfn =
+                                       (unsigned long)(mem_size>>PAGE_SHIFT);
+#endif
                        }
                }
 
@@ -744,7 +783,7 @@ static void __init parse_cmdline_early (
                        noexec_setup(from + 7);
 
 
-#ifdef  CONFIG_X86_SMP
+#ifdef  CONFIG_X86_MPPARSE
                /*
                 * If the BIOS enumerates physical processors before logical,
                 * maxcpus=N at enumeration-time can be used to disable HT.
@@ -846,6 +885,7 @@ static void __init parse_cmdline_early (
        }
 }
 
+#if 0 /* !XEN */
 /*
  * Callback for efi_memory_walk.
  */
@@ -889,6 +929,15 @@ void __init find_max_pfn(void)
                        max_pfn = end;
        }
 }
+#else
+/* We don't use the fake e820 because we need to respond to user override. */
+void __init find_max_pfn(void)
+{
+       if ( xen_override_max_pfn < xen_start_info.nr_pages )
+               xen_override_max_pfn = xen_start_info.nr_pages;
+       max_pfn = xen_override_max_pfn;
+}
+#endif /* XEN */
 
 /*
  * Determine low and high memory ranges:
@@ -1011,6 +1060,7 @@ static void __init register_bootmem_low_
        }
 }
 
+#ifndef CONFIG_XEN
 /*
  * workaround for Dell systems that neglect to reserve EBDA
  */
@@ -1021,16 +1071,18 @@ static void __init reserve_ebda_region(v
        if (addr)
                reserve_bootmem(addr, PAGE_SIZE);       
 }
+#endif
 
 #ifndef CONFIG_DISCONTIGMEM
 void __init setup_bootmem_allocator(void);
 static unsigned long __init setup_memory(void)
 {
+
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       min_low_pfn = PFN_UP(init_pg_tables_end);
+       min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + 
xen_start_info.nr_pt_frames;
 
        find_max_pfn();
 
@@ -1057,7 +1109,14 @@ void __init zone_sizes_init(void)
        unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
        unsigned int max_dma, low;
 
-       max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+       /*
+        * XEN: Our notion of "DMA memory" is fake when running over Xen.
+        * We simply put all RAM in the DMA zone so that those drivers which
+        * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
+        * Those drivers that *do* require lowmem are screwed anyway when
+        * running over Xen!
+        */
+       max_dma = max_low_pfn;
        low = max_low_pfn;
 
        if (low < max_dma)
@@ -1095,6 +1154,7 @@ void __init setup_bootmem_allocator(void
        reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
                         bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
 
+#ifndef CONFIG_XEN
        /*
         * reserve physical page 0 - it's a special BIOS page on many boxes,
         * enabling clean reboots, SMP operation, laptop functions.
@@ -1125,20 +1185,15 @@ void __init setup_bootmem_allocator(void
         */
        acpi_reserve_bootmem();
 #endif
-#ifdef CONFIG_X86_FIND_SMP_CONFIG
-       /*
-        * Find and reserve possible boot-time SMP configuration:
-        */
-       find_smp_config();
-#endif
+#endif /* !CONFIG_XEN */
 
 #ifdef CONFIG_BLK_DEV_INITRD
-       if (LOADER_TYPE && INITRD_START) {
+       if (xen_start_info.mod_start) {
                if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
-                       reserve_bootmem(INITRD_START, INITRD_SIZE);
-                       initrd_start =
-                               INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+                       /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
+                       initrd_start = INITRD_START + PAGE_OFFSET;
                        initrd_end = initrd_start+INITRD_SIZE;
+                       initrd_below_start_ok = 1;
                }
                else {
                        printk(KERN_ERR "initrd extends beyond end of memory "
@@ -1149,6 +1204,8 @@ void __init setup_bootmem_allocator(void
                }
        }
 #endif
+
+       phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
 }
 
 /*
@@ -1178,7 +1235,9 @@ legacy_init_iomem_resources(struct resou
 {
        int i;
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
        probe_roms();
+#endif
        for (i = 0; i < e820.nr_map; i++) {
                struct resource *res;
                if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
@@ -1220,8 +1279,9 @@ static void __init register_memory(void)
        else
                legacy_init_iomem_resources(&code_resource, &data_resource);
 
-       /* EFI systems may still have VGA */
-       request_resource(&iomem_resource, &video_ram_resource);
+       if (xen_start_info.flags & SIF_INITDOMAIN)
+               /* EFI systems may still have VGA */
+               request_resource(&iomem_resource, &video_ram_resource);
 
        /* request I/O space for devices used on all i[345]86 PCs */
        for (i = 0; i < STANDARD_IO_RESOURCES; i++)
@@ -1396,10 +1456,23 @@ static void set_mca_bus(int x) { }
  */
 void __init setup_arch(char **cmdline_p)
 {
+       int i, j;
+       physdev_op_t op;
        unsigned long max_low_pfn;
 
+       /* Force a quick death if the kernel panics. */
+       extern int panic_timeout;
+       if (panic_timeout == 0)
+               panic_timeout = 1;
+
+       /* Register a call for panic conditions. */
+       notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+
+       HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
+       HYPERVISOR_vm_assist(VMASST_CMD_enable,
+                            VMASST_TYPE_writable_pagetables);
+
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
-       pre_setup_arch_hook();
        early_cpu_init();
 
        /*
@@ -1414,7 +1487,10 @@ void __init setup_arch(char **cmdline_p)
                efi_enabled = 1;
 #endif
 
-       ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+       /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
+          properly.  Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
+       */
+       ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
        drive_info = DRIVE_INFO;
        screen_info = SCREEN_INFO;
        edid_info = EDID_INFO;
@@ -1429,6 +1505,16 @@ void __init setup_arch(char **cmdline_p)
        }
        bootloader_type = LOADER_TYPE;
 
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+       /* This is drawn from a dump from vgacon:startup in standard Linux. */
+       screen_info.orig_video_mode = 3; 
+       screen_info.orig_video_isVGA = 1;
+       screen_info.orig_video_lines = 25;
+       screen_info.orig_video_cols = 80;
+       screen_info.orig_video_ega_bx = 3;
+       screen_info.orig_video_points = 16;
+#endif
+
 #ifdef CONFIG_BLK_DEV_RAM
        rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
        rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
@@ -1449,12 +1535,14 @@ void __init setup_arch(char **cmdline_p)
        init_mm.start_code = (unsigned long) _text;
        init_mm.end_code = (unsigned long) _etext;
        init_mm.end_data = (unsigned long) _edata;
-       init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
+       init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) +
+                      xen_start_info.nr_pt_frames) << PAGE_SHIFT;
 
-       code_resource.start = virt_to_phys(_text);
-       code_resource.end = virt_to_phys(_etext)-1;
-       data_resource.start = virt_to_phys(_etext);
-       data_resource.end = virt_to_phys(_edata)-1;
+       /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
+       /*code_resource.start = virt_to_phys(_text);*/
+       /*code_resource.end = virt_to_phys(_etext)-1;*/
+       /*data_resource.start = virt_to_phys(_etext);*/
+       /*data_resource.end = virt_to_phys(_edata)-1;*/
 
        parse_cmdline_early(cmdline_p);
 
@@ -1477,6 +1565,51 @@ void __init setup_arch(char **cmdline_p)
        remapped_pgdat_init();
        zone_sizes_init();
 
+#ifdef CONFIG_X86_FIND_SMP_CONFIG
+       /*
+        * Find and reserve possible boot-time SMP configuration:
+        */
+       find_smp_config();
+#endif
+
+       /* Make sure we have a correctly sized P->M table. */
+       if (max_pfn != xen_start_info.nr_pages) {
+               phys_to_machine_mapping = alloc_bootmem_low_pages(
+                       max_pfn * sizeof(unsigned long));
+
+               if (max_pfn > xen_start_info.nr_pages) {
+                       /* set to INVALID_P2M_ENTRY */
+                       memset(phys_to_machine_mapping, ~0,
+                               max_pfn * sizeof(unsigned long));
+                       memcpy(phys_to_machine_mapping,
+                               (unsigned long *)xen_start_info.mfn_list,
+                               xen_start_info.nr_pages * sizeof(unsigned 
long));
+               } else {
+                       memcpy(phys_to_machine_mapping,
+                               (unsigned long *)xen_start_info.mfn_list,
+                               max_pfn * sizeof(unsigned long));
+                       if (HYPERVISOR_dom_mem_op(
+                               MEMOP_decrease_reservation,
+                               (unsigned long *)xen_start_info.mfn_list + 
max_pfn,
+                               xen_start_info.nr_pages - max_pfn, 0) !=
+                           (xen_start_info.nr_pages - max_pfn)) BUG();
+               }
+               free_bootmem(
+                       __pa(xen_start_info.mfn_list), 
+                       PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+                       sizeof(unsigned long))));
+       }
+
+       pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
+       for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+       {       
+            pfn_to_mfn_frame_list[j] = 
+                 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+       }
+       HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
+            virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+
+
        /*
         * NOTE: at this point the bootmem allocator is fully available.
         */
@@ -1502,6 +1635,18 @@ void __init setup_arch(char **cmdline_p)
        if (efi_enabled)
                efi_map_memmap();
 
+       op.cmd             = PHYSDEVOP_SET_IOPL;
+       op.u.set_iopl.iopl = current->thread.io_pl = 1;
+       HYPERVISOR_physdev_op(&op);
+
+#ifdef CONFIG_ACPI_BOOT
+       if (!(xen_start_info.flags & SIF_INITDOMAIN)) {
+               printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
+               acpi_disabled = 1;
+               acpi_ht = 0;
+       }
+#endif
+
 #ifdef CONFIG_ACPI_BOOT
        /*
         * Parse the ACPI tables for possible boot-time SMP configuration.
@@ -1515,16 +1660,46 @@ void __init setup_arch(char **cmdline_p)
                get_smp_config();
 #endif
 
+       /* XXX Disable irqdebug until we have a way to avoid interrupt
+        * conflicts. */
+       noirqdebug_setup("");
+
        register_memory();
 
+       if (xen_start_info.flags & SIF_INITDOMAIN) {
+               if (!(xen_start_info.flags & SIF_PRIVILEGED))
+                       panic("Xen granted us console access "
+                             "but not privileged status");
+
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
-       if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
-               conswitchp = &vga_con;
+               if (!efi_enabled ||
+                   (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+                       conswitchp = &vga_con;
 #elif defined(CONFIG_DUMMY_CONSOLE)
-       conswitchp = &dummy_con;
+               conswitchp = &dummy_con;
+#endif
 #endif
+       } else {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+               extern const struct consw xennull_con;
+               extern int console_use_vt;
+#if defined(CONFIG_VGA_CONSOLE)
+               /* disable VGA driver */
+               ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
 #endif
+               conswitchp = &xennull_con;
+               console_use_vt = 0;
+#endif
+       }
+}
+
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       HYPERVISOR_crash();    
+       /* we're never actually going to get here... */
+       return NOTIFY_DONE;
 }
 
 #include "setup_arch_post.h"
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/signal.c 
linux-2.6-xen-sparse/arch/i386/kernel/signal.c
--- pristine-linux-2.6.12/arch/i386/kernel/signal.c     2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/signal.c      2005-08-02 
00:59:44.000000000 -0700
@@ -599,7 +599,7 @@ int fastcall do_signal(struct pt_regs *r
         * kernel mode. Just return without doing anything
         * if so.
         */
-       if ((regs->xcs & 3) != 3)
+       if ((regs->xcs & 2) != 2)
                return 1;
 
        if (current->flags & PF_FREEZE) {
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/smpboot.c 
linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c
--- pristine-linux-2.6.12/arch/i386/kernel/smpboot.c    2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/smpboot.c     2005-07-28 
13:17:07.000000000 -0700
@@ -44,6 +44,9 @@
 #include <linux/smp_lock.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -51,7 +54,11 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 
-#include <mach_apic.h>
+#include <asm/smp_alt.h>
+
+#ifndef CONFIG_X86_IO_APIC
+#define Dprintk(args...)
+#endif
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
 
@@ -79,6 +86,7 @@ u8 x86_cpu_to_apicid[NR_CPUS] =
                        { [0 ... NR_CPUS-1] = 0xff };
 EXPORT_SYMBOL(x86_cpu_to_apicid);
 
+#if 0
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -87,9 +95,19 @@ extern unsigned char trampoline_data [];
 extern unsigned char trampoline_end  [];
 static unsigned char *trampoline_base;
 static int trampoline_exec;
+#endif
 
-static void map_cpu_to_logical_apicid(void);
+#ifdef CONFIG_HOTPLUG_CPU
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+#endif
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
 
+#if 0
 /*
  * Currently trivial. Write the real->protected mode
  * bootstrap into the page concerned. The caller
@@ -101,6 +119,9 @@ static unsigned long __init setup_trampo
        memcpy(trampoline_base, trampoline_data, trampoline_end - 
trampoline_data);
        return virt_to_phys(trampoline_base);
 }
+#endif
+
+static void map_cpu_to_logical_apicid(void);
 
 /*
  * We are called very early to get the low memory for the
@@ -108,6 +129,15 @@ static unsigned long __init setup_trampo
  */
 void __init smp_alloc_memory(void)
 {
+#if 1
+       int cpu;
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_gdt_descr[cpu].address = (unsigned long)
+                       alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XXX free unused pages later */
+       }
+#else
        trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
        /*
         * Has to be in very low memory so we can execute
@@ -119,6 +149,7 @@ void __init smp_alloc_memory(void)
         * Make the SMP trampoline executable:
         */
        trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
+#endif
 }
 
 /*
@@ -179,6 +210,7 @@ valid_k7:
        ;
 }
 
+#if 0
 /*
  * TSC synchronization.
  *
@@ -315,6 +347,7 @@ static void __init synchronize_tsc_ap (v
        }
 }
 #undef NR_LOOPS
+#endif
 
 extern void calibrate_delay(void);
 
@@ -325,6 +358,7 @@ static void __init smp_callin(void)
        int cpuid, phys_id;
        unsigned long timeout;
 
+#if 0
        /*
         * If waken up by an INIT in an 82489DX configuration
         * we may get here before an INIT-deassert IPI reaches
@@ -332,11 +366,12 @@ static void __init smp_callin(void)
         * lock up on an APIC access.
         */
        wait_for_init_deassert(&init_deasserted);
+#endif
 
        /*
         * (This works even if the APIC is not enabled.)
         */
-       phys_id = GET_APIC_ID(apic_read(APIC_ID));
+       phys_id = smp_processor_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                printk("huh, phys CPU#%d, CPU#%d already present??\n",
@@ -372,6 +407,7 @@ static void __init smp_callin(void)
                BUG();
        }
 
+#if 0
        /*
         * the boot CPU has finished the init stage and is spinning
         * on callin_map until we finish. We are free to set up this
@@ -382,6 +418,7 @@ static void __init smp_callin(void)
        Dprintk("CALLIN, before setup_local_APIC().\n");
        smp_callin_clear_local_apic();
        setup_local_APIC();
+#endif
        map_cpu_to_logical_apicid();
 
        /*
@@ -395,22 +432,49 @@ static void __init smp_callin(void)
         */
        smp_store_cpu_info(cpuid);
 
+#if 0
        disable_APIC_timer();
+#endif
 
        /*
         * Allow the master to continue.
         */
        cpu_set(cpuid, cpu_callin_map);
 
+#if 0
        /*
         *      Synchronize the TSC with the BP
         */
        if (cpu_has_tsc && cpu_khz)
                synchronize_tsc_ap();
+#endif
 }
 
 static int cpucount;
 
+
+static irqreturn_t ldebug_interrupt(
+       int irq, void *dev_id, struct pt_regs *regs)
+{
+       return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+       sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+       BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+                          SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+
+extern void local_setup_timer(void);
+
 /*
  * Activate a secondary processor.
  */
@@ -425,13 +489,10 @@ static void __init start_secondary(void 
        smp_callin();
        while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
                rep_nop();
-       setup_secondary_APIC_clock();
-       if (nmi_watchdog == NMI_IO_APIC) {
-               disable_8259A_irq(0);
-               enable_NMI_through_LVT0(NULL);
-               enable_8259A_irq(0);
-       }
-       enable_APIC_timer();
+       local_setup_timer();
+       ldebug_setup();
+       smp_intr_init();
+       local_irq_enable();
        /*
         * low-memory mappings have been cleared, flush them from
         * the local TLBs too.
@@ -510,7 +571,7 @@ u8 cpu_2_logical_apicid[NR_CPUS] = { [0 
 static void map_cpu_to_logical_apicid(void)
 {
        int cpu = smp_processor_id();
-       int apicid = logical_smp_processor_id();
+       int apicid = smp_processor_id();
 
        cpu_2_logical_apicid[cpu] = apicid;
        map_cpu_to_node(cpu, apicid_to_node(apicid));
@@ -560,6 +621,7 @@ static inline void __inquire_remote_apic
 }
 #endif
 
+#if 0
 #ifdef WAKE_SECONDARY_VIA_NMI
 /* 
  * Poke the other CPU in the eye via NMI to wake it up. Remember that the 
normal
@@ -745,6 +807,7 @@ wakeup_secondary_cpu(int phys_apicid, un
        return (send_status | accept_status);
 }
 #endif /* WAKE_SECONDARY_VIA_INIT */
+#endif
 
 extern cpumask_t cpu_initialized;
 
@@ -759,7 +822,15 @@ static int __init do_boot_cpu(int apicid
        unsigned long boot_error;
        int timeout, cpu;
        unsigned long start_eip;
+#if 0
        unsigned short nmi_high = 0, nmi_low = 0;
+#endif
+       vcpu_guest_context_t ctxt;
+       extern void startup_32_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern void smp_trap_init(trap_info_t *);
+       int i;
 
        cpu = ++cpucount;
        /*
@@ -771,7 +842,7 @@ static int __init do_boot_cpu(int apicid
                panic("failed fork for CPU %d", cpu);
        idle->thread.eip = (unsigned long) start_secondary;
        /* start_eip had better be page-aligned! */
-       start_eip = setup_trampoline();
+       start_eip = (unsigned long)startup_32_smp;
 
        /* So we see what's up   */
        printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
@@ -787,6 +858,107 @@ static int __init do_boot_cpu(int apicid
 
        atomic_set(&init_deasserted, 0);
 
+#if 1
+       if (cpu_gdt_descr[0].size > PAGE_SIZE)
+               BUG();
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       printk("GDT: copying %d bytes from %lx to %lx\n",
+               cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
+               cpu_gdt_descr[cpu].address); 
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+              (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.user_regs.ds = __USER_DS;
+       ctxt.user_regs.es = __USER_DS;
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS;
+       ctxt.user_regs.cs = __KERNEL_CS;
+       ctxt.user_regs.eip = start_eip;
+       ctxt.user_regs.esp = idle->thread.esp;
+       ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = idle->thread.esp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_cs     = __KERNEL_CS;
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_cs  = __KERNEL_CS;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+       printk("boot error: %ld\n", boot_error);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+       if (boot_error) {
+               /* Try to put things back the way they were before ... */
+               unmap_cpu_to_logical_apicid(cpu);
+               cpu_clear(cpu, cpu_callout_map); /* was set here 
(do_boot_cpu()) */
+               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+               cpucount--;
+       }
+
+#else
        Dprintk("Setting warm reset code and vector.\n");
 
        store_NMI_vector(&nmi_high, &nmi_low);
@@ -844,6 +1016,7 @@ static int __init do_boot_cpu(int apicid
 
        /* mark "stuck" area as not stuck */
        *((volatile unsigned long *)trampoline_base) = 0;
+#endif
 
        return boot_error;
 }
@@ -882,7 +1055,9 @@ static void smp_tune_scheduling (void)
  * Cycle through the processors sending APIC IPIs to boot each.
  */
 
+#if 0
 static int boot_cpu_logical_apicid;
+#endif
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
 void *xquad_portio;
 
@@ -892,8 +1067,11 @@ EXPORT_SYMBOL(cpu_core_map);
 
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
-       int apicid, cpu, bit, kicked;
+       int cpu, kicked;
        unsigned long bogosum = 0;
+#if 0
+       int apicid, bit;
+#endif
 
        /*
         * Setup boot CPU information
@@ -902,9 +1080,15 @@ static void __init smp_boot_cpus(unsigne
        printk("CPU%d: ", 0);
        print_cpu_info(&cpu_data[0]);
 
+#if 0
        boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
        boot_cpu_logical_apicid = logical_smp_processor_id();
        x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+#else
+       // boot_cpu_physical_apicid = 0;
+       // boot_cpu_logical_apicid = 0;
+       x86_cpu_to_apicid[0] = 0;
+#endif
 
        current_thread_info()->cpu = 0;
        smp_tune_scheduling();
@@ -914,6 +1098,7 @@ static void __init smp_boot_cpus(unsigne
        cpus_clear(cpu_core_map[0]);
        cpu_set(0, cpu_core_map[0]);
 
+#ifdef CONFIG_X86_IO_APIC
        /*
         * If we couldn't find an SMP configuration at boot time,
         * get out of here now!
@@ -921,16 +1106,22 @@ static void __init smp_boot_cpus(unsigne
        if (!smp_found_config && !acpi_lapic) {
                printk(KERN_NOTICE "SMP motherboard not detected.\n");
                smpboot_clear_io_apic_irqs();
+#if 0
                phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
                if (APIC_init_uniprocessor())
                        printk(KERN_NOTICE "Local APIC not detected."
                                           " Using dummy APIC emulation.\n");
+#endif
                map_cpu_to_logical_apicid();
                cpu_set(0, cpu_sibling_map[0]);
                cpu_set(0, cpu_core_map[0]);
                return;
        }
+#endif
 
+#if 0
        /*
         * Should not be necessary because the MP table should list the boot
         * CPU too, but we do it for the sake of robustness anyway.
@@ -953,27 +1144,35 @@ static void __init smp_boot_cpus(unsigne
                phys_cpu_present_map = physid_mask_of_physid(0);
                cpu_set(0, cpu_sibling_map[0]);
                cpu_set(0, cpu_core_map[0]);
+               cpu_set(0, cpu_sibling_map[0]);
+               cpu_set(0, cpu_core_map[0]);
                return;
        }
 
        verify_local_APIC();
+#endif
 
        /*
         * If SMP should be disabled, then really disable it!
         */
        if (!max_cpus) {
-               smp_found_config = 0;
+               HYPERVISOR_shared_info->n_vcpu = 1;
                printk(KERN_INFO "SMP mode deactivated, forcing use of dummy 
APIC emulation.\n");
                smpboot_clear_io_apic_irqs();
+#if 0
                phys_cpu_present_map = physid_mask_of_physid(0);
-               cpu_set(0, cpu_sibling_map[0]);
-               cpu_set(0, cpu_core_map[0]);
+#endif
                return;
        }
 
+       smp_intr_init();
+
+#if 0
        connect_bsp_APIC();
        setup_local_APIC();
+#endif
        map_cpu_to_logical_apicid();
+#if 0
 
 
        setup_portio_remap();
@@ -986,32 +1185,33 @@ static void __init smp_boot_cpus(unsigne
         * clustered apic ID.
         */
        Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+#endif
+       Dprintk("CPU present map: %lx\n",
+               (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
 
        kicked = 1;
-       for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
-               apicid = cpu_present_to_apicid(bit);
-               /*
-                * Don't even attempt to start the boot CPU!
-                */
-               if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
-                       continue;
-
-               if (!check_apicid_present(bit))
-                       continue;
+       for (cpu = 1; kicked < NR_CPUS &&
+                    cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
                if (max_cpus <= cpucount+1)
                        continue;
 
-               if (do_boot_cpu(apicid))
+#ifdef CONFIG_SMP_ALTERNATIVES
+               if (kicked == 1)
+                       prepare_for_smp();
+#endif
+               if (do_boot_cpu(cpu))
                        printk("CPU #%d not responding - cannot use it.\n",
-                                                               apicid);
+                                                               cpu);
                else
                        ++kicked;
        }
 
+#if 0
        /*
         * Cleanup possible dangling ends...
         */
        smpboot_restore_warm_reset_vector();
+#endif
 
        /*
         * Allow the user to impress friends.
@@ -1078,7 +1278,6 @@ static void __init smp_boot_cpus(unsigne
                        printk(KERN_WARNING "WARNING: %d siblings found for 
CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
                        smp_num_siblings = siblings;
                }
-
                if (c->x86_num_cores > 1) {
                        for (i = 0; i < NR_CPUS; i++) {
                                if (!cpu_isset(i, cpu_callout_map))
@@ -1094,6 +1293,7 @@ static void __init smp_boot_cpus(unsigne
 
        smpboot_setup_io_apic();
 
+#if 0
        setup_boot_APIC_clock();
 
        /*
@@ -1101,12 +1301,16 @@ static void __init smp_boot_cpus(unsigne
         */
        if (cpu_has_tsc && cpucount && cpu_khz)
                synchronize_tsc_bp();
+#endif
 }
 
 /* These are wrappers to interface to the new boot process.  Someone
    who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
+       smp_commenced_mask = cpumask_of_cpu(0);
+       cpu_callin_map = cpumask_of_cpu(0);
+       mb();
        smp_boot_cpus(max_cpus);
 }
 
@@ -1116,20 +1320,189 @@ void __devinit smp_prepare_boot_cpu(void
        cpu_set(smp_processor_id(), cpu_callout_map);
 }
 
-int __devinit __cpu_up(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+#include <asm-xen/ctrl_if.h>
+
+/* hotplug down/up funtion pointer and target vcpu */
+struct vcpu_hotplug_handler_t {
+       void (*fn)(int vcpu);
+       u32 vcpu;
+};
+static struct vcpu_hotplug_handler_t vcpu_hotplug_handler;
+
+/* must be called with the cpucontrol mutex held */
+static int __devinit cpu_enable(unsigned int cpu)
+{
+#ifdef CONFIG_SMP_ALTERNATIVES
+       if (num_online_cpus() == 1)
+               prepare_for_smp();
+#endif
+
+       /* get the target out of its holding state */
+       per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+       wmb();
+
+       /* wait for the processor to ack it. timeout? */
+       while (!cpu_online(cpu))
+               cpu_relax();
+
+       fixup_irqs(cpu_online_map);
+
+       /* counter the disable in fixup_irqs() */
+       local_irq_enable();
+       return 0;
+}
+
+int __cpu_disable(void)
 {
-       /* This only works at boot for x86.  See "rewrite" above. */
-       if (cpu_isset(cpu, smp_commenced_mask)) {
-               local_irq_enable();
-               return -ENOSYS;
+       cpumask_t map = cpu_online_map;
+       int cpu = smp_processor_id();
+
+       /*
+        * Perhaps use cpufreq to drop frequency, but that could go
+        * into generic code.
+        *
+        * We won't take down the boot processor on i386 due to some
+        * interrupts only being able to be serviced by the BSP.
+        * Especially so if we're not using an IOAPIC   -zwane
+        */
+       if (cpu == 0)
+               return -EBUSY;
+
+       cpu_clear(cpu, map);
+       fixup_irqs(map);
+
+       /* It's now safe to remove this processor from the online map */
+       cpu_clear(cpu, cpu_online_map);
+
+#ifdef CONFIG_SMP_ALTERNATIVES
+       if (num_online_cpus() == 1)
+               unprepare_for_smp();
+#endif
+
+       return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+       /* We don't do anything here: idle task is faking death itself. */
+       unsigned int i;
+
+       for (i = 0; i < 10; i++) {
+               /* They ack this in play_dead by setting CPU_DEAD */
+               if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+                       return;
+               current->state = TASK_UNINTERRUPTIBLE;
+               schedule_timeout(HZ/10);
+       }
+       printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+
+static int vcpu_hotplug_cpu_process(void *unused)
+{
+       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+
+       if (handler->fn) {
+               (*(handler->fn))(handler->vcpu);
+               handler->fn = NULL;
        }
+       return 0;
+}
+
+static void __vcpu_hotplug_handler(void *unused)
+{
+       int err;
+
+       err = kernel_thread(vcpu_hotplug_cpu_process, 
+                           NULL, CLONE_FS | CLONE_FILES);
+       if (err < 0)
+               printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
+
+}
+
+static void vcpu_hotplug_event_handler(ctrl_msg_t *msg, unsigned long id)
+{
+       static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
+       vcpu_hotplug_t *req = (vcpu_hotplug_t *)&msg->msg[0];
+       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+       ssize_t ret;
+
+       if (msg->length != sizeof(vcpu_hotplug_t))
+               goto parse_error;
+
+       /* grab target vcpu from msg */
+       handler->vcpu = req->vcpu;
+
+       /* determine which function to call based on msg subtype */
+       switch (msg->subtype) {
+        case CMSG_VCPU_HOTPLUG_OFF:
+               handler->fn = (void *)&cpu_down;
+               ret = schedule_work(&vcpu_hotplug_work);
+               req->status = (u32) ret;
+               break;
+        case CMSG_VCPU_HOTPLUG_ON:
+               handler->fn = (void *)&cpu_up;
+               ret = schedule_work(&vcpu_hotplug_work);
+               req->status = (u32) ret;
+               break;
+        default:
+               goto parse_error;
+       }
+
+       ctrl_if_send_response(msg);
+       return;
+ parse_error:
+       msg->length = 0;
+       ctrl_if_send_response(msg);
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+       struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
+
+       handler->fn = NULL;
+       ctrl_if_register_receiver(CMSG_VCPU_HOTPLUG,
+                                 vcpu_hotplug_event_handler, 0);
+
+       return 0;
+}
+
+__initcall(setup_vcpu_hotplug_event);
 
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+       return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+       /* We said "no" in __cpu_disable */
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+int __devinit __cpu_up(unsigned int cpu)
+{
        /* In case one didn't come up */
        if (!cpu_isset(cpu, cpu_callin_map)) {
+               printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
                local_irq_enable();
                return -EIO;
        }
 
+#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_XEN
+       /* Tell hypervisor to bring vcpu up. */
+       HYPERVISOR_vcpu_up(cpu);
+#endif
+       /* Already up, and in cpu_quiescent now? */
+       if (cpu_isset(cpu, smp_commenced_mask)) {
+               cpu_enable(cpu);
+               return 0;
+       }
+#endif
+
        local_irq_enable();
        /* Unleash the CPU! */
        cpu_set(cpu, smp_commenced_mask);
@@ -1140,6 +1513,8 @@ int __devinit __cpu_up(unsigned int cpu)
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
+#if 1
+#else
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
 #endif
@@ -1148,25 +1523,26 @@ void __init smp_cpus_done(unsigned int m
         * Disable executability of the SMP trampoline:
         */
        set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
 }
 
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
 void __init smp_intr_init(void)
 {
-       /*
-        * IRQ0 must be given a fixed assignment and initialized,
-        * because it's used before the IO-APIC is set up.
-        */
-       set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-
-       /*
-        * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-        * IPI, driven by wakeup.
-        */
-       set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-       /* IPI for invalidation */
-       set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+       int cpu = smp_processor_id();
 
-       /* IPI for generic function call */
-       set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+       per_cpu(resched_irq, cpu) =
+               bind_ipi_on_cpu_to_irq(RESCHEDULE_VECTOR);
+       sprintf(resched_name[cpu], "resched%d", cpu);
+       BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+                          SA_INTERRUPT, resched_name[cpu], NULL));
+
+       per_cpu(callfunc_irq, cpu) =
+               bind_ipi_on_cpu_to_irq(CALL_FUNCTION_VECTOR);
+       sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+       BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+                          smp_call_function_interrupt,
+                          SA_INTERRUPT, callfunc_name[cpu], NULL));
 }
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/smp.c 
linux-2.6-xen-sparse/arch/i386/kernel/smp.c
--- pristine-linux-2.6.12/arch/i386/kernel/smp.c        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/smp.c 2005-07-28 13:17:07.000000000 
-0700
@@ -19,10 +19,16 @@
 #include <linux/mc146818rtc.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
+#include <linux/cpu.h>
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
+#if 0
 #include <mach_apic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
 
 /*
  *     Some notes on x86 processor bugs affecting SMP operation:
@@ -121,31 +127,49 @@ static inline int __prepare_ICR2 (unsign
        return SET_APIC_DEST_FIELD(mask);
 }
 
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
 {
-       /*
-        * Subtle. In the case of the 'never do double writes' workaround
-        * we have to lock out interrupts to be safe.  As we don't care
-        * of the value read we use an atomic rmw access to avoid costly
-        * cli/sti.  Otherwise we use an even cheaper single atomic write
-        * to the APIC.
-        */
-       unsigned int cfg;
+       unsigned int evtchn;
 
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, 
vector, evtchn);
+       if (evtchn) {
+#if 0
+               shared_info_t *s = HYPERVISOR_shared_info;
+               while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
+                      synch_test_bit(evtchn, &s->evtchn_mask[0]))
+                       ;
+#endif
+               notify_via_evtchn(evtchn);
+       } else
+               printk("send_IPI to unbound port %d/%d",
+                      cpu, vector);
+}
 
-       /*
-        * No need to touch the target chip field
-        */
-       cfg = __prepare_ICR(shortcut, vector);
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+       int cpu;
 
-       /*
-        * Send the IPI. The write to APIC_ICR fires this off.
-        */
-       apic_write_around(APIC_ICR, cfg);
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
 }
 
 void fastcall send_IPI_self(int vector)
@@ -156,81 +180,32 @@ void fastcall send_IPI_self(int vector)
 /*
  * This is only used on smaller machines.
  */
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+void send_IPI_mask_bitmask(cpumask_t mask, int vector)
 {
-       unsigned long mask = cpus_addr(cpumask)[0];
-       unsigned long cfg;
        unsigned long flags;
+       unsigned int cpu;
 
        local_irq_save(flags);
-               
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-               
-       /*
-        * prepare target chip field
-        */
-       cfg = __prepare_ICR2(mask);
-       apic_write_around(APIC_ICR2, cfg);
-               
-       /*
-        * program the ICR 
-        */
-       cfg = __prepare_ICR(0, vector);
-                       
-       /*
-        * Send the IPI. The write to APIC_ICR fires this off.
-        */
-       apic_write_around(APIC_ICR, cfg);
+       WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, mask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
 
        local_irq_restore(flags);
 }
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector)
 {
-       unsigned long cfg, flags;
-       unsigned int query_cpu;
-
-       /*
-        * Hack. The clustered APIC addressing mode doesn't allow us to send 
-        * to an arbitrary mask, so I do a unicasts to each CPU instead. This 
-        * should be modified to do 1 message per cluster ID - mbligh
-        */ 
 
-       local_irq_save(flags);
-
-       for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
-               if (cpu_isset(query_cpu, mask)) {
-               
-                       /*
-                        * Wait for idle.
-                        */
-                       apic_wait_icr_idle();
-               
-                       /*
-                        * prepare target chip field
-                        */
-                       cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
-                       apic_write_around(APIC_ICR2, cfg);
-               
-                       /*
-                        * program the ICR 
-                        */
-                       cfg = __prepare_ICR(0, vector);
-                       
-                       /*
-                        * Send the IPI. The write to APIC_ICR fires this off.
-                        */
-                       apic_write_around(APIC_ICR, cfg);
-               }
-       }
-       local_irq_restore(flags);
+       send_IPI_mask_bitmask(mask, vector);
 }
 
 #include <mach_ipi.h> /* must come after the send_IPI functions above for 
inlining */
 
+#if 0 /* XEN */
 /*
  *     Smarter SMP flushing macros. 
  *             c/o Linus Torvalds.
@@ -308,7 +283,8 @@ static inline void leave_mm (unsigned lo
  * 2) Leave the mm if we are in the lazy tlb mode.
  */
 
-fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
 {
        unsigned long cpu;
 
@@ -334,32 +310,33 @@ fastcall void smp_invalidate_interrupt(s
                } else
                        leave_mm(cpu);
        }
-       ack_APIC_irq();
        smp_mb__before_clear_bit();
        cpu_clear(cpu, flush_cpumask);
        smp_mb__after_clear_bit();
 out:
        put_cpu_no_resched();
+
+       return IRQ_HANDLED;
 }
 
 static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
                                                unsigned long va)
 {
-       cpumask_t tmp;
        /*
         * A couple of (to be removed) sanity checks:
         *
-        * - we do not send IPIs to not-yet booted CPUs.
         * - current CPU must not be in mask
         * - mask must exist :)
         */
        BUG_ON(cpus_empty(cpumask));
-
-       cpus_and(tmp, cpumask, cpu_online_map);
-       BUG_ON(!cpus_equal(cpumask, tmp));
        BUG_ON(cpu_isset(smp_processor_id(), cpumask));
        BUG_ON(!mm);
 
+       /* If a CPU which we ran on has gone down, OK. */
+       cpus_and(cpumask, cpumask, cpu_online_map);
+       if (cpus_empty(cpumask))
+               return;
+
        /*
         * i'm not happy about this global shared spinlock in the
         * MM hot path, but we'll see how contended it is.
@@ -443,7 +420,7 @@ void flush_tlb_page(struct vm_area_struc
        if (current->active_mm == mm) {
                if(current->mm)
                        __flush_tlb_one(va);
-                else
+               else
                        leave_mm(smp_processor_id());
        }
 
@@ -467,6 +444,22 @@ void flush_tlb_all(void)
        on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 }
 
+#else
+
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
+{ return 0; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
+void flush_tlb_mm(struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+
+#endif /* XEN */
+
 /*
  * this function sends a 'reschedule' IPI to another CPU.
  * it goes straight through and wastes no time serializing
@@ -474,6 +467,7 @@ void flush_tlb_all(void)
  */
 void smp_send_reschedule(int cpu)
 {
+       WARN_ON(cpu_is_offline(cpu));
        send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
@@ -514,10 +508,16 @@ int smp_call_function (void (*func) (voi
  */
 {
        struct call_data_struct data;
-       int cpus = num_online_cpus()-1;
+       int cpus;
 
-       if (!cpus)
+       /* Holding any lock stops cpus from going down. */
+       spin_lock(&call_lock);
+       cpus = num_online_cpus()-1;
+
+       if (!cpus) {
+               spin_unlock(&call_lock);
                return 0;
+       }
 
        /* Can deadlock when called with interrupts disabled */
        WARN_ON(irqs_disabled());
@@ -529,7 +529,6 @@ int smp_call_function (void (*func) (voi
        if (wait)
                atomic_set(&data.finished, 0);
 
-       spin_lock(&call_lock);
        call_data = &data;
        mb();
        
@@ -538,11 +537,11 @@ int smp_call_function (void (*func) (voi
 
        /* Wait for response */
        while (atomic_read(&data.started) != cpus)
-               cpu_relax();
+               barrier();
 
        if (wait)
                while (atomic_read(&data.finished) != cpus)
-                       cpu_relax();
+                       barrier();
        spin_unlock(&call_lock);
 
        return 0;
@@ -555,7 +554,11 @@ static void stop_this_cpu (void * dummy)
         */
        cpu_clear(smp_processor_id(), cpu_online_map);
        local_irq_disable();
+#if 1
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        if (cpu_data[smp_processor_id()].hlt_works_ok)
                for(;;) __asm__("hlt");
        for (;;);
@@ -570,7 +573,11 @@ void smp_send_stop(void)
        smp_call_function(stop_this_cpu, NULL, 1, 0);
 
        local_irq_disable();
+#if 1
+       xxprint("smp_send_stop disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable();
 }
 
@@ -579,18 +586,21 @@ void smp_send_stop(void)
  * all the work is done automatically when
  * we return from the interrupt.
  */
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
 {
-       ack_APIC_irq();
+
+       return IRQ_HANDLED;
 }
 
-fastcall void smp_call_function_interrupt(struct pt_regs *regs)
+#include <linux/kallsyms.h>
+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+                                       struct pt_regs *regs)
 {
        void (*func) (void *info) = call_data->func;
        void *info = call_data->info;
        int wait = call_data->wait;
 
-       ack_APIC_irq();
        /*
         * Notify initiating CPU that I've grabbed the data and am
         * about to execute the function
@@ -608,5 +618,7 @@ fastcall void smp_call_function_interrup
                mb();
                atomic_inc(&call_data->finished);
        }
+
+       return IRQ_HANDLED;
 }
 
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/time.c 
linux-2.6-xen-sparse/arch/i386/kernel/time.c
--- pristine-linux-2.6.12/arch/i386/kernel/time.c       2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/time.c        2005-07-28 
13:17:07.000000000 -0700
@@ -46,6 +46,8 @@
 #include <linux/bcd.h>
 #include <linux/efi.h>
 #include <linux/mca.h>
+#include <linux/sysctl.h>
+#include <linux/percpu.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -71,13 +73,24 @@
 extern spinlock_t i8259A_lock;
 int pit_latch_buggy;              /* extern */
 
-#include "do_timer.h"
-
 u64 jiffies_64 = INITIAL_JIFFIES;
 
 EXPORT_SYMBOL(jiffies_64);
 
+#if defined(__x86_64__)
+unsigned long vxtime_hz = PIT_TICK_RATE;
+struct vxtime_data __vxtime __section_vxtime;   /* for vsyscalls */
+volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
+struct timespec __xtime __section_xtime;
+struct timezone __sys_tz __section_sys_tz;
+#endif
+
+#if defined(__x86_64__)
+unsigned int cpu_khz;  /* Detected as we calibrate the TSC */
+#else
 unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+#endif
 
 extern unsigned long wall_jiffies;
 
@@ -86,7 +99,210 @@ DEFINE_SPINLOCK(rtc_lock);
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
-struct timer_opts *cur_timer = &timer_none;
+extern struct init_timer_opts timer_tsc_init;
+extern struct timer_opts timer_tsc;
+struct timer_opts *cur_timer = &timer_tsc;
+
+/* These are peridically updated in shared_info, and then copied here. */
+struct shadow_time_info {
+       u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+       u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+       u32 tsc_to_nsec_mul;
+       u32 tsc_to_usec_mul;
+       int tsc_shift;
+       u32 version;
+};
+static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
+static struct timeval shadow_tv;
+
+/* Keep track of last time we did processing/updating of jiffies and xtime. */
+static u64 processed_system_time;   /* System time (ns) at last processing. */
+static DEFINE_PER_CPU(u64, processed_system_time);
+
+#define NS_PER_TICK (1000000000ULL/HZ)
+
+#define HANDLE_USEC_UNDERFLOW(_tv) do {                \
+       while ((_tv).tv_usec < 0) {             \
+               (_tv).tv_usec += USEC_PER_SEC;  \
+               (_tv).tv_sec--;                 \
+       }                                       \
+} while (0)
+#define HANDLE_USEC_OVERFLOW(_tv) do {         \
+       while ((_tv).tv_usec >= USEC_PER_SEC) { \
+               (_tv).tv_usec -= USEC_PER_SEC;  \
+               (_tv).tv_sec++;                 \
+       }                                       \
+} while (0)
+static inline void __normalize_time(time_t *sec, s64 *nsec)
+{
+       while (*nsec >= NSEC_PER_SEC) {
+               (*nsec) -= NSEC_PER_SEC;
+               (*sec)++;
+       }
+       while (*nsec < 0) {
+               (*nsec) += NSEC_PER_SEC;
+               (*sec)--;
+       }
+}
+
+/* Does this guest OS track Xen time, or set its wall clock independently? */
+static int independent_wallclock = 0;
+static int __init __independent_wallclock(char *str)
+{
+       independent_wallclock = 1;
+       return 1;
+}
+__setup("independent_wallclock", __independent_wallclock);
+#define INDEPENDENT_WALLCLOCK() \
+    (independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
+
+int tsc_disable __initdata = 0;
+
+static void delay_tsc(unsigned long loops)
+{
+       unsigned long bclock, now;
+       
+       rdtscl(bclock);
+       do
+       {
+               rep_nop();
+               rdtscl(now);
+       } while ((now-bclock) < loops);
+}
+
+struct timer_opts timer_tsc = {
+       .name = "tsc",
+       .delay = delay_tsc,
+};
+
+static inline u32 down_shift(u64 time, int shift)
+{
+       if ( shift < 0 )
+               return (u32)(time >> -shift);
+       return (u32)((u32)time << shift);
+}
+
+/*
+ * 32-bit multiplication of integer multiplicand and fractional multiplier
+ * yielding 32-bit integer product.
+ */
+static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
+{
+       u32 product_int, product_frac;
+       __asm__ (
+               "mul %3"
+               : "=a" (product_frac), "=d" (product_int)
+               : "0" (multiplicand), "r" (multiplier) );
+       return product_int;
+}
+
+void init_cpu_khz(void)
+{
+       u64 __cpu_khz = 1000000ULL << 32;
+       struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
+       do_div(__cpu_khz, info->tsc_to_system_mul);
+       cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
+       printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
+              cpu_khz / 1000, cpu_khz % 1000);
+}
+
+static u64 get_nsec_offset(struct shadow_time_info *shadow)
+{
+       u64 now;
+       u32 delta;
+       rdtscll(now);
+       delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
+       return mul_frac(delta, shadow->tsc_to_nsec_mul);
+}
+
+static unsigned long get_usec_offset(struct shadow_time_info *shadow)
+{
+       u64 now;
+       u32 delta;
+       rdtscll(now);
+       delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
+       return mul_frac(delta, shadow->tsc_to_usec_mul);
+}
+
+static void update_wallclock(void)
+{
+       shared_info_t *s = HYPERVISOR_shared_info;
+       long wtm_nsec, xtime_nsec;
+       time_t wtm_sec, xtime_sec;
+       u64 tmp, usec;
+
+       shadow_tv.tv_sec  = s->wc_sec;
+       shadow_tv.tv_usec = s->wc_usec;
+
+       if (INDEPENDENT_WALLCLOCK())
+               return;
+
+       if ((time_status & STA_UNSYNC) != 0)
+               return;
+
+       /* Adjust wall-clock time base based on wall_jiffies ticks. */
+       usec = processed_system_time;
+       do_div(usec, 1000);
+       usec += (u64)shadow_tv.tv_sec * 1000000ULL;
+       usec += (u64)shadow_tv.tv_usec;
+       usec -= (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
+
+       /* Split wallclock base into seconds and nanoseconds. */
+       tmp = usec;
+       xtime_nsec = do_div(tmp, 1000000) * 1000ULL;
+       xtime_sec  = (time_t)tmp;
+
+       wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec);
+       wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec);
+
+       set_normalized_timespec(&xtime, xtime_sec, xtime_nsec);
+       set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+}
+
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
+{
+       shared_info_t           *s = HYPERVISOR_shared_info;
+       struct vcpu_time_info   *src;
+       struct shadow_time_info *dst;
+
+       src = &s->vcpu_time[smp_processor_id()];
+       dst = &per_cpu(shadow_time, smp_processor_id());
+
+       do {
+               dst->version = src->time_version2;
+               rmb();
+               dst->tsc_timestamp     = src->tsc_timestamp;
+               dst->system_timestamp  = src->system_time;
+               dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+               dst->tsc_shift         = src->tsc_shift;
+               rmb();
+       }
+       while (dst->version != src->time_version1);
+
+       dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
+
+       if ((shadow_tv.tv_sec != s->wc_sec) ||
+           (shadow_tv.tv_usec != s->wc_usec))
+               update_wallclock();
+}
+
+static inline int time_values_up_to_date(int cpu)
+{
+       struct vcpu_time_info   *src;
+       struct shadow_time_info *dst;
+
+       src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()];
+       dst = &per_cpu(shadow_time, smp_processor_id());
+
+       return (dst->version == src->time_version2);
+}
+
+#define TIME_VALUES_UP_TO_DATE \
+ ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
 
 /*
  * This is a special lock that is owned by the CPU and holds the index
@@ -126,13 +342,20 @@ void do_gettimeofday(struct timeval *tv)
        unsigned long seq;
        unsigned long usec, sec;
        unsigned long max_ntp_tick;
+       unsigned long flags;
+       s64 nsec;
+       unsigned int cpu;
+       struct shadow_time_info *shadow;
+
+       cpu = get_cpu();
+       shadow = &per_cpu(shadow_time, cpu);
 
        do {
                unsigned long lost;
 
                seq = read_seqbegin(&xtime_lock);
 
-               usec = cur_timer->get_offset();
+               usec = get_usec_offset(shadow);
                lost = jiffies - wall_jiffies;
 
                /*
@@ -151,11 +374,31 @@ void do_gettimeofday(struct timeval *tv)
                        usec += lost * (USEC_PER_SEC / HZ);
 
                sec = xtime.tv_sec;
-               usec += (xtime.tv_nsec / 1000);
+               usec += (xtime.tv_nsec / NSEC_PER_USEC);
+
+               nsec = shadow->system_timestamp - processed_system_time;
+               __normalize_time(&sec, &nsec);
+               usec += (long)nsec / NSEC_PER_USEC;
+
+               if (unlikely(!time_values_up_to_date(cpu))) {
+                       /*
+                        * We may have blocked for a long time,
+                        * rendering our calculations invalid
+                        * (e.g. the time delta may have
+                        * overflowed). Detect that and recalculate
+                        * with fresh values.
+                        */
+                       write_seqlock_irqsave(&xtime_lock, flags);
+                       __get_time_values_from_xen();
+                       write_sequnlock_irqrestore(&xtime_lock, flags);
+                       continue;
+               }
        } while (read_seqretry(&xtime_lock, seq));
 
-       while (usec >= 1000000) {
-               usec -= 1000000;
+       put_cpu();
+
+       while (usec >= USEC_PER_SEC) {
+               usec -= USEC_PER_SEC;
                sec++;
        }
 
@@ -168,21 +411,49 @@ EXPORT_SYMBOL(do_gettimeofday);
 int do_settimeofday(struct timespec *tv)
 {
        time_t wtm_sec, sec = tv->tv_sec;
-       long wtm_nsec, nsec = tv->tv_nsec;
+       long wtm_nsec;
+       s64 nsec;
+       struct timespec xentime;
+       unsigned int cpu;
+       struct shadow_time_info *shadow;
 
        if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
 
+       if (!INDEPENDENT_WALLCLOCK())
+               return 0; /* Silent failure? */
+
+       cpu = get_cpu();
+       shadow = &per_cpu(shadow_time, cpu);
+
        write_seqlock_irq(&xtime_lock);
+
+       /*
+        * Ensure we don't get blocked for a long time so that our time delta
+        * overflows. If that were to happen then our shadow time values would
+        * be stale, so we can retry with fresh ones.
+        */
+ again:
+       nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
+       if (unlikely(!time_values_up_to_date(cpu))) {
+               __get_time_values_from_xen();
+               goto again;
+       }
+
+       __normalize_time(&sec, &nsec);
+       set_normalized_timespec(&xentime, sec, nsec);
+
        /*
         * This is revolting. We need to set "xtime" correctly. However, the
         * value in this location is the value at the most recent update of
         * wall time.  Discover what correction gettimeofday() would have
         * made, and then undo it!
         */
-       nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
        nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
 
+       nsec -= (shadow->system_timestamp - processed_system_time);
+
+       __normalize_time(&sec, &nsec);
        wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
        wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
 
@@ -193,13 +464,29 @@ int do_settimeofday(struct timespec *tv)
        time_status |= STA_UNSYNC;
        time_maxerror = NTP_PHASE_LIMIT;
        time_esterror = NTP_PHASE_LIMIT;
-       write_sequnlock_irq(&xtime_lock);
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       if (xen_start_info.flags & SIF_INITDOMAIN) {
+               dom0_op_t op;
+               op.cmd = DOM0_SETTIME;
+               op.u.settime.secs        = xentime.tv_sec;
+               op.u.settime.usecs       = xentime.tv_nsec / NSEC_PER_USEC;
+               op.u.settime.system_time = shadow->system_timestamp;
+               write_sequnlock_irq(&xtime_lock);
+               HYPERVISOR_dom0_op(&op);
+       } else
+#endif
+               write_sequnlock_irq(&xtime_lock);
+
+       put_cpu();
+
        clock_was_set();
        return 0;
 }
 
 EXPORT_SYMBOL(do_settimeofday);
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static int set_rtc_mmss(unsigned long nowtime)
 {
        int retval;
@@ -216,9 +503,12 @@ static int set_rtc_mmss(unsigned long no
 
        return retval;
 }
-
-
-int timer_ack;
+#else
+static int set_rtc_mmss(unsigned long nowtime)
+{
+       return 0;
+}
+#endif
 
 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
  *             Note: This function is required to return accurate
@@ -226,10 +516,31 @@ int timer_ack;
  */
 unsigned long long monotonic_clock(void)
 {
-       return cur_timer->monotonic_clock();
+       int cpu = get_cpu();
+       struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+       s64 off;
+       unsigned long flags;
+       
+       for ( ; ; ) {
+               off = get_nsec_offset(shadow);
+               if (time_values_up_to_date(cpu))
+                       break;
+               write_seqlock_irqsave(&xtime_lock, flags);
+               __get_time_values_from_xen();
+               write_sequnlock_irqrestore(&xtime_lock, flags);
+       }
+
+       put_cpu();
+
+       return shadow->system_timestamp + off;
 }
 EXPORT_SYMBOL(monotonic_clock);
 
+unsigned long long sched_clock(void)
+{
+       return monotonic_clock();
+}
+
 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
 unsigned long profile_pc(struct pt_regs *regs)
 {
@@ -250,37 +561,47 @@ EXPORT_SYMBOL(profile_pc);
 static inline void do_timer_interrupt(int irq, void *dev_id,
                                        struct pt_regs *regs)
 {
-#ifdef CONFIG_X86_IO_APIC
-       if (timer_ack) {
-               /*
-                * Subtle, when I/O APICs are used we have to ack timer IRQ
-                * manually to reset the IRR bit for do_slow_gettimeoffset().
-                * This will also deassert NMI lines for the watchdog if run
-                * on an 82489DX-based system.
-                */
-               spin_lock(&i8259A_lock);
-               outb(0x0c, PIC_MASTER_OCW3);
-               /* Ack the IRQ; AEOI will end it automatically. */
-               inb(PIC_MASTER_POLL);
-               spin_unlock(&i8259A_lock);
-       }
-#endif
+       s64 delta, delta_cpu;
+       int cpu = smp_processor_id();
+       struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+
+       do {
+               __get_time_values_from_xen();
 
-       do_timer_interrupt_hook(regs);
+               delta = delta_cpu = 
+                       shadow->system_timestamp + get_nsec_offset(shadow);
+               delta     -= processed_system_time;
+               delta_cpu -= per_cpu(processed_system_time, cpu);
+       }
+       while (!time_values_up_to_date(cpu));
 
+       if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
+               printk("Timer ISR/%d: Time went backwards: "
+                      "delta=%lld cpu_delta=%lld shadow=%lld "
+                      "off=%lld processed=%lld cpu_processed=%lld\n",
+                      cpu, delta, delta_cpu, shadow->system_timestamp,
+                      (s64)get_nsec_offset(shadow),
+                      processed_system_time,
+                      per_cpu(processed_system_time, cpu));
+               for (cpu = 0; cpu < num_online_cpus(); cpu++)
+                       printk(" %d: %lld\n", cpu,
+                              per_cpu(processed_system_time, cpu));
+               return;
+       }
 
-       if (MCA_bus) {
-               /* The PS/2 uses level-triggered interrupts.  You can't
-               turn them off, nor would you want to (any attempt to
-               enable edge-triggered interrupts usually gets intercepted by a
-               special hardware circuit).  Hence we have to acknowledge
-               the timer interrupt.  Through some incredibly stupid
-               design idea, the reset for IRQ 0 is done by setting the
-               high bit of the PPI port B (0x61).  Note that some PS/2s,
-               notably the 55SX, work fine if this is removed.  */
+       /* System-wide jiffy work. */
+       while (delta >= NS_PER_TICK) {
+               delta -= NS_PER_TICK;
+               processed_system_time += NS_PER_TICK;
+               do_timer(regs);
+       }
 
-               irq = inb_p( 0x61 );    /* read the current state */
-               outb_p( irq|0x80, 0x61 );       /* reset the IRQ */
+       /* Local CPU jiffy work. */
+       while (delta_cpu >= NS_PER_TICK) {
+               delta_cpu -= NS_PER_TICK;
+               per_cpu(processed_system_time, cpu) += NS_PER_TICK;
+               update_process_times(user_mode(regs));
+               profile_tick(CPU_PROFILING, regs);
        }
 }
 
@@ -299,11 +620,7 @@ irqreturn_t timer_interrupt(int irq, voi
         * locally disabled. -arca
         */
        write_seqlock(&xtime_lock);
-
-       cur_timer->mark_offset();
- 
        do_timer_interrupt(irq, NULL, regs);
-
        write_sequnlock(&xtime_lock);
        return IRQ_HANDLED;
 }
@@ -452,6 +769,14 @@ static void __init hpet_time_init(void)
 }
 #endif
 
+/* Dynamically-mapped IRQ. */
+static DEFINE_PER_CPU(int, timer_irq);
+
+static struct irqaction irq_timer = {
+       timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
+       NULL, NULL
+};
+
 void __init time_init(void)
 {
 #ifdef CONFIG_HPET_TIMER
@@ -464,13 +789,141 @@ void __init time_init(void)
                return;
        }
 #endif
-       xtime.tv_sec = get_cmos_time();
-       xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+       __get_time_values_from_xen();
+       xtime.tv_sec = shadow_tv.tv_sec;
+       xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
        set_normalized_timespec(&wall_to_monotonic,
                -xtime.tv_sec, -xtime.tv_nsec);
+       processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
+       per_cpu(processed_system_time, 0) = processed_system_time;
 
-       cur_timer = select_timer();
-       printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+       init_cpu_khz();
 
-       time_init_hook();
+#if defined(__x86_64__)
+       vxtime.mode = VXTIME_TSC;
+       vxtime.quot = (1000000L << 32) / vxtime_hz;
+       vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+       vxtime.hz = vxtime_hz;
+       sync_core();
+       rdtscll(vxtime.last_tsc);
+#endif
+
+       per_cpu(timer_irq, 0) = bind_virq_to_irq(VIRQ_TIMER);
+       (void)setup_irq(per_cpu(timer_irq, 0), &irq_timer);
+}
+
+/* Convert jiffies to system time. */
+static inline u64 jiffies_to_st(unsigned long j) 
+{
+       unsigned long seq;
+       long delta;
+       u64 st;
+
+       do {
+               seq = read_seqbegin(&xtime_lock);
+               delta = j - jiffies;
+               /* NB. The next check can trigger in some wrap-around cases,
+                * but that's ok: we'll just end up with a shorter timeout. */
+               if (delta < 1)
+                       delta = 1;
+               st = processed_system_time + (delta * NS_PER_TICK);
+       } while (read_seqretry(&xtime_lock, seq));
+
+       return st;
 }
+
+/*
+ * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
+ * These functions are based on implementations from arch/s390/kernel/time.c
+ */
+void stop_hz_timer(void)
+{
+       unsigned int cpu = smp_processor_id();
+       unsigned long j;
+
+       /* s390 does this /before/ checking rcu_pending(). We copy them. */
+       cpu_set(cpu, nohz_cpu_mask);
+
+       /* Leave ourselves in 'tick mode' if rcu or softirq pending. */
+       if (rcu_pending(cpu) || local_softirq_pending()) {
+               cpu_clear(cpu, nohz_cpu_mask);
+               j = jiffies + 1;
+       } else {
+               j = next_timer_interrupt();
+       }
+
+       BUG_ON(HYPERVISOR_set_timer_op(jiffies_to_st(j)) != 0);
+}
+
+void start_hz_timer(void)
+{
+       cpu_clear(smp_processor_id(), nohz_cpu_mask);
+}
+
+void time_suspend(void)
+{
+       /* nothing */
+}
+
+/* No locking required. We are only CPU running, and interrupts are off. */
+void time_resume(void)
+{
+       init_cpu_khz();
+
+       /* Get timebases for new environment. */ 
+       __get_time_values_from_xen();
+
+       /* Reset our own concept of passage of system time. */
+       processed_system_time =
+               per_cpu(shadow_time, smp_processor_id()).system_timestamp;
+       per_cpu(processed_system_time, 0) = processed_system_time;
+}
+
+#ifdef CONFIG_SMP
+static char timer_name[NR_CPUS][15];
+void local_setup_timer(void)
+{
+       int seq, cpu = smp_processor_id();
+
+       do {
+               seq = read_seqbegin(&xtime_lock);
+               per_cpu(processed_system_time, cpu) = 
+                       per_cpu(shadow_time, cpu).system_timestamp;
+       } while (read_seqretry(&xtime_lock, seq));
+
+       per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
+       sprintf(timer_name[cpu], "timer%d", cpu);
+       BUG_ON(request_irq(per_cpu(timer_irq, cpu), timer_interrupt,
+                          SA_INTERRUPT, timer_name[cpu], NULL));
+}
+#endif
+
+/*
+ * /proc/sys/xen: This really belongs in another file. It can stay here for
+ * now however.
+ */
+static ctl_table xen_subtable[] = {
+       {1, "independent_wallclock", &independent_wallclock,
+        sizeof(independent_wallclock), 0644, NULL, proc_dointvec},
+       {0}
+};
+static ctl_table xen_table[] = {
+       {123, "xen", NULL, 0, 0555, xen_subtable},
+       {0}
+};
+static int __init xen_sysctl_init(void)
+{
+       (void)register_sysctl_table(xen_table, 0);
+       return 0;
+}
+__initcall(xen_sysctl_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/timers/Makefile 
linux-2.6-xen-sparse/arch/i386/kernel/timers/Makefile
--- pristine-linux-2.6.12/arch/i386/kernel/timers/Makefile      2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/timers/Makefile       2005-07-28 
13:17:07.000000000 -0700
@@ -2,8 +2,16 @@
 # Makefile for x86 timers
 #
 
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
-obj-$(CONFIG_X86_CYCLONE_TIMER)        += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER)       += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER)     += timer_pm.o
+obj-y :=       timer_tsc.o
+c-obj-y :=
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/kernel/timers/$(notdir $@) $@
+
+obj-y  += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/timers/timer_tsc.c 
linux-2.6-xen-sparse/arch/i386/kernel/timers/timer_tsc.c
--- pristine-linux-2.6.12/arch/i386/kernel/timers/timer_tsc.c   2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/timers/timer_tsc.c    2005-07-28 
13:17:07.000000000 -0700
@@ -1,10 +1,6 @@
 /*
  * This code largely moved from arch/i386/kernel/time.c.
  * See comments there for proper credits.
- *
- * 2004-06-25    Jesper Juhl
- *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- *      failing to inline.
  */
 
 #include <linux/spinlock.h>
@@ -38,12 +34,9 @@ int tsc_disable __initdata = 0;
 extern spinlock_t i8253_lock;
 
 static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
 
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
 static unsigned long long monotonic_base;
+static u32 monotonic_offset;
 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
 
 /* convert from cycles(64bits) => nanoseconds (64bits)
@@ -74,8 +67,6 @@ static inline unsigned long long cycles_
        return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
 }
 
-static int count2; /* counter for mark_offset_tsc() */
-
 /* Cached *multiplier* to convert TSC counts to microseconds.
  * (see the equation below).
  * Equal to 2^32 * (1 / (clocks per usec) ).
@@ -83,6 +74,9 @@ static int count2; /* counter for mark_o
  */
 static unsigned long fast_gettimeoffset_quotient;
 
+extern u32 shadow_tsc_stamp;
+extern u64 shadow_system_time;
+
 static unsigned long get_offset_tsc(void)
 {
        register unsigned long eax, edx;
@@ -92,7 +86,7 @@ static unsigned long get_offset_tsc(void
        rdtsc(eax,edx);
 
        /* .. relative to previous jiffy (32 bits is enough) */
-       eax -= last_tsc_low;    /* tsc_low delta */
+       eax -= shadow_tsc_stamp;
 
        /*
          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
@@ -109,7 +103,7 @@ static unsigned long get_offset_tsc(void
                 "0" (eax));
 
        /* our adjusted time offset in microseconds */
-       return delay_at_last_interrupt + edx;
+       return edx;
 }
 
 static unsigned long long monotonic_clock_tsc(void)
@@ -120,7 +114,7 @@ static unsigned long long monotonic_cloc
        /* atomically read monotonic base & last_offset */
        do {
                seq = read_seqbegin(&monotonic_lock);
-               last_offset = ((unsigned long 
long)last_tsc_high<<32)|last_tsc_low;
+               last_offset = monotonic_offset;
                base = monotonic_base;
        } while (read_seqretry(&monotonic_lock, seq));
 
@@ -155,6 +149,17 @@ unsigned long long sched_clock(void)
        return cycles_2_ns(this_offset);
 }
 
+
+static void mark_offset_tsc(void)
+{
+
+       /* update the monotonic base value */
+       write_seqlock(&monotonic_lock);
+       monotonic_base = shadow_system_time;
+       monotonic_offset = shadow_tsc_stamp;
+       write_sequnlock(&monotonic_lock);
+}
+
 static void delay_tsc(unsigned long loops)
 {
        unsigned long bclock, now;
@@ -320,245 +325,39 @@ core_initcall(cpufreq_tsc);
 static inline void cpufreq_delayed_get(void) { return; }
 #endif 
 
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
-       unsigned long cpu_khz_old = cpu_khz;
-
-       if (cpu_has_tsc) {
-               init_cpu_khz();
-               cpu_data[0].loops_per_jiffy =
-                   cpufreq_scale(cpu_data[0].loops_per_jiffy,
-                                 cpu_khz_old,
-                                 cpu_khz);
-               return 0;
-       } else
-               return -ENODEV;
-#else
-       return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);
 
-static void mark_offset_tsc(void)
+static int init_tsc(char* override)
 {
-       unsigned long lost,delay;
-       unsigned long delta = last_tsc_low;
-       int count;
-       int countmp;
-       static int count1 = 0;
-       unsigned long long this_offset, last_offset;
-       static int lost_count = 0;
-
-       write_seqlock(&monotonic_lock);
-       last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-       /*
-        * It is important that these two operations happen almost at
-        * the same time. We do the RDTSC stuff first, since it's
-        * faster. To avoid any inconsistencies, we need interrupts
-        * disabled locally.
-        */
-
-       /*
-        * Interrupts are just disabled locally since the timer irq
-        * has the SA_INTERRUPT flag set. -arca
-        */
-
-       /* read Pentium cycle counter */
-
-       rdtsc(last_tsc_low, last_tsc_high);
-
-       spin_lock(&i8253_lock);
-       outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
-
-       count = inb_p(PIT_CH0);    /* read the latched count */
-       count |= inb(PIT_CH0) << 8;
-
-       /*
-        * VIA686a test code... reset the latch if count > max + 1
-        * from timer_pit.c - cjb
-        */
-       if (count > LATCH) {
-               outb_p(0x34, PIT_MODE);
-               outb_p(LATCH & 0xff, PIT_CH0);
-               outb(LATCH >> 8, PIT_CH0);
-               count = LATCH - 1;
-       }
-
-       spin_unlock(&i8253_lock);
+       u64 __cpu_khz;
 
-       if (pit_latch_buggy) {
-               /* get center value of last 3 time lutch */
-               if ((count2 >= count && count >= count1)
-                   || (count1 >= count && count >= count2)) {
-                       count2 = count1; count1 = count;
-               } else if ((count1 >= count2 && count2 >= count)
-                          || (count >= count2 && count2 >= count1)) {
-                       countmp = count;count = count2;
-                       count2 = count1;count1 = countmp;
-               } else {
-                       count2 = count1; count1 = count; count = count1;
-               }
-       }
+       __cpu_khz = HYPERVISOR_shared_info->cpu_freq;
+       do_div(__cpu_khz, 1000);
+       cpu_khz = (u32)__cpu_khz;
+       printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n", 
+              cpu_khz / 1000, cpu_khz % 1000);
 
-       /* lost tick compensation */
-       delta = last_tsc_low - delta;
+       /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
+          (2^32 * 1 / (clocks/us)) */
        {
-               register unsigned long eax, edx;
-               eax = delta;
-               __asm__("mull %2"
-               :"=a" (eax), "=d" (edx)
-               :"rm" (fast_gettimeoffset_quotient),
-                "0" (eax));
-               delta = edx;
-       }
-       delta += delay_at_last_interrupt;
-       lost = delta/(1000000/HZ);
-       delay = delta%(1000000/HZ);
-       if (lost >= 2) {
-               jiffies_64 += lost-1;
-
-               /* sanity check to ensure we're not always losing ticks */
-               if (lost_count++ > 100) {
-                       printk(KERN_WARNING "Losing too many ticks!\n");
-                       printk(KERN_WARNING "TSC cannot be used as a 
timesource.  \n");
-                       printk(KERN_WARNING "Possible reasons for this are:\n");
-                       printk(KERN_WARNING "  You're running with 
Speedstep,\n");
-                       printk(KERN_WARNING "  You don't have DMA enabled for 
your hard disk (see hdparm),\n");
-                       printk(KERN_WARNING "  Incorrect TSC synchronization on 
an SMP system (see dmesg).\n");
-                       printk(KERN_WARNING "Falling back to a sane timesource 
now.\n");
-
-                       clock_fallback();
-               }
-               /* ... but give the TSC a fair chance */
-               if (lost_count > 25)
-                       cpufreq_delayed_get();
-       } else
-               lost_count = 0;
-       /* update the monotonic base value */
-       this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-       monotonic_base += cycles_2_ns(this_offset - last_offset);
-       write_sequnlock(&monotonic_lock);
-
-       /* calculate delay_at_last_interrupt */
-       count = ((LATCH-1) - count) * TICK_SIZE;
-       delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-       /* catch corner case where tick rollover occured
-        * between tsc and pit reads (as noted when
-        * usec delta is > 90% # of usecs/tick)
-        */
-       if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
-               jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
-       /* check clock override */
-       if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
-               if (is_hpet_enabled()) {
-                       printk(KERN_ERR "Warning: clock= override failed. 
Defaulting to tsc\n");
-               } else
-#endif
-               {
-                       return -ENODEV;
-               }
+               unsigned long eax=0, edx=1000;
+               __asm__("divl %2"
+                   :"=a" (fast_gettimeoffset_quotient), "=d" (edx)
+                   :"r" (cpu_khz),
+                   "0" (eax), "1" (edx));
        }
 
-       /*
-        * If we have APM enabled or the CPU clock speed is variable
-        * (CPU stops clock on HLT or slows clock to save power)
-        * then the TSC timestamps may diverge by up to 1 jiffy from
-        * 'real time' but nothing will break.
-        * The most frequent case is that the CPU is "woken" from a halt
-        * state by the timer interrupt itself, so we get 0 error. In the
-        * rare cases where a driver would "wake" the CPU and request a
-        * timestamp, the maximum error is < 1 jiffy. But timestamps are
-        * still perfectly ordered.
-        * Note that the TSC counter will be reset if APM suspends
-        * to disk; this won't break the kernel, though, 'cuz we're
-        * smart.  See arch/i386/kernel/apm.c.
-        */
-       /*
-        *      Firstly we have to do a CPU check for chips with
-        *      a potentially buggy TSC. At this point we haven't run
-        *      the ident/bugs checks so we must run this hook as it
-        *      may turn off the TSC flag.
-        *
-        *      NOTE: this doesn't yet handle SMP 486 machines where only
-        *      some CPU's have a TSC. Thats never worked and nobody has
-        *      moaned if you have the only one in the world - you fix it!
-        */
-
-       count2 = LATCH; /* initialize counter for mark_offset_tsc() */
+       set_cyc2ns_scale(cpu_khz/1000);
 
-       if (cpu_has_tsc) {
-               unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
-               if (is_hpet_enabled() && hpet_use_timer) {
-                       unsigned long result, remain;
-                       printk("Using TSC for gettimeofday\n");
-                       tsc_quotient = calibrate_tsc_hpet(NULL);
-                       timer_tsc.mark_offset = &mark_offset_tsc_hpet;
-                       /*
-                        * Math to calculate hpet to usec multiplier
-                        * Look for the comments at get_offset_tsc_hpet()
-                        */
-                       ASM_DIV64_REG(result, remain, hpet_tick,
-                                       0, KERNEL_TICK_USEC);
-                       if (remain > (hpet_tick >> 1))
-                               result++; /* rounding the result */
+       use_tsc = 1;
 
-                       hpet_usec_quotient = result;
-               } else
-#endif
-               {
-                       tsc_quotient = calibrate_tsc();
-               }
-
-               if (tsc_quotient) {
-                       fast_gettimeoffset_quotient = tsc_quotient;
-                       use_tsc = 1;
-                       /*
-                        *      We could be more selective here I suspect
-                        *      and just enable this for the next intel chips ?
-                        */
-                       /* report CPU clock rate in Hz.
-                        * The formula is (10^6 * 2^32) / (2^32 * 1 / 
(clocks/us)) =
-                        * clock/second. Our precision is about 100 ppm.
-                        */
-                       {       unsigned long eax=0, edx=1000;
-                               __asm__("divl %2"
-                               :"=a" (cpu_khz), "=d" (edx)
-                               :"r" (tsc_quotient),
-                               "0" (eax), "1" (edx));
-                               printk("Detected %lu.%03lu MHz processor.\n", 
cpu_khz / 1000, cpu_khz % 1000);
-                       }
-                       set_cyc2ns_scale(cpu_khz/1000);
-                       return 0;
-               }
-       }
-       return -ENODEV;
+       return 0;
 }
 
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
 static int __init tsc_setup(char *str)
 {
-       tsc_disable = 1;
+       printk(KERN_WARNING "notsc: cannot disable TSC in Xen/Linux.\n");
        return 1;
 }
-#else
-static int __init tsc_setup(char *str)
-{
-       printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-                               "cannot disable TSC.\n");
-       return 1;
-}
-#endif
 __setup("notsc", tsc_setup);
 
 
@@ -566,7 +365,7 @@ __setup("notsc", tsc_setup);
 /************************************************************/
 
 /* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
+struct timer_opts timer_tsc = {
        .name = "tsc",
        .mark_offset = mark_offset_tsc, 
        .get_offset = get_offset_tsc,
@@ -574,7 +373,7 @@ static struct timer_opts timer_tsc = {
        .delay = delay_tsc,
 };
 
-struct init_timer_opts __initdata timer_tsc_init = {
+struct init_timer_opts timer_tsc_init = {
        .init = init_tsc,
        .opts = &timer_tsc,
 };
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/traps.c 
linux-2.6-xen-sparse/arch/i386/kernel/traps.c
--- pristine-linux-2.6.12/arch/i386/kernel/traps.c      2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/traps.c       2005-07-28 
13:17:07.000000000 -0700
@@ -58,9 +58,6 @@
 
 asmlinkage int system_call(void);
 
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
-               { 0, 0 }, { 0, 0 } };
-
 /* Do we ignore FPU interrupts ? */
 char ignore_fpu_irq = 0;
 
@@ -88,7 +85,7 @@ asmlinkage void page_fault(void);
 asmlinkage void coprocessor_error(void);
 asmlinkage void simd_coprocessor_error(void);
 asmlinkage void alignment_check(void);
-asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void fixup_4gb_segment(void);
 asmlinkage void machine_check(void);
 
 static int kstack_depth_to_print = 24;
@@ -209,7 +206,7 @@ void show_registers(struct pt_regs *regs
 
        esp = (unsigned long) (&regs->esp);
        ss = __KERNEL_DS;
-       if (regs->xcs & 3) {
+       if (regs->xcs & 2) {
                in_kernel = 0;
                esp = regs->esp;
                ss = regs->xss & 0xffff;
@@ -265,7 +262,7 @@ static void handle_BUG(struct pt_regs *r
        char c;
        unsigned long eip;
 
-       if (regs->xcs & 3)
+       if (regs->xcs & 2)
                goto no_bug;            /* Not in kernel */
 
        eip = regs->eip;
@@ -353,7 +350,7 @@ void die(const char * str, struct pt_reg
 
 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long 
err)
 {
-       if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs))
+       if (!(regs->eflags & VM_MASK) && !(2 & regs->xcs))
                die(str, regs, err);
 }
 
@@ -366,7 +363,7 @@ static void do_trap(int trapnr, int sign
                goto trap_signal;
        }
 
-       if (!(regs->xcs & 3))
+       if (!(regs->xcs & 2))
                goto kernel_trap;
 
        trap_signal: {
@@ -446,49 +443,37 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
 DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, 
regs->eip)
+DO_VM86_ERROR( 7, SIGSEGV, "device not available", device_not_available)
 DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", 
coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
+#ifdef CONFIG_X86_MCE
+DO_ERROR(18, SIGBUS, "machine check", machine_check)
+#endif
 
 fastcall void do_general_protection(struct pt_regs * regs, long error_code)
 {
-       int cpu = get_cpu();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
-       struct thread_struct *thread = &current->thread;
-
        /*
-        * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
-        * invalid offset set (the LAZY one) and the faulting thread has
-        * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
-        * and we set the offset field correctly. Then we let the CPU to
-        * restart the faulting instruction.
-        */
-       if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
-           thread->io_bitmap_ptr) {
-               memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
-                      thread->io_bitmap_max);
-               /*
-                * If the previously set map was extending to higher ports
-                * than the current one, pad extra space with 0xff (no access).
-                */
-               if (thread->io_bitmap_max < tss->io_bitmap_max)
-                       memset((char *) tss->io_bitmap +
-                               thread->io_bitmap_max, 0xff,
-                               tss->io_bitmap_max - thread->io_bitmap_max);
-               tss->io_bitmap_max = thread->io_bitmap_max;
-               tss->io_bitmap_base = IO_BITMAP_OFFSET;
-               put_cpu();
-               return;
+        * If we trapped on an LDT access then ensure that the default_ldt is
+        * loaded, if nothing else. We load default_ldt lazily because LDT
+        * switching costs time and many applications don't need it.
+        */
+       if (unlikely((error_code & 6) == 4)) {
+               unsigned long ldt;
+               __asm__ __volatile__ ("sldt %0" : "=r" (ldt));
+               if (ldt == 0) {
+                       xen_set_ldt((unsigned long)&default_ldt[0], 5);
+                       return;
+               }
        }
-       put_cpu();
 
        if (regs->eflags & VM_MASK)
                goto gp_in_vm86;
 
-       if (!(regs->xcs & 3))
+       if (!(regs->xcs & 2))
                goto gp_in_kernel;
 
        current->thread.error_code = error_code;
@@ -624,6 +609,14 @@ fastcall void do_nmi(struct pt_regs * re
        nmi_enter();
 
        cpu = smp_processor_id();
+
+#ifdef CONFIG_HOTPLUG_CPU
+       if (!cpu_online(cpu)) {
+               nmi_exit();
+               return;
+       }
+#endif
+
        ++nmi_count(cpu);
 
        if (!nmi_callback(regs, cpu))
@@ -682,14 +675,16 @@ fastcall void do_debug(struct pt_regs * 
        unsigned int condition;
        struct task_struct *tsk = current;
 
-       __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+       condition = HYPERVISOR_get_debugreg(6);
 
        if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
                                        SIGTRAP) == NOTIFY_STOP)
                return;
+#if 0
        /* It's safe to allow irq's after DR6 has been saved */
        if (regs->eflags & X86_EFLAGS_IF)
                local_irq_enable();
+#endif
 
        /* Mask out spurious debug traps due to lazy DR7 setting */
        if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
@@ -713,7 +708,7 @@ fastcall void do_debug(struct pt_regs * 
                 * check for kernel mode by just checking the CPL
                 * of CS.
                 */
-               if ((regs->xcs & 3) == 0)
+               if ((regs->xcs & 2) == 0)
                        goto clear_TF_reenable;
        }
 
@@ -724,9 +719,7 @@ fastcall void do_debug(struct pt_regs * 
         * the signal is delivered.
         */
 clear_dr7:
-       __asm__("movl %0,%%db7"
-               : /* no output */
-               : "r" (0));
+       HYPERVISOR_set_debugreg(7, 0);
        return;
 
 debug_vm86:
@@ -878,15 +871,6 @@ fastcall void do_simd_coprocessor_error(
        }
 }
 
-fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
-                                         long error_code)
-{
-#if 0
-       /* No need to warn about this any longer. */
-       printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
-#endif
-}
-
 fastcall void setup_x86_bogus_stack(unsigned char * stk)
 {
        unsigned long *switch16_ptr, *switch32_ptr;
@@ -947,7 +931,7 @@ asmlinkage void math_state_restore(struc
        struct thread_info *thread = current_thread_info();
        struct task_struct *tsk = thread->task;
 
-       clts();         /* Allow maths ops (or we recurse) */
+       /* NB. 'clts' is done for us by Xen during virtual trap. */
        if (!tsk_used_math(tsk))
                init_fpu(tsk);
        restore_fpu(tsk);
@@ -980,100 +964,58 @@ void __init trap_init_f00f_bug(void)
 }
 #endif
 
-#define _set_gate(gate_addr,type,dpl,addr,seg) \
-do { \
-  int __d0, __d1; \
-  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
-       "movw %4,%%dx\n\t" \
-       "movl %%eax,%0\n\t" \
-       "movl %%edx,%1" \
-       :"=m" (*((long *) (gate_addr))), \
-        "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
-       :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
-        "3" ((char *) (addr)),"2" ((seg) << 16)); \
-} while (0)
-
-
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
-void set_intr_gate(unsigned int n, void *addr)
-{
-       _set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
-}
-
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
-       _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
-}
-
-static void __init set_trap_gate(unsigned int n, void *addr)
-{
-       _set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
-}
-
-static void __init set_system_gate(unsigned int n, void *addr)
-{
-       _set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
-}
-
-static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
-       _set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
-}
 
+/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */
+static trap_info_t trap_table[] = {
+       {  0, 0, __KERNEL_CS, (unsigned long)divide_error               },
+       {  1, 0, __KERNEL_CS, (unsigned long)debug                      },
+       {  3, 3, __KERNEL_CS, (unsigned long)int3                       },
+       {  4, 3, __KERNEL_CS, (unsigned long)overflow                   },
+       {  5, 3, __KERNEL_CS, (unsigned long)bounds                     },
+       {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                 },
+       {  7, 0, __KERNEL_CS, (unsigned long)device_not_available       },
+       {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+       { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                },
+       { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present        },
+       { 12, 0, __KERNEL_CS, (unsigned long)stack_segment              },
+       { 13, 0, __KERNEL_CS, (unsigned long)general_protection         },
+       { 14, 0, __KERNEL_CS, (unsigned long)page_fault                 },
+       { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment          },
+       { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error          },
+       { 17, 0, __KERNEL_CS, (unsigned long)alignment_check            },
+#ifdef CONFIG_X86_MCE
+       { 18, 0, __KERNEL_CS, (unsigned long)machine_check              },
+#endif
+       { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error     },
+       { SYSCALL_VECTOR,  3, __KERNEL_CS, (unsigned long)system_call   },
+       {  0, 0,           0, 0                                         }
+};
 
 void __init trap_init(void)
 {
-#ifdef CONFIG_EISA
-       void __iomem *p = ioremap(0x0FFFD9, 4);
-       if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
-               EISA_bus = 1;
-       }
-       iounmap(p);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-       init_apic_mappings();
-#endif
-
-       set_trap_gate(0,&divide_error);
-       set_intr_gate(1,&debug);
-       set_intr_gate(2,&nmi);
-       set_system_intr_gate(3, &int3); /* int3-5 can be called from all */
-       set_system_gate(4,&overflow);
-       set_system_gate(5,&bounds);
-       set_trap_gate(6,&invalid_op);
-       set_trap_gate(7,&device_not_available);
-       set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
-       set_trap_gate(9,&coprocessor_segment_overrun);
-       set_trap_gate(10,&invalid_TSS);
-       set_trap_gate(11,&segment_not_present);
-       set_trap_gate(12,&stack_segment);
-       set_trap_gate(13,&general_protection);
-       set_intr_gate(14,&page_fault);
-       set_trap_gate(15,&spurious_interrupt_bug);
-       set_trap_gate(16,&coprocessor_error);
-       set_trap_gate(17,&alignment_check);
-#ifdef CONFIG_X86_MCE
-       set_trap_gate(18,&machine_check);
-#endif
-       set_trap_gate(19,&simd_coprocessor_error);
+       HYPERVISOR_set_trap_table(trap_table);
 
-       set_system_gate(SYSCALL_VECTOR,&system_call);
+       /*
+        * default LDT is a single-entry callgate to lcall7 for iBCS
+        * and a callgate to lcall27 for Solaris/x86 binaries
+        */
+       make_lowmem_page_readonly(&default_ldt[0]);
 
        /*
         * Should be a barrier for any external CPU state.
         */
        cpu_init();
+}
 
-       trap_init_hook();
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+       trap_info_t *t = trap_table;
+
+       for (t = trap_table; t->address; t++) {
+               trap_ctxt[t->vector].flags = t->flags;
+               trap_ctxt[t->vector].cs = t->cs;
+               trap_ctxt[t->vector].address = t->address;
+       }
 }
 
 static int __init kstack_setup(char *s)
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/kernel/vsyscall.S 
linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S
--- pristine-linux-2.6.12/arch/i386/kernel/vsyscall.S   2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S    2005-07-28 
13:17:07.000000000 -0700
@@ -4,12 +4,12 @@ __INITDATA
 
        .globl vsyscall_int80_start, vsyscall_int80_end
 vsyscall_int80_start:
-       .incbin "arch/i386/kernel/vsyscall-int80.so"
+       .incbin "arch/xen/i386/kernel/vsyscall-int80.so"
 vsyscall_int80_end:
 
        .globl vsyscall_sysenter_start, vsyscall_sysenter_end
 vsyscall_sysenter_start:
-       .incbin "arch/i386/kernel/vsyscall-sysenter.so"
+       .incbin "arch/xen/i386/kernel/vsyscall-sysenter.so"
 vsyscall_sysenter_end:
 
 __FINIT
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mach-default/Makefile 
linux-2.6-xen-sparse/arch/i386/mach-default/Makefile
--- pristine-linux-2.6.12/arch/i386/mach-default/Makefile       2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/mach-default/Makefile        2005-07-28 
13:17:07.000000000 -0700
@@ -2,4 +2,11 @@
 # Makefile for the linux kernel.
 #
 
-obj-y                          := setup.o topology.o
+c-obj-y                                := topology.o
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y)):
+       @ln -fsn $(srctree)/arch/i386/mach-default/$(notdir $@) $@
+
+obj-y  += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/Makefile linux-2.6-xen-sparse/arch/i386/Makefile
--- pristine-linux-2.6.12/arch/i386/Makefile    2005-06-17 12:48:29.000000000 
-0700
+++ linux-2.6-xen-sparse/arch/i386/Makefile     2005-07-28 13:17:07.000000000 
-0700
@@ -17,15 +17,19 @@
 # 20050320  Kianusch Sayah Karadji <kianusch@xxxxxxxxxxx>
 #           Added support for GEODE CPU
 
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
+
 LDFLAGS                := -m elf_i386
-OBJCOPYFLAGS   := -O binary -R .note -R .comment -S
 LDFLAGS_vmlinux :=
-CHECKFLAGS     += -D__i386__
+CHECK          := $(CHECK) -D__i386__=1
+
+CFLAGS += -m32
+AFLAGS += -m32
 
 CFLAGS += -pipe -msoft-float
 
 # prevent gcc from keeping the stack 16 byte aligned
-CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
+CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2,)
 
 align := $(cc-option-align)
 cflags-$(CONFIG_M386)          += -march=i386
@@ -59,116 +63,46 @@ cflags-$(CONFIG_MGEODEGX1)         += $(call cc
 
 # -mregparm=3 works ok on gcc-3.0 and later
 #
-GCC_VERSION                    := $(call cc-version)
+GCC_VERSION                    := $(shell $(CONFIG_SHELL) 
$(srctree)/scripts/gcc-version.sh $(CC))
 cflags-$(CONFIG_REGPARM)       += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; 
then echo "-mregparm=3"; fi ;)
 
 # Disable unit-at-a-time mode, it makes gcc use a lot more stack
 # due to the lack of sharing of stacklots.
-CFLAGS += $(call cc-option,-fno-unit-at-a-time)
+CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
 
 CFLAGS += $(cflags-y)
 
-# Default subarch .c files
-mcore-y  := mach-default
-
-# Voyager subarch support
-mflags-$(CONFIG_X86_VOYAGER)   := -Iinclude/asm-i386/mach-voyager
-mcore-$(CONFIG_X86_VOYAGER)    := mach-voyager
-
-# VISWS subarch support
-mflags-$(CONFIG_X86_VISWS)     := -Iinclude/asm-i386/mach-visws
-mcore-$(CONFIG_X86_VISWS)      := mach-visws
-
-# NUMAQ subarch support
-mflags-$(CONFIG_X86_NUMAQ)     := -Iinclude/asm-i386/mach-numaq
-mcore-$(CONFIG_X86_NUMAQ)      := mach-default
-
-# BIGSMP subarch support
-mflags-$(CONFIG_X86_BIGSMP)    := -Iinclude/asm-i386/mach-bigsmp
-mcore-$(CONFIG_X86_BIGSMP)     := mach-default
-
-#Summit subarch support
-mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
-mcore-$(CONFIG_X86_SUMMIT)  := mach-default
-
-# generic subarchitecture
-mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
-mcore-$(CONFIG_X86_GENERICARCH) := mach-default
-core-$(CONFIG_X86_GENERICARCH) += arch/i386/mach-generic/
-
-# ES7000 subarch support
-mflags-$(CONFIG_X86_ES7000)    := -Iinclude/asm-i386/mach-es7000
-mcore-$(CONFIG_X86_ES7000)     := mach-default
-core-$(CONFIG_X86_ES7000)      := arch/i386/mach-es7000/
-
-# default subarch .h files
-mflags-y += -Iinclude/asm-i386/mach-default
-
-head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o
+head-y := arch/xen/i386/kernel/head.o arch/xen/i386/kernel/init_task.o
 
 libs-y                                         += arch/i386/lib/
-core-y                                 += arch/i386/kernel/ \
-                                          arch/i386/mm/ \
-                                          arch/i386/$(mcore-y)/ \
+core-y                                 += arch/xen/i386/kernel/ \
+                                          arch/xen/i386/mm/ \
+                                          arch/xen/i386/mach-default/ \
                                           arch/i386/crypto/
+# \
+#                                         arch/xen/$(mcore-y)/
 drivers-$(CONFIG_MATH_EMULATION)       += arch/i386/math-emu/
-drivers-$(CONFIG_PCI)                  += arch/i386/pci/
+drivers-$(CONFIG_PCI)                  += arch/xen/i386/pci/
 # must be linked after kernel/
 drivers-$(CONFIG_OPROFILE)             += arch/i386/oprofile/
 drivers-$(CONFIG_PM)                   += arch/i386/power/
 
-CFLAGS += $(mflags-y)
-AFLAGS += $(mflags-y)
-
-boot := arch/i386/boot
-
-.PHONY: zImage bzImage compressed zlilo bzlilo \
-       zdisk bzdisk fdimage fdimage144 fdimage288 install kernel_install
-
-all: bzImage
-
-# KBUILD_IMAGE specify target image being built
-                    KBUILD_IMAGE := $(boot)/bzImage
-zImage zlilo zdisk: KBUILD_IMAGE := arch/i386/boot/zImage
+# for clean
+obj-   += kernel/ mm/ pci/
+#obj-  += ../../i386/lib/ ../../i386/mm/ 
+#../../i386/$(mcore-y)/
+#obj-  += ../../i386/pci/ ../../i386/oprofile/ ../../i386/power/
+
+xenflags-y += -Iinclude/asm-xen/asm-i386/mach-xen \
+               -Iinclude/asm-i386/mach-default
+CFLAGS += $(xenflags-y)
+AFLAGS += $(xenflags-y)
 
-zImage bzImage: vmlinux
-       $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+prepare: include/asm-$(XENARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(XENARCH)/asm_offsets.h
 
-compressed: zImage
+arch/$(XENARCH)/kernel/asm-offsets.s: include/asm include/.asm-ignore \
+       include/linux/version.h include/config/MARKER
 
-zlilo bzlilo: vmlinux
-       $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
-
-zdisk bzdisk: vmlinux
-       $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
-
-fdimage fdimage144 fdimage288: vmlinux
-       $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
-
-install: vmlinux
-install kernel_install:
-       $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
-
-prepare: include/asm-$(ARCH)/asm_offsets.h
-CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
-
-arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
-                                  include/config/MARKER
-
-include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
+include/asm-$(XENARCH)/asm_offsets.h: arch/$(XENARCH)/kernel/asm-offsets.s
        $(call filechk,gen-asm-offsets)
-
-archclean:
-       $(Q)$(MAKE) $(clean)=arch/i386/boot
-
-define archhelp
-  echo  '* bzImage     - Compressed kernel image (arch/$(ARCH)/boot/bzImage)'
-  echo  '  install     - Install kernel using'
-  echo  '                 (your) ~/bin/installkernel or'
-  echo  '                 (distribution) /sbin/installkernel or'
-  echo  '                 install to $$(INSTALL_PATH) and run lilo'
-  echo  '  bzdisk       - Create a boot floppy in /dev/fd0'
-  echo  '  fdimage      - Create a boot floppy image'
-endef
-
-CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/fault.c 
linux-2.6-xen-sparse/arch/i386/mm/fault.c
--- pristine-linux-2.6.12/arch/i386/mm/fault.c  2005-06-17 12:48:29.000000000 
-0700
+++ linux-2.6-xen-sparse/arch/i386/mm/fault.c   2005-07-28 13:17:07.000000000 
-0700
@@ -21,6 +21,7 @@
 #include <linux/vt_kern.h>             /* For unblank_screen() */
 #include <linux/highmem.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -29,6 +30,8 @@
 
 extern void die(const char *,struct pt_regs *,long);
 
+DEFINE_PER_CPU(pgd_t *, cur_pgd);
+
 /*
  * Unlock any spinlocks which will prevent us from getting the
  * message out 
@@ -77,7 +80,7 @@ static inline unsigned long get_segment_
        u32 seg_ar, seg_limit, base, *desc;
 
        /* The standard kernel/user address space limit. */
-       *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
+       *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
 
        /* Unlikely, but must come before segment checks. */
        if (unlikely((regs->eflags & VM_MASK) != 0))
@@ -107,7 +110,7 @@ static inline unsigned long get_segment_
                desc = (void *)desc + (seg & ~7);
        } else {
                /* Must disable preemption while reading the GDT. */
-               desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu());
+               desc = (u32 *)get_cpu_gdt_table(get_cpu());
                desc = (void *)desc + (seg & ~7);
        }
 
@@ -211,25 +214,30 @@ fastcall void do_invalid_op(struct pt_re
  *     bit 1 == 0 means read, 1 means write
  *     bit 2 == 0 means kernel, 1 means user-mode
  */
-fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code,
+                             unsigned long address)
 {
        struct task_struct *tsk;
        struct mm_struct *mm;
        struct vm_area_struct * vma;
-       unsigned long address;
        unsigned long page;
        int write;
        siginfo_t info;
 
-       /* get the address */
-       __asm__("movl %%cr2,%0":"=r" (address));
+       /* Set the "privileged fault" bit to something sane. */
+       error_code &= 3;
+       error_code |= (regs->xcs & 2) << 1;
+       if (regs->eflags & X86_EFLAGS_VM)
+               error_code |= 4;
 
        if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
                                        SIGSEGV) == NOTIFY_STOP)
                return;
+#if 0
        /* It's safe to allow irq's after cr2 has been saved */
        if (regs->eflags & (X86_EFLAGS_IF|VM_MASK))
                local_irq_enable();
+#endif
 
        tsk = current;
 
@@ -446,9 +454,10 @@ no_context:
        printk(" at virtual address %08lx\n",address);
        printk(KERN_ALERT " printing eip:\n");
        printk("%08lx\n", regs->eip);
-       asm("movl %%cr3,%0":"=r" (page));
-       page = ((unsigned long *) __va(page))[address >> 22];
-       printk(KERN_ALERT "*pde = %08lx\n", page);
+       page = ((unsigned long *) per_cpu(cur_pgd, smp_processor_id()))
+           [address >> 22];
+       printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
+              machine_to_phys(page));
        /*
         * We must not directly access the pte in the highpte
         * case, the page table might be allocated in highmem.
@@ -459,8 +468,10 @@ no_context:
        if (page & 1) {
                page &= PAGE_MASK;
                address &= 0x003ff000;
+               page = machine_to_phys(page);
                page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
-               printk(KERN_ALERT "*pte = %08lx\n", page);
+               printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
+                      machine_to_phys(page));
        }
 #endif
        die("Oops", regs, error_code);
@@ -514,14 +525,12 @@ vmalloc_fault:
                 * an interrupt in the middle of a task switch..
                 */
                int index = pgd_index(address);
-               unsigned long pgd_paddr;
                pgd_t *pgd, *pgd_k;
                pud_t *pud, *pud_k;
                pmd_t *pmd, *pmd_k;
                pte_t *pte_k;
 
-               asm("movl %%cr3,%0":"=r" (pgd_paddr));
-               pgd = index + (pgd_t *)__va(pgd_paddr);
+               pgd = index + per_cpu(cur_pgd, smp_processor_id());
                pgd_k = init_mm.pgd + index;
 
                if (!pgd_present(*pgd_k))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/highmem.c 
linux-2.6-xen-sparse/arch/i386/mm/highmem.c
--- pristine-linux-2.6.12/arch/i386/mm/highmem.c        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/mm/highmem.c 2005-07-28 13:17:07.000000000 
-0700
@@ -25,7 +25,7 @@ void kunmap(struct page *page)
  * However when holding an atomic kmap is is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
-void *kmap_atomic(struct page *page, enum km_type type)
+static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
 {
        enum fixed_addresses idx;
        unsigned long vaddr;
@@ -41,12 +41,23 @@ void *kmap_atomic(struct page *page, enu
        if (!pte_none(*(kmap_pte-idx)))
                BUG();
 #endif
-       set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+       set_pte(kmap_pte-idx, mk_pte(page, prot));
        __flush_tlb_one(vaddr);
 
        return (void*) vaddr;
 }
 
+void *kmap_atomic(struct page *page, enum km_type type)
+{
+       return __kmap_atomic(page, type, kmap_prot);
+}
+
+/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
+void *kmap_atomic_pte(struct page *page, enum km_type type)
+{
+       return __kmap_atomic(page, type, PAGE_KERNEL_RO);
+}
+
 void kunmap_atomic(void *kvaddr, enum km_type type)
 {
 #ifdef CONFIG_DEBUG_HIGHMEM
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/hypervisor.c 
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- pristine-linux-2.6.12/arch/i386/mm/hypervisor.c     1969-12-31 
16:00:00.000000000 -0800
+++ linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c      2005-07-28 
13:17:07.000000000 -0700
@@ -0,0 +1,363 @@
+/******************************************************************************
+ * mm/hypervisor.c
+ * 
+ * Update page tables via the hypervisor.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/balloon.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include <linux/percpu.h>
+#include <asm/tlbflush.h>
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#define pte_offset_kernel pte_offset
+#define pud_t pgd_t
+#define pud_offset(d, va) d
+#elif defined(CONFIG_X86_64)
+#define pmd_val_ma(v) (v).pmd
+#else
+#ifdef CONFIG_X86_PAE
+# define pmd_val_ma(v) ((v).pmd)
+# define pud_val_ma(v) ((v).pgd.pgd)
+#else
+# define pmd_val_ma(v) ((v).pud.pgd.pgd)
+#endif
+#endif
+
+#ifndef CONFIG_XEN_SHADOW_MODE
+void xen_l1_entry_update(pte_t *ptr, pte_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = pte_val_ma(val);
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = pmd_val_ma(val);
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+
+#ifdef CONFIG_X86_PAE
+void xen_l3_entry_update(pud_t *ptr, pud_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = pud_val_ma(val);
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void xen_l3_entry_update(pud_t *ptr, pud_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = val.pud;
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
+{
+    mmu_update_t u;
+    u.ptr = virt_to_machine(ptr);
+    u.val = val.pgd;
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_XEN_SHADOW_MODE */
+
+void xen_machphys_update(unsigned long mfn, unsigned long pfn)
+{
+    mmu_update_t u;
+    u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+    u.val = pfn;
+    BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pt_switch(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_NEW_BASEPTR;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_new_user_pt(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_NEW_USER_BASEPTR;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_tlb_flush(void)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_invlpg(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_INVLPG_LOCAL;
+    op.linear_addr = ptr & PAGE_MASK;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+#ifdef CONFIG_SMP
+
+void xen_tlb_flush_all(void)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_TLB_FLUSH_ALL;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_tlb_flush_mask(cpumask_t *mask)
+{
+    struct mmuext_op op;
+    if ( cpus_empty(*mask) )
+        return;
+    op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+    op.vcpumask = mask->bits;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_invlpg_all(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_INVLPG_ALL;
+    op.linear_addr = ptr & PAGE_MASK;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
+{
+    struct mmuext_op op;
+    if ( cpus_empty(*mask) )
+        return;
+    op.cmd = MMUEXT_INVLPG_MULTI;
+    op.vcpumask = mask->bits;
+    op.linear_addr = ptr & PAGE_MASK;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+#endif /* CONFIG_SMP */
+
+#ifndef CONFIG_XEN_SHADOW_MODE
+void xen_pgd_pin(unsigned long ptr)
+{
+    struct mmuext_op op;
+#ifdef CONFIG_X86_64
+    op.cmd = MMUEXT_PIN_L4_TABLE;
+#elif defined(CONFIG_X86_PAE)
+    op.cmd = MMUEXT_PIN_L3_TABLE;
+#else
+    op.cmd = MMUEXT_PIN_L2_TABLE;
+#endif
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pgd_unpin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_UNPIN_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pte_pin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_PIN_L1_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pte_unpin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_UNPIN_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+#ifdef CONFIG_X86_64
+void xen_pud_pin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_PIN_L3_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pud_unpin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_UNPIN_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pmd_pin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_PIN_L2_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_pmd_unpin(unsigned long ptr)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_UNPIN_TABLE;
+    op.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_XEN_SHADOW_MODE */
+
+void xen_set_ldt(unsigned long ptr, unsigned long len)
+{
+    struct mmuext_op op;
+    op.cmd = MMUEXT_SET_LDT;
+    op.linear_addr = ptr;
+    op.nr_ents = len;
+    BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+}
+
+void xen_contig_memory(unsigned long vstart, unsigned int order)
+{
+    /*
+     * Ensure multi-page extents are contiguous in machine memory. This code 
+     * could be cleaned up some, and the number of hypercalls reduced.
+     */
+    pgd_t         *pgd; 
+    pud_t         *pud; 
+    pmd_t         *pmd;
+    pte_t         *pte;
+    unsigned long  mfn, i, flags;
+
+    scrub_pages(vstart, 1 << order);
+
+    balloon_lock(flags);
+
+    /* 1. Zap current PTEs, giving away the underlying pages. */
+    for (i = 0; i < (1<<order); i++) {
+        pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
+        pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+        pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+        mfn = pte_mfn(*pte);
+        HYPERVISOR_update_va_mapping(
+            vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
+        phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+            INVALID_P2M_ENTRY;
+        BUG_ON(HYPERVISOR_dom_mem_op(
+            MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
+    }
+
+    /* 2. Get a new contiguous memory extent. */
+    BUG_ON(HYPERVISOR_dom_mem_op(
+        MEMOP_increase_reservation, &mfn, 1, order) != 1);
+
+    /* 3. Map the new extent in place of old pages. */
+    for (i = 0; i < (1<<order); i++) {
+        HYPERVISOR_update_va_mapping(
+            vstart + (i*PAGE_SIZE),
+            __pte_ma(((mfn+i)<<PAGE_SHIFT)|__PAGE_KERNEL), 0);
+        xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
+        phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn+i;
+    }
+
+    flush_tlb_all();
+
+    balloon_unlock(flags);
+}
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
+unsigned long allocate_empty_lowmem_region(unsigned long pages)
+{
+    pgd_t         *pgd;
+    pud_t         *pud; 
+    pmd_t         *pmd;
+    pte_t         *pte;
+    unsigned long *pfn_array;
+    unsigned long  vstart;
+    unsigned long  i;
+    unsigned int   order = get_order(pages*PAGE_SIZE);
+
+    vstart = __get_free_pages(GFP_KERNEL, order);
+    if ( vstart == 0 )
+        return 0UL;
+
+    scrub_pages(vstart, 1 << order);
+
+    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
+    if ( pfn_array == NULL )
+        BUG();
+
+    for ( i = 0; i < (1<<order); i++ )
+    {
+        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
+        pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
+        pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
+        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); 
+        pfn_array[i] = pte_mfn(*pte);
+#ifdef CONFIG_X86_64
+        xen_l1_entry_update(pte, __pte(0));
+#else
+        HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), __pte_ma(0), 0);
+#endif
+        phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] =
+            INVALID_P2M_ENTRY;
+    }
+
+    flush_tlb_all();
+
+    balloon_put_pages(pfn_array, 1 << order);
+
+    vfree(pfn_array);
+
+    return vstart;
+}
+
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/init.c 
linux-2.6-xen-sparse/arch/i386/mm/init.c
--- pristine-linux-2.6.12/arch/i386/mm/init.c   2005-06-17 12:48:29.000000000 
-0700
+++ linux-2.6-xen-sparse/arch/i386/mm/init.c    2005-07-28 13:17:07.000000000 
-0700
@@ -39,6 +39,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm-xen/hypervisor.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
@@ -56,9 +57,10 @@ static pmd_t * __init one_md_table_init(
 {
        pud_t *pud;
        pmd_t *pmd_table;
-               
+
 #ifdef CONFIG_X86_PAE
        pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+       make_page_readonly(pmd_table);
        set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
        pud = pud_offset(pgd, 0);
        if (pmd_table != pmd_offset(pud, 0)) 
@@ -79,6 +81,7 @@ static pte_t * __init one_page_table_ini
 {
        if (pmd_none(*pmd)) {
                pte_t *page_table = (pte_t *) 
alloc_bootmem_low_pages(PAGE_SIZE);
+               make_page_readonly(page_table);
                set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
                if (page_table != pte_offset_kernel(pmd, 0))
                        BUG();  
@@ -119,7 +122,7 @@ static void __init page_table_range_init
                pud = pud_offset(pgd, vaddr);
                pmd = pmd_offset(pud, vaddr);
                for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, 
pmd_idx++) {
-                       if (pmd_none(*pmd)) 
+                       if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd)) 
                                one_page_table_init(pmd);
 
                        vaddr += PMD_SIZE;
@@ -148,16 +151,36 @@ static void __init kernel_physical_mappi
        pte_t *pte;
        int pgd_idx, pmd_idx, pte_ofs;
 
+       unsigned long max_ram_pfn = xen_start_info.nr_pages;
+       if (max_ram_pfn > max_low_pfn)
+               max_ram_pfn = max_low_pfn;
+
        pgd_idx = pgd_index(PAGE_OFFSET);
        pgd = pgd_base + pgd_idx;
        pfn = 0;
+       pmd_idx = pmd_index(PAGE_OFFSET);
+       pte_ofs = pte_index(PAGE_OFFSET);
 
        for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+#ifdef CONFIG_XEN
+               /*
+                * Native linux hasn't PAE-paging enabled yet at this
+                * point.  When running as xen domain we are in PAE
+                * mode already, thus we can't simply hook a empty
+                * pmd.  That would kill the mappings we are currently
+                * using ...
+                */
+               pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
+#else
                pmd = one_md_table_init(pgd);
+#endif
                if (pfn >= max_low_pfn)
                        continue;
-               for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; 
pmd++, pmd_idx++) {
+               pmd += pmd_idx;
+               for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, 
pmd_idx++) {
                        unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+                       if (address >= HYPERVISOR_VIRT_START)
+                               continue;
 
                        /* Map with big pages if possible, otherwise create 
normal page tables. */
                        if (cpu_has_pse) {
@@ -171,14 +194,20 @@ static void __init kernel_physical_mappi
                        } else {
                                pte = one_page_table_init(pmd);
 
-                               for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn 
< max_low_pfn; pte++, pfn++, pte_ofs++) {
+                               pte += pte_ofs;
+                               for (; pte_ofs < PTRS_PER_PTE && pfn < 
max_low_pfn; pte++, pfn++, pte_ofs++) {
+                                               /* XEN: Only map initial RAM 
allocation. */
+                                               if ((pfn >= max_ram_pfn) || 
pte_present(*pte))
+                                                       continue;
                                                if (is_kernel_text(address))
                                                        set_pte(pte, 
pfn_pte(pfn, PAGE_KERNEL_EXEC));
                                                else
                                                        set_pte(pte, 
pfn_pte(pfn, PAGE_KERNEL));
                                }
+                               pte_ofs = 0;
                        }
                }
+               pmd_idx = 0;
        }
 }
 
@@ -271,7 +300,8 @@ void __init one_highpage_init(struct pag
                ClearPageReserved(page);
                set_bit(PG_highmem, &page->flags);
                set_page_count(page, 1);
-               __free_page(page);
+               if (pfn < xen_start_info.nr_pages)
+                       __free_page(page);
                totalhigh_pages++;
        } else
                SetPageReserved(page);
@@ -308,6 +338,7 @@ static void __init pagetable_init (void)
 {
        unsigned long vaddr;
        pgd_t *pgd_base = swapper_pg_dir;
+       pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
 
 #ifdef CONFIG_X86_PAE
        int i;
@@ -328,6 +359,45 @@ static void __init pagetable_init (void)
                __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
        }
 
+       /*
+        * Switch to proper mm_init page directory. Initialise from the current
+        * page directory, write-protect the new page directory, then switch to
+        * it. We clean up by write-enabling and then freeing the old page dir.
+        */
+#ifndef CONFIG_X86_PAE
+       memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
+       make_page_readonly(pgd_base);
+       xen_pgd_pin(__pa(pgd_base));
+       load_cr3(pgd_base);
+       xen_pgd_unpin(__pa(old_pgd));
+       make_page_writable(old_pgd);
+       __flush_tlb_all();
+       free_bootmem(__pa(old_pgd), PAGE_SIZE);
+#else
+       {
+               pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
+               pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
+               pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
+
+               memcpy(new_pmd,  old_pmd, PAGE_SIZE);
+               memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
+               set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
+
+               make_page_readonly(new_pmd);
+               make_page_readonly(pgd_base);
+               xen_pgd_pin(__pa(pgd_base));
+               load_cr3(pgd_base);
+               xen_pgd_unpin(__pa(old_pgd));
+               make_page_writable(old_pgd);
+               make_page_writable(old_pmd);
+               __flush_tlb_all();
+
+               free_bootmem(__pa(old_pgd), PAGE_SIZE);
+               free_bootmem(__pa(old_pmd), PAGE_SIZE);
+       }
+#endif
+
+       init_mm.context.pinned = 1;
        kernel_physical_mapping_init(pgd_base);
        remap_numa_kva();
 
@@ -340,7 +410,7 @@ static void __init pagetable_init (void)
 
        permanent_kmaps_init(pgd_base);
 
-#ifdef CONFIG_X86_PAE
+#if 0 /* def CONFIG_X86_PAE */
        /*
         * Add low memory identity-mappings - SMP needs it when
         * starting up on an AP from real-mode. In the non-PAE
@@ -348,7 +418,7 @@ static void __init pagetable_init (void)
         * All user-space mappings are explicitly cleared after
         * SMP startup.
         */
-       pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+       set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
 #endif
 }
 
@@ -383,7 +453,7 @@ void zap_low_mappings (void)
         * us, because pgd_clear() is a no-op on i386.
         */
        for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
                set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
                set_pgd(swapper_pg_dir+i, __pgd(0));
@@ -470,6 +540,10 @@ out:
  */
 void __init paging_init(void)
 {
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+       int i;
+#endif
+
 #ifdef CONFIG_X86_PAE
        set_nx();
        if (nx_enabled)
@@ -478,12 +552,12 @@ void __init paging_init(void)
 
        pagetable_init();
 
-       load_cr3(swapper_pg_dir);
-
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
        /*
         * We will bail out later - printk doesn't work right now so
         * the user would just see a hanging kernel.
+        * when running as xen domain we are already in PAE mode at
+        * this point.
         */
        if (cpu_has_pae)
                set_in_cr4(X86_CR4_PAE);
@@ -491,6 +565,22 @@ void __init paging_init(void)
        __flush_tlb_all();
 
        kmap_init();
+
+       /* Switch to the real shared_info page, and clear the dummy page. */
+       set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
+       HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+       memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+       /* Setup mapping of lower 1st MB */
+       for (i = 0; i < NR_FIX_ISAMAPS; i++)
+               if (xen_start_info.flags & SIF_PRIVILEGED)
+                       set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
+               else
+                       __set_fixmap(FIX_ISAMAP_BEGIN - i,
+                                    virt_to_machine(empty_zero_page),
+                                    PAGE_KERNEL_RO);
+#endif
 }
 
 /*
@@ -539,6 +629,7 @@ void __init mem_init(void)
        int codesize, reservedpages, datasize, initsize;
        int tmp;
        int bad_ppro;
+       unsigned long pfn;
 
 #ifndef CONFIG_DISCONTIGMEM
        if (!mem_map)
@@ -564,9 +655,18 @@ void __init mem_init(void)
 #else
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
-
+       printk("vmalloc area: %lx-%lx, maxmem %lx\n",
+              VMALLOC_START,VMALLOC_END,MAXMEM);
+       BUG_ON(VMALLOC_START > VMALLOC_END);
+       
        /* this will put all low memory onto the freelists */
        totalram_pages += free_all_bootmem();
+       /* XEN: init and count low-mem pages outside initial allocation. */
+       for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
+               ClearPageReserved(&mem_map[pfn]);
+               set_page_count(&mem_map[pfn], 1);
+               totalram_pages++;
+       }
 
        reservedpages = 0;
        for (tmp = 0; tmp < max_low_pfn; tmp++)
@@ -630,11 +730,16 @@ void __init pgtable_cache_init(void)
                        panic("pgtable_cache_init(): cannot create pmd cache");
        }
        pgd_cache = kmem_cache_create("pgd",
+#if 0 /* How the heck _this_ works in native linux ??? */
                                PTRS_PER_PGD*sizeof(pgd_t),
                                PTRS_PER_PGD*sizeof(pgd_t),
+#else
+                               PAGE_SIZE,
+                               PAGE_SIZE,
+#endif
                                0,
                                pgd_ctor,
-                               PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+                               pgd_dtor);
        if (!pgd_cache)
                panic("pgtable_cache_init(): Cannot create pgd cache");
 }
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/ioremap.c 
linux-2.6-xen-sparse/arch/i386/mm/ioremap.c
--- pristine-linux-2.6.12/arch/i386/mm/ioremap.c        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/mm/ioremap.c 2005-07-28 13:17:07.000000000 
-0700
@@ -11,91 +11,54 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <asm/io.h>
 #include <asm/fixmap.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/pgtable.h>
+#include <asm/pgalloc.h>
 
-#define ISA_START_ADDRESS      0xa0000
-#define ISA_END_ADDRESS                0x100000
+#ifndef CONFIG_XEN_PHYSDEV_ACCESS
 
-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-               unsigned long end, unsigned long phys_addr, unsigned long flags)
+void * __ioremap(unsigned long phys_addr, unsigned long size,
+                unsigned long flags)
 {
-       pte_t *pte;
-       unsigned long pfn;
-
-       pfn = phys_addr >> PAGE_SHIFT;
-       pte = pte_alloc_kernel(&init_mm, pmd, addr);
-       if (!pte)
-               return -ENOMEM;
-       do {
-               BUG_ON(!pte_none(*pte));
-               set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
-                                       _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
-               pfn++;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
-       return 0;
+       return NULL;
 }
 
-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-               unsigned long end, unsigned long phys_addr, unsigned long flags)
+void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
-       pmd_t *pmd;
-       unsigned long next;
-
-       phys_addr -= addr;
-       pmd = pmd_alloc(&init_mm, pud, addr);
-       if (!pmd)
-               return -ENOMEM;
-       do {
-               next = pmd_addr_end(addr, end);
-               if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags))
-                       return -ENOMEM;
-       } while (pmd++, addr = next, addr != end);
-       return 0;
+       return NULL;
 }
 
-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
-               unsigned long end, unsigned long phys_addr, unsigned long flags)
+void iounmap(volatile void __iomem *addr)
 {
-       pud_t *pud;
-       unsigned long next;
+}
 
-       phys_addr -= addr;
-       pud = pud_alloc(&init_mm, pgd, addr);
-       if (!pud)
-               return -ENOMEM;
-       do {
-               next = pud_addr_end(addr, end);
-               if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags))
-                       return -ENOMEM;
-       } while (pud++, addr = next, addr != end);
-       return 0;
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+       return NULL;
 }
 
-static int ioremap_page_range(unsigned long addr,
-               unsigned long end, unsigned long phys_addr, unsigned long flags)
+void __init bt_iounmap(void *addr, unsigned long size)
 {
-       pgd_t *pgd;
-       unsigned long next;
-       int err;
+}
 
-       BUG_ON(addr >= end);
-       flush_cache_all();
-       phys_addr -= addr;
-       pgd = pgd_offset_k(addr);
-       spin_lock(&init_mm.page_table_lock);
-       do {
-               next = pgd_addr_end(addr, end);
-               err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
-               if (err)
-                       break;
-       } while (pgd++, addr = next, addr != end);
-       spin_unlock(&init_mm.page_table_lock);
-       flush_tlb_all();
-       return err;
+#else
+
+/*
+ * Does @address reside within a non-highmem page that is local to this virtual
+ * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
+ * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
+ * why this works.
+ */
+static inline int is_local_lowmem(unsigned long address)
+{
+       extern unsigned long max_low_pfn;
+       unsigned long mfn = address >> PAGE_SHIFT;
+       unsigned long pfn = mfn_to_pfn(mfn);
+       return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
 }
 
 /*
@@ -116,31 +79,36 @@ void __iomem * __ioremap(unsigned long p
        void __iomem * addr;
        struct vm_struct * area;
        unsigned long offset, last_addr;
+       domid_t domid = DOMID_IO;
 
        /* Don't allow wraparound or zero size */
        last_addr = phys_addr + size - 1;
        if (!size || last_addr < phys_addr)
                return NULL;
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
        /*
         * Don't remap the low PCI/ISA area, it's always mapped..
         */
-       if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
-               return (void __iomem *) phys_to_virt(phys_addr);
+       if (phys_addr >= 0x0 && last_addr < 0x100000)
+               return isa_bus_to_virt(phys_addr);
+#endif
 
        /*
         * Don't allow anybody to remap normal RAM that we're using..
         */
-       if (phys_addr <= virt_to_phys(high_memory - 1)) {
+       if (is_local_lowmem(phys_addr)) {
                char *t_addr, *t_end;
                struct page *page;
 
-               t_addr = __va(phys_addr);
+               t_addr = bus_to_virt(phys_addr);
                t_end = t_addr + (size - 1);
           
                for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); 
page++)
                        if(!PageReserved(page))
                                return NULL;
+
+               domid = DOMID_SELF;
        }
 
        /*
@@ -158,8 +126,10 @@ void __iomem * __ioremap(unsigned long p
                return NULL;
        area->phys_addr = phys_addr;
        addr = (void __iomem *) area->addr;
-       if (ioremap_page_range((unsigned long) addr,
-                       (unsigned long) addr + size, phys_addr, flags)) {
+       if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr,
+                                   size, __pgprot(_PAGE_PRESENT | _PAGE_RW |
+                                                  _PAGE_DIRTY | _PAGE_ACCESSED
+                                                  | flags), domid)) {
                vunmap((void __force *) addr);
                return NULL;
        }
@@ -199,8 +169,8 @@ void __iomem *ioremap_nocache (unsigned 
        /* Guaranteed to be > phys_addr, as per __ioremap() */
        last_addr = phys_addr + size - 1;
 
-       if (last_addr < virt_to_phys(high_memory) - 1) {
-               struct page *ppage = virt_to_page(__va(phys_addr));             
+       if (is_local_lowmem(last_addr)) { 
+               struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
                unsigned long npages;
 
                phys_addr &= PAGE_MASK;
@@ -227,32 +197,24 @@ void iounmap(volatile void __iomem *addr
 {
        struct vm_struct *p;
        if ((void __force *) addr <= high_memory) 
+               return; 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
                return;
-
-       /*
-        * __ioremap special-cases the PCI/ISA range by not instantiating a
-        * vm_area and by simply returning an address into the kernel mapping
-        * of ISA space.   So handle that here.
-        */
-       if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
-                       addr < phys_to_virt(ISA_END_ADDRESS))
-               return;
-
-       write_lock(&vmlist_lock);
-       p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) 
addr));
+#endif
+       p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
        if (!p) { 
-               printk("iounmap: bad address %p\n", addr);
-               goto out_unlock;
+               printk("__iounmap: bad address %p\n", addr);
+               return;
        }
 
-       if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
-               change_page_attr(virt_to_page(__va(p->phys_addr)),
-                                p->size >> PAGE_SHIFT,
-                                PAGE_KERNEL);
+       if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
+               /* p->size includes the guard page, but cpa doesn't like that */
+               change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
+                                (p->size - PAGE_SIZE) >> PAGE_SHIFT,
+                                PAGE_KERNEL);                           
                global_flush_tlb();
        } 
-out_unlock:
-       write_unlock(&vmlist_lock);
        kfree(p); 
 }
 
@@ -267,11 +229,13 @@ void __init *bt_ioremap(unsigned long ph
        if (!size || last_addr < phys_addr)
                return NULL;
 
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
        /*
         * Don't remap the low PCI/ISA area, it's always mapped..
         */
-       if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
-               return phys_to_virt(phys_addr);
+       if (phys_addr >= 0x0 && last_addr < 0x100000)
+               return isa_bus_to_virt(phys_addr);
+#endif
 
        /*
         * Mappings have to be page-aligned
@@ -310,6 +274,10 @@ void __init bt_iounmap(void *addr, unsig
        virt_addr = (unsigned long)addr;
        if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
                return;
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
+               return;
+#endif
        offset = virt_addr & ~PAGE_MASK;
        nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
 
@@ -320,3 +288,155 @@ void __init bt_iounmap(void *addr, unsig
                --nrpages;
        }
 }
+
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+
+/* These hacky macros avoid phys->machine translations. */
+#define __direct_pte(x) ((pte_t) { (x) } )
+#define __direct_mk_pte(page_nr,pgprot) \
+  __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+#define direct_mk_pte_phys(physpage, pgprot) \
+  __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+static inline void direct_remap_area_pte(pte_t *pte, 
+                                        unsigned long address, 
+                                        unsigned long size,
+                                        mmu_update_t **v)
+{
+       unsigned long end;
+
+       address &= ~PMD_MASK;
+       end = address + size;
+       if (end > PMD_SIZE)
+               end = PMD_SIZE;
+       if (address >= end)
+               BUG();
+
+       do {
+               (*v)->ptr = virt_to_machine(pte);
+               (*v)++;
+               address += PAGE_SIZE;
+               pte++;
+       } while (address && (address < end));
+}
+
+static inline int direct_remap_area_pmd(struct mm_struct *mm,
+                                       pmd_t *pmd, 
+                                       unsigned long address, 
+                                       unsigned long size,
+                                       mmu_update_t **v)
+{
+       unsigned long end;
+
+       address &= ~PGDIR_MASK;
+       end = address + size;
+       if (end > PGDIR_SIZE)
+               end = PGDIR_SIZE;
+       if (address >= end)
+               BUG();
+       do {
+               pte_t *pte = (mm == &init_mm) ? 
+                       pte_alloc_kernel(mm, pmd, address) :
+                       pte_alloc_map(mm, pmd, address);
+               if (!pte)
+                       return -ENOMEM;
+               direct_remap_area_pte(pte, address, end - address, v);
+               pte_unmap(pte);
+               address = (address + PMD_SIZE) & PMD_MASK;
+               pmd++;
+       } while (address && (address < end));
+       return 0;
+}
+ 
+int __direct_remap_area_pages(struct mm_struct *mm,
+                             unsigned long address, 
+                             unsigned long size, 
+                             mmu_update_t *v)
+{
+       pgd_t * dir;
+       unsigned long end = address + size;
+       int error;
+
+       dir = pgd_offset(mm, address);
+       if (address >= end)
+               BUG();
+       spin_lock(&mm->page_table_lock);
+       do {
+               pud_t *pud;
+               pmd_t *pmd;
+
+               error = -ENOMEM;
+               pud = pud_alloc(mm, dir, address);
+               if (!pud)
+                       break;
+               pmd = pmd_alloc(mm, pud, address);
+               if (!pmd)
+                       break;
+               error = 0;
+               direct_remap_area_pmd(mm, pmd, address, end - address, &v);
+               address = (address + PGDIR_SIZE) & PGDIR_MASK;
+               dir++;
+
+       } while (address && (address < end));
+       spin_unlock(&mm->page_table_lock);
+       return error;
+}
+
+
+int direct_remap_area_pages(struct mm_struct *mm,
+                           unsigned long address, 
+                           unsigned long machine_addr,
+                           unsigned long size, 
+                           pgprot_t prot,
+                           domid_t  domid)
+{
+       int i;
+       unsigned long start_address;
+#define MAX_DIRECTMAP_MMU_QUEUE 130
+       mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
+
+       start_address = address;
+
+       flush_cache_all();
+
+       for (i = 0; i < size; i += PAGE_SIZE) {
+               if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
+                       /* Fill in the PTE pointers. */
+                       __direct_remap_area_pages(mm,
+                                                 start_address, 
+                                                 address-start_address, 
+                                                 u);
+ 
+                       if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
+                               return -EFAULT;
+                       v = u;
+                       start_address = address;
+               }
+
+               /*
+                * Fill in the machine address: PTE ptr is done later by
+                * __direct_remap_area_pages(). 
+                */
+               v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+
+               machine_addr += PAGE_SIZE;
+               address += PAGE_SIZE; 
+               v++;
+       }
+
+       if (v != u) {
+               /* get the ptep's filled in */
+               __direct_remap_area_pages(mm,
+                                         start_address, 
+                                         address-start_address, 
+                                         u);
+               if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
+                       return -EFAULT;
+       }
+
+       flush_tlb_all();
+
+       return 0;
+}
+
+EXPORT_SYMBOL(direct_remap_area_pages);
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/Makefile 
linux-2.6-xen-sparse/arch/i386/mm/Makefile
--- pristine-linux-2.6.12/arch/i386/mm/Makefile 2005-06-17 12:48:29.000000000 
-0700
+++ linux-2.6-xen-sparse/arch/i386/mm/Makefile  2005-07-28 13:17:07.000000000 
-0700
@@ -2,9 +2,23 @@
 # Makefile for the linux i386-specific parts of the memory manager.
 #
 
-obj-y  := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
-obj-$(CONFIG_DISCONTIGMEM)     += discontig.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+CFLAGS += -Iarch/$(XENARCH)/mm
+
+obj-y  := init.o pgtable.o fault.o ioremap.o hypervisor.o
+c-obj-y        := extable.o mmap.o pageattr.o
+
+c-obj-$(CONFIG_DISCONTIGMEM)   += discontig.o
+c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_HIGHMEM) += highmem.o
-obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
+c-obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/mm/$(notdir $@) $@
+
+obj-y  += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/mm/pgtable.c 
linux-2.6-xen-sparse/arch/i386/mm/pgtable.c
--- pristine-linux-2.6.12/arch/i386/mm/pgtable.c        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/mm/pgtable.c 2005-07-28 13:17:07.000000000 
-0700
@@ -21,6 +21,10 @@
 #include <asm/e820.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+
+#include <asm-xen/foreign_page.h>
 
 void show_mem(void)
 {
@@ -93,6 +97,44 @@ static void set_pte_pfn(unsigned long va
 }
 
 /*
+ * Associate a virtual page frame with a given physical page frame 
+ * and protection flags for that frame.
+ */ 
+static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn,
+                          pgprot_t flags)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pgd = swapper_pg_dir + pgd_index(vaddr);
+       if (pgd_none(*pgd)) {
+               BUG();
+               return;
+       }
+       pud = pud_offset(pgd, vaddr);
+       if (pud_none(*pud)) {
+               BUG();
+               return;
+       }
+       pmd = pmd_offset(pud, vaddr);
+       if (pmd_none(*pmd)) {
+               BUG();
+               return;
+       }
+       pte = pte_offset_kernel(pmd, vaddr);
+       /* <pfn,flags> stored as-is, to permit clearing entries */
+       set_pte(pte, pfn_pte_ma(pfn, flags));
+
+       /*
+        * It's enough to flush this one mapping.
+        * (PGE mappings get flushed as well)
+        */
+       __flush_tlb_one(vaddr);
+}
+
+/*
  * Associate a large virtual page frame with a given physical page frame 
  * and protection flags for that frame. pfn is for the base of the page,
  * vaddr is what the page gets mapped to - both must be properly aligned. 
@@ -135,12 +177,26 @@ void __set_fixmap (enum fixed_addresses 
                BUG();
                return;
        }
-       set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+       switch (idx) {
+       case FIX_WP_TEST:
+       case FIX_VSYSCALL:
+#ifdef CONFIG_X86_F00F_BUG
+       case FIX_F00F_IDT:
+#endif
+               set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+               break;
+       default:
+               set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
+               break;
+       }
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-       return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+       pte_t *pte = (pte_t 
*)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+       if (pte)
+               make_page_readonly(pte);
+       return pte;
 }
 
 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
@@ -151,10 +207,29 @@ struct page *pte_alloc_one(struct mm_str
        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
 #else
        pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+       if (pte) {
+               SetPageForeign(pte, pte_free);
+               set_page_count(pte, 1);
+       }
 #endif
+
        return pte;
 }
 
+void pte_free(struct page *pte)
+{
+       unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
+
+       if (!pte_write(*virt_to_ptep(va)))
+               HYPERVISOR_update_va_mapping(
+                       va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+
+       ClearPageForeign(pte);
+       set_page_count(pte, 1);
+
+       __free_page(pte);
+}
+
 void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
 {
        memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
@@ -199,14 +274,14 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
 {
        unsigned long flags;
 
-       if (PTRS_PER_PMD == 1)
+       if (!HAVE_SHARED_KERNEL_PMD)
                spin_lock_irqsave(&pgd_lock, flags);
 
        memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
                        swapper_pg_dir + USER_PTRS_PER_PGD,
                        (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 
-       if (PTRS_PER_PMD > 1)
+       if (HAVE_SHARED_KERNEL_PMD)
                return;
 
        pgd_list_add(pgd);
@@ -214,11 +289,13 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
        memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
 }
 
-/* never called when PTRS_PER_PMD > 1 */
 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 {
        unsigned long flags; /* can be called from interrupt context */
 
+       if (HAVE_SHARED_KERNEL_PMD)
+               return;
+
        spin_lock_irqsave(&pgd_lock, flags);
        pgd_list_del(pgd);
        spin_unlock_irqrestore(&pgd_lock, flags);
@@ -226,12 +303,30 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-       int i;
+       int i = 0;
        pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
 
        if (PTRS_PER_PMD == 1 || !pgd)
                return pgd;
 
+       if (!HAVE_SHARED_KERNEL_PMD) {
+               /* alloc and copy kernel pmd */
+               unsigned long flags;
+               pgd_t *copy_pgd = pgd_offset_k(PAGE_OFFSET);
+               pud_t *copy_pud = pud_offset(copy_pgd, PAGE_OFFSET);
+               pmd_t *copy_pmd = pmd_offset(copy_pud, PAGE_OFFSET);
+               pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+               if (0 == pmd)
+                       goto out_oom;
+
+               spin_lock_irqsave(&pgd_lock, flags);
+               memcpy(pmd, copy_pmd, PAGE_SIZE);
+               spin_unlock_irqrestore(&pgd_lock, flags);
+               make_page_readonly(pmd);
+               set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
+       }
+
+       /* alloc user pmds */
        for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
                pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
                if (!pmd)
@@ -250,11 +345,207 @@ out_oom:
 void pgd_free(pgd_t *pgd)
 {
        int i;
+       pte_t *ptep = virt_to_ptep(pgd);
+
+       if (!pte_write(*ptep)) {
+               xen_pgd_unpin(__pa(pgd));
+               HYPERVISOR_update_va_mapping(
+                       (unsigned long)pgd,
+                       pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
+                       0);
+       }
 
        /* in the PAE case user pgd entries are overwritten before usage */
-       if (PTRS_PER_PMD > 1)
-               for (i = 0; i < USER_PTRS_PER_PGD; ++i)
-                       kmem_cache_free(pmd_cache, (void 
*)__va(pgd_val(pgd[i])-1));
+       if (PTRS_PER_PMD > 1) {
+               for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+                       pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
+                       kmem_cache_free(pmd_cache, pmd);
+               }
+               if (!HAVE_SHARED_KERNEL_PMD) {
+                       pmd_t *pmd = (void 
*)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
+                       make_page_writable(pmd);
+                       memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+                       kmem_cache_free(pmd_cache, pmd);
+               }
+       }
        /* in the non-PAE case, free_pgtables() clears user pgd entries */
        kmem_cache_free(pgd_cache, pgd);
 }
+
+#ifndef CONFIG_XEN_SHADOW_MODE
+void make_lowmem_page_readonly(void *va)
+{
+       pte_t *pte = virt_to_ptep(va);
+       set_pte(pte, pte_wrprotect(*pte));
+}
+
+void make_lowmem_page_writable(void *va)
+{
+       pte_t *pte = virt_to_ptep(va);
+       set_pte(pte, pte_mkwrite(*pte));
+}
+
+void make_page_readonly(void *va)
+{
+       pte_t *pte = virt_to_ptep(va);
+       set_pte(pte, pte_wrprotect(*pte));
+       if ( (unsigned long)va >= (unsigned long)high_memory )
+       {
+               unsigned long phys;
+               phys = machine_to_phys(*(unsigned long *)pte & PAGE_MASK);
+#ifdef CONFIG_HIGHMEM
+               if ( (phys >> PAGE_SHIFT) < highstart_pfn )
+#endif
+                       make_lowmem_page_readonly(phys_to_virt(phys));
+       }
+}
+
+void make_page_writable(void *va)
+{
+       pte_t *pte = virt_to_ptep(va);
+       set_pte(pte, pte_mkwrite(*pte));
+       if ( (unsigned long)va >= (unsigned long)high_memory )
+       {
+               unsigned long phys;
+               phys = machine_to_phys(*(unsigned long *)pte & PAGE_MASK);
+#ifdef CONFIG_HIGHMEM
+               if ( (phys >> PAGE_SHIFT) < highstart_pfn )
+#endif
+                       make_lowmem_page_writable(phys_to_virt(phys));
+       }
+}
+
+void make_pages_readonly(void *va, unsigned int nr)
+{
+       while ( nr-- != 0 )
+       {
+               make_page_readonly(va);
+               va = (void *)((unsigned long)va + PAGE_SIZE);
+       }
+}
+
+void make_pages_writable(void *va, unsigned int nr)
+{
+       while ( nr-- != 0 )
+       {
+               make_page_writable(va);
+               va = (void *)((unsigned long)va + PAGE_SIZE);
+       }
+}
+#endif /* CONFIG_XEN_SHADOW_MODE */
+
+LIST_HEAD(mm_unpinned);
+DEFINE_SPINLOCK(mm_unpinned_lock);
+
+static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
+{
+       struct page *page = virt_to_page(pt);
+       unsigned long pfn = page_to_pfn(page);
+
+       if (PageHighMem(page))
+               return;
+       HYPERVISOR_update_va_mapping(
+               (unsigned long)__va(pfn << PAGE_SHIFT),
+               pfn_pte(pfn, flags), 0);
+}
+
+static void mm_walk(struct mm_struct *mm, pgprot_t flags)
+{
+       pgd_t       *pgd;
+       pud_t       *pud;
+       pmd_t       *pmd;
+       pte_t       *pte;
+       int          g,u,m;
+
+       pgd = mm->pgd;
+       for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
+               if (pgd_none(*pgd))
+                       continue;
+               pud = pud_offset(pgd, 0);
+               if (PTRS_PER_PUD > 1) /* not folded */
+                       mm_walk_set_prot(pud,flags);
+               for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+                       if (pud_none(*pud))
+                               continue;
+                       pmd = pmd_offset(pud, 0);
+                       if (PTRS_PER_PMD > 1) /* not folded */
+                               mm_walk_set_prot(pmd,flags);
+                       for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+                               if (pmd_none(*pmd))
+                                       continue;
+                               pte = pte_offset_kernel(pmd,0);
+                               mm_walk_set_prot(pte,flags);
+                       }
+               }
+       }
+}
+
+void mm_pin(struct mm_struct *mm)
+{
+    spin_lock(&mm->page_table_lock);
+
+    mm_walk(mm, PAGE_KERNEL_RO);
+    HYPERVISOR_update_va_mapping(
+        (unsigned long)mm->pgd,
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
+        UVMF_TLB_FLUSH);
+    xen_pgd_pin(__pa(mm->pgd));
+    mm->context.pinned = 1;
+    spin_lock(&mm_unpinned_lock);
+    list_del(&mm->context.unpinned);
+    spin_unlock(&mm_unpinned_lock);
+
+    spin_unlock(&mm->page_table_lock);
+}
+
+void mm_unpin(struct mm_struct *mm)
+{
+    spin_lock(&mm->page_table_lock);
+
+    xen_pgd_unpin(__pa(mm->pgd));
+    HYPERVISOR_update_va_mapping(
+        (unsigned long)mm->pgd,
+        pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+    mm_walk(mm, PAGE_KERNEL);
+    xen_tlb_flush();
+    mm->context.pinned = 0;
+    spin_lock(&mm_unpinned_lock);
+    list_add(&mm->context.unpinned, &mm_unpinned);
+    spin_unlock(&mm_unpinned_lock);
+
+    spin_unlock(&mm->page_table_lock);
+}
+
+void mm_pin_all(void)
+{
+    while (!list_empty(&mm_unpinned))  
+       mm_pin(list_entry(mm_unpinned.next, struct mm_struct,
+                         context.unpinned));
+}
+
+void _arch_exit_mmap(struct mm_struct *mm)
+{
+    struct task_struct *tsk = current;
+
+    task_lock(tsk);
+
+    /*
+     * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+     * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+     */
+    if ( tsk->active_mm == mm )
+    {
+        tsk->active_mm = &init_mm;
+        atomic_inc(&init_mm.mm_count);
+
+        switch_mm(mm, &init_mm, tsk);
+
+        atomic_dec(&mm->mm_count);
+        BUG_ON(atomic_read(&mm->mm_count) == 0);
+    }
+
+    task_unlock(tsk);
+
+    if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) )
+        mm_unpin(mm);
+}
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/pci/irq.c 
linux-2.6-xen-sparse/arch/i386/pci/irq.c
--- pristine-linux-2.6.12/arch/i386/pci/irq.c   2005-06-17 12:48:29.000000000 
-0700
+++ linux-2.6-xen-sparse/arch/i386/pci/irq.c    2005-07-28 13:17:07.000000000 
-0700
@@ -68,7 +68,8 @@ static struct irq_routing_table * __init
        int i;
        u8 sum;
 
-       for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 
16) {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       for(addr = (u8 *) isa_bus_to_virt(0xf0000); addr < (u8 *) 
isa_bus_to_virt(0x100000); addr += 16) {
                rt = (struct irq_routing_table *) addr;
                if (rt->signature != PIRQ_SIGNATURE ||
                    rt->version != PIRQ_VERSION ||
@@ -83,6 +84,8 @@ static struct irq_routing_table * __init
                        return rt;
                }
        }
+#endif
+       
        return NULL;
 }
 
diff -x mkbuildtree -x include -x xen -x SCCS -urPp 
pristine-linux-2.6.12/arch/i386/pci/Makefile 
linux-2.6-xen-sparse/arch/i386/pci/Makefile
--- pristine-linux-2.6.12/arch/i386/pci/Makefile        2005-06-17 
12:48:29.000000000 -0700
+++ linux-2.6-xen-sparse/arch/i386/pci/Makefile 2005-07-28 13:17:07.000000000 
-0700
@@ -1,14 +1,32 @@
-obj-y                          := i386.o
+XENARCH        := $(subst ",,$(CONFIG_XENARCH))
 
-obj-$(CONFIG_PCI_BIOS)         += pcbios.o
-obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig.o
-obj-$(CONFIG_PCI_DIRECT)       += direct.o
+CFLAGS += -Iarch/$(XENARCH)/pci
 
-pci-y                          := fixup.o
-pci-$(CONFIG_ACPI_PCI)         += acpi.o
-pci-y                          += legacy.o irq.o
+c-obj-y                                := i386.o
 
-pci-$(CONFIG_X86_VISWS)                := visws.o fixup.o
-pci-$(CONFIG_X86_NUMAQ)                := numa.o irq.o
+c-obj-$(CONFIG_PCI_BIOS)               += pcbios.o
+c-obj-$(CONFIG_PCI_MMCONFIG)   += mmconfig.o
+c-obj-$(CONFIG_PCI_DIRECT)     += direct.o
 
-obj-y                          += $(pci-y) common.o
+c-pci-y                                := fixup.o
+c-pci-$(CONFIG_ACPI_PCI)       += acpi.o
+c-pci-y                                += legacy.o
+# Make sure irq.o gets linked in after legacy.o
+l-pci-y                                += irq.o
+
+c-pci-$(CONFIG_X86_VISWS)      := visws.o fixup.o
+pci-$(CONFIG_X86_VISWS)                :=
+c-pci-$(CONFIG_X86_NUMAQ)      := numa.o
+pci-$(CONFIG_X86_NUMAQ)                := irq.o
+
+obj-y                          += $(pci-y)
+c-obj-y                                += $(c-pci-y) common.o
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+       @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@
+
+obj-y  += $(c-obj-y) $(l-pci-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))

_______________________________________________
Xen-merge mailing list
Xen-merge@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-merge

<Prev in Thread] Current Thread [Next in Thread>