WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [linux-2.6.18-xen] Imported linux-2.6-xen-sparse from xe

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [linux-2.6.18-xen] Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec
From: "Xen patchbot-linux-2.6.18-xen" <patchbot-linux-2.6.18-xen@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 11 Jun 2007 02:23:05 -0700
Delivery-date: Tue, 12 Jun 2007 05:08:57 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Ian Campbell <ian.campbell@xxxxxxxxxxxxx>
# Date 1180947928 -3600
# Node ID a533be77c572209cd133734bab614a102a61ab75
# Parent  42f6970e18c9d339a78789e1a3e50b4cb948d99a
Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec
---
 arch/i386/Kconfig                                   |   85 
 arch/i386/Kconfig.cpu                               |    6 
 arch/i386/Kconfig.debug                             |    1 
 arch/i386/Makefile                                  |   24 
 arch/i386/boot-xen/Makefile                         |   19 
 arch/i386/kernel/Makefile                           |   22 
 arch/i386/kernel/acpi/Makefile                      |    4 
 arch/i386/kernel/acpi/boot-xen.c                    | 1168 ++++++++
 arch/i386/kernel/apic-xen.c                         |  155 +
 arch/i386/kernel/asm-offsets.c                      |    7 
 arch/i386/kernel/cpu/Makefile                       |    5 
 arch/i386/kernel/cpu/common-xen.c                   |  743 +++++
 arch/i386/kernel/cpu/mtrr/Makefile                  |    7 
 arch/i386/kernel/cpu/mtrr/main-xen.c                |  197 +
 arch/i386/kernel/crash.c                            |    4 
 arch/i386/kernel/early_printk-xen.c                 |    2 
 arch/i386/kernel/entry-xen.S                        | 1216 ++++++++
 arch/i386/kernel/fixup.c                            |   88 
 arch/i386/kernel/head-xen.S                         |  207 +
 arch/i386/kernel/init_task-xen.c                    |   51 
 arch/i386/kernel/io_apic-xen.c                      | 2777 ++++++++++++++++++++
 arch/i386/kernel/ioport-xen.c                       |  122 
 arch/i386/kernel/irq-xen.c                          |  324 ++
 arch/i386/kernel/ldt-xen.c                          |  270 +
 arch/i386/kernel/machine_kexec.c                    |   40 
 arch/i386/kernel/microcode-xen.c                    |  144 +
 arch/i386/kernel/mpparse-xen.c                      | 1185 ++++++++
 arch/i386/kernel/pci-dma-xen.c                      |  378 ++
 arch/i386/kernel/process-xen.c                      |  853 ++++++
 arch/i386/kernel/quirks-xen.c                       |   47 
 arch/i386/kernel/setup-xen.c                        | 1898 +++++++++++++
 arch/i386/kernel/smp-xen.c                          |  624 ++++
 arch/i386/kernel/swiotlb.c                          |  726 +++++
 arch/i386/kernel/sysenter.c                         |   18 
 arch/i386/kernel/time-xen.c                         | 1143 ++++++++
 arch/i386/kernel/traps-xen.c                        | 1186 ++++++++
 arch/i386/kernel/vm86.c                             |   12 
 arch/i386/kernel/vsyscall-note-xen.S                |   32 
 arch/i386/mach-xen/Makefile                         |    5 
 arch/i386/mach-xen/setup.c                          |  147 +
 arch/i386/mm/Makefile                               |    8 
 arch/i386/mm/fault-xen.c                            |  769 +++++
 arch/i386/mm/highmem-xen.c                          |  136 
 arch/i386/mm/hypervisor.c                           |  432 +++
 arch/i386/mm/init-xen.c                             |  850 ++++++
 arch/i386/mm/ioremap-xen.c                          |  443 +++
 arch/i386/mm/pgtable-xen.c                          |  727 +++++
 arch/i386/oprofile/Makefile                         |    7 
 arch/i386/oprofile/xenoprof.c                       |  179 +
 arch/i386/pci/Makefile                              |    9 
 arch/i386/pci/irq-xen.c                             | 1205 ++++++++
 arch/i386/pci/pcifront.c                            |   55 
 arch/i386/power/Makefile                            |    4 
 arch/ia64/Kconfig                                   |   57 
 arch/ia64/Makefile                                  |   12 
 arch/ia64/hp/common/sba_iommu.c                     |   61 
 arch/ia64/kernel/acpi.c                             |    6 
 arch/ia64/kernel/asm-offsets.c                      |   25 
 arch/ia64/kernel/entry.S                            |   36 
 arch/ia64/kernel/fsys.S                             |   41 
 arch/ia64/kernel/gate.S                             |  103 
 arch/ia64/kernel/gate.lds.S                         |   14 
 arch/ia64/kernel/head.S                             |    6 
 arch/ia64/kernel/iosapic.c                          |   75 
 arch/ia64/kernel/irq_ia64.c                         |  356 ++
 arch/ia64/kernel/pal.S                              |    5 
 arch/ia64/kernel/patch.c                            |   67 
 arch/ia64/kernel/perfmon.c                          |   89 
 arch/ia64/kernel/setup.c                            |  135 
 arch/ia64/kernel/time.c                             |  194 +
 arch/ia64/mm/ioremap.c                              |    3 
 arch/ia64/oprofile/Makefile                         |    4 
 arch/ia64/oprofile/init.c                           |   14 
 arch/ia64/oprofile/oprofile_perfmon.h               |   28 
 arch/ia64/oprofile/perfmon.c                        |   35 
 arch/ia64/oprofile/xenoprof.c                       |  142 +
 arch/ia64/pci/pci.c                                 |   16 
 arch/ia64/xen/Makefile                              |    9 
 arch/ia64/xen/hypercall.S                           |  170 +
 arch/ia64/xen/hypervisor.c                          | 1264 +++++++++
 arch/ia64/xen/machvec.c                             |    4 
 arch/ia64/xen/mem.c                                 |   75 
 arch/ia64/xen/swiotlb.c                             |  882 ++++++
 arch/ia64/xen/util.c                                |  105 
 arch/ia64/xen/xcom_hcall.c                          |  397 ++
 arch/ia64/xen/xcom_mini.c                           |  469 +++
 arch/ia64/xen/xcom_privcmd.c                        |  673 ++++
 arch/ia64/xen/xen_dma.c                             |  145 +
 arch/ia64/xen/xencomm.c                             |  263 +
 arch/ia64/xen/xenentry.S                            |  931 ++++++
 arch/ia64/xen/xenhpski.c                            |   19 
 arch/ia64/xen/xenivt.S                              | 2177 +++++++++++++++
 arch/ia64/xen/xenminstate.h                         |  358 ++
 arch/ia64/xen/xenpal.S                              |   85 
 arch/ia64/xen/xensetup.S                            |   52 
 arch/um/kernel/physmem.c                            |    4 
 arch/x86_64/Kconfig                                 |   68 
 arch/x86_64/Makefile                                |   20 
 arch/x86_64/ia32/Makefile                           |   20 
 arch/x86_64/ia32/ia32entry-xen.S                    |  743 +++++
 arch/x86_64/ia32/syscall32-xen.c                    |  128 
 arch/x86_64/ia32/syscall32_syscall-xen.S            |   28 
 arch/x86_64/ia32/vsyscall-int80.S                   |   58 
 arch/x86_64/ia32/vsyscall-sigreturn.S               |    2 
 arch/x86_64/kernel/Makefile                         |   19 
 arch/x86_64/kernel/acpi/Makefile                    |    1 
 arch/x86_64/kernel/apic-xen.c                       |  197 +
 arch/x86_64/kernel/asm-offsets.c                    |    2 
 arch/x86_64/kernel/crash.c                          |    6 
 arch/x86_64/kernel/e820-xen.c                       |  774 +++++
 arch/x86_64/kernel/early_printk-xen.c               |  302 ++
 arch/x86_64/kernel/entry-xen.S                      | 1325 +++++++++
 arch/x86_64/kernel/genapic-xen.c                    |  143 +
 arch/x86_64/kernel/genapic_xen.c                    |  161 +
 arch/x86_64/kernel/head-xen.S                       |  203 +
 arch/x86_64/kernel/head64-xen.c                     |  162 +
 arch/x86_64/kernel/init_task.c                      |    3 
 arch/x86_64/kernel/io_apic-xen.c                    | 2269 ++++++++++++++++
 arch/x86_64/kernel/ioport-xen.c                     |   99 
 arch/x86_64/kernel/irq-xen.c                        |  197 +
 arch/x86_64/kernel/ldt-xen.c                        |  282 ++
 arch/x86_64/kernel/machine_kexec.c                  |  118 
 arch/x86_64/kernel/mpparse-xen.c                    | 1011 +++++++
 arch/x86_64/kernel/pci-swiotlb-xen.c                |   55 
 arch/x86_64/kernel/process-xen.c                    |  829 +++++
 arch/x86_64/kernel/setup-xen.c                      | 1677 ++++++++++++
 arch/x86_64/kernel/setup64-xen.c                    |  361 ++
 arch/x86_64/kernel/smp-xen.c                        |  600 ++++
 arch/x86_64/kernel/traps-xen.c                      | 1175 ++++++++
 arch/x86_64/kernel/vsyscall-xen.c                   |  239 +
 arch/x86_64/kernel/xen_entry.S                      |   40 
 arch/x86_64/mm/Makefile                             |   10 
 arch/x86_64/mm/fault-xen.c                          |  724 +++++
 arch/x86_64/mm/init-xen.c                           | 1238 ++++++++
 arch/x86_64/mm/pageattr-xen.c                       |  433 +++
 arch/x86_64/oprofile/Makefile                       |   10 
 arch/x86_64/pci/Makefile                            |   12 
 drivers/Makefile                                    |    1 
 drivers/acpi/Kconfig                                |    3 
 drivers/char/mem.c                                  |    6 
 drivers/char/tpm/Kconfig                            |   10 
 drivers/char/tpm/Makefile                           |    2 
 drivers/char/tpm/tpm.h                              |   15 
 drivers/char/tpm/tpm_vtpm.c                         |  542 +++
 drivers/char/tpm/tpm_vtpm.h                         |   55 
 drivers/char/tpm/tpm_xen.c                          |  720 +++++
 drivers/char/tty_io.c                               |    7 
 drivers/firmware/Kconfig                            |    1 
 drivers/pci/Kconfig                                 |    1 
 drivers/serial/Kconfig                              |    1 
 drivers/video/console/Kconfig                       |    1 
 drivers/xen/Kconfig                                 |  260 +
 drivers/xen/Makefile                                |   20 
 drivers/xen/balloon/Makefile                        |    2 
 drivers/xen/balloon/balloon.c                       |  663 ++++
 drivers/xen/balloon/common.h                        |   58 
 drivers/xen/balloon/sysfs.c                         |  170 +
 drivers/xen/blkback/Makefile                        |    3 
 drivers/xen/blkback/blkback.c                       |  614 ++++
 drivers/xen/blkback/common.h                        |  139 +
 drivers/xen/blkback/interface.c                     |  181 +
 drivers/xen/blkback/vbd.c                           |  118 
 drivers/xen/blkback/xenbus.c                        |  533 +++
 drivers/xen/blkfront/Makefile                       |    5 
 drivers/xen/blkfront/blkfront.c                     |  902 ++++++
 drivers/xen/blkfront/block.h                        |  142 +
 drivers/xen/blkfront/vbd.c                          |  372 ++
 drivers/xen/blktap/Makefile                         |    5 
 drivers/xen/blktap/blktap.c                         | 1641 +++++++++++
 drivers/xen/blktap/common.h                         |  121 
 drivers/xen/blktap/interface.c                      |  174 +
 drivers/xen/blktap/xenbus.c                         |  477 +++
 drivers/xen/char/Makefile                           |    2 
 drivers/xen/char/mem.c                              |  203 +
 drivers/xen/console/Makefile                        |    2 
 drivers/xen/console/console.c                       |  721 +++++
 drivers/xen/console/xencons_ring.c                  |  143 +
 drivers/xen/core/Makefile                           |   12 
 drivers/xen/core/cpu_hotplug.c                      |  172 +
 drivers/xen/core/evtchn.c                           | 1015 +++++++
 drivers/xen/core/features.c                         |   34 
 drivers/xen/core/gnttab.c                           |  753 +++++
 drivers/xen/core/hypervisor_sysfs.c                 |   59 
 drivers/xen/core/machine_kexec.c                    |  189 +
 drivers/xen/core/machine_reboot.c                   |  241 +
 drivers/xen/core/reboot.c                           |  249 +
 drivers/xen/core/smpboot.c                          |  452 +++
 drivers/xen/core/xen_proc.c                         |   23 
 drivers/xen/core/xen_sysfs.c                        |  378 ++
 drivers/xen/evtchn/Makefile                         |    2 
 drivers/xen/evtchn/evtchn.c                         |  469 +++
 drivers/xen/fbfront/Makefile                        |    2 
 drivers/xen/fbfront/xenfb.c                         |  752 +++++
 drivers/xen/fbfront/xenkbd.c                        |  333 ++
 drivers/xen/gntdev/Makefile                         |    1 
 drivers/xen/gntdev/gntdev.c                         |  973 +++++++
 drivers/xen/netback/Makefile                        |    5 
 drivers/xen/netback/common.h                        |  165 +
 drivers/xen/netback/interface.c                     |  336 ++
 drivers/xen/netback/loopback.c                      |  320 ++
 drivers/xen/netback/netback.c                       | 1606 +++++++++++
 drivers/xen/netback/xenbus.c                        |  453 +++
 drivers/xen/netfront/Makefile                       |    4 
 drivers/xen/netfront/netfront.c                     | 2133 +++++++++++++++
 drivers/xen/pciback/Makefile                        |   15 
 drivers/xen/pciback/conf_space.c                    |  426 +++
 drivers/xen/pciback/conf_space.h                    |  126 
 drivers/xen/pciback/conf_space_capability.c         |   71 
 drivers/xen/pciback/conf_space_capability.h         |   23 
 drivers/xen/pciback/conf_space_capability_pm.c      |  128 
 drivers/xen/pciback/conf_space_capability_vpd.c     |   42 
 drivers/xen/pciback/conf_space_header.c             |  309 ++
 drivers/xen/pciback/conf_space_quirks.c             |  126 
 drivers/xen/pciback/conf_space_quirks.h             |   35 
 drivers/xen/pciback/passthrough.c                   |  157 +
 drivers/xen/pciback/pci_stub.c                      |  929 ++++++
 drivers/xen/pciback/pciback.h                       |   93 
 drivers/xen/pciback/pciback_ops.c                   |   95 
 drivers/xen/pciback/slot.c                          |  151 +
 drivers/xen/pciback/vpci.c                          |  204 +
 drivers/xen/pciback/xenbus.c                        |  454 +++
 drivers/xen/pcifront/Makefile                       |    7 
 drivers/xen/pcifront/pci.c                          |   46 
 drivers/xen/pcifront/pci_op.c                       |  268 +
 drivers/xen/pcifront/pcifront.h                     |   40 
 drivers/xen/pcifront/xenbus.c                       |  295 ++
 drivers/xen/privcmd/Makefile                        |    2 
 drivers/xen/privcmd/privcmd.c                       |  284 ++
 drivers/xen/tpmback/Makefile                        |    4 
 drivers/xen/tpmback/common.h                        |   85 
 drivers/xen/tpmback/interface.c                     |  167 +
 drivers/xen/tpmback/tpmback.c                       |  944 ++++++
 drivers/xen/tpmback/xenbus.c                        |  289 ++
 drivers/xen/util.c                                  |   70 
 drivers/xen/xenbus/Makefile                         |    9 
 drivers/xen/xenbus/xenbus_backend_client.c          |  147 +
 drivers/xen/xenbus/xenbus_client.c                  |  283 ++
 drivers/xen/xenbus/xenbus_comms.c                   |  232 +
 drivers/xen/xenbus/xenbus_comms.h                   |   46 
 drivers/xen/xenbus/xenbus_dev.c                     |  404 ++
 drivers/xen/xenbus/xenbus_probe.c                   | 1087 +++++++
 drivers/xen/xenbus/xenbus_probe.h                   |   75 
 drivers/xen/xenbus/xenbus_probe_backend.c           |  287 ++
 drivers/xen/xenbus/xenbus_xs.c                      |  880 ++++++
 drivers/xen/xenoprof/xenoprofile.c                  |  500 +++
 fs/Kconfig                                          |    1 
 include/asm-i386/apic.h                             |    2 
 include/asm-i386/kexec.h                            |   14 
 include/asm-i386/mach-xen/asm/agp.h                 |   37 
 include/asm-i386/mach-xen/asm/desc.h                |  164 +
 include/asm-i386/mach-xen/asm/dma-mapping.h         |  157 +
 include/asm-i386/mach-xen/asm/fixmap.h              |  155 +
 include/asm-i386/mach-xen/asm/floppy.h              |  147 +
 include/asm-i386/mach-xen/asm/highmem.h             |   80 
 include/asm-i386/mach-xen/asm/hw_irq.h              |   72 
 include/asm-i386/mach-xen/asm/hypercall.h           |  407 ++
 include/asm-i386/mach-xen/asm/hypervisor.h          |  258 +
 include/asm-i386/mach-xen/asm/io.h                  |  390 ++
 include/asm-i386/mach-xen/asm/irqflags.h            |  127 
 include/asm-i386/mach-xen/asm/maddr.h               |  193 +
 include/asm-i386/mach-xen/asm/mmu.h                 |   29 
 include/asm-i386/mach-xen/asm/mmu_context.h         |  108 
 include/asm-i386/mach-xen/asm/page.h                |  235 +
 include/asm-i386/mach-xen/asm/param.h               |   23 
 include/asm-i386/mach-xen/asm/pci.h                 |  146 +
 include/asm-i386/mach-xen/asm/pgalloc.h             |   59 
 include/asm-i386/mach-xen/asm/pgtable-2level-defs.h |   20 
 include/asm-i386/mach-xen/asm/pgtable-2level.h      |  120 
 include/asm-i386/mach-xen/asm/pgtable-3level-defs.h |   24 
 include/asm-i386/mach-xen/asm/pgtable-3level.h      |  209 +
 include/asm-i386/mach-xen/asm/pgtable.h             |  531 +++
 include/asm-i386/mach-xen/asm/processor.h           |  741 +++++
 include/asm-i386/mach-xen/asm/ptrace.h              |   90 
 include/asm-i386/mach-xen/asm/scatterlist.h         |   22 
 include/asm-i386/mach-xen/asm/segment.h             |  117 
 include/asm-i386/mach-xen/asm/setup.h               |   81 
 include/asm-i386/mach-xen/asm/smp.h                 |  103 
 include/asm-i386/mach-xen/asm/spinlock.h            |  202 +
 include/asm-i386/mach-xen/asm/swiotlb.h             |   43 
 include/asm-i386/mach-xen/asm/synch_bitops.h        |  145 +
 include/asm-i386/mach-xen/asm/system.h              |  488 +++
 include/asm-i386/mach-xen/asm/tlbflush.h            |  101 
 include/asm-i386/mach-xen/asm/vga.h                 |   20 
 include/asm-i386/mach-xen/asm/xenoprof.h            |   48 
 include/asm-i386/mach-xen/irq_vectors.h             |  125 
 include/asm-i386/mach-xen/mach_traps.h              |   33 
 include/asm-i386/mach-xen/setup_arch.h              |    5 
 include/asm-ia64/agp.h                              |   31 
 include/asm-ia64/dma-mapping.h                      |   23 
 include/asm-ia64/gcc_intrin.h                       |   68 
 include/asm-ia64/hw_irq.h                           |   10 
 include/asm-ia64/hypercall.h                        |  416 ++
 include/asm-ia64/hypervisor.h                       |  226 +
 include/asm-ia64/intel_intrin.h                     |   66 
 include/asm-ia64/io.h                               |   37 
 include/asm-ia64/iosapic.h                          |    2 
 include/asm-ia64/irq.h                              |   33 
 include/asm-ia64/machvec.h                          |    2 
 include/asm-ia64/machvec_xen.h                      |   43 
 include/asm-ia64/maddr.h                            |  107 
 include/asm-ia64/meminit.h                          |    5 
 include/asm-ia64/page.h                             |   23 
 include/asm-ia64/pal.h                              |    1 
 include/asm-ia64/pgalloc.h                          |    4 
 include/asm-ia64/privop.h                           |   60 
 include/asm-ia64/processor.h                        |    1 
 include/asm-ia64/sal.h                              |   21 
 include/asm-ia64/swiotlb.h                          |   41 
 include/asm-ia64/synch_bitops.h                     |   61 
 include/asm-ia64/system.h                           |    4 
 include/asm-ia64/uaccess.h                          |   20 
 include/asm-ia64/xen/privop.h                       |  310 ++
 include/asm-ia64/xen/xcom_hcall.h                   |   88 
 include/asm-ia64/xen/xencomm.h                      |   60 
 include/asm-ia64/xenoprof.h                         |   48 
 include/asm-um/page.h                               |    2 
 include/asm-x86_64/apic.h                           |    2 
 include/asm-x86_64/kexec.h                          |   13 
 include/asm-x86_64/mach-xen/asm/agp.h               |   35 
 include/asm-x86_64/mach-xen/asm/arch_hooks.h        |   27 
 include/asm-x86_64/mach-xen/asm/bootsetup.h         |   42 
 include/asm-x86_64/mach-xen/asm/desc.h              |  263 +
 include/asm-x86_64/mach-xen/asm/dma-mapping.h       |  207 +
 include/asm-x86_64/mach-xen/asm/e820.h              |   66 
 include/asm-x86_64/mach-xen/asm/fixmap.h            |  112 
 include/asm-x86_64/mach-xen/asm/floppy.h            |  206 +
 include/asm-x86_64/mach-xen/asm/hw_irq.h            |  136 
 include/asm-x86_64/mach-xen/asm/hypercall.h         |  406 ++
 include/asm-x86_64/mach-xen/asm/hypervisor.h        |    2 
 include/asm-x86_64/mach-xen/asm/io.h                |  330 ++
 include/asm-x86_64/mach-xen/asm/irq.h               |   38 
 include/asm-x86_64/mach-xen/asm/irqflags.h          |  139 +
 include/asm-x86_64/mach-xen/asm/maddr.h             |  161 +
 include/asm-x86_64/mach-xen/asm/mmu.h               |   38 
 include/asm-x86_64/mach-xen/asm/mmu_context.h       |  136 
 include/asm-x86_64/mach-xen/asm/msr.h               |  399 ++
 include/asm-x86_64/mach-xen/asm/nmi.h               |   93 
 include/asm-x86_64/mach-xen/asm/page.h              |  217 +
 include/asm-x86_64/mach-xen/asm/pci.h               |  166 +
 include/asm-x86_64/mach-xen/asm/pgalloc.h           |  204 +
 include/asm-x86_64/mach-xen/asm/pgtable.h           |  575 ++++
 include/asm-x86_64/mach-xen/asm/processor.h         |  506 +++
 include/asm-x86_64/mach-xen/asm/ptrace.h            |  127 
 include/asm-x86_64/mach-xen/asm/smp.h               |  150 +
 include/asm-x86_64/mach-xen/asm/synch_bitops.h      |    2 
 include/asm-x86_64/mach-xen/asm/system.h            |  262 +
 include/asm-x86_64/mach-xen/asm/timer.h             |   67 
 include/asm-x86_64/mach-xen/asm/tlbflush.h          |  103 
 include/asm-x86_64/mach-xen/asm/vga.h               |   20 
 include/asm-x86_64/mach-xen/asm/xenoprof.h          |    1 
 include/asm-x86_64/mach-xen/asm/xor.h               |  328 ++
 include/asm-x86_64/mach-xen/irq_vectors.h           |  123 
 include/asm-x86_64/mach-xen/mach_time.h             |  111 
 include/asm-x86_64/mach-xen/mach_timer.h            |   50 
 include/asm-x86_64/mach-xen/setup_arch_post.h       |   63 
 include/asm-x86_64/mach-xen/setup_arch_pre.h        |    5 
 include/linux/gfp.h                                 |    6 
 include/linux/highmem.h                             |    6 
 include/linux/interrupt.h                           |    6 
 include/linux/kexec.h                               |   13 
 include/linux/mm.h                                  |   14 
 include/linux/page-flags.h                          |   14 
 include/linux/skbuff.h                              |    8 
 include/xen/balloon.h                               |   57 
 include/xen/blkif.h                                 |   97 
 include/xen/cpu_hotplug.h                           |   44 
 include/xen/driver_util.h                           |   14 
 include/xen/evtchn.h                                |  126 
 include/xen/features.h                              |   20 
 include/xen/gnttab.h                                |  167 +
 include/xen/hvm.h                                   |   23 
 include/xen/hypercall.h                             |   24 
 include/xen/hypervisor_sysfs.h                      |   32 
 include/xen/interface/COPYING                       |   38 
 include/xen/interface/acm.h                         |  228 +
 include/xen/interface/acm_ops.h                     |  159 +
 include/xen/interface/arch-ia64.h                   |  628 ++++
 include/xen/interface/arch-powerpc.h                |  125 
 include/xen/interface/arch-x86/xen-x86_32.h         |  168 +
 include/xen/interface/arch-x86/xen-x86_64.h         |  211 +
 include/xen/interface/arch-x86/xen.h                |  204 +
 include/xen/interface/arch-x86_32.h                 |   27 
 include/xen/interface/arch-x86_64.h                 |   27 
 include/xen/interface/callback.h                    |   92 
 include/xen/interface/dom0_ops.h                    |  120 
 include/xen/interface/domctl.h                      |  478 +++
 include/xen/interface/elfnote.h                     |  233 +
 include/xen/interface/elfstructs.h                  |  527 +++
 include/xen/interface/event_channel.h               |  264 +
 include/xen/interface/features.h                    |   71 
 include/xen/interface/foreign/Makefile              |   37 
 include/xen/interface/foreign/mkchecker.py          |   58 
 include/xen/interface/foreign/mkheader.py           |  167 +
 include/xen/interface/foreign/reference.size        |   18 
 include/xen/interface/foreign/structs.py            |   58 
 include/xen/interface/grant_table.h                 |  422 +++
 include/xen/interface/hvm/e820.h                    |   47 
 include/xen/interface/hvm/hvm_info_table.h          |   41 
 include/xen/interface/hvm/hvm_op.h                  |   73 
 include/xen/interface/hvm/ioreq.h                   |  122 
 include/xen/interface/hvm/params.h                  |   60 
 include/xen/interface/hvm/save.h                    |  462 +++
 include/xen/interface/hvm/vmx_assist.h              |  116 
 include/xen/interface/io/blkif.h                    |  128 
 include/xen/interface/io/console.h                  |   51 
 include/xen/interface/io/fbif.h                     |  138 
 include/xen/interface/io/kbdif.h                    |  130 
 include/xen/interface/io/netif.h                    |  184 +
 include/xen/interface/io/pciif.h                    |   83 
 include/xen/interface/io/protocols.h                |   21 
 include/xen/interface/io/ring.h                     |  299 ++
 include/xen/interface/io/tpmif.h                    |   77 
 include/xen/interface/io/xenbus.h                   |   73 
 include/xen/interface/io/xs_wire.h                  |  117 
 include/xen/interface/kexec.h                       |  137 
 include/xen/interface/libelf.h                      |  241 +
 include/xen/interface/memory.h                      |  281 ++
 include/xen/interface/nmi.h                         |   78 
 include/xen/interface/physdev.h                     |  169 +
 include/xen/interface/platform.h                    |  143 +
 include/xen/interface/sched.h                       |  121 
 include/xen/interface/sysctl.h                      |  182 +
 include/xen/interface/trace.h                       |  119 
 include/xen/interface/vcpu.h                        |  192 +
 include/xen/interface/version.h                     |   91 
 include/xen/interface/xen-compat.h                  |   51 
 include/xen/interface/xen.h                         |  610 ++++
 include/xen/interface/xencomm.h                     |   41 
 include/xen/interface/xenoprof.h                    |  132 
 include/xen/pcifront.h                              |   77 
 include/xen/public/evtchn.h                         |   88 
 include/xen/public/gntdev.h                         |  105 
 include/xen/public/privcmd.h                        |   79 
 include/xen/xen_proc.h                              |   12 
 include/xen/xenbus.h                                |  302 ++
 include/xen/xencons.h                               |   19 
 include/xen/xenoprof.h                              |   42 
 kernel/Kconfig.preempt                              |    1 
 kernel/fork.c                                       |    3 
 kernel/irq/spurious.c                               |    3 
 kernel/kexec.c                                      |   73 
 lib/Makefile                                        |    6 
 mm/Kconfig                                          |    3 
 mm/highmem.c                                        |   11 
 mm/memory.c                                         |  129 
 mm/mmap.c                                           |    4 
 mm/page_alloc.c                                     |   24 
 net/core/dev.c                                      |   66 
 net/core/skbuff.c                                   |    5 
 scripts/Makefile.xen                                |   14 
 450 files changed, 100652 insertions(+), 189 deletions(-)

diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig
--- a/arch/i386/Kconfig Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig Mon Jun 04 10:05:28 2007 +0100
@@ -16,6 +16,7 @@ config X86_32
 
 config GENERIC_TIME
        bool
+       depends on !X86_XEN
        default y
 
 config LOCKDEP_SUPPORT
@@ -102,6 +103,15 @@ config X86_PC
        bool "PC-compatible"
        help
          Choose this option if your computer is a standard PC or compatible.
+
+config X86_XEN
+       bool "Xen-compatible"
+       select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
+       select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
+       select SWIOTLB
+       help
+         Choose this option if you plan to run this kernel on top of the
+         Xen Hypervisor.
 
 config X86_ELAN
        bool "AMD Elan"
@@ -213,6 +223,7 @@ source "arch/i386/Kconfig.cpu"
 
 config HPET_TIMER
        bool "HPET Timer Support"
+       depends on !X86_XEN
        help
          This enables the use of the HPET for the kernel's internal timer.
          HPET is the next generation timer replacing legacy 8254s.
@@ -263,7 +274,7 @@ source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
        bool "Local APIC support on uniprocessors"
-       depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+       depends on !SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
        help
          A local APIC (Advanced Programmable Interrupt Controller) is an
          integrated interrupt controller in the CPU. If you have a single-CPU
@@ -288,12 +299,12 @@ config X86_UP_IOAPIC
 
 config X86_LOCAL_APIC
        bool
-       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !(X86_VOYAGER || 
XEN_UNPRIVILEGED_GUEST))
        default y
 
 config X86_IO_APIC
        bool
-       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER || 
XEN_UNPRIVILEGED_GUEST))
        default y
 
 config X86_VISWS_APIC
@@ -303,7 +314,7 @@ config X86_VISWS_APIC
 
 config X86_MCE
        bool "Machine Check Exception"
-       depends on !X86_VOYAGER
+       depends on !(X86_VOYAGER || X86_XEN)
        ---help---
          Machine Check Exception support allows the processor to notify the
          kernel if it detects a problem (e.g. overheating, component failure).
@@ -402,6 +413,7 @@ config X86_REBOOTFIXUPS
 
 config MICROCODE
        tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+       depends on !XEN_UNPRIVILEGED_GUEST
        ---help---
          If you say Y here and also to "/dev file system support" in the
          'File systems' section, you will be able to update the microcode on
@@ -419,6 +431,7 @@ config MICROCODE
 
 config X86_MSR
        tristate "/dev/cpu/*/msr - Model-specific register support"
+       depends on !X86_XEN
        help
          This device gives privileged processes access to the x86
          Model-Specific Registers (MSRs).  It is a character device with
@@ -433,6 +446,10 @@ config X86_CPUID
          be executed on a specific processor.  It is a character device
          with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
          /dev/cpu/31/cpuid.
+
+config SWIOTLB
+       bool
+       default n
 
 source "drivers/firmware/Kconfig"
 
@@ -616,6 +633,7 @@ config HIGHPTE
 
 config MATH_EMULATION
        bool "Math emulation"
+       depends on !X86_XEN
        ---help---
          Linux can emulate a math coprocessor (used for floating point
          operations) if you don't have one. 486DX and Pentium processors have
@@ -641,6 +659,8 @@ config MATH_EMULATION
 
 config MTRR
        bool "MTRR (Memory Type Range Register) support"
+       depends on !XEN_UNPRIVILEGED_GUEST
+       default y if X86_XEN
        ---help---
          On Intel P6 family processors (Pentium Pro, Pentium II and later)
          the Memory Type Range Registers (MTRRs) may be used to control
@@ -675,7 +695,7 @@ config MTRR
 
 config EFI
        bool "Boot from EFI support"
-       depends on ACPI
+       depends on ACPI && !X86_XEN
        default n
        ---help---
        This enables the the kernel to boot on EFI platforms using
@@ -693,7 +713,7 @@ config EFI
 
 config IRQBALANCE
        bool "Enable kernel irq balancing"
-       depends on SMP && X86_IO_APIC
+       depends on SMP && X86_IO_APIC && !X86_XEN
        default y
        help
          The default yes will allow the kernel to do irq load balancing.
@@ -741,7 +761,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
@@ -793,6 +813,7 @@ config HOTPLUG_CPU
 
 config COMPAT_VDSO
        bool "Compat VDSO support"
+       depends on !X86_XEN
        default y
        help
          Map the VDSO to the predictable old-style address too.
@@ -810,18 +831,20 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
        depends on HIGHMEM
 
 menu "Power management options (ACPI, APM)"
-       depends on !X86_VOYAGER
-
+       depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
+
+if !X86_XEN
 source kernel/power/Kconfig
+endif
 
 source "drivers/acpi/Kconfig"
 
 menu "APM (Advanced Power Management) BIOS Support"
-depends on PM && !X86_VISWS
+depends on PM && !(X86_VISWS || X86_XEN)
 
 config APM
        tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM
+       depends on PM && PM_LEGACY
        ---help---
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
@@ -1006,6 +1029,7 @@ choice
 
 config PCI_GOBIOS
        bool "BIOS"
+       depends on !X86_XEN
 
 config PCI_GOMMCONFIG
        bool "MMConfig"
@@ -1013,6 +1037,13 @@ config PCI_GODIRECT
 config PCI_GODIRECT
        bool "Direct"
 
+config PCI_GOXEN_FE
+       bool "Xen PCI Frontend"
+       depends on X86_XEN
+       help
+         The PCI device frontend driver allows the kernel to import arbitrary
+         PCI devices from a PCI backend to support PCI driver domains.
+
 config PCI_GOANY
        bool "Any"
 
@@ -1020,7 +1051,7 @@ endchoice
 
 config PCI_BIOS
        bool
-       depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+       depends on !(X86_VISWS || X86_XEN) && PCI && (PCI_GOBIOS || PCI_GOANY)
        default y
 
 config PCI_DIRECT
@@ -1032,6 +1063,18 @@ config PCI_MMCONFIG
        bool
        depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
        default y
+
+config XEN_PCIDEV_FRONTEND
+       bool
+       depends on PCI && X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)
+       default y
+
+config XEN_PCIDEV_FE_DEBUG
+       bool "Xen PCI Frontend Debugging"
+       depends on XEN_PCIDEV_FRONTEND
+       default n
+       help
+         Enables some debug statements within the PCI Frontend.
 
 source "drivers/pci/pcie/Kconfig"
 
@@ -1043,7 +1086,7 @@ config ISA_DMA_API
 
 config ISA
        bool "ISA support"
-       depends on !(X86_VOYAGER || X86_VISWS)
+       depends on !(X86_VOYAGER || X86_VISWS || X86_XEN)
        help
          Find out whether you have ISA slots on your motherboard.  ISA is the
          name of a bus system, i.e. the way the CPU talks to the other stuff
@@ -1070,7 +1113,7 @@ source "drivers/eisa/Kconfig"
 source "drivers/eisa/Kconfig"
 
 config MCA
-       bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+       bool "MCA support" if !(X86_VISWS || X86_VOYAGER || X86_XEN)
        default y if X86_VOYAGER
        help
          MicroChannel Architecture is found in some IBM PS/2 machines and
@@ -1146,6 +1189,8 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+source "drivers/xen/Kconfig"
+
 source "lib/Kconfig"
 
 #
@@ -1171,7 +1216,7 @@ config X86_SMP
 
 config X86_HT
        bool
-       depends on SMP && !(X86_VISWS || X86_VOYAGER)
+       depends on SMP && !(X86_VISWS || X86_VOYAGER || X86_XEN)
        default y
 
 config X86_BIOS_REBOOT
@@ -1184,6 +1229,16 @@ config X86_TRAMPOLINE
        depends on X86_SMP || (X86_VOYAGER && SMP)
        default y
 
+config X86_NO_TSS
+       bool
+       depends on X86_XEN
+       default y
+
+config X86_NO_IDT
+       bool
+       depends on X86_XEN
+       default y
+
 config KTIME_SCALAR
        bool
        default y
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig.cpu
--- a/arch/i386/Kconfig.cpu     Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig.cpu     Mon Jun 04 10:05:28 2007 +0100
@@ -251,7 +251,7 @@ config X86_PPRO_FENCE
 
 config X86_F00F_BUG
        bool
-       depends on M586MMX || M586TSC || M586 || M486 || M386
+       depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT
        default y
 
 config X86_WP_WORKS_OK
@@ -311,5 +311,5 @@ config X86_OOSTORE
 
 config X86_TSC
        bool
-       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && 
!X86_NUMAQ
-       default y
+       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII 
|| M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && 
!X86_NUMAQ && !X86_XEN
+       default y
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Kconfig.debug
--- a/arch/i386/Kconfig.debug   Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Kconfig.debug   Mon Jun 04 10:05:28 2007 +0100
@@ -79,6 +79,7 @@ config DOUBLEFAULT
 config DOUBLEFAULT
        default y
        bool "Enable doublefault exception handler" if EMBEDDED
+       depends on !X86_NO_TSS
        help
           This option allows trapping of rare doublefault exceptions that
           would otherwise cause a system to silently reboot. Disabling this
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/Makefile
--- a/arch/i386/Makefile        Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/Makefile        Mon Jun 04 10:05:28 2007 +0100
@@ -48,6 +48,11 @@ CFLAGS                               += $(shell if [ $(call 
cc-vers
 
 CFLAGS += $(cflags-y)
 
+cppflags-$(CONFIG_XEN) += \
+       -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
+
+CPPFLAGS += $(cppflags-y)
+
 # Default subarch .c files
 mcore-y  := mach-default
 
@@ -70,6 +75,10 @@ mcore-$(CONFIG_X86_BIGSMP)   := mach-defau
 #Summit subarch support
 mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
 mcore-$(CONFIG_X86_SUMMIT)  := mach-default
+
+# Xen subarch support
+mflags-$(CONFIG_X86_XEN)       := -Iinclude/asm-i386/mach-xen
+mcore-$(CONFIG_X86_XEN)                := mach-xen
 
 # generic subarchitecture
 mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
@@ -105,6 +114,19 @@ PHONY += zImage bzImage compressed zlilo
 PHONY += zImage bzImage compressed zlilo bzlilo \
          zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
 
+ifdef CONFIG_XEN
+CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
+head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o
+boot := arch/i386/boot-xen
+.PHONY: vmlinuz
+all: vmlinuz
+
+vmlinuz: vmlinux
+       $(Q)$(MAKE) $(build)=$(boot) $@
+
+install:
+       $(Q)$(MAKE) $(build)=$(boot) $@
+else
 all: bzImage
 
 # KBUILD_IMAGE specify target image being built
@@ -127,6 +149,7 @@ fdimage fdimage144 fdimage288 isoimage: 
 
 install:
        $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
+endif
 
 archclean:
        $(Q)$(MAKE) $(clean)=arch/i386/boot
@@ -145,3 +168,4 @@ CLEAN_FILES += arch/$(ARCH)/boot/fdimage
 CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
               arch/$(ARCH)/boot/image.iso \
               arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += vmlinuz vmlinux-stripped
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/boot-xen/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/boot-xen/Makefile       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,19 @@
+
+OBJCOPYFLAGS := -g --strip-unneeded
+
+vmlinuz: vmlinux-stripped FORCE
+       $(call if_changed,gzip)
+
+vmlinux-stripped: vmlinux FORCE
+       $(call if_changed,objcopy)
+
+INSTALL_ROOT := $(patsubst %/boot,%,$(INSTALL_PATH))
+
+XINSTALL_NAME ?= $(KERNELRELEASE)
+install:
+       mkdir -p $(INSTALL_ROOT)/boot
+       install -m0644 vmlinuz 
$(INSTALL_ROOT)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       install -m0644 vmlinux 
$(INSTALL_ROOT)/boot/vmlinux-syms-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       install -m0664 .config 
$(INSTALL_ROOT)/boot/config-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       install -m0664 System.map 
$(INSTALL_ROOT)/boot/System.map-$(XINSTALL_NAME)$(INSTALL_SUFFIX)
+       ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) 
$(INSTALL_ROOT)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL)$(shell [ -e 
$(objtree)/localversion-xen ] && cat 
$(objtree)/localversion-xen)$(INSTALL_SUFFIX)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/Makefile Mon Jun 04 10:05:28 2007 +0100
@@ -44,6 +44,12 @@ EXTRA_AFLAGS   := -traditional
 
 obj-$(CONFIG_SCx200)           += scx200.o
 
+ifdef CONFIG_XEN
+vsyscall_note := vsyscall-note-xen.o
+else
+vsyscall_note := vsyscall-note.o
+endif
+
 # vsyscall.o contains the vsyscall DSO images as __initdata.
 # We must build both images before we can assemble it.
 # Note: kbuild does not track this dependency due to usage of .incbin
@@ -65,7 +71,7 @@ SYSCFLAGS_vsyscall-int80.so   = $(vsyscall
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
-                     $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
+                     $(obj)/vsyscall-%.o $(obj)/$(vsyscall_note) FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -77,8 +83,20 @@ extra-y += vsyscall-syms.o
 
 SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
-                       $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
+                       $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE
        $(call if_changed,syscall)
 
 k8-y                      += ../../x86_64/kernel/k8.o
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+
+obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+extra-y := $(call cherrypickxen, $(extra-y))
+%/head-xen.o %/head-xen.s: EXTRA_AFLAGS :=
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/acpi/Makefile
--- a/arch/i386/kernel/acpi/Makefile    Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/acpi/Makefile    Mon Jun 04 10:05:28 2007 +0100
@@ -6,3 +6,7 @@ obj-y                           += cstate.o processor.o
 obj-y                          += cstate.o processor.o
 endif
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/acpi/boot-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/acpi/boot-xen.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1168 @@
+/*
+ *  boot.c - Architecture-Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2001 Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/irq.h>
+
+#include <asm/pgtable.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#include <asm/io.h>
+#include <asm/mpspec.h>
+
+#ifdef CONFIG_X86_64
+
+extern void __init clustered_apic_check(void);
+
+extern int gsi_irq_sharing(int gsi);
+#include <asm/proto.h>
+
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { 
return 0; }
+
+
+#else                          /* X86 */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#endif                         /* CONFIG_X86_LOCAL_APIC */
+
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
+#endif                         /* X86 */
+
+#define BAD_MADT_ENTRY(entry, end) (                                       \
+               (!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+               ((acpi_table_entry_header *)entry)->length < sizeof(*entry))
+
+#define PREFIX                 "ACPI: "
+
+int acpi_noirq __initdata;     /* skip ACPI IRQ initialization */
+int acpi_pci_disabled __initdata;      /* skip ACPI PCI scan and IRQ 
initialization */
+int acpi_ht __initdata = 1;    /* enable HT */
+
+int acpi_lapic;
+int acpi_ioapic;
+int acpi_strict;
+EXPORT_SYMBOL(acpi_strict);
+
+acpi_interrupt_flags acpi_sci_flags __initdata;
+int acpi_sci_override_gsi __initdata;
+int acpi_skip_timer_override __initdata;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+#endif
+
+#ifndef __HAVE_ARCH_CMPXCHG
+#warning ACPI uses CMPXCHG, i486 and later hardware
+#endif
+
+#define MAX_MADT_ENTRIES       256
+u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
+    {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
+EXPORT_SYMBOL(x86_acpiid_to_apicid);
+
+/* --------------------------------------------------------------------------
+                              Boot-time Configuration
+   -------------------------------------------------------------------------- 
*/
+
+/*
+ * The default interrupt routing model is PIC (8259).  This gets
+ * overriden if IOAPICs are enumerated (below).
+ */
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
+
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+
+/* rely on all ACPI tables being in the direct mapping */
+char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+{
+       if (!phys_addr || !size)
+               return NULL;
+
+       if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
+               return __va(phys_addr);
+
+       return NULL;
+}
+
+#else
+
+/*
+ * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
+ * to map the target physical address. The problem is that set_fixmap()
+ * provides a single page, and it is possible that the page is not
+ * sufficient.
+ * By using this area, we can map up to MAX_IO_APICS pages temporarily,
+ * i.e. until the next __va_range() call.
+ *
+ * Important Safety Note:  The fixed I/O APIC page numbers are *subtracted*
+ * from the fixed base.  That's why we start at FIX_IO_APIC_BASE_END and
+ * count idx down while incrementing the phys address.
+ */
+char *__acpi_map_table(unsigned long phys, unsigned long size)
+{
+       unsigned long base, offset, mapped_size;
+       int idx;
+
+#ifndef CONFIG_XEN
+       if (phys + size < 8 * 1024 * 1024)
+               return __va(phys);
+#endif
+
+       offset = phys & (PAGE_SIZE - 1);
+       mapped_size = PAGE_SIZE - offset;
+       set_fixmap(FIX_ACPI_END, phys);
+       base = fix_to_virt(FIX_ACPI_END);
+
+       /*
+        * Most cases can be covered by the below.
+        */
+       idx = FIX_ACPI_END;
+       while (mapped_size < size) {
+               if (--idx < FIX_ACPI_BEGIN)
+                       return NULL;    /* cannot handle this */
+               phys += PAGE_SIZE;
+               set_fixmap(idx, phys);
+               mapped_size += PAGE_SIZE;
+       }
+
+       return ((unsigned char *)base + offset);
+}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+/* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
+struct acpi_table_mcfg_config *pci_mmcfg_config;
+int pci_mmcfg_config_num;
+
+int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_mcfg *mcfg;
+       unsigned long i;
+       int config_size;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size);
+       if (!mcfg) {
+               printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
+               return -ENODEV;
+       }
+
+       /* how many config structures do we have */
+       pci_mmcfg_config_num = 0;
+       i = size - sizeof(struct acpi_table_mcfg);
+       while (i >= sizeof(struct acpi_table_mcfg_config)) {
+               ++pci_mmcfg_config_num;
+               i -= sizeof(struct acpi_table_mcfg_config);
+       };
+       if (pci_mmcfg_config_num == 0) {
+               printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
+               return -ENODEV;
+       }
+
+       config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
+       pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
+       if (!pci_mmcfg_config) {
+               printk(KERN_WARNING PREFIX
+                      "No memory for MCFG config tables\n");
+               return -ENOMEM;
+       }
+
+       memcpy(pci_mmcfg_config, &mcfg->config, config_size);
+       for (i = 0; i < pci_mmcfg_config_num; ++i) {
+               if (mcfg->config[i].base_reserved) {
+                       printk(KERN_ERR PREFIX
+                              "MMCONFIG not in low 4GB of memory\n");
+                       kfree(pci_mmcfg_config);
+                       pci_mmcfg_config_num = 0;
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+#endif                         /* CONFIG_PCI_MMCONFIG */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_madt *madt = NULL;
+
+       if (!phys_addr || !size || !cpu_has_apic)
+               return -EINVAL;
+
+       madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
+       if (!madt) {
+               printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+               return -ENODEV;
+       }
+
+       if (madt->lapic_address) {
+               acpi_lapic_addr = (u64) madt->lapic_address;
+
+               printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+                      madt->lapic_address);
+       }
+
+       acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_lapic *processor = NULL;
+
+       processor = (struct acpi_table_lapic *)header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* Record local apic id only when enabled */
+       if (processor->flags.enabled)
+               x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
+
+       /*
+        * We need to register disabled CPU as well to permit
+        * counting disabled CPUs. This allows us to size
+        * cpus_possible_map more accurately, to permit
+        * to not preallocating memory for all NR_CPUS
+        * when we use CPU hotplug.
+        */
+       mp_register_lapic(processor->id,        /* APIC ID */
+                         processor->flags.enabled);    /* Enabled? */
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
+                         const unsigned long end)
+{
+       struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL;
+
+       lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header;
+
+       if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
+               return -EINVAL;
+
+       acpi_lapic_addr = lapic_addr_ovr->address;
+
+       return 0;
+}
+
+static int __init
+acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_lapic_nmi *lapic_nmi = NULL;
+
+       lapic_nmi = (struct acpi_table_lapic_nmi *)header;
+
+       if (BAD_MADT_ENTRY(lapic_nmi, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (lapic_nmi->lint != 1)
+               printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+       return 0;
+}
+
+#endif                         /*CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+
+static int __init
+acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_ioapic *ioapic = NULL;
+
+       ioapic = (struct acpi_table_ioapic *)header;
+
+       if (BAD_MADT_ENTRY(ioapic, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       mp_register_ioapic(ioapic->id,
+                          ioapic->address, ioapic->global_irq_base);
+
+       return 0;
+}
+
+/*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+{
+       if (trigger == 0)       /* compatible SCI trigger is level */
+               trigger = 3;
+
+       if (polarity == 0)      /* compatible SCI polarity is low */
+               polarity = 3;
+
+       /* Command-line over-ride via acpi_sci= */
+       if (acpi_sci_flags.trigger)
+               trigger = acpi_sci_flags.trigger;
+
+       if (acpi_sci_flags.polarity)
+               polarity = acpi_sci_flags.polarity;
+
+       /*
+        * mp_config_acpi_legacy_irqs() already setup IRQs < 16
+        * If GSI is < 16, this will update its flags,
+        * else it will create a new mp_irqs[] entry.
+        */
+       mp_override_legacy_irq(gsi, polarity, trigger, gsi);
+
+       /*
+        * stash over-ride to indicate we've been here
+        * and for later update of acpi_fadt
+        */
+       acpi_sci_override_gsi = gsi;
+       return;
+}
+
+static int __init
+acpi_parse_int_src_ovr(acpi_table_entry_header * header,
+                      const unsigned long end)
+{
+       struct acpi_table_int_src_ovr *intsrc = NULL;
+
+       intsrc = (struct acpi_table_int_src_ovr *)header;
+
+       if (BAD_MADT_ENTRY(intsrc, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (intsrc->bus_irq == acpi_fadt.sci_int) {
+               acpi_sci_ioapic_setup(intsrc->global_irq,
+                                     intsrc->flags.polarity,
+                                     intsrc->flags.trigger);
+               return 0;
+       }
+
+       if (acpi_skip_timer_override &&
+           intsrc->bus_irq == 0 && intsrc->global_irq == 2) {
+               printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+               return 0;
+       }
+
+       mp_override_legacy_irq(intsrc->bus_irq,
+                              intsrc->flags.polarity,
+                              intsrc->flags.trigger, intsrc->global_irq);
+
+       return 0;
+}
+
+static int __init
+acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
+{
+       struct acpi_table_nmi_src *nmi_src = NULL;
+
+       nmi_src = (struct acpi_table_nmi_src *)header;
+
+       if (BAD_MADT_ENTRY(nmi_src, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* TBD: Support nimsrc entries? */
+
+       return 0;
+}
+
+#endif                         /* CONFIG_X86_IO_APIC */
+
+/*
+ * acpi_pic_sci_set_trigger()
+ * 
+ * use ELCR to set PIC-mode trigger type for SCI
+ *
+ * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
+ * it may require Edge Trigger -- use "acpi_sci=edge"
+ *
+ * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
+ * for the 8259 PIC.  bit[n] = 1 means irq[n] is Level, otherwise Edge.
+ * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ */
+
+void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
+{
+       unsigned int mask = 1 << irq;
+       unsigned int old, new;
+
+       /* Real old ELCR mask */
+       old = inb(0x4d0) | (inb(0x4d1) << 8);
+
+       /*
+        * If we use ACPI to set PCI irq's, then we should clear ELCR
+        * since we will set it correctly as we enable the PCI irq
+        * routing.
+        */
+       new = acpi_noirq ? old : 0;
+
+       /*
+        * Update SCI information in the ELCR, it isn't in the PCI
+        * routing tables..
+        */
+       switch (trigger) {
+       case 1:         /* Edge - clear */
+               new &= ~mask;
+               break;
+       case 3:         /* Level - set */
+               new |= mask;
+               break;
+       }
+
+       if (old == new)
+               return;
+
+       printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
+       outb(new, 0x4d0);
+       outb(new >> 8, 0x4d1);
+}
+
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (use_pci_vector() && !platform_legacy_irq(gsi))
+               *irq = IO_APIC_VECTOR(gsi);
+       else
+#endif
+               *irq = gsi_irq_sharing(gsi);
+       return 0;
+}
+
+/*
+ * success: return IRQ number (>=0)
+ * failure: return < 0
+ */
+int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+{
+       unsigned int irq;
+       unsigned int plat_gsi = gsi;
+
+#ifdef CONFIG_PCI
+       /*
+        * Make sure all (legacy) PCI IRQs are set as level-triggered.
+        */
+       if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+               extern void eisa_set_level_irq(unsigned int irq);
+
+               if (triggering == ACPI_LEVEL_SENSITIVE)
+                       eisa_set_level_irq(gsi);
+       }
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+       if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
+               plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+       }
+#endif
+       acpi_gsi_to_irq(plat_gsi, &irq);
+       return irq;
+}
+
+EXPORT_SYMBOL(acpi_register_gsi);
+
+/*
+ *  ACPI based hotplug support for CPU
+ */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+int acpi_map_lsapic(acpi_handle handle, int *pcpu)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_map_lsapic);
+
+int acpi_unmap_lsapic(int cpu)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_unmap_lsapic);
+#endif                         /* CONFIG_ACPI_HOTPLUG_CPU */
+
+int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_register_ioapic);
+
+int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
+{
+       /* TBD */
+       return -EINVAL;
+}
+
+EXPORT_SYMBOL(acpi_unregister_ioapic);
+
+static unsigned long __init
+acpi_scan_rsdp(unsigned long start, unsigned long length)
+{
+       unsigned long offset = 0;
+       unsigned long sig_len = sizeof("RSD PTR ") - 1;
+       unsigned long vstart = (unsigned long)isa_bus_to_virt(start);
+
+       /*
+        * Scan all 16-byte boundaries of the physical memory region for the
+        * RSDP signature.
+        */
+       for (offset = 0; offset < length; offset += 16) {
+               if (strncmp((char *)(vstart + offset), "RSD PTR ", sig_len))
+                       continue;
+               return (start + offset);
+       }
+
+       return 0;
+}
+
+static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_sbf *sb;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size);
+       if (!sb) {
+               printk(KERN_WARNING PREFIX "Unable to map SBF\n");
+               return -ENODEV;
+       }
+
+       sbf_port = sb->sbf_cmos;        /* Save CMOS port */
+
+       return 0;
+}
+
+#ifdef CONFIG_HPET_TIMER
+
+static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+{
+       struct acpi_table_hpet *hpet_tbl;
+
+       if (!phys || !size)
+               return -EINVAL;
+
+       hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size);
+       if (!hpet_tbl) {
+               printk(KERN_WARNING PREFIX "Unable to map HPET\n");
+               return -ENODEV;
+       }
+
+       if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) {
+               printk(KERN_WARNING PREFIX "HPET timers must be located in "
+                      "memory.\n");
+               return -1;
+       }
+#ifdef CONFIG_X86_64
+       vxtime.hpet_address = hpet_tbl->addr.addrl |
+           ((long)hpet_tbl->addr.addrh << 32);
+
+       printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+              hpet_tbl->id, vxtime.hpet_address);
+#else                          /* X86 */
+       {
+               extern unsigned long hpet_address;
+
+               hpet_address = hpet_tbl->addr.addrl;
+               printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+                      hpet_tbl->id, hpet_address);
+       }
+#endif                         /* X86 */
+
+       return 0;
+}
+#else
+#define        acpi_parse_hpet NULL
+#endif
+
+#ifdef CONFIG_X86_PM_TIMER
+extern u32 pmtmr_ioport;
+#endif
+
+static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
+{
+       struct fadt_descriptor *fadt = NULL;
+
+       fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size);
+       if (!fadt) {
+               printk(KERN_WARNING PREFIX "Unable to map FADT\n");
+               return 0;
+       }
+       /* initialize sci_int early for INT_SRC_OVR MADT parsing */
+       acpi_fadt.sci_int = fadt->sci_int;
+
+       /* initialize rev and apic_phys_dest_mode for x86_64 genapic */
+       acpi_fadt.revision = fadt->revision;
+       acpi_fadt.force_apic_physical_destination_mode =
+           fadt->force_apic_physical_destination_mode;
+
+#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN)
+       /* detect the location of the ACPI PM Timer */
+       if (fadt->revision >= FADT2_REVISION_ID) {
+               /* FADT rev. 2 */
+               if (fadt->xpm_tmr_blk.address_space_id !=
+                   ACPI_ADR_SPACE_SYSTEM_IO)
+                       return 0;
+
+               pmtmr_ioport = fadt->xpm_tmr_blk.address;
+               /*
+                * "X" fields are optional extensions to the original V1.0
+                * fields, so we must selectively expand V1.0 fields if the
+                * corresponding X field is zero.
+                */
+               if (!pmtmr_ioport)
+                       pmtmr_ioport = fadt->V1_pm_tmr_blk;
+       } else {
+               /* FADT rev. 1 */
+               pmtmr_ioport = fadt->V1_pm_tmr_blk;
+       }
+       if (pmtmr_ioport)
+               printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
+                      pmtmr_ioport);
+#endif
+       return 0;
+}
+
+unsigned long __init acpi_find_rsdp(void)
+{
+       unsigned long rsdp_phys = 0;
+
+       if (efi_enabled) {
+               if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+                       return efi.acpi20;
+               else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+                       return efi.acpi;
+       }
+       /*
+        * Scan memory looking for the RSDP signature. First search EBDA (low
+        * memory) paragraphs and then search upper memory (E0000-FFFFF).
+        */
+       rsdp_phys = acpi_scan_rsdp(0, 0x400);
+       if (!rsdp_phys)
+               rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
+
+       return rsdp_phys;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Parse LAPIC entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init acpi_parse_madt_lapic_entries(void)
+{
+       int count;
+
+       if (!cpu_has_apic)
+               return -ENODEV;
+
+       /* 
+        * Note that the LAPIC address is obtained from the MADT (32-bit value)
+        * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
+        */
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
+                                 acpi_parse_lapic_addr_ovr, 0);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX
+                      "Error parsing LAPIC address override entry\n");
+               return count;
+       }
+
+       mp_register_lapic_address(acpi_lapic_addr);
+
+       count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic,
+                                     MAX_APICS);
+       if (!count) {
+               printk(KERN_ERR PREFIX "No LAPIC entries present\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return -ENODEV;
+       } else if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+       return 0;
+}
+#endif                         /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * Parse IOAPIC related entries in MADT
+ * returns 0 on success, < 0 on error
+ */
+static int __init acpi_parse_madt_ioapic_entries(void)
+{
+       int count;
+
+       /*
+        * ACPI interpreter is required to complete interrupt setup,
+        * so if it is off, don't enumerate the io-apics with ACPI.
+        * If MPS is present, it will handle them,
+        * otherwise the system will stay in PIC mode
+        */
+       if (acpi_disabled || acpi_noirq) {
+               return -ENODEV;
+       }
+
+       if (!cpu_has_apic) 
+               return -ENODEV;
+
+       /*
+        * if "noapic" boot option, don't look for IO-APICs
+        */
+       if (skip_ioapic_setup) {
+               printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
+                      "due to 'noapic' option.\n");
+               return -ENODEV;
+       }
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic,
+                                 MAX_IO_APICS);
+       if (!count) {
+               printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
+               return -ENODEV;
+       } else if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
+               return count;
+       }
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr,
+                                 NR_IRQ_VECTORS);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX
+                      "Error parsing interrupt source overrides entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       /*
+        * If BIOS did not supply an INT_SRC_OVR for the SCI
+        * pretend we got one so we can set the SCI flags.
+        */
+       if (!acpi_sci_override_gsi)
+               acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
+
+       /* Fill in identity legacy mapings where no override */
+       mp_config_acpi_legacy_irqs();
+
+       count =
+           acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
+                                 NR_IRQ_VECTORS);
+       if (count < 0) {
+               printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+               /* TBD: Cleanup to allow fallback to MPS */
+               return count;
+       }
+
+       return 0;
+}
+#else
+static inline int acpi_parse_madt_ioapic_entries(void)
+{
+       return -1;
+}
+#endif /* !CONFIG_X86_IO_APIC */
+
+static void __init acpi_process_madt(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+       int count, error;
+
+       count = acpi_table_parse(ACPI_APIC, acpi_parse_madt);
+       if (count >= 1) {
+
+               /*
+                * Parse MADT LAPIC entries
+                */
+               error = acpi_parse_madt_lapic_entries();
+               if (!error) {
+                       acpi_lapic = 1;
+
+#ifdef CONFIG_X86_GENERICARCH
+                       generic_bigsmp_probe();
+#endif
+                       /*
+                        * Parse MADT IO-APIC entries
+                        */
+                       error = acpi_parse_madt_ioapic_entries();
+                       if (!error) {
+                               acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
+                               acpi_irq_balance_set(NULL);
+                               acpi_ioapic = 1;
+
+                               smp_found_config = 1;
+                               clustered_apic_check();
+                       }
+               }
+               if (error == -EINVAL) {
+                       /*
+                        * Dell Precision Workstation 410, 610 come here.
+                        */
+                       printk(KERN_ERR PREFIX
+                              "Invalid BIOS MADT, disabling ACPI\n");
+                       disable_acpi();
+               }
+       }
+#endif
+       return;
+}
+
+extern int acpi_force;
+
+#ifdef __i386__
+
+static int __init disable_acpi_irq(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
+                      d->ident);
+               acpi_noirq_set();
+       }
+       return 0;
+}
+
+static int __init disable_acpi_pci(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
+                      d->ident);
+               acpi_disable_pci();
+       }
+       return 0;
+}
+
+static int __init dmi_disable_acpi(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
+               disable_acpi();
+       } else {
+               printk(KERN_NOTICE
+                      "Warning: DMI blacklist says broken, but acpi forced\n");
+       }
+       return 0;
+}
+
+/*
+ * Limit ACPI to CPU enumeration for HT
+ */
+static int __init force_acpi_ht(struct dmi_system_id *d)
+{
+       if (!acpi_force) {
+               printk(KERN_NOTICE "%s detected: force use of acpi=ht\n",
+                      d->ident);
+               disable_acpi();
+               acpi_ht = 1;
+       } else {
+               printk(KERN_NOTICE
+                      "Warning: acpi=force overrules DMI blacklist: 
acpi=ht\n");
+       }
+       return 0;
+}
+
+/*
+ * If your system is blacklisted here, but you find that acpi=force
+ * works for you, please contact acpi-devel@xxxxxxxxxxxxxxx
+ */
+static struct dmi_system_id __initdata acpi_dmi_table[] = {
+       /*
+        * Boxes that need ACPI disabled
+        */
+       {
+        .callback = dmi_disable_acpi,
+        .ident = "IBM Thinkpad",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
+                    },
+        },
+
+       /*
+        * Boxes that need acpi=ht
+        */
+       {
+        .callback = force_acpi_ht,
+        .ident = "FSC Primergy T850",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "DELL GX240",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+                    DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "HP VISUALIZE NT Workstation",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "Compaq Workstation W8000",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ASUS P4B266",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "P4B266"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ASUS P2B-DS",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ASUS CUR-DLS",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "ABIT i440BX-W83977",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"),
+                    DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM Bladecenter",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM eServer xSeries 360",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM eserver xSeries 330",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"),
+                    },
+        },
+       {
+        .callback = force_acpi_ht,
+        .ident = "IBM eserver xSeries 440",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"),
+                    },
+        },
+
+       /*
+        * Boxes that need ACPI PCI IRQ routing disabled
+        */
+       {
+        .callback = disable_acpi_irq,
+        .ident = "ASUS A7V",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
+                    DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
+                    /* newer BIOS, Revision 1011, does work */
+                    DMI_MATCH(DMI_BIOS_VERSION,
+                              "ASUS A7V ACPI BIOS Revision 1007"),
+                    },
+        },
+
+       /*
+        * Boxes that need ACPI PCI IRQ routing and PCI scan disabled
+        */
+       {                       /* _BBN 0 bug */
+        .callback = disable_acpi_pci,
+        .ident = "ASUS PR-DLS",
+        .matches = {
+                    DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+                    DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
+                    DMI_MATCH(DMI_BIOS_VERSION,
+                              "ASUS PR-DLS ACPI BIOS Revision 1010"),
+                    DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
+                    },
+        },
+       {
+        .callback = disable_acpi_pci,
+        .ident = "Acer TravelMate 36x Laptop",
+        .matches = {
+                    DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                    DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
+                    },
+        },
+       {}
+};
+
+#endif                         /* __i386__ */
+
+/*
+ * acpi_boot_table_init() and acpi_boot_init()
+ *  called from setup_arch(), always.
+ *     1. checksums all tables
+ *     2. enumerates lapics
+ *     3. enumerates io-apics
+ *
+ * acpi_table_init() is separate to allow reading SRAT without
+ * other side effects.
+ *
+ * side effects of acpi_boot_init:
+ *     acpi_lapic = 1 if LAPIC found
+ *     acpi_ioapic = 1 if IOAPIC found
+ *     if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
+ *     if acpi_blacklisted() acpi_disabled = 1;
+ *     acpi_irq_model=...
+ *     ...
+ *
+ * return value: (currently ignored)
+ *     0: success
+ *     !0: failure
+ */
+
+int __init acpi_boot_table_init(void)
+{
+       int error;
+
+#ifdef __i386__
+       dmi_check_system(acpi_dmi_table);
+#endif
+
+       /*
+        * If acpi_disabled, bail out
+        * One exception: acpi=ht continues far enough to enumerate LAPICs
+        */
+       if (acpi_disabled && !acpi_ht)
+               return 1;
+
+       /* 
+        * Initialize the ACPI boot-time table parser.
+        */
+       error = acpi_table_init();
+       if (error) {
+               disable_acpi();
+               return error;
+       }
+
+       acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+       /*
+        * blacklist may disable ACPI entirely
+        */
+       error = acpi_blacklisted();
+       if (error) {
+               if (acpi_force) {
+                       printk(KERN_WARNING PREFIX "acpi=force override\n");
+               } else {
+                       printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
+                       disable_acpi();
+                       return error;
+               }
+       }
+
+       return 0;
+}
+
+int __init acpi_boot_init(void)
+{
+       /*
+        * If acpi_disabled, bail out
+        * One exception: acpi=ht continues far enough to enumerate LAPICs
+        */
+       if (acpi_disabled && !acpi_ht)
+               return 1;
+
+       acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
+
+       /*
+        * set sci_int and PM timer address
+        */
+       acpi_table_parse(ACPI_FADT, acpi_parse_fadt);
+
+       /*
+        * Process the Multiple APIC Description Table (MADT), if present
+        */
+       acpi_process_madt();
+
+       acpi_table_parse(ACPI_HPET, acpi_parse_hpet);
+
+       return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/apic-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/apic-xen.c       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,155 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ *     Maciej W. Rozycki       :       Various updates and fixes.
+ *     Mikael Pettersson       :       Power Management for UP-APIC.
+ *     Pavel Machek and
+ *     Mikael Pettersson       :       PM converted to driver model.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+#include <asm/hpet.h>
+#include <asm/i8253.h>
+#include <asm/nmi.h>
+
+#include <mach_apic.h>
+#include <mach_apicdef.h>
+#include <mach_ipi.h>
+
+#include "io_ports.h"
+
+#ifndef CONFIG_XEN
+/*
+ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
+ * IPIs in place of local APIC timers
+ */
+static cpumask_t timer_bcast_ipi;
+#endif
+
+/*
+ * Knob to control our willingness to enable the local APIC.
+ */
+int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+/*
+ * Debug level
+ */
+int apic_verbosity;
+
+#ifndef CONFIG_XEN
+static int modern_apic(void)
+{
+       unsigned int lvr, version;
+       /* AMD systems use old APIC versions, so check the CPU */
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+               boot_cpu_data.x86 >= 0xf)
+               return 1;
+       lvr = apic_read(APIC_LVR);
+       version = GET_APIC_VERSION(lvr);
+       return version >= 0x14;
+}
+#endif /* !CONFIG_XEN */
+
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        * But only ack when the APIC is enabled -AK
+        */
+       if (cpu_has_apic)
+               ack_APIC_irq();
+}
+
+int get_physical_broadcast(void)
+{
+        return 0xff;
+}
+
+#ifndef CONFIG_XEN
+#ifndef CONFIG_SMP
+static void up_apic_timer_interrupt_call(struct pt_regs *regs)
+{
+       int cpu = smp_processor_id();
+
+       /*
+        * the NMI deadlock-detector uses this.
+        */
+       per_cpu(irq_stat, cpu).apic_timer_irqs++;
+
+       smp_local_timer_interrupt(regs);
+}
+#endif
+
+void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
+{
+       cpumask_t mask;
+
+       cpus_and(mask, cpu_online_map, timer_bcast_ipi);
+       if (!cpus_empty(mask)) {
+#ifdef CONFIG_SMP
+               send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#else
+               /*
+                * We can directly call the apic timer interrupt handler
+                * in UP case. Minus all irq related functions
+                */
+               up_apic_timer_interrupt_call(regs);
+#endif
+       }
+}
+#endif
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+       return -EINVAL;
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (smp_found_config)
+               if (!skip_ioapic_setup && nr_ioapics)
+                       setup_IO_APIC();
+#endif
+
+       return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/asm-offsets.c
--- a/arch/i386/kernel/asm-offsets.c    Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/asm-offsets.c    Mon Jun 04 10:05:28 2007 +0100
@@ -66,9 +66,14 @@ void foo(void)
        OFFSET(pbe_orig_address, pbe, orig_address);
        OFFSET(pbe_next, pbe, next);
 
+#ifndef CONFIG_X86_NO_TSS
        /* Offset from the sysenter stack to tss.esp0 */
-       DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+       DEFINE(SYSENTER_stack_esp0, offsetof(struct tss_struct, esp0) -
                 sizeof(struct tss_struct));
+#else
+       /* sysenter stack points directly to esp0 */
+       DEFINE(SYSENTER_stack_esp0, 0);
+#endif
 
        DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
        DEFINE(VDSO_PRELINK, VDSO_PRELINK);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/Makefile
--- a/arch/i386/kernel/cpu/Makefile     Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/cpu/Makefile     Mon Jun 04 10:05:28 2007 +0100
@@ -17,3 +17,8 @@ obj-$(CONFIG_X86_MCE) +=      mcheck/
 
 obj-$(CONFIG_MTRR)     +=      mtrr/
 obj-$(CONFIG_CPU_FREQ) +=      cpufreq/
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/common-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/cpu/common-xen.c Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,743 @@
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <mach_apic.h>
+#else
+#ifdef CONFIG_XEN
+#define phys_pkg_id(a,b) a
+#endif
+#endif
+#include <asm/hypervisor.h>
+
+#include "cpu.h"
+
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
+#ifndef CONFIG_XEN
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+#endif
+
+static int cachesize_override __cpuinitdata = -1;
+static int disable_x86_fxsr __cpuinitdata;
+static int disable_x86_serial_nr __cpuinitdata = 1;
+static int disable_x86_sep __cpuinitdata;
+
+struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+extern int disable_pse;
+
+static void default_init(struct cpuinfo_x86 * c)
+{
+       /* Not much we can do here... */
+       /* Check if at least it has cpuid */
+       if (c->cpuid_level == -1) {
+               /* No cpuid. It must be an ancient CPU */
+               if (c->x86 == 4)
+                       strcpy(c->x86_model_id, "486");
+               else if (c->x86 == 3)
+                       strcpy(c->x86_model_id, "386");
+       }
+}
+
+static struct cpu_dev default_cpu = {
+       .c_init = default_init,
+       .c_vendor = "Unknown",
+};
+static struct cpu_dev * this_cpu = &default_cpu;
+
+static int __init cachesize_setup(char *str)
+{
+       get_option (&str, &cachesize_override);
+       return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+{
+       unsigned int *v;
+       char *p, *q;
+
+       if (cpuid_eax(0x80000000) < 0x80000004)
+               return 0;
+
+       v = (unsigned int *) c->x86_model_id;
+       cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+       cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+       cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+       c->x86_model_id[48] = 0;
+
+       /* Intel chips right-justify this string for some dumb reason;
+          undo that brain damage */
+       p = q = &c->x86_model_id[0];
+       while ( *p == ' ' )
+            p++;
+       if ( p != q ) {
+            while ( *p )
+                 *q++ = *p++;
+            while ( q <= &c->x86_model_id[48] )
+                 *q++ = '\0';  /* Zero-pad the rest */
+       }
+
+       return 1;
+}
+
+
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+{
+       unsigned int n, dummy, ecx, edx, l2size;
+
+       n = cpuid_eax(0x80000000);
+
+       if (n >= 0x80000005) {
+               cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+               printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache 
%dK (%d bytes/line)\n",
+                       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+               c->x86_cache_size=(ecx>>24)+(edx>>24);  
+       }
+
+       if (n < 0x80000006)     /* Some chips just has a large L1. */
+               return;
+
+       ecx = cpuid_ecx(0x80000006);
+       l2size = ecx >> 16;
+       
+       /* do processor-specific cache resizing */
+       if (this_cpu->c_size_cache)
+               l2size = this_cpu->c_size_cache(c,l2size);
+
+       /* Allow user to override all this if necessary. */
+       if (cachesize_override != -1)
+               l2size = cachesize_override;
+
+       if ( l2size == 0 )
+               return;         /* Again, no L2 cache is possible */
+
+       c->x86_cache_size = l2size;
+
+       printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+              l2size, ecx & 0xFF);
+}
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 
*/
+
+/* Look up CPU names by table lookup. */
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+{
+       struct cpu_model_info *info;
+
+       if ( c->x86_model >= 16 )
+               return NULL;    /* Range check */
+
+       if (!this_cpu)
+               return NULL;
+
+       info = this_cpu->c_models;
+
+       while (info && info->family) {
+               if (info->family == c->x86)
+                       return info->model_names[c->x86_model];
+               info++;
+       }
+       return NULL;            /* Not found */
+}
+
+
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+{
+       char *v = c->x86_vendor_id;
+       int i;
+       static int printed;
+
+       for (i = 0; i < X86_VENDOR_NUM; i++) {
+               if (cpu_devs[i]) {
+                       if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+                           (cpu_devs[i]->c_ident[1] && 
+                            !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+                               c->x86_vendor = i;
+                               if (!early)
+                                       this_cpu = cpu_devs[i];
+                               return;
+                       }
+               }
+       }
+       if (!printed) {
+               printed++;
+               printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+               printk(KERN_ERR "CPU: Your system may be unstable.\n");
+       }
+       c->x86_vendor = X86_VENDOR_UNKNOWN;
+       this_cpu = &default_cpu;
+}
+
+
+static int __init x86_fxsr_setup(char * s)
+{
+       disable_x86_fxsr = 1;
+       return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+static int __init x86_sep_setup(char * s)
+{
+       disable_x86_sep = 1;
+       return 1;
+}
+__setup("nosep", x86_sep_setup);
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+       u32 f1, f2;
+
+       asm("pushfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "movl %0,%1\n\t"
+           "xorl %2,%0\n\t"
+           "pushl %0\n\t"
+           "popfl\n\t"
+           "pushfl\n\t"
+           "popl %0\n\t"
+           "popfl\n\t"
+           : "=&r" (f1), "=&r" (f2)
+           : "ir" (flag));
+
+       return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __cpuinit have_cpuid_p(void)
+{
+       return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+/* Do minimum CPU detection early.
+   Fields really needed: vendor, cpuid_level, family, model, mask, cache 
alignment.
+   The others are not touched to avoid unwanted side effects.
+
+   WARNING: this function is only called on the BP.  Don't add code here
+   that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+
+       c->x86_cache_alignment = 32;
+
+       if (!have_cpuid_p())
+               return;
+
+       /* Get vendor name */
+       cpuid(0x00000000, &c->cpuid_level,
+             (int *)&c->x86_vendor_id[0],
+             (int *)&c->x86_vendor_id[8],
+             (int *)&c->x86_vendor_id[4]);
+
+       get_cpu_vendor(c, 1);
+
+       c->x86 = 4;
+       if (c->cpuid_level >= 0x00000001) {
+               u32 junk, tfms, cap0, misc;
+               cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+               c->x86 = (tfms >> 8) & 15;
+               c->x86_model = (tfms >> 4) & 15;
+               if (c->x86 == 0xf)
+                       c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
+                       c->x86_model += ((tfms >> 16) & 0xF) << 4;
+               c->x86_mask = tfms & 15;
+               if (cap0 & (1<<19))
+                       c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
+       }
+}
+
+void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+{
+       u32 tfms, xlvl;
+       int ebx;
+
+       if (have_cpuid_p()) {
+               /* Get vendor name */
+               cpuid(0x00000000, &c->cpuid_level,
+                     (int *)&c->x86_vendor_id[0],
+                     (int *)&c->x86_vendor_id[8],
+                     (int *)&c->x86_vendor_id[4]);
+               
+               get_cpu_vendor(c, 0);
+               /* Initialize the standard set of capabilities */
+               /* Note that the vendor-specific code below might override */
+       
+               /* Intel-defined flags: level 0x00000001 */
+               if ( c->cpuid_level >= 0x00000001 ) {
+                       u32 capability, excap;
+                       cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+                       c->x86_capability[0] = capability;
+                       c->x86_capability[4] = excap;
+                       c->x86 = (tfms >> 8) & 15;
+                       c->x86_model = (tfms >> 4) & 15;
+                       if (c->x86 == 0xf)
+                               c->x86 += (tfms >> 20) & 0xff;
+                       if (c->x86 >= 0x6)
+                               c->x86_model += ((tfms >> 16) & 0xF) << 4;
+                       c->x86_mask = tfms & 15;
+#ifdef CONFIG_X86_HT
+                       c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+#else
+                       c->apicid = (ebx >> 24) & 0xFF;
+#endif
+               } else {
+                       /* Have CPUID level 0 only - unheard of */
+                       c->x86 = 4;
+               }
+
+               /* AMD-defined flags: level 0x80000001 */
+               xlvl = cpuid_eax(0x80000000);
+               if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+                       if ( xlvl >= 0x80000001 ) {
+                               c->x86_capability[1] = cpuid_edx(0x80000001);
+                               c->x86_capability[6] = cpuid_ecx(0x80000001);
+                       }
+                       if ( xlvl >= 0x80000004 )
+                               get_model_name(c); /* Default name */
+               }
+       }
+
+       early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+       c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
+}
+
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+{
+       if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+               /* Disable processor serial number */
+               unsigned long lo,hi;
+               rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+               lo |= 0x200000;
+               wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+               printk(KERN_NOTICE "CPU serial number disabled.\n");
+               clear_bit(X86_FEATURE_PN, c->x86_capability);
+
+               /* Disabling the serial number may affect the cpuid level */
+               c->cpuid_level = cpuid_eax(0);
+       }
+}
+
+static int __init x86_serial_nr_setup(char *s)
+{
+       disable_x86_serial_nr = 0;
+       return 1;
+}
+__setup("serialnumber", x86_serial_nr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+{
+       int i;
+
+       c->loops_per_jiffy = loops_per_jiffy;
+       c->x86_cache_size = -1;
+       c->x86_vendor = X86_VENDOR_UNKNOWN;
+       c->cpuid_level = -1;    /* CPUID not detected */
+       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_vendor_id[0] = '\0'; /* Unset */
+       c->x86_model_id[0] = '\0';  /* Unset */
+       c->x86_max_cores = 1;
+       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+       if (!have_cpuid_p()) {
+               /* First of all, decide if this is a 486 or higher */
+               /* It's a 486 if we can modify the AC flag */
+               if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+                       c->x86 = 4;
+               else
+                       c->x86 = 3;
+       }
+
+       generic_identify(c);
+
+       printk(KERN_DEBUG "CPU: After generic identify, caps:");
+       for (i = 0; i < NCAPINTS; i++)
+               printk(" %08lx", c->x86_capability[i]);
+       printk("\n");
+
+       if (this_cpu->c_identify) {
+               this_cpu->c_identify(c);
+
+               printk(KERN_DEBUG "CPU: After vendor identify, caps:");
+               for (i = 0; i < NCAPINTS; i++)
+                       printk(" %08lx", c->x86_capability[i]);
+               printk("\n");
+       }
+
+       /*
+        * Vendor-specific initialization.  In this section we
+        * canonicalize the feature flags, meaning if there are
+        * features a certain CPU supports which CPUID doesn't
+        * tell us, CPUID claiming incorrect flags, or other bugs,
+        * we handle them here.
+        *
+        * At the end of this section, c->x86_capability better
+        * indicate the features this CPU genuinely supports!
+        */
+       if (this_cpu->c_init)
+               this_cpu->c_init(c);
+
+       /* Disable the PN if appropriate */
+       squash_the_stupid_serial_number(c);
+
+       /*
+        * The vendor-specific functions might have changed features.  Now
+        * we do "generic changes."
+        */
+
+       /* TSC disabled? */
+       if ( tsc_disable )
+               clear_bit(X86_FEATURE_TSC, c->x86_capability);
+
+       /* FXSR disabled? */
+       if (disable_x86_fxsr) {
+               clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+               clear_bit(X86_FEATURE_XMM, c->x86_capability);
+       }
+
+       /* SEP disabled? */
+       if (disable_x86_sep)
+               clear_bit(X86_FEATURE_SEP, c->x86_capability);
+
+       if (disable_pse)
+               clear_bit(X86_FEATURE_PSE, c->x86_capability);
+
+       /* If the model name is still unset, do table lookup. */
+       if ( !c->x86_model_id[0] ) {
+               char *p;
+               p = table_lookup_model(c);
+               if ( p )
+                       strcpy(c->x86_model_id, p);
+               else
+                       /* Last resort... */
+                       sprintf(c->x86_model_id, "%02x/%02x",
+                               c->x86, c->x86_model);
+       }
+
+       /* Now the feature flags better reflect actual CPU features! */
+
+       printk(KERN_DEBUG "CPU: After all inits, caps:");
+       for (i = 0; i < NCAPINTS; i++)
+               printk(" %08lx", c->x86_capability[i]);
+       printk("\n");
+
+       /*
+        * On SMP, boot_cpu_data holds the common feature set between
+        * all CPUs; so make sure that we indicate which features are
+        * common between the CPUs.  The first time this routine gets
+        * executed, c == &boot_cpu_data.
+        */
+       if ( c != &boot_cpu_data ) {
+               /* AND the already accumulated flags with these */
+               for ( i = 0 ; i < NCAPINTS ; i++ )
+                       boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+       }
+
+       /* Init Machine Check Exception if available. */
+       mcheck_init(c);
+
+       if (c == &boot_cpu_data)
+               sysenter_setup();
+       enable_sep_cpu();
+
+       if (c == &boot_cpu_data)
+               mtrr_bp_init();
+       else
+               mtrr_ap_init();
+}
+
+#ifdef CONFIG_X86_HT
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+{
+       u32     eax, ebx, ecx, edx;
+       int     index_msb, core_bits;
+
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+
+       if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+               return;
+
+       smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+       if (smp_num_siblings == 1) {
+               printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+       } else if (smp_num_siblings > 1 ) {
+
+               if (smp_num_siblings > NR_CPUS) {
+                       printk(KERN_WARNING "CPU: Unsupported number of the "
+                                       "siblings %d", smp_num_siblings);
+                       smp_num_siblings = 1;
+                       return;
+               }
+
+               index_msb = get_count_order(smp_num_siblings);
+               c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+
+               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+                      c->phys_proc_id);
+
+               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+               index_msb = get_count_order(smp_num_siblings) ;
+
+               core_bits = get_count_order(c->x86_max_cores);
+
+               c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+                                              ((1 << core_bits) - 1);
+
+               if (c->x86_max_cores > 1)
+                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                              c->cpu_core_id);
+       }
+}
+#endif
+
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+{
+       char *vendor = NULL;
+
+       if (c->x86_vendor < X86_VENDOR_NUM)
+               vendor = this_cpu->c_vendor;
+       else if (c->cpuid_level >= 0)
+               vendor = c->x86_vendor_id;
+
+       if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+               printk("%s ", vendor);
+
+       if (!c->x86_model_id[0])
+               printk("%d86", c->x86);
+       else
+               printk("%s", c->x86_model_id);
+
+       if (c->x86_mask || c->cpuid_level >= 0) 
+               printk(" stepping %02x\n", c->x86_mask);
+       else
+               printk("\n");
+}
+
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+
+/* This is hacky. :)
+ * We're emulating future behavior.
+ * In the future, the cpu-specific init functions will be called implicitly
+ * via the magic of initcalls.
+ * They will insert themselves into the cpu_devs structure.
+ * Then, when cpu_init() is called, we can just iterate over that array.
+ */
+
+extern int intel_cpu_init(void);
+extern int cyrix_init_cpu(void);
+extern int nsc_init_cpu(void);
+extern int amd_init_cpu(void);
+extern int centaur_init_cpu(void);
+extern int transmeta_init_cpu(void);
+extern int rise_init_cpu(void);
+extern int nexgen_init_cpu(void);
+extern int umc_init_cpu(void);
+
+void __init early_cpu_init(void)
+{
+       intel_cpu_init();
+       cyrix_init_cpu();
+       nsc_init_cpu();
+       amd_init_cpu();
+       centaur_init_cpu();
+       transmeta_init_cpu();
+       rise_init_cpu();
+       nexgen_init_cpu();
+       umc_init_cpu();
+       early_cpu_detect();
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+       /* pse is not compatible with on-the-fly unmapping,
+        * disable it even if the cpus claim to support it.
+        */
+       clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+       disable_pse = 1;
+#endif
+}
+
+void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr)
+{
+       unsigned long frames[16];
+       unsigned long va;
+       int f;
+
+       for (va = gdt_descr->address, f = 0;
+            va < gdt_descr->address + gdt_descr->size;
+            va += PAGE_SIZE, f++) {
+               frames[f] = virt_to_mfn(va);
+               make_lowmem_page_readonly(
+                       (void *)va, XENFEAT_writable_descriptor_tables);
+       }
+       if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
+               BUG();
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
+{
+       int cpu = smp_processor_id();
+#ifndef CONFIG_X86_NO_TSS
+       struct tss_struct * t = &per_cpu(init_tss, cpu);
+#endif
+       struct thread_struct *thread = &current->thread;
+       struct desc_struct *gdt;
+       struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+
+       if (cpu_test_and_set(cpu, cpu_initialized)) {
+               printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+               for (;;) local_irq_enable();
+       }
+       printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+       if (cpu_has_vme || cpu_has_de)
+               clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+       if (tsc_disable && cpu_has_tsc) {
+               printk(KERN_NOTICE "Disabling TSC...\n");
+               /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
+               clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+               set_in_cr4(X86_CR4_TSD);
+       }
+
+#ifndef CONFIG_XEN
+       /* The CPU hotplug case */
+       if (cpu_gdt_descr->address) {
+               gdt = (struct desc_struct *)cpu_gdt_descr->address;
+               memset(gdt, 0, PAGE_SIZE);
+               goto old_gdt;
+       }
+       /*
+        * This is a horrible hack to allocate the GDT.  The problem
+        * is that cpu_init() is called really early for the boot CPU
+        * (and hence needs bootmem) but much later for the secondary
+        * CPUs, when bootmem will have gone away
+        */
+       if (NODE_DATA(0)->bdata->node_bootmem_map) {
+               gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+               /* alloc_bootmem_pages panics on failure, so no check */
+               memset(gdt, 0, PAGE_SIZE);
+       } else {
+               gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+               if (unlikely(!gdt)) {
+                       printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+                       for (;;)
+                               local_irq_enable();
+               }
+       }
+old_gdt:
+       /*
+        * Initialize the per-CPU GDT with the boot GDT,
+        * and set up the GDT descriptor:
+        */
+       memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+
+       /* Set up GDT entry for 16bit stack */
+       *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
+               ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
+               ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
+               (CPU_16BIT_STACK_SIZE - 1);
+
+       cpu_gdt_descr->size = GDT_SIZE - 1;
+       cpu_gdt_descr->address = (unsigned long)gdt;
+#else
+       if (cpu == 0 && cpu_gdt_descr->address == 0) {
+               gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+               /* alloc_bootmem_pages panics on failure, so no check */
+               memset(gdt, 0, PAGE_SIZE);
+
+               memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+               
+               cpu_gdt_descr->size = GDT_SIZE;
+               cpu_gdt_descr->address = (unsigned long)gdt;
+       }
+#endif
+
+       cpu_gdt_init(cpu_gdt_descr);
+
+       /*
+        * Set up and load the per-CPU TSS and LDT
+        */
+       atomic_inc(&init_mm.mm_count);
+       current->active_mm = &init_mm;
+       if (current->mm)
+               BUG();
+       enter_lazy_tlb(&init_mm, current);
+
+       load_esp0(t, thread);
+
+       load_LDT(&init_mm.context);
+
+#ifdef CONFIG_DOUBLEFAULT
+       /* Set up doublefault TSS pointer in the GDT */
+       __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
+
+       /* Clear %fs and %gs. */
+       asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+
+       /* Clear all 6 debug registers: */
+       set_debugreg(0, 0);
+       set_debugreg(0, 1);
+       set_debugreg(0, 2);
+       set_debugreg(0, 3);
+       set_debugreg(0, 6);
+       set_debugreg(0, 7);
+
+       /*
+        * Force FPU initialization:
+        */
+       current_thread_info()->status = 0;
+       clear_used_math();
+       mxcsr_feature_mask_init();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __cpuinit cpu_uninit(void)
+{
+       int cpu = raw_smp_processor_id();
+       cpu_clear(cpu, cpu_initialized);
+
+       /* lazy TLB state */
+       per_cpu(cpu_tlbstate, cpu).state = 0;
+       per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+}
+#endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/mtrr/Makefile
--- a/arch/i386/kernel/cpu/mtrr/Makefile        Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/cpu/mtrr/Makefile        Mon Jun 04 10:05:28 2007 +0100
@@ -3,3 +3,10 @@ obj-y          += cyrix.o
 obj-y          += cyrix.o
 obj-y          += centaur.o
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+n-obj-xen := generic.o state.o amd.o cyrix.o centaur.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/cpu/mtrr/main-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/cpu/mtrr/main-xen.c      Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,197 @@
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+
+#include <asm/mtrr.h>
+#include "mtrr.h"
+
+static DEFINE_MUTEX(mtrr_mutex);
+
+void generic_get_mtrr(unsigned int reg, unsigned long *base,
+                     unsigned int *size, mtrr_type * type)
+{
+       struct xen_platform_op op;
+
+       op.cmd = XENPF_read_memtype;
+       op.u.read_memtype.reg = reg;
+       (void)HYPERVISOR_platform_op(&op);
+
+       *size = op.u.read_memtype.nr_mfns;
+       *base = op.u.read_memtype.mfn;
+       *type = op.u.read_memtype.type;
+}
+
+struct mtrr_ops generic_mtrr_ops = {
+       .use_intel_if      = 1,
+       .get               = generic_get_mtrr,
+};
+
+struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
+unsigned int num_var_ranges;
+unsigned int *usage_table;
+
+static void __init set_num_var_ranges(void)
+{
+       struct xen_platform_op op;
+
+       for (num_var_ranges = 0; ; num_var_ranges++) {
+               op.cmd = XENPF_read_memtype;
+               op.u.read_memtype.reg = num_var_ranges;
+               if (HYPERVISOR_platform_op(&op) != 0)
+                       break;
+       }
+}
+
+static void __init init_table(void)
+{
+       int i, max;
+
+       max = num_var_ranges;
+       if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL))
+           == NULL) {
+               printk(KERN_ERR "mtrr: could not allocate\n");
+               return;
+       }
+       for (i = 0; i < max; i++)
+               usage_table[i] = 0;
+}
+
+int mtrr_add_page(unsigned long base, unsigned long size, 
+                 unsigned int type, char increment)
+{
+       int error;
+       struct xen_platform_op op;
+
+       mutex_lock(&mtrr_mutex);
+
+       op.cmd = XENPF_add_memtype;
+       op.u.add_memtype.mfn     = base;
+       op.u.add_memtype.nr_mfns = size;
+       op.u.add_memtype.type    = type;
+       error = HYPERVISOR_platform_op(&op);
+       if (error) {
+               mutex_unlock(&mtrr_mutex);
+               BUG_ON(error > 0);
+               return error;
+       }
+
+       if (increment)
+               ++usage_table[op.u.add_memtype.reg];
+
+       mutex_unlock(&mtrr_mutex);
+
+       return op.u.add_memtype.reg;
+}
+
+static int mtrr_check(unsigned long base, unsigned long size)
+{
+       if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+               printk(KERN_WARNING
+                       "mtrr: size and base must be multiples of 4 kiB\n");
+               printk(KERN_DEBUG
+                       "mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+               dump_stack();
+               return -1;
+       }
+       return 0;
+}
+
+int
+mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+        char increment)
+{
+       if (mtrr_check(base, size))
+               return -EINVAL;
+       return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+                            increment);
+}
+
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+       unsigned i;
+       mtrr_type ltype;
+       unsigned long lbase;
+       unsigned int lsize;
+       int error = -EINVAL;
+       struct xen_platform_op op;
+
+       mutex_lock(&mtrr_mutex);
+
+       if (reg < 0) {
+               /*  Search for existing MTRR  */
+               for (i = 0; i < num_var_ranges; ++i) {
+                       mtrr_if->get(i, &lbase, &lsize, &ltype);
+                       if (lbase == base && lsize == size) {
+                               reg = i;
+                               break;
+                       }
+               }
+               if (reg < 0) {
+                       printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 
found\n", base,
+                              size);
+                       goto out;
+               }
+       }
+       if (usage_table[reg] < 1) {
+               printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+               goto out;
+       }
+       if (--usage_table[reg] < 1) {
+               op.cmd = XENPF_del_memtype;
+               op.u.del_memtype.handle = 0;
+               op.u.del_memtype.reg    = reg;
+               error = HYPERVISOR_platform_op(&op);
+               if (error) {
+                       BUG_ON(error > 0);
+                       goto out;
+               }
+       }
+       error = reg;
+ out:
+       mutex_unlock(&mtrr_mutex);
+       return error;
+}
+
+int
+mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+       if (mtrr_check(base, size))
+               return -EINVAL;
+       return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+void __init mtrr_bp_init(void)
+{
+}
+
+void mtrr_ap_init(void)
+{
+}
+
+static int __init mtrr_init(void)
+{
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+
+       if (!is_initial_xendomain())
+               return -ENODEV;
+
+       if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
+           (!cpu_has(c, X86_FEATURE_K6_MTRR)) &&
+           (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) &&
+           (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
+               return -ENODEV;
+
+       set_num_var_ranges();
+       init_table();
+
+       return 0;
+}
+
+subsys_initcall(mtrr_init);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/crash.c
--- a/arch/i386/kernel/crash.c  Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/crash.c  Mon Jun 04 10:05:28 2007 +0100
@@ -90,6 +90,7 @@ static void crash_save_self(struct pt_re
        crash_save_this_cpu(regs, cpu);
 }
 
+#ifndef CONFIG_XEN
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 static atomic_t waiting_for_crash_ipi;
 
@@ -154,6 +155,7 @@ static void nmi_shootdown_cpus(void)
        /* There are no cpus to shootdown */
 }
 #endif
+#endif /* CONFIG_XEN */
 
 void machine_crash_shutdown(struct pt_regs *regs)
 {
@@ -170,10 +172,12 @@ void machine_crash_shutdown(struct pt_re
 
        /* Make a note of crashing cpu. Will be used in NMI callback.*/
        crashing_cpu = smp_processor_id();
+#ifndef CONFIG_XEN
        nmi_shootdown_cpus();
        lapic_shutdown();
 #if defined(CONFIG_X86_IO_APIC)
        disable_IO_APIC();
 #endif
+#endif /* CONFIG_XEN */
        crash_save_self(regs);
 }
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/early_printk-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/early_printk-xen.c       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,2 @@
+
+#include "../../x86_64/kernel/early_printk-xen.c"
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/entry-xen.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/entry-xen.S      Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1216 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ *     ptrace needs to have all regs on the stack.
+ *     if the order here is changed, it needs to be
+ *     updated in fork.c:copy_process, signal.c:do_signal,
+ *     ptrace.c and ptrace.h
+ *
+ *      0(%esp) - %ebx
+ *      4(%esp) - %ecx
+ *      8(%esp) - %edx
+ *       C(%esp) - %esi
+ *     10(%esp) - %edi
+ *     14(%esp) - %ebp
+ *     18(%esp) - %eax
+ *     1C(%esp) - %ds
+ *     20(%esp) - %es
+ *     24(%esp) - orig_eax
+ *     28(%esp) - %eip
+ *     2C(%esp) - %cs
+ *     30(%esp) - %eflags
+ *     34(%esp) - %oldesp
+ *     38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/irqflags.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include <asm/desc.h>
+#include <asm/dwarf2.h>
+#include "irq_vectors.h"
+#include <xen/interface/xen.h>
+
+#define nr_syscalls ((syscall_table_size)/4)
+
+EBX            = 0x00
+ECX            = 0x04
+EDX            = 0x08
+ESI            = 0x0C
+EDI            = 0x10
+EBP            = 0x14
+EAX            = 0x18
+DS             = 0x1C
+ES             = 0x20
+ORIG_EAX       = 0x24
+EIP            = 0x28
+CS             = 0x2C
+EFLAGS         = 0x30
+OLDESP         = 0x34
+OLDSS          = 0x38
+
+CF_MASK                = 0x00000001
+TF_MASK                = 0x00000100
+IF_MASK                = 0x00000200
+DF_MASK                = 0x00000400 
+NT_MASK                = 0x00004000
+VM_MASK                = 0x00020000
+/* Pseudo-eflags. */
+NMI_MASK       = 0x80000000
+
+#ifndef CONFIG_XEN
+#define DISABLE_INTERRUPTS     cli
+#define ENABLE_INTERRUPTS      sti
+#else
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending          /* 0 */
+#define evtchn_upcall_mask             1
+
+#define sizeof_vcpu_shift              6
+
+#ifdef CONFIG_SMP
+#define GET_VCPU_INFO          movl TI_cpu(%ebp),%esi                  ; \
+                               shl  $sizeof_vcpu_shift,%esi            ; \
+                               addl HYPERVISOR_shared_info,%esi
+#else
+#define GET_VCPU_INFO          movl HYPERVISOR_shared_info,%esi
+#endif
+
+#define __DISABLE_INTERRUPTS   movb $1,evtchn_upcall_mask(%esi)
+#define __ENABLE_INTERRUPTS    movb $0,evtchn_upcall_mask(%esi)
+#define DISABLE_INTERRUPTS     GET_VCPU_INFO                           ; \
+                               __DISABLE_INTERRUPTS
+#define ENABLE_INTERRUPTS      GET_VCPU_INFO                           ; \
+                               __ENABLE_INTERRUPTS
+#define __TEST_PENDING         testb $0xFF,evtchn_upcall_pending(%esi)
+#endif
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop           cli; TRACE_IRQS_OFF
+#else
+#define preempt_stop
+#define resume_kernel          restore_nocheck
+#endif
+
+.macro TRACE_IRQS_IRET
+#ifdef CONFIG_TRACE_IRQFLAGS
+       testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
+       jz 1f
+       TRACE_IRQS_ON
+1:
+#endif
+.endm
+
+#ifdef CONFIG_VM86
+#define resume_userspace_sig   check_userspace
+#else
+#define resume_userspace_sig   resume_userspace
+#endif
+
+#define SAVE_ALL \
+       cld; \
+       pushl %es; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       /*CFI_REL_OFFSET es, 0;*/\
+       pushl %ds; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       /*CFI_REL_OFFSET ds, 0;*/\
+       pushl %eax; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET eax, 0;\
+       pushl %ebp; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ebp, 0;\
+       pushl %edi; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET edi, 0;\
+       pushl %esi; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET esi, 0;\
+       pushl %edx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET edx, 0;\
+       pushl %ecx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ecx, 0;\
+       pushl %ebx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ebx, 0;\
+       movl $(__USER_DS), %edx; \
+       movl %edx, %ds; \
+       movl %edx, %es;
+
+#define RESTORE_INT_REGS \
+       popl %ebx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ebx;\
+       popl %ecx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ecx;\
+       popl %edx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE edx;\
+       popl %esi;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE esi;\
+       popl %edi;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE edi;\
+       popl %ebp;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ebp;\
+       popl %eax;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE eax
+
+#define RESTORE_REGS   \
+       RESTORE_INT_REGS; \
+1:     popl %ds;       \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       /*CFI_RESTORE ds;*/\
+2:     popl %es;       \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       /*CFI_RESTORE es;*/\
+.section .fixup,"ax";  \
+3:     movl $0,(%esp); \
+       jmp 1b;         \
+4:     movl $0,(%esp); \
+       jmp 2b;         \
+.previous;             \
+.section __ex_table,"a";\
+       .align 4;       \
+       .long 1b,3b;    \
+       .long 2b,4b;    \
+.previous
+
+#define RING0_INT_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, 3*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, 4*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, OLDESP-EBX;\
+       /*CFI_OFFSET cs, CS-OLDESP;*/\
+       CFI_OFFSET eip, EIP-OLDESP;\
+       /*CFI_OFFSET es, ES-OLDESP;*/\
+       /*CFI_OFFSET ds, DS-OLDESP;*/\
+       CFI_OFFSET eax, EAX-OLDESP;\
+       CFI_OFFSET ebp, EBP-OLDESP;\
+       CFI_OFFSET edi, EDI-OLDESP;\
+       CFI_OFFSET esi, ESI-OLDESP;\
+       CFI_OFFSET edx, EDX-OLDESP;\
+       CFI_OFFSET ecx, ECX-OLDESP;\
+       CFI_OFFSET ebx, EBX-OLDESP
+
+ENTRY(ret_from_fork)
+       CFI_STARTPROC
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       call schedule_tail
+       GET_THREAD_INFO(%ebp)
+       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
+       pushl $0x0202                   # Reset kernel eflags
+       CFI_ADJUST_CFA_OFFSET 4
+       popfl
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp syscall_exit
+       CFI_ENDPROC
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+       # userspace resumption stub bypassing syscall exit tracing
+       ALIGN
+       RING0_PTREGS_FRAME
+ret_from_exception:
+       preempt_stop
+ret_from_intr:
+       GET_THREAD_INFO(%ebp)
+check_userspace:
+       movl EFLAGS(%esp), %eax         # mix EFLAGS and CS
+       movb CS(%esp), %al
+       testl $(VM_MASK | 2), %eax
+       jz resume_kernel
+ENTRY(resume_userspace)
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       movl TI_flags(%ebp), %ecx
+       andl $_TIF_WORK_MASK, %ecx      # is there any work to be done on
+                                       # int/exception return?
+       jne work_pending
+       jmp restore_all
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+       cli
+       cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
+       jnz restore_nocheck
+need_resched:
+       movl TI_flags(%ebp), %ecx       # need_resched set ?
+       testb $_TIF_NEED_RESCHED, %cl
+       jz restore_all
+       testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
+       jz restore_all
+       call preempt_schedule_irq
+       jmp need_resched
+#endif
+       CFI_ENDPROC
+
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
+
+       # sysenter call handler stub
+ENTRY(sysenter_entry)
+       CFI_STARTPROC simple
+       CFI_DEF_CFA esp, 0
+       CFI_REGISTER esp, ebp
+       movl SYSENTER_stack_esp0(%esp),%esp
+sysenter_past_esp:
+       /*
+        * No need to follow this irqs on/off section: the syscall
+        * disabled irqs and here we enable it straight after entry:
+        */
+       sti
+       pushl $(__USER_DS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ss, 0*/
+       pushl %ebp
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esp, 0
+       pushfl
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $(__USER_CS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET cs, 0*/
+       /*
+        * Push current_thread_info()->sysenter_return to the stack.
+        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
+        * pushed above; +8 corresponds to copy_thread's esp0 setting.
+        */
+       pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eip, 0
+
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+       cmpl $__PAGE_OFFSET-3,%ebp
+       jae syscall_fault
+1:     movl (%ebp),%ebp
+.section __ex_table,"a"
+       .align 4
+       .long 1b,syscall_fault
+.previous
+
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+
+       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
+       testw 
$(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       jnz syscall_trace_entry
+       cmpl $(nr_syscalls), %eax
+       jae syscall_badsys
+       call *sys_call_table(,%eax,4)
+       movl %eax,EAX(%esp)
+       DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       testw $_TIF_ALLWORK_MASK, %cx
+       jne syscall_exit_work
+/* if something modifies registers it must also disable sysexit */
+       movl EIP(%esp), %edx
+       movl OLDESP(%esp), %ecx
+       xorl %ebp,%ebp
+#ifdef CONFIG_XEN
+       TRACE_IRQS_ON
+       __ENABLE_INTERRUPTS
+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
+       __TEST_PENDING
+       jnz  14f                        # process more events if necessary...
+       movl ESI(%esp), %esi
+       sysexit
+14:    __DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
+       push %esp
+       call evtchn_do_upcall
+       add  $4,%esp
+       jmp  ret_from_intr
+#else
+       TRACE_IRQS_ON
+       sti
+       sysexit
+#endif /* !CONFIG_XEN */
+       CFI_ENDPROC
+
+
+       # system call handler stub
+ENTRY(system_call)
+       RING0_INT_FRAME                 # can't unwind into user space anyway
+       pushl %eax                      # save orig_eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+       testl $TF_MASK,EFLAGS(%esp)
+       jz no_singlestep
+       orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+no_singlestep:
+                                       # system call tracing in operation / 
emulation
+       /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not 
testb */
+       testw 
$(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+       jnz syscall_trace_entry
+       cmpl $(nr_syscalls), %eax
+       jae syscall_badsys
+syscall_call:
+       call *sys_call_table(,%eax,4)
+       movl %eax,EAX(%esp)             # store the return value
+syscall_exit:
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       testw $_TIF_ALLWORK_MASK, %cx   # current->work
+       jne syscall_exit_work
+
+restore_all:
+#ifndef CONFIG_XEN
+       movl EFLAGS(%esp), %eax         # mix EFLAGS, SS and CS
+       # Warning: OLDSS(%esp) contains the wrong/random values if we
+       # are returning to the kernel.
+       # See comments in process.c:copy_thread() for details.
+       movb OLDSS(%esp), %ah
+       movb CS(%esp), %al
+       andl $(VM_MASK | (4 << 8) | 3), %eax
+       cmpl $((4 << 8) | 3), %eax
+       CFI_REMEMBER_STATE
+       je ldt_ss                       # returning to user-space with LDT SS
+restore_nocheck:
+#else
+restore_nocheck:
+       movl EFLAGS(%esp), %eax
+       testl $(VM_MASK|NMI_MASK), %eax
+       CFI_REMEMBER_STATE
+       jnz hypervisor_iret
+       shr $9, %eax                    # EAX[0] == IRET_EFLAGS.IF
+       GET_VCPU_INFO
+       andb evtchn_upcall_mask(%esi),%al
+       andb $1,%al                     # EAX[0] == IRET_EFLAGS.IF & event_mask
+       CFI_REMEMBER_STATE
+       jnz restore_all_enable_events   #        != 0 => enable event delivery
+#endif
+       TRACE_IRQS_IRET
+restore_nocheck_notrace:
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+1:     iret
+.section .fixup,"ax"
+iret_exc:
+#ifndef CONFIG_XEN
+       TRACE_IRQS_ON
+       sti
+#endif
+       pushl $0                        # no error code
+       pushl $do_iret_error
+       jmp error_code
+.previous
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+
+       CFI_RESTORE_STATE
+#ifndef CONFIG_XEN
+ldt_ss:
+       larl OLDSS(%esp), %eax
+       jnz restore_nocheck
+       testl $0x00400000, %eax         # returning to 32bit stack?
+       jnz restore_nocheck             # allright, normal return
+       /* If returning to userspace with 16bit stack,
+        * try to fix the higher word of ESP, as the CPU
+        * won't restore it.
+        * This is an "official" bug of all the x86-compatible
+        * CPUs, which we can try to work around to make
+        * dosemu and wine happy. */
+       subl $8, %esp           # reserve space for switch16 pointer
+       CFI_ADJUST_CFA_OFFSET 8
+       cli
+       TRACE_IRQS_OFF
+       movl %esp, %eax
+       /* Set up the 16bit stack frame with switch32 pointer on top,
+        * and a switch16 pointer on top of the current frame. */
+       call setup_x86_bogus_stack
+       CFI_ADJUST_CFA_OFFSET -8        # frame has moved
+       TRACE_IRQS_IRET
+       RESTORE_REGS
+       lss 20+4(%esp), %esp    # switch to 16bit stack
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+#else
+        ALIGN
+restore_all_enable_events:
+       TRACE_IRQS_ON
+       __ENABLE_INTERRUPTS
+scrit: /**** START OF CRITICAL REGION ****/
+       __TEST_PENDING
+       jnz  14f                        # process more events if necessary...
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+1:     iret
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+14:    __DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+       jmp  11f
+ecrit:  /**** END OF CRITICAL REGION ****/
+
+       CFI_RESTORE_STATE
+hypervisor_iret:
+       andl $~NMI_MASK, EFLAGS(%esp)
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp  hypercall_page + (__HYPERVISOR_iret * 32)
+#endif
+       CFI_ENDPROC
+
+       # perform work that needs to be done immediately before resumption
+       ALIGN
+       RING0_PTREGS_FRAME              # can't unwind into user space anyway
+work_pending:
+       testb $_TIF_NEED_RESCHED, %cl
+       jz work_notifysig
+work_resched:
+       call schedule
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       andl $_TIF_WORK_MASK, %ecx      # is there any work to be done other
+                                       # than syscall tracing?
+       jz restore_all
+       testb $_TIF_NEED_RESCHED, %cl
+       jnz work_resched
+
+work_notifysig:                                # deal with pending signals and
+                                       # notify-resume requests
+       testl $VM_MASK, EFLAGS(%esp)
+       movl %esp, %eax
+       jne work_notifysig_v86          # returning to kernel-space or
+                                       # vm86-space
+       xorl %edx, %edx
+       call do_notify_resume
+       jmp resume_userspace_sig
+
+       ALIGN
+work_notifysig_v86:
+#ifdef CONFIG_VM86
+       pushl %ecx                      # save ti_flags for do_notify_resume
+       CFI_ADJUST_CFA_OFFSET 4
+       call save_v86_state             # %eax contains pt_regs pointer
+       popl %ecx
+       CFI_ADJUST_CFA_OFFSET -4
+       movl %eax, %esp
+       xorl %edx, %edx
+       call do_notify_resume
+       jmp resume_userspace_sig
+#endif
+
+       # perform syscall exit tracing
+       ALIGN
+syscall_trace_entry:
+       movl $-ENOSYS,EAX(%esp)
+       movl %esp, %eax
+       xorl %edx,%edx
+       call do_syscall_trace
+       cmpl $0, %eax
+       jne resume_userspace            # ret != 0 -> running under 
PTRACE_SYSEMU,
+                                       # so must skip actual syscall
+       movl ORIG_EAX(%esp), %eax
+       cmpl $(nr_syscalls), %eax
+       jnae syscall_call
+       jmp syscall_exit
+
+       # perform syscall exit tracing
+       ALIGN
+syscall_exit_work:
+       testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+       jz work_pending
+       TRACE_IRQS_ON
+       ENABLE_INTERRUPTS               # could let do_syscall_trace() call
+                                       # schedule() instead
+       movl %esp, %eax
+       movl $1, %edx
+       call do_syscall_trace
+       jmp resume_userspace
+       CFI_ENDPROC
+
+       RING0_INT_FRAME                 # can't unwind into user space anyway
+syscall_fault:
+       pushl %eax                      # save orig_eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+       movl $-EFAULT,EAX(%esp)
+       jmp resume_userspace
+
+syscall_badsys:
+       movl $-ENOSYS,EAX(%esp)
+       jmp resume_userspace
+       CFI_ENDPROC
+
+#ifndef CONFIG_XEN
+#define FIXUP_ESPFIX_STACK \
+       movl %esp, %eax; \
+       /* switch to 32bit stack using the pointer on top of 16bit stack */ \
+       lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+       /* copy data from 16bit stack to 32bit stack */ \
+       call fixup_x86_bogus_stack; \
+       /* put ESP to the proper location */ \
+       movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+       pushl %eax; \
+       CFI_ADJUST_CFA_OFFSET 4; \
+       movl %ss, %eax; \
+       /* see if on 16bit stack */ \
+       cmpw $__ESPFIX_SS, %ax; \
+       je 28f; \
+27:    popl %eax; \
+       CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28:    movl $__KERNEL_DS, %eax; \
+       movl %eax, %ds; \
+       movl %eax, %es; \
+       /* switch to 32bit stack */ \
+       FIXUP_ESPFIX_STACK; \
+       jmp 27b; \
+.previous
+
+/*
+ * Build the entry stubs and pointer table with
+ * some assembler magic.
+ */
+.data
+ENTRY(interrupt)
+.text
+
+vector=0
+ENTRY(irq_entries_start)
+       RING0_INT_FRAME
+.rept NR_IRQS
+       ALIGN
+ .if vector
+       CFI_ADJUST_CFA_OFFSET -4
+ .endif
+1:     pushl $~(vector)
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp common_interrupt
+.data
+       .long 1b
+.text
+vector=vector+1
+.endr
+
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
+       ALIGN
+common_interrupt:
+       SAVE_ALL
+       TRACE_IRQS_OFF
+       movl %esp,%eax
+       call do_IRQ
+       jmp ret_from_intr
+       CFI_ENDPROC
+
+#define BUILD_INTERRUPT(name, nr)      \
+ENTRY(name)                            \
+       RING0_INT_FRAME;                \
+       pushl $~(nr);                   \
+       CFI_ADJUST_CFA_OFFSET 4;        \
+       SAVE_ALL;                       \
+       TRACE_IRQS_OFF                  \
+       movl %esp,%eax;                 \
+       call smp_/**/name;              \
+       jmp ret_from_intr;              \
+       CFI_ENDPROC
+
+/* The include is where all of the SMP etc. interrupts come from */
+#include "entry_arch.h"
+#else
+#define UNWIND_ESPFIX_STACK
+#endif
+
+ENTRY(divide_error)
+       RING0_INT_FRAME
+       pushl $0                        # no error code
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_divide_error
+       CFI_ADJUST_CFA_OFFSET 4
+       ALIGN
+error_code:
+       pushl %ds
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ds, 0*/
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eax, 0
+       xorl %eax, %eax
+       pushl %ebp
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ebp, 0
+       pushl %edi
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET edi, 0
+       pushl %esi
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esi, 0
+       pushl %edx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET edx, 0
+       decl %eax                       # eax = -1
+       pushl %ecx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ecx, 0
+       pushl %ebx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ebx, 0
+       cld
+       pushl %es
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET es, 0*/
+       UNWIND_ESPFIX_STACK
+       popl %ecx
+       CFI_ADJUST_CFA_OFFSET -4
+       /*CFI_REGISTER es, ecx*/
+       movl ES(%esp), %edi             # get the function address
+       movl ORIG_EAX(%esp), %edx       # get the error code
+       movl %eax, ORIG_EAX(%esp)
+       movl %ecx, ES(%esp)
+       /*CFI_REL_OFFSET es, ES*/
+       movl $(__USER_DS), %ecx
+       movl %ecx, %ds
+       movl %ecx, %es
+       movl %esp,%eax                  # pt_regs pointer
+       call *%edi
+       jmp ret_from_exception
+       CFI_ENDPROC
+
+#ifdef CONFIG_XEN
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+#
+# The sysexit critical region is slightly different. sysexit
+# atomically removes the entire stack frame. If we interrupt in the
+# critical region we know that the entire frame is present and correct
+# so we can simply throw away the new one.
+ENTRY(hypervisor_callback)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       movl EIP(%esp),%eax
+       cmpl $scrit,%eax
+       jb   11f
+       cmpl $ecrit,%eax
+       jb   critical_region_fixup
+       cmpl $sysexit_scrit,%eax
+       jb   11f
+       cmpl $sysexit_ecrit,%eax
+       ja   11f
+       addl $OLDESP,%esp               # Remove eflags...ebx from stack frame.
+11:    push %esp
+       CFI_ADJUST_CFA_OFFSET 4
+       call evtchn_do_upcall
+       add  $4,%esp
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp  ret_from_intr
+       CFI_ENDPROC
+
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+critical_region_fixup:
+       movzbl critical_fixup_table-scrit(%eax),%ecx # %eax contains num bytes 
popped
+       cmpb $0xff,%cl                  # 0xff => vcpu_info critical region
+       jne  15f
+       xorl %ecx,%ecx
+15:    leal (%esp,%ecx),%esi           # %esi points at end of src region
+       leal OLDESP(%esp),%edi          # %edi points at end of dst region
+       shrl $2,%ecx                    # convert words to bytes
+       je   17f                        # skip loop if nothing to copy
+16:    subl $4,%esi                    # pre-decrementing copy loop
+       subl $4,%edi
+       movl (%esi),%eax
+       movl %eax,(%edi)
+       loop 16b
+17:    movl %edi,%esp                  # final %edi is top of merged stack
+       jmp  11b
+
+.section .rodata,"a"
+critical_fixup_table:
+       .byte 0xff,0xff,0xff            # testb $0xff,(%esi) = __TEST_PENDING
+       .byte 0xff,0xff                 # jnz  14f
+       .byte 0x00                      # pop  %ebx
+       .byte 0x04                      # pop  %ecx
+       .byte 0x08                      # pop  %edx
+       .byte 0x0c                      # pop  %esi
+       .byte 0x10                      # pop  %edi
+       .byte 0x14                      # pop  %ebp
+       .byte 0x18                      # pop  %eax
+       .byte 0x1c                      # pop  %ds
+       .byte 0x20                      # pop  %es
+       .byte 0x24,0x24,0x24            # add  $4,%esp
+       .byte 0x28                      # iret
+       .byte 0xff,0xff,0xff,0xff       # movb $1,1(%esi)
+       .byte 0x00,0x00                 # jmp  11b
+.previous
+
+# Hypervisor uses this for application faults while it executes.
+# We get here for two reasons:
+#  1. Fault while reloading DS, ES, FS or GS
+#  2. Fault while executing IRET
+# Category 1 we fix up by reattempting the load, and zeroing the segment
+# register if the load fails.
+# Category 2 we fix up by jumping to do_iret_error. We cannot use the
+# normal Linux return path in this case because if we use the IRET hypercall
+# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+# We distinguish between categories by maintaining a status value in EAX.
+ENTRY(failsafe_callback)
+       pushl %eax
+       movl $1,%eax
+1:     mov 4(%esp),%ds
+2:     mov 8(%esp),%es
+3:     mov 12(%esp),%fs
+4:     mov 16(%esp),%gs
+       testl %eax,%eax
+       popl %eax
+       jz 5f
+       addl $16,%esp           # EAX != 0 => Category 2 (Bad IRET)
+       jmp iret_exc
+5:     addl $16,%esp           # EAX == 0 => Category 1 (Bad segment)
+       RING0_INT_FRAME
+       pushl $0
+       SAVE_ALL
+       jmp ret_from_exception
+.section .fixup,"ax";          \
+6:     xorl %eax,%eax;         \
+       movl %eax,4(%esp);      \
+       jmp 1b;                 \
+7:     xorl %eax,%eax;         \
+       movl %eax,8(%esp);      \
+       jmp 2b;                 \
+8:     xorl %eax,%eax;         \
+       movl %eax,12(%esp);     \
+       jmp 3b;                 \
+9:     xorl %eax,%eax;         \
+       movl %eax,16(%esp);     \
+       jmp 4b;                 \
+.previous;                     \
+.section __ex_table,"a";       \
+       .align 4;               \
+       .long 1b,6b;            \
+       .long 2b,7b;            \
+       .long 3b,8b;            \
+       .long 4b,9b;            \
+.previous
+#endif
+       CFI_ENDPROC
+
+ENTRY(coprocessor_error)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_coprocessor_error
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(simd_coprocessor_error)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_simd_coprocessor_error
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(device_not_available)
+       RING0_INT_FRAME
+       pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+#ifndef CONFIG_XEN
+       movl %cr0, %eax
+       testl $0x4, %eax                # EM (math emulation bit)
+       je device_available_emulate
+       pushl $0                        # temporary storage for ORIG_EIP
+       CFI_ADJUST_CFA_OFFSET 4
+       call math_emulate
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+       jmp ret_from_exception
+device_available_emulate:
+#endif
+       preempt_stop
+       call math_state_restore
+       jmp ret_from_exception
+       CFI_ENDPROC
+
+#ifndef CONFIG_XEN
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label)           \
+       cmpw $__KERNEL_CS,4(%esp);              \
+       jne ok;                                 \
+label:                                         \
+       movl SYSENTER_stack_esp0+offset(%esp),%esp;     \
+       pushfl;                                 \
+       pushl $__KERNEL_CS;                     \
+       pushl $sysenter_past_esp
+#endif /* CONFIG_XEN */
+
+KPROBE_ENTRY(debug)
+       RING0_INT_FRAME
+#ifndef CONFIG_XEN
+       cmpl $sysenter_entry,(%esp)
+       jne debug_stack_correct
+       FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+#endif /* !CONFIG_XEN */
+       pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx                  # error code 0
+       movl %esp,%eax                  # pt_regs pointer
+       call do_debug
+       jmp ret_from_exception
+       CFI_ENDPROC
+       .previous .text
+#ifndef CONFIG_XEN
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got  an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       movl %ss, %eax
+       cmpw $__ESPFIX_SS, %ax
+       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
+       je nmi_16bit_stack
+       cmpl $sysenter_entry,(%esp)
+       je nmi_stack_fixup
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       movl %esp,%eax
+       /* Do not access memory above the end of our stack page,
+        * it might not exist.
+        */
+       andl $(THREAD_SIZE-1),%eax
+       cmpl $(THREAD_SIZE-20),%eax
+       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
+       jae nmi_stack_correct
+       cmpl $sysenter_entry,12(%esp)
+       je nmi_debug_stack_check
+nmi_stack_correct:
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_nmi
+       jmp restore_nocheck_notrace
+       CFI_ENDPROC
+
+nmi_stack_fixup:
+       FIX_STACK(12,nmi_stack_correct, 1)
+       jmp nmi_stack_correct
+nmi_debug_stack_check:
+       cmpw $__KERNEL_CS,16(%esp)
+       jne nmi_stack_correct
+       cmpl $debug,(%esp)
+       jb nmi_stack_correct
+       cmpl $debug_esp_fix_insn,(%esp)
+       ja nmi_stack_correct
+       FIX_STACK(24,nmi_stack_correct, 1)
+       jmp nmi_stack_correct
+
+nmi_16bit_stack:
+       RING0_INT_FRAME
+       /* create the pointer to lss back */
+       pushl %ss
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl %esp
+       CFI_ADJUST_CFA_OFFSET 4
+       movzwl %sp, %esp
+       addw $4, (%esp)
+       /* copy the iret frame of 12 bytes */
+       .rept 3
+       pushl 16(%esp)
+       CFI_ADJUST_CFA_OFFSET 4
+       .endr
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       FIXUP_ESPFIX_STACK              # %eax == %esp
+       CFI_ADJUST_CFA_OFFSET -20       # the frame has now moved
+       xorl %edx,%edx                  # zero error code
+       call do_nmi
+       RESTORE_REGS
+       lss 12+4(%esp), %esp            # back to 16bit stack
+1:     iret
+       CFI_ENDPROC
+.section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+.previous
+#else
+ENTRY(nmi)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_nmi
+       orl  $NMI_MASK, EFLAGS(%esp)
+       jmp restore_all
+       CFI_ENDPROC
+#endif
+
+KPROBE_ENTRY(int3)
+       RING0_INT_FRAME
+       pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+       xorl %edx,%edx          # zero error code
+       movl %esp,%eax          # pt_regs pointer
+       call do_int3
+       jmp ret_from_exception
+       CFI_ENDPROC
+       .previous .text
+
+ENTRY(overflow)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_overflow
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(bounds)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_bounds
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(invalid_op)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_invalid_op
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(coprocessor_segment_overrun)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_coprocessor_segment_overrun
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(invalid_TSS)
+       RING0_EC_FRAME
+       pushl $do_invalid_TSS
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(segment_not_present)
+       RING0_EC_FRAME
+       pushl $do_segment_not_present
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+ENTRY(stack_segment)
+       RING0_EC_FRAME
+       pushl $do_stack_segment
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+KPROBE_ENTRY(general_protection)
+       RING0_EC_FRAME
+       pushl $do_general_protection
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+       .previous .text
+
+ENTRY(alignment_check)
+       RING0_EC_FRAME
+       pushl $do_alignment_check
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+KPROBE_ENTRY(page_fault)
+       RING0_EC_FRAME
+       pushl $do_page_fault
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+       .previous .text
+
+#ifdef CONFIG_X86_MCE
+ENTRY(machine_check)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl machine_check_vector
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+#endif
+
+#ifndef CONFIG_XEN
+ENTRY(spurious_interrupt_bug)
+       RING0_INT_FRAME
+       pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $do_spurious_interrupt_bug
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+#endif /* !CONFIG_XEN */
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+       CFI_STARTPROC
+       movl    4(%esp), %edx
+       movl    (%esp), %ecx
+       leal    4(%esp), %eax
+       movl    %ebx, EBX(%edx)
+       xorl    %ebx, %ebx
+       movl    %ebx, ECX(%edx)
+       movl    %ebx, EDX(%edx)
+       movl    %esi, ESI(%edx)
+       movl    %edi, EDI(%edx)
+       movl    %ebp, EBP(%edx)
+       movl    %ebx, EAX(%edx)
+       movl    $__USER_DS, DS(%edx)
+       movl    $__USER_DS, ES(%edx)
+       movl    %ebx, ORIG_EAX(%edx)
+       movl    %ecx, EIP(%edx)
+       movl    12(%esp), %ecx
+       movl    $__KERNEL_CS, CS(%edx)
+       movl    %ebx, EFLAGS(%edx)
+       movl    %eax, OLDESP(%edx)
+       movl    8(%esp), %eax
+       movl    %ecx, 8(%esp)
+       movl    EBX(%edx), %ebx
+       movl    $__KERNEL_DS, OLDSS(%edx)
+       jmpl    *%eax
+       CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
+
+ENTRY(fixup_4gb_segment)
+       RING0_EC_FRAME
+       pushl $do_fixup_4gb_segment
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+
+.section .rodata,"a"
+.align 4
+#include "syscall_table.S"
+
+syscall_table_size=(.-sys_call_table)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/fixup.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/fixup.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * fixup.c
+ * 
+ * Binary-rewriting of certain IA32 instructions, on notification by Xen.
+ * Used to avoid repeated slow emulation of common instructions used by the
+ * user-space TLS (Thread-Local Storage) libraries.
+ * 
+ * **** NOTE ****
+ *  Issues with the binary rewriting have caused it to be removed. Instead
+ *  we rely on Xen's emulator to boot the kernel, and then print a banner
+ *  message recommending that the user disables /lib/tls.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+
+#define DP(_f, _args...) printk(KERN_ALERT "  " _f "\n" , ## _args )
+
+fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
+{
+       static unsigned long printed = 0;
+       char info[100];
+       int i;
+
+       /* Ignore statically-linked init. */
+       if (current->tgid == 1)
+               return;
+            
+       HYPERVISOR_vm_assist(
+               VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
+
+       if (test_and_set_bit(0, &printed))
+               return;
+
+       sprintf(info, "%s (pid=%d)", current->comm, current->tgid);
+
+       DP("");
+       DP("***************************************************************");
+       DP("***************************************************************");
+       DP("** WARNING: Currently emulating unsupported memory accesses  **");
+       DP("**          in /lib/tls glibc libraries. The emulation is    **");
+       DP("**          slow. To ensure full performance you should      **");
+       DP("**          install a 'xen-friendly' (nosegneg) version of   **");
+       DP("**          the library, or disable tls support by executing **");
+       DP("**          the following as root:                           **");
+       DP("**          mv /lib/tls /lib/tls.disabled                    **");
+       DP("** Offending process: %-38.38s **", info);
+       DP("***************************************************************");
+       DP("***************************************************************");
+       DP("");
+
+       for (i = 5; i > 0; i--) {
+               touch_softlockup_watchdog();
+               printk("Pausing... %d", i);
+               mdelay(1000);
+               printk("\b\b\b\b\b\b\b\b\b\b\b\b");
+       }
+
+       printk("Continuing...\n\n");
+}
+
+static int __init fixup_init(void)
+{
+       HYPERVISOR_vm_assist(
+               VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
+       return 0;
+}
+__initcall(fixup_init);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/head-xen.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/head-xen.S       Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,207 @@
+
+
+.text
+#include <linux/elfnote.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf2.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/elfnote.h>
+
+/*
+ * References to members of the new_cpu_data structure.
+ */
+
+#define X86            new_cpu_data+CPUINFO_x86
+#define X86_VENDOR     new_cpu_data+CPUINFO_x86_vendor
+#define X86_MODEL      new_cpu_data+CPUINFO_x86_model
+#define X86_MASK       new_cpu_data+CPUINFO_x86_mask
+#define X86_HARD_MATH  new_cpu_data+CPUINFO_hard_math
+#define X86_CPUID      new_cpu_data+CPUINFO_cpuid_level
+#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
+#define X86_VENDOR_ID  new_cpu_data+CPUINFO_x86_vendor_id
+
+#define VIRT_ENTRY_OFFSET 0x0
+.org VIRT_ENTRY_OFFSET
+ENTRY(startup_32)
+       movl %esi,xen_start_info
+       cld
+
+       /* Set up the stack pointer */
+       movl $(init_thread_union+THREAD_SIZE),%esp
+
+       /* get vendor info */
+       xorl %eax,%eax                  # call CPUID with 0 -> return vendor ID
+       XEN_CPUID
+       movl %eax,X86_CPUID             # save CPUID level
+       movl %ebx,X86_VENDOR_ID         # lo 4 chars
+       movl %edx,X86_VENDOR_ID+4       # next 4 chars
+       movl %ecx,X86_VENDOR_ID+8       # last 4 chars
+
+       movl $1,%eax            # Use the CPUID instruction to get CPU type
+       XEN_CPUID
+       movb %al,%cl            # save reg for future use
+       andb $0x0f,%ah          # mask processor family
+       movb %ah,X86
+       andb $0xf0,%al          # mask model
+       shrb $4,%al
+       movb %al,X86_MODEL
+       andb $0x0f,%cl          # mask mask revision
+       movb %cl,X86_MASK
+       movl %edx,X86_CAPABILITY
+
+       movb $1,X86_HARD_MATH
+
+       xorl %eax,%eax                  # Clear FS/GS and LDT
+       movl %eax,%fs
+       movl %eax,%gs
+       cld                     # gcc2 wants the direction flag cleared at all 
times
+
+       pushl %eax              # fake return address
+       jmp start_kernel
+
+#define HYPERCALL_PAGE_OFFSET 0x1000
+.org HYPERCALL_PAGE_OFFSET
+ENTRY(hypercall_page)
+       CFI_STARTPROC
+.skip 0x1000
+       CFI_ENDPROC
+
+/*
+ * Real beginning of normal "text" segment
+ */
+ENTRY(stext)
+ENTRY(_stext)
+
+/*
+ * BSS section
+ */
+.section ".bss.page_aligned","w"
+ENTRY(empty_zero_page)
+       .fill 4096,1,0
+
+/*
+ * This starts the data section.
+ */
+.data
+
+/*
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
+ */
+       .align L1_CACHE_BYTES
+ENTRY(cpu_gdt_table)
+       .quad 0x0000000000000000        /* NULL descriptor */
+       .quad 0x0000000000000000        /* 0x0b reserved */
+       .quad 0x0000000000000000        /* 0x13 reserved */
+       .quad 0x0000000000000000        /* 0x1b reserved */
+       .quad 0x0000000000000000        /* 0x20 unused */
+       .quad 0x0000000000000000        /* 0x28 unused */
+       .quad 0x0000000000000000        /* 0x33 TLS entry 1 */
+       .quad 0x0000000000000000        /* 0x3b TLS entry 2 */
+       .quad 0x0000000000000000        /* 0x43 TLS entry 3 */
+       .quad 0x0000000000000000        /* 0x4b reserved */
+       .quad 0x0000000000000000        /* 0x53 reserved */
+       .quad 0x0000000000000000        /* 0x5b reserved */
+
+       .quad 0x00cf9a000000ffff        /* 0x60 kernel 4GB code at 0x00000000 */
+       .quad 0x00cf92000000ffff        /* 0x68 kernel 4GB data at 0x00000000 */
+       .quad 0x00cffa000000ffff        /* 0x73 user 4GB code at 0x00000000 */
+       .quad 0x00cff2000000ffff        /* 0x7b user 4GB data at 0x00000000 */
+
+       .quad 0x0000000000000000        /* 0x80 TSS descriptor */
+       .quad 0x0000000000000000        /* 0x88 LDT descriptor */
+
+       /*
+        * Segments used for calling PnP BIOS have byte granularity.
+        * They code segments and data segments have fixed 64k limits,
+        * the transfer segment sizes are set at run time.
+        */
+       .quad 0x0000000000000000        /* 0x90 32-bit code */
+       .quad 0x0000000000000000        /* 0x98 16-bit code */
+       .quad 0x0000000000000000        /* 0xa0 16-bit data */
+       .quad 0x0000000000000000        /* 0xa8 16-bit data */
+       .quad 0x0000000000000000        /* 0xb0 16-bit data */
+
+       /*
+        * The APM segments have byte granularity and their bases
+        * are set at run time.  All have 64k limits.
+        */
+       .quad 0x0000000000000000        /* 0xb8 APM CS    code */
+       .quad 0x0000000000000000        /* 0xc0 APM CS 16 code (16 bit) */
+       .quad 0x0000000000000000        /* 0xc8 APM DS    data */
+
+       .quad 0x0000000000000000        /* 0xd0 - ESPFIX 16-bit SS */
+       .quad 0x0000000000000000        /* 0xd8 - unused */
+       .quad 0x0000000000000000        /* 0xe0 - unused */
+       .quad 0x0000000000000000        /* 0xe8 - unused */
+       .quad 0x0000000000000000        /* 0xf0 - unused */
+       .quad 0x0000000000000000        /* 0xf8 - GDT entry 31: double-fault 
TSS */
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+/*
+ * __xen_guest information
+ */
+.macro utoa value
+ .if (\value) < 0 || (\value) >= 0x10
+       utoa (((\value)>>4)&0x0fffffff)
+ .endif
+ .if ((\value) & 0xf) < 10
+  .byte '0' + ((\value) & 0xf)
+ .else
+  .byte 'A' + ((\value) & 0xf) - 10
+ .endif
+.endm
+
+.section __xen_guest
+       .ascii  "GUEST_OS=linux,GUEST_VER=2.6"
+       .ascii  ",XEN_VER=xen-3.0"
+       .ascii  ",VIRT_BASE=0x"
+               utoa __PAGE_OFFSET
+       .ascii  ",ELF_PADDR_OFFSET=0x"
+               utoa __PAGE_OFFSET
+       .ascii  ",VIRT_ENTRY=0x"
+               utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
+       .ascii  ",HYPERCALL_PAGE=0x"
+               utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
+       .ascii  ",FEATURES=writable_page_tables"
+       .ascii           "|writable_descriptor_tables"
+       .ascii           "|auto_translated_physmap"
+       .ascii           "|pae_pgdir_above_4gb"
+       .ascii           "|supervisor_mode_kernel"
+#ifdef CONFIG_X86_PAE
+       .ascii  ",PAE=yes[extended-cr3]"
+#else
+       .ascii  ",PAE=no"
+#endif
+       .ascii  ",LOADER=generic"
+       .byte   0
+#endif /* CONFIG_XEN_COMPAT <= 0x030002 */
+
+
+       ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "linux")       
+       ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "2.6")
+       ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
+       ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  __PAGE_OFFSET)
+#if CONFIG_XEN_COMPAT <= 0x030002
+       ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  __PAGE_OFFSET)
+#else
+       ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  0)
+#endif
+       ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  startup_32)
+       ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
+       ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
+       ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, 
"writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
+#ifdef CONFIG_X86_PAE
+       ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
+       ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .quad,  
_PAGE_PRESENT,_PAGE_PRESENT)
+#else
+       ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "no")
+       ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  
_PAGE_PRESENT,_PAGE_PRESENT)
+#endif
+       ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
+       ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  1)
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/init_task-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/init_task-xen.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,51 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+#define swapper_pg_dir ((pgd_t *)NULL)
+struct mm_struct init_mm = INIT_MM(init_mm);
+#undef swapper_pg_dir
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is THREAD_SIZE aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union 
+       __attribute__((__section__(".data.init_task"))) =
+               { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+#ifndef CONFIG_X86_NO_TSS
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's.
+ */ 
+DEFINE_PER_CPU(struct tss_struct, init_tss) 
____cacheline_internodealigned_in_smp = INIT_TSS;
+#endif
+
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/io_apic-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/io_apic-xen.c    Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,2777 @@
+/*
+ *     Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *     Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *     Many thanks to Stig Venaas for trying out countless experimental
+ *     patches and reporting/debugging problems patiently!
+ *
+ *     (c) 1999, Multiple IO-APIC support, developed by
+ *     Ken-ichi Yaku <yaku@xxxxxxxxxxxxxxxxxxxx> and
+ *      Hidemi Kishimoto <kisimoto@xxxxxxxxxxxxxxxxxxxx>,
+ *     further tested and cleaned up by Zach Brown <zab@xxxxxxxxxx>
+ *     and Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively
+ *     Paul Diefenbaugh        :       Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/nmi.h>
+
+#include <mach_apic.h>
+
+#include "io_ports.h"
+
+#ifdef CONFIG_XEN
+
+#include <xen/interface/xen.h>
+#include <xen/interface/physdev.h>
+
+/* Fake i8259 */
+#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
+#define disable_8259A_irq(_irq)  ((void)0)
+#define i8259A_irq_pending(_irq) (0)
+
+unsigned long io_apic_irqs;
+
+static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int 
reg)
+{
+       struct physdev_apic apic_op;
+       int ret;
+
+       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+       apic_op.reg = reg;
+       ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+       if (ret)
+               return ret;
+       return apic_op.value;
+}
+
+static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, 
unsigned int value)
+{
+       struct physdev_apic apic_op;
+
+       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+       apic_op.reg = reg;
+       apic_op.value = value;
+       HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
+}
+
+#define io_apic_read(a,r)    xen_io_apic_read(a,r)
+#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+
+#endif /* CONFIG_XEN */
+
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int timer_over_8254 __initdata = 1;
+
+/*
+ *     Is the SiS APIC rmw bug present ?
+ *     -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+int disable_timer_pin_1 __initdata;
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+       int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
+#define vector_to_irq(vector)  \
+       (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+#else
+#define vector_to_irq(vector)  (vector)
+#endif
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+       static int first_free_entry = NR_IRQS;
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (entry->next)
+               entry = irq_2_pin + entry->next;
+
+       if (entry->pin != -1) {
+               entry->next = first_free_entry;
+               entry = irq_2_pin + entry->next;
+               if (++first_free_entry >= PIN_MAP_SIZE)
+                       panic("io_apic.c: whoops");
+       }
+       entry->apic = apic;
+       entry->pin = pin;
+}
+
+#ifdef CONFIG_XEN
+#define clear_IO_APIC() ((void)0)
+#else
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq(unsigned int irq,
+                                     int oldapic, int oldpin,
+                                     int newapic, int newpin)
+{
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (1) {
+               if (entry->apic == oldapic && entry->pin == oldpin) {
+                       entry->apic = newapic;
+                       entry->pin = newpin;
+               }
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+}
+
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, 
unsigned long disable)
+{
+       struct irq_pin_list *entry = irq_2_pin + irq;
+       unsigned int pin, reg;
+
+       for (;;) {
+               pin = entry->pin;
+               if (pin == -1)
+                       break;
+               reg = io_apic_read(entry->apic, 0x10 + pin*2);
+               reg &= ~disable;
+               reg |= enable;
+               io_apic_modify(entry->apic, 0x10 + pin*2, reg);
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00010000, 0);
+}
+
+/* mask = 0 */
+static void __unmask_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0, 0x00010000);
+}
+
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
+}
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __mask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+       
+       /* Check delivery_mode to be sure we're not clearing an SMI pin */
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+       *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       if (entry.delivery_mode == dest_SMI)
+               return;
+
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+       memset(&entry, 0, sizeof(entry));
+       entry.mask = 1;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       clear_IO_APIC_pin(apic, pin);
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
+{
+       unsigned long flags;
+       int pin;
+       struct irq_pin_list *entry = irq_2_pin + irq;
+       unsigned int apicid_value;
+       cpumask_t tmp;
+       
+       cpus_and(tmp, cpumask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       cpus_and(cpumask, tmp, CPU_MASK_ALL);
+
+       apicid_value = cpu_mask_to_apicid(cpumask);
+       /* Prepare to do the io_apic_write */
+       apicid_value = apicid_value << 24;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       for (;;) {
+               pin = entry->pin;
+               if (pin == -1)
+                       break;
+               io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+       set_irq_info(irq, cpumask);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#if defined(CONFIG_IRQBALANCE)
+# include <asm/processor.h>    /* kernel_thread() */
+# include <linux/kernel_stat.h>        /* kstat */
+# include <linux/slab.h>               /* kmalloc() */
+# include <linux/timer.h>      /* time_after() */
+ 
+#ifdef CONFIG_BALANCED_IRQ_DEBUG
+#  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, 
__LINE__); printk(x); } while (0)
+#  define Dprintk(x...) do { TDprintk(x); } while (0)
+# else
+#  define TDprintk(x...) 
+#  define Dprintk(x...) 
+# endif
+
+#define IRQBALANCE_CHECK_ARCH -999
+#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA                (HZ)
+
+static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
+static int physical_balance __read_mostly;
+static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
+
+static struct irq_cpu_info {
+       unsigned long * last_irq;
+       unsigned long * irq_delta;
+       unsigned long irq;
+} irq_cpu_data[NR_CPUS];
+
+#define CPU_IRQ(cpu)           (irq_cpu_data[cpu].irq)
+#define LAST_CPU_IRQ(cpu,irq)   (irq_cpu_data[cpu].last_irq[irq])
+#define IRQ_DELTA(cpu,irq)     (irq_cpu_data[cpu].irq_delta[irq])
+
+#define IDLE_ENOUGH(cpu,now) \
+       (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
+
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+
+static cpumask_t balance_irq_affinity[NR_IRQS] = {
+       [0 ... NR_IRQS-1] = CPU_MASK_ALL
+};
+
+void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       balance_irq_affinity[irq] = mask;
+}
+
+static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
+                       unsigned long now, int direction)
+{
+       int search_idle = 1;
+       int cpu = curr_cpu;
+
+       goto inside;
+
+       do {
+               if (unlikely(cpu == curr_cpu))
+                       search_idle = 0;
+inside:
+               if (direction == 1) {
+                       cpu++;
+                       if (cpu >= NR_CPUS)
+                               cpu = 0;
+               } else {
+                       cpu--;
+                       if (cpu == -1)
+                               cpu = NR_CPUS-1;
+               }
+       } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
+                       (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+       return cpu;
+}
+
+static inline void balance_irq(int cpu, int irq)
+{
+       unsigned long now = jiffies;
+       cpumask_t allowed_mask;
+       unsigned int new_cpu;
+               
+       if (irqbalance_disabled)
+               return; 
+
+       cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
+       new_cpu = move(cpu, allowed_mask, now, 1);
+       if (cpu != new_cpu) {
+               set_pending_irq(irq, cpumask_of_cpu(new_cpu));
+       }
+}
+
+static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
+{
+       int i, j;
+       Dprintk("Rotating IRQs among CPUs.\n");
+       for_each_online_cpu(i) {
+               for (j = 0; j < NR_IRQS; j++) {
+                       if (!irq_desc[j].action)
+                               continue;
+                       /* Is it a significant load ?  */
+                       if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
+                                               useful_load_threshold)
+                               continue;
+                       balance_irq(i, j);
+               }
+       }
+       balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+               balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+       return;
+}
+
+static void do_irq_balance(void)
+{
+       int i, j;
+       unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
+       unsigned long move_this_load = 0;
+       int max_loaded = 0, min_loaded = 0;
+       int load;
+       unsigned long useful_load_threshold = balanced_irq_interval + 10;
+       int selected_irq;
+       int tmp_loaded, first_attempt = 1;
+       unsigned long tmp_cpu_irq;
+       unsigned long imbalance = 0;
+       cpumask_t allowed_mask, target_cpu_mask, tmp;
+
+       for_each_possible_cpu(i) {
+               int package_index;
+               CPU_IRQ(i) = 0;
+               if (!cpu_online(i))
+                       continue;
+               package_index = CPU_TO_PACKAGEINDEX(i);
+               for (j = 0; j < NR_IRQS; j++) {
+                       unsigned long value_now, delta;
+                       /* Is this an active IRQ? */
+                       if (!irq_desc[j].action)
+                               continue;
+                       if ( package_index == i )
+                               IRQ_DELTA(package_index,j) = 0;
+                       /* Determine the total count per processor per IRQ */
+                       value_now = (unsigned long) kstat_cpu(i).irqs[j];
+
+                       /* Determine the activity per processor per IRQ */
+                       delta = value_now - LAST_CPU_IRQ(i,j);
+
+                       /* Update last_cpu_irq[][] for the next time */
+                       LAST_CPU_IRQ(i,j) = value_now;
+
+                       /* Ignore IRQs whose rate is less than the clock */
+                       if (delta < useful_load_threshold)
+                               continue;
+                       /* update the load for the processor or package total */
+                       IRQ_DELTA(package_index,j) += delta;
+
+                       /* Keep track of the higher numbered sibling as well */
+                       if (i != package_index)
+                               CPU_IRQ(i) += delta;
+                       /*
+                        * We have sibling A and sibling B in the package
+                        *
+                        * cpu_irq[A] = load for cpu A + load for cpu B
+                        * cpu_irq[B] = load for cpu B
+                        */
+                       CPU_IRQ(package_index) += delta;
+               }
+       }
+       /* Find the least loaded processor package */
+       for_each_online_cpu(i) {
+               if (i != CPU_TO_PACKAGEINDEX(i))
+                       continue;
+               if (min_cpu_irq > CPU_IRQ(i)) {
+                       min_cpu_irq = CPU_IRQ(i);
+                       min_loaded = i;
+               }
+       }
+       max_cpu_irq = ULONG_MAX;
+
+tryanothercpu:
+       /* Look for heaviest loaded processor.
+        * We may come back to get the next heaviest loaded processor.
+        * Skip processors with trivial loads.
+        */
+       tmp_cpu_irq = 0;
+       tmp_loaded = -1;
+       for_each_online_cpu(i) {
+               if (i != CPU_TO_PACKAGEINDEX(i))
+                       continue;
+               if (max_cpu_irq <= CPU_IRQ(i)) 
+                       continue;
+               if (tmp_cpu_irq < CPU_IRQ(i)) {
+                       tmp_cpu_irq = CPU_IRQ(i);
+                       tmp_loaded = i;
+               }
+       }
+
+       if (tmp_loaded == -1) {
+        /* In the case of small number of heavy interrupt sources, 
+         * loading some of the cpus too much. We use Ingo's original 
+         * approach to rotate them around.
+         */
+               if (!first_attempt && imbalance >= useful_load_threshold) {
+                       rotate_irqs_among_cpus(useful_load_threshold);
+                       return;
+               }
+               goto not_worth_the_effort;
+       }
+       
+       first_attempt = 0;              /* heaviest search */
+       max_cpu_irq = tmp_cpu_irq;      /* load */
+       max_loaded = tmp_loaded;        /* processor */
+       imbalance = (max_cpu_irq - min_cpu_irq) / 2;
+       
+       Dprintk("max_loaded cpu = %d\n", max_loaded);
+       Dprintk("min_loaded cpu = %d\n", min_loaded);
+       Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
+       Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
+       Dprintk("load imbalance = %lu\n", imbalance);
+
+       /* if imbalance is less than approx 10% of max load, then
+        * observe diminishing returns action. - quit
+        */
+       if (imbalance < (max_cpu_irq >> 3)) {
+               Dprintk("Imbalance too trivial\n");
+               goto not_worth_the_effort;
+       }
+
+tryanotherirq:
+       /* if we select an IRQ to move that can't go where we want, then
+        * see if there is another one to try.
+        */
+       move_this_load = 0;
+       selected_irq = -1;
+       for (j = 0; j < NR_IRQS; j++) {
+               /* Is this an active IRQ? */
+               if (!irq_desc[j].action)
+                       continue;
+               if (imbalance <= IRQ_DELTA(max_loaded,j))
+                       continue;
+               /* Try to find the IRQ that is closest to the imbalance
+                * without going over.
+                */
+               if (move_this_load < IRQ_DELTA(max_loaded,j)) {
+                       move_this_load = IRQ_DELTA(max_loaded,j);
+                       selected_irq = j;
+               }
+       }
+       if (selected_irq == -1) {
+               goto tryanothercpu;
+       }
+
+       imbalance = move_this_load;
+       
+       /* For physical_balance case, we accumlated both load
+        * values in the one of the siblings cpu_irq[],
+        * to use the same code for physical and logical processors
+        * as much as possible. 
+        *
+        * NOTE: the cpu_irq[] array holds the sum of the load for
+        * sibling A and sibling B in the slot for the lowest numbered
+        * sibling (A), _AND_ the load for sibling B in the slot for
+        * the higher numbered sibling.
+        *
+        * We seek the least loaded sibling by making the comparison
+        * (A+B)/2 vs B
+        */
+       load = CPU_IRQ(min_loaded) >> 1;
+       for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+               if (load > CPU_IRQ(j)) {
+                       /* This won't change cpu_sibling_map[min_loaded] */
+                       load = CPU_IRQ(j);
+                       min_loaded = j;
+               }
+       }
+
+       cpus_and(allowed_mask,
+               cpu_online_map,
+               balance_irq_affinity[selected_irq]);
+       target_cpu_mask = cpumask_of_cpu(min_loaded);
+       cpus_and(tmp, target_cpu_mask, allowed_mask);
+
+       if (!cpus_empty(tmp)) {
+
+               Dprintk("irq = %d moved to cpu = %d\n",
+                               selected_irq, min_loaded);
+               /* mark for change destination */
+               set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
+
+               /* Since we made a change, come back sooner to 
+                * check for more variation.
+                */
+               balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
+                       balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);       
+               return;
+       }
+       goto tryanotherirq;
+
+not_worth_the_effort:
+       /*
+        * if we did not find an IRQ to move, then adjust the time interval
+        * upward
+        */
+       balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
+               balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);       
+       Dprintk("IRQ worth rotating not found\n");
+       return;
+}
+
+static int balanced_irq(void *unused)
+{
+       int i;
+       unsigned long prev_balance_time = jiffies;
+       long time_remaining = balanced_irq_interval;
+
+       daemonize("kirqd");
+       
+       /* push everything to CPU 0 to give us a starting point.  */
+       for (i = 0 ; i < NR_IRQS ; i++) {
+               irq_desc[i].pending_mask = cpumask_of_cpu(0);
+               set_pending_irq(i, cpumask_of_cpu(0));
+       }
+
+       for ( ; ; ) {
+               time_remaining = schedule_timeout_interruptible(time_remaining);
+               try_to_freeze();
+               if (time_after(jiffies,
+                               prev_balance_time+balanced_irq_interval)) {
+                       preempt_disable();
+                       do_irq_balance();
+                       prev_balance_time = jiffies;
+                       time_remaining = balanced_irq_interval;
+                       preempt_enable();
+               }
+       }
+       return 0;
+}
+
+static int __init balanced_irq_init(void)
+{
+       int i;
+       struct cpuinfo_x86 *c;
+       cpumask_t tmp;
+
+       cpus_shift_right(tmp, cpu_online_map, 2);
+        c = &boot_cpu_data;
+       /* When not overwritten by the command line ask subarchitecture. */
+       if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
+               irqbalance_disabled = NO_BALANCE_IRQ;
+       if (irqbalance_disabled)
+               return 0;
+       
+        /* disable irqbalance completely if there is only one processor online 
*/
+       if (num_online_cpus() < 2) {
+               irqbalance_disabled = 1;
+               return 0;
+       }
+       /*
+        * Enable physical balance only if more than 1 physical processor
+        * is present
+        */
+       if (smp_num_siblings > 1 && !cpus_empty(tmp))
+               physical_balance = 1;
+
+       for_each_online_cpu(i) {
+               irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * 
NR_IRQS, GFP_KERNEL);
+               irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * 
NR_IRQS, GFP_KERNEL);
+               if (irq_cpu_data[i].irq_delta == NULL || 
irq_cpu_data[i].last_irq == NULL) {
+                       printk(KERN_ERR "balanced_irq_init: out of memory");
+                       goto failed;
+               }
+               memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * 
NR_IRQS);
+               memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * 
NR_IRQS);
+       }
+       
+       printk(KERN_INFO "Starting balanced_irq\n");
+       if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) 
+               return 0;
+       else 
+               printk(KERN_ERR "balanced_irq_init: failed to spawn 
balanced_irq");
+failed:
+       for_each_possible_cpu(i) {
+               kfree(irq_cpu_data[i].irq_delta);
+               irq_cpu_data[i].irq_delta = NULL;
+               kfree(irq_cpu_data[i].last_irq);
+               irq_cpu_data[i].last_irq = NULL;
+       }
+       return 0;
+}
+
+int __init irqbalance_disable(char *str)
+{
+       irqbalance_disabled = 1;
+       return 1;
+}
+
+__setup("noirqbalance", irqbalance_disable);
+
+late_initcall(balanced_irq_init);
+#endif /* CONFIG_IRQBALANCE */
+#endif /* CONFIG_SMP */
+#endif
+
+#ifndef CONFIG_SMP
+void fastcall send_IPI_self(int vector)
+{
+#ifndef CONFIG_XEN
+       unsigned int cfg;
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write_around(APIC_ICR, cfg);
+#endif
+}
+#endif /* !CONFIG_SMP */
+
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+int skip_ioapic_setup;
+
+static int __init ioapic_setup(char *str)
+{
+       skip_ioapic_setup = 1;
+       return 1;
+}
+
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
+{
+       int i, max;
+       int ints[MAX_PIRQS+1];
+
+       get_options(str, ARRAY_SIZE(ints), ints);
+
+       for (i = 0; i < MAX_PIRQS; i++)
+               pirq_entries[i] = -1;
+
+       pirqs_enabled = 1;
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+
+       for (i = 0; i < max; i++) {
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int apic, int pin, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].mpc_irqtype == type &&
+                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mpc_dstirq == pin)
+                       return i;
+
+       return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+
+                       return mp_irqs[i].mpc_dstirq;
+       }
+       return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mpc_apicid == 
mp_irqs[i].mpc_dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
+               "slot:%d, pin:%d.\n", bus, slot, pin);
+       if (mp_bus_id_to_pci_bus[bus] == -1) {
+               printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus 
%d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].mpc_apicid == 
mp_irqs[i].mpc_dstapic ||
+                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                               break;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+                   !mp_irqs[i].mpc_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                               return irq;
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0)
+                               best_guess = irq;
+               }
+       }
+       return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+/*
+ * This function currently is only a helper for the i386 smp boot process 
where 
+ * we need to reprogram the ioredtbls to cater for the cpus which have come 
online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+#ifndef CONFIG_XEN
+void __init setup_ioapic_dest(void)
+{
+       int pin, ioapic, irq, irq_entry;
+
+       if (skip_ioapic_setup == 1)
+               return;
+
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+                       if (irq_entry == -1)
+                               continue;
+                       irq = pin_2_irq(irq_entry, ioapic, pin);
+                       set_ioapic_affinity_irq(irq, TARGET_CPUS);
+               }
+
+       }
+}
+#endif /* !CONFIG_XEN */
+#endif
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+       if (irq < 16) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx)     (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)       (0)
+#define default_ISA_polarity(idx)      (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)       (1)
+#define default_PCI_polarity(idx)      (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      (0)
+
+/* NEC98 interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_NEC98_trigger(idx)     (0)
+#define default_NEC98_polarity(idx)    (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int polarity;
+
+       /*
+        * Determine IRQ line polarity (high active or low active):
+        */
+       switch (mp_irqs[idx].mpc_irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       polarity = default_ISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       polarity = default_EISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       polarity = default_PCI_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       polarity = default_MCA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_NEC98: /* NEC 98 pin */
+                               {
+                                       polarity = default_NEC98_polarity(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       polarity = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* high active */
+               {
+                       polarity = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+               case 3: /* low active */
+               {
+                       polarity = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+       }
+       return polarity;
+}
+
+static int MPBIOS_trigger(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int trigger;
+
+       /*
+        * Determine IRQ trigger mode (edge or level sensitive):
+        */
+       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       trigger = default_ISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       trigger = default_PCI_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_NEC98: /* NEC 98 pin */
+                               {
+                                       trigger = default_NEC98_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* edge */
+               {
+                       trigger = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 1;
+                       break;
+               }
+               case 3: /* level */
+               {
+                       trigger = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 0;
+                       break;
+               }
+       }
+       return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+       return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+       return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+       int irq, i;
+       int bus = mp_irqs[idx].mpc_srcbus;
+
+       /*
+        * Debugging check, we are in big trouble if this message pops up!
+        */
+       if (mp_irqs[idx].mpc_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+       switch (mp_bus_id_to_type[bus])
+       {
+               case MP_BUS_ISA: /* ISA pin */
+               case MP_BUS_EISA:
+               case MP_BUS_MCA:
+               case MP_BUS_NEC98:
+               {
+                       irq = mp_irqs[idx].mpc_srcbusirq;
+                       break;
+               }
+               case MP_BUS_PCI: /* PCI pin */
+               {
+                       /*
+                        * PCI IRQs are mapped in order
+                        */
+                       i = irq = 0;
+                       while (i < apic)
+                               irq += nr_ioapic_registers[i++];
+                       irq += pin;
+
+                       /*
+                        * For MPS mode, so far only needed by ES7000 platform
+                        */
+                       if (ioapic_renumber_irq)
+                               irq = ioapic_renumber_irq(apic, irq);
+
+                       break;
+               }
+               default:
+               {
+                       printk(KERN_ERR "unknown bus type %d.\n",bus); 
+                       irq = 0;
+                       break;
+               }
+       }
+
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
+       }
+       return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic,pin,mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+        * nonexistent IRQs are edge default
+        */
+       return 0;
+}
+
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; 
*/
+
+int assign_irq_vector(int irq)
+{
+       unsigned long flags;
+       int vector;
+       struct physdev_irq irq_op;
+
+       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+
+       spin_lock_irqsave(&vector_lock, flags);
+
+       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+               spin_unlock_irqrestore(&vector_lock, flags);
+               return IO_APIC_VECTOR(irq);
+       }
+
+       irq_op.irq = irq;
+       if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+               spin_unlock_irqrestore(&vector_lock, flags);
+               return -ENOSPC;
+       }
+
+       vector = irq_op.vector;
+       vector_irq[vector] = irq;
+       if (irq != AUTO_ASSIGN)
+               IO_APIC_VECTOR(irq) = vector;
+
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return vector;
+}
+
+#ifndef CONFIG_XEN
+static struct hw_interrupt_type ioapic_level_type;
+static struct hw_interrupt_type ioapic_edge_type;
+
+#define IOAPIC_AUTO    -1
+#define IOAPIC_EDGE    0
+#define IOAPIC_LEVEL   1
+
+static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+{
+       unsigned idx;
+
+       idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+               irq_desc[idx].chip = &ioapic_level_type;
+       else
+               irq_desc[idx].chip = &ioapic_edge_type;
+       set_intr_gate(vector, interrupt[idx]);
+}
+#else
+#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+#endif
+
+static void __init setup_IO_APIC_irqs(void)
+{
+       struct IO_APIC_route_entry entry;
+       int apic, pin, idx, irq, first_notcon = 1, vector;
+       unsigned long flags;
+
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+               /*
+                * add it to the IO-APIC irq-routing table:
+                */
+               memset(&entry,0,sizeof(entry));
+
+               entry.delivery_mode = INT_DELIVERY_MODE;
+               entry.dest_mode = INT_DEST_MODE;
+               entry.mask = 0;                         /* enable IRQ */
+               entry.dest.logical.logical_dest = 
+                                       cpu_mask_to_apicid(TARGET_CPUS);
+
+               idx = find_irq_entry(apic,pin,mp_INT);
+               if (idx == -1) {
+                       if (first_notcon) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               " IO-APIC (apicid-pin) %d-%d",
+                                               mp_ioapics[apic].mpc_apicid,
+                                               pin);
+                               first_notcon = 0;
+                       } else
+                               apic_printk(APIC_VERBOSE, ", %d-%d",
+                                       mp_ioapics[apic].mpc_apicid, pin);
+                       continue;
+               }
+
+               entry.trigger = irq_trigger(idx);
+               entry.polarity = irq_polarity(idx);
+
+               if (irq_trigger(idx)) {
+                       entry.trigger = 1;
+                       entry.mask = 1;
+               }
+
+               irq = pin_2_irq(idx, apic, pin);
+               /*
+                * skip adding the timer int on secondary nodes, which causes
+                * a small but painful rift in the time-space continuum
+                */
+               if (multi_timer_check(apic, irq))
+                       continue;
+               else
+                       add_pin_to_irq(irq, apic, pin);
+
+               if (/*!apic &&*/ !IO_APIC_IRQ(irq))
+                       continue;
+
+               if (IO_APIC_IRQ(irq)) {
+                       vector = assign_irq_vector(irq);
+                       entry.vector = vector;
+                       ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+               
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+               io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               set_native_irq_info(irq, TARGET_CPUS);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+       }
+
+       if (!first_notcon)
+               apic_printk(APIC_VERBOSE, " not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin:
+ */
+#ifndef CONFIG_XEN
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, 
int vector)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       memset(&entry,0,sizeof(entry));
+
+       disable_8259A_irq(0);
+
+       /* mask LVT0 */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+       /*
+        * We use logical delivery to get the timer IRQ
+        * to the first CPU.
+        */
+       entry.dest_mode = INT_DEST_MODE;
+       entry.mask = 0;                                 /* unmask IRQ now */
+       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.polarity = 0;
+       entry.trigger = 0;
+       entry.vector = vector;
+
+       /*
+        * The timer IRQ doesn't have to know that behind the
+        * scene we have a 8259A-master in AEOI mode ...
+        */
+       irq_desc[0].chip = &ioapic_edge_type;
+
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       enable_8259A_irq(0);
+}
+
+static inline void UNEXPECTED_IO_APIC(void)
+{
+}
+
+void __init print_IO_APIC(void)
+{
+       int apic, i;
+       union IO_APIC_reg_00 reg_00;
+       union IO_APIC_reg_01 reg_01;
+       union IO_APIC_reg_02 reg_02;
+       union IO_APIC_reg_03 reg_03;
+       unsigned long flags;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       for (i = 0; i < nr_ioapics; i++)
+               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+       /*
+        * We are a bit conservative about what we expect.  We have to
+        * know about every hardware change ASAP.
+        */
+       printk(KERN_INFO "testing the IO APIC.......................\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(apic, 0);
+       reg_01.raw = io_apic_read(apic, 1);
+       if (reg_01.bits.version >= 0x10)
+               reg_02.raw = io_apic_read(apic, 2);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", 
reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", 
reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+       if (reg_00.bits.ID >= get_physical_broadcast())
+               UNEXPECTED_IO_APIC();
+       if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
+       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", 
reg_01.bits.entries);
+       if (    (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
+               (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
+               (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
+               (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
+               (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
+               (reg_01.bits.entries != 0x2E) &&
+               (reg_01.bits.entries != 0x3F)
+       )
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", 
reg_01.bits.PRQ);
+       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", 
reg_01.bits.version);
+       if (    (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
+               (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
+               (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
+               (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
+               (reg_01.bits.version != 0x20)    /* Intel P64H (82806 AA) */
+       )
+               UNEXPECTED_IO_APIC();
+       if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+        * but the value of reg_02 is read as the previous read register
+        * value, so ignore it if reg_02 == reg_01.
+        */
+       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+               printk(KERN_DEBUG ".......     : arbitration: %02X\n", 
reg_02.bits.arbitration);
+               if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
+                       UNEXPECTED_IO_APIC();
+       }
+
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+        * or reg_03, but the value of reg_0[23] is read as the previous read
+        * register value, so ignore it if reg_03 == reg_0[12].
+        */
+       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+           reg_03.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", 
reg_03.bits.boot_DT);
+               if (reg_03.bits.__reserved_1)
+                       UNEXPECTED_IO_APIC();
+       }
+
+       printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+       printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+                         " Stat Dest Deli Vect:   \n");
+
+       for (i = 0; i <= reg_01.bits.entries; i++) {
+               struct IO_APIC_route_entry entry;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+               *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               printk(KERN_DEBUG " %02x %03X %02X  ",
+                       i,
+                       entry.dest.logical.logical_dest,
+                       entry.dest.physical.physical_dest
+               );
+
+               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+                       entry.mask,
+                       entry.trigger,
+                       entry.irr,
+                       entry.polarity,
+                       entry.delivery_status,
+                       entry.dest_mode,
+                       entry.delivery_mode,
+                       entry.vector
+               );
+       }
+       }
+       if (use_pci_vector())
+               printk(KERN_INFO "Using vector-based indexing\n");
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for (i = 0; i < NR_IRQS; i++) {
+               struct irq_pin_list *entry = irq_2_pin + i;
+               if (entry->pin < 0)
+                       continue;
+               if (use_pci_vector() && !platform_legacy_irq(i))
+                       printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+               else
+                       printk(KERN_DEBUG "IRQ%d ", i);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+                               break;
+                       entry = irq_2_pin + entry->next;
+               }
+               printk("\n");
+       }
+
+       printk(KERN_INFO ".................................... done.\n");
+
+       return;
+}
+
+#if 0
+
+static void print_APIC_bitfield (int base)
+{
+       unsigned int v;
+       int i, j;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+       for (i = 0; i < 8; i++) {
+               v = apic_read(base + i*0x10);
+               for (j = 0; j < 32; j++) {
+                       if (v & (1<<j))
+                               printk("1");
+                       else
+                               printk("0");
+               }
+               printk("\n");
+       }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+       unsigned int v, ver, maxlvt;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+       maxlvt = get_maxlvt();
+
+       v = apic_read(APIC_TASKPRI);
+       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & 
APIC_TPRI_MASK);
+
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               v = apic_read(APIC_ARBPRI);
+               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                       v & APIC_ARBPRI_MASK);
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       v = apic_read(APIC_EOI);
+       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+       v = apic_read(APIC_RRR);
+       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       v = apic_read(APIC_LDR);
+       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+       v = apic_read(APIC_DFR);
+       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       v = apic_read(APIC_SPIV);
+       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+       printk(KERN_DEBUG "... APIC ISR field:\n");
+       print_APIC_bitfield(APIC_ISR);
+       printk(KERN_DEBUG "... APIC TMR field:\n");
+       print_APIC_bitfield(APIC_TMR);
+       printk(KERN_DEBUG "... APIC IRR field:\n");
+       print_APIC_bitfield(APIC_IRR);
+
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_ICR);
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+       v = apic_read(APIC_ICR2);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+       v = apic_read(APIC_LVTT);
+       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+       if (maxlvt > 3) {                       /* PC is LVT#4. */
+               v = apic_read(APIC_LVTPC);
+               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+       }
+       v = apic_read(APIC_LVT0);
+       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+       v = apic_read(APIC_LVT1);
+       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+       if (maxlvt > 2) {                       /* ERR is LVT#3. */
+               v = apic_read(APIC_LVTERR);
+               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_TMICT);
+       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+       v = apic_read(APIC_TMCCT);
+       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+       v = apic_read(APIC_TDCR);
+       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+       printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+       on_each_cpu(print_local_APIC, NULL, 1, 1);
+}
+
+void /*__init*/ print_PIC(void)
+{
+       unsigned int v;
+       unsigned long flags;
+
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       v = inb(0xa1) << 8 | inb(0x21);
+       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+       v = inb(0xa0) << 8 | inb(0x20);
+       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+       outb(0x0b,0xa0);
+       outb(0x0b,0x20);
+       v = inb(0xa0) << 8 | inb(0x20);
+       outb(0x0a,0xa0);
+       outb(0x0a,0x20);
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+
+       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+       v = inb(0x4d1) << 8 | inb(0x4d0);
+       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+#endif  /*  0  */
+
+#else
+void __init print_IO_APIC(void) { }
+#endif /* !CONFIG_XEN */
+
+static void __init enable_IO_APIC(void)
+{
+       union IO_APIC_reg_01 reg_01;
+       int i8259_apic, i8259_pin;
+       int i, apic;
+       unsigned long flags;
+
+       for (i = 0; i < PIN_MAP_SIZE; i++) {
+               irq_2_pin[i].pin = -1;
+               irq_2_pin[i].next = 0;
+       }
+       if (!pirqs_enabled)
+               for (i = 0; i < MAX_PIRQS; i++)
+                       pirq_entries[i] = -1;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_01.raw = io_apic_read(apic, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       spin_lock_irqsave(&ioapic_lock, flags);
+                       *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * 
pin);
+                       *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * 
pin);
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == 
dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported 
by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != 
i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+       }
+
+       /*
+        * Do not trust the IO-APIC being empty at bootup
+        */
+       clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+       /*
+        * Clear the IO-APIC before rebooting:
+        */
+       clear_IO_APIC();
+
+#ifndef CONFIG_XEN
+       /*
+        * If the i8259 is routed through an IOAPIC
+        * Put that IOAPIC in virtual wire mode
+        * so legacy interrupts can be delivered.
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+               unsigned long flags;
+
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+               entry.trigger         = 0; /* Edge */
+               entry.irr             = 0;
+               entry.polarity        = 0; /* High */
+               entry.delivery_status = 0;
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest.physical.physical_dest =
+                                       GET_APIC_ID(apic_read(APIC_ID));
+
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+1));
+               io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+0));
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+#endif
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@xxxxxxxx>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+       union IO_APIC_reg_00 reg_00;
+       physid_mask_t phys_id_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       /*
+        * This is broken; anything with a real cpu count has to
+        * circumvent this idiocy regardless.
+        */
+       phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               
+               old_id = mp_ioapics[apic].mpc_apicid;
+
+               if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the 
MPC table!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw 
vendor)\n",
+                               reg_00.bits.ID);
+                       mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (check_apicid_used(phys_id_present_map,
+                                       mp_ioapics[apic].mpc_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already 
used!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       for (i = 0; i < get_physical_broadcast(); i++)
+                               if (!physid_isset(i, phys_id_present_map))
+                                       break;
+                       if (i >= get_physical_broadcast())
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw 
vendor)\n",
+                               i);
+                       physid_set(i, phys_id_present_map);
+                       mp_ioapics[apic].mpc_apicid = i;
+               } else {
+                       physid_mask_t tmp;
+                       tmp = 
apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic].mpc_apicid);
+                       physids_or(phys_id_present_map, phys_id_present_map, 
tmp);
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mpc_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mpc_dstapic == old_id)
+                                       mp_irqs[i].mpc_dstapic
+                                               = mp_ioapics[apic].mpc_apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic].mpc_apicid);
+
+               reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+                       printk("could not set ID!\n");
+               else
+                       apic_printk(APIC_VERBOSE, " ok.\n");
+       }
+}
+#else
+static void __init setup_ioapic_ids_from_mpc(void) { }
+#endif
+
+#ifndef CONFIG_XEN
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *     - timer IRQ defaults to IO-APIC IRQ
+ *     - if this function detects that timer IRQs are defunct, then we fall
+ *       back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+       unsigned long t1 = jiffies;
+
+       local_irq_enable();
+       /* Let ten ticks pass... */
+       mdelay((10 * 1000) / HZ);
+
+       /*
+        * Expect a few ticks at least, to be sure some possible
+        * glue logic does not lock up after one or two first
+        * ticks in a non-ExtINT mode.  Also the local APIC
+        * might have cached one ExtINT interrupt.  Finally, at
+        * least one tick may be lost due to delays.
+        */
+       if (jiffies - t1 > 4)
+               return 1;
+
+       return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < 16) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+       move_irq(irq);
+       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+                                       == (IRQ_PENDING | IRQ_DISABLED))
+               mask_IO_APIC_irq(irq);
+       ack_APIC_irq();
+}
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+       unmask_IO_APIC_irq(irq);
+
+       return 0; /* don't check for pending */
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+       unsigned long v;
+       int i;
+
+       move_irq(irq);
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+       i = IO_APIC_VECTOR(irq);
+
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+       ack_APIC_irq();
+
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(irq);
+               __unmask_and_level_IO_APIC_irq(irq);
+               spin_unlock(&ioapic_lock);
+       }
+}
+
+#ifdef CONFIG_PCI_MSI
+static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       return startup_edge_ioapic_irq(irq);
+}
+
+static void ack_edge_ioapic_vector(unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       move_native_irq(vector);
+       ack_edge_ioapic_irq(irq);
+}
+
+static unsigned int startup_level_ioapic_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       return startup_level_ioapic_irq (irq);
+}
+
+static void end_level_ioapic_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       move_native_irq(vector);
+       end_level_ioapic_irq(irq);
+}
+
+static void mask_IO_APIC_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       mask_IO_APIC_irq(irq);
+}
+
+static void unmask_IO_APIC_vector (unsigned int vector)
+{
+       int irq = vector_to_irq(vector);
+
+       unmask_IO_APIC_irq(irq);
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_vector (unsigned int vector,
+                                       cpumask_t cpu_mask)
+{
+       int irq = vector_to_irq(vector);
+
+       set_native_irq_info(vector, cpu_mask);
+       set_ioapic_affinity_irq(irq, cpu_mask);
+}
+#endif
+#endif
+
+static int ioapic_retrigger(unsigned int irq)
+{
+       send_IPI_self(IO_APIC_VECTOR(irq));
+
+       return 1;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+       .typename       = "IO-APIC-edge",
+       .startup        = startup_edge_ioapic,
+       .shutdown       = shutdown_edge_ioapic,
+       .enable         = enable_edge_ioapic,
+       .disable        = disable_edge_ioapic,
+       .ack            = ack_edge_ioapic,
+       .end            = end_edge_ioapic,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity,
+#endif
+       .retrigger      = ioapic_retrigger,
+};
+
+static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+       .typename       = "IO-APIC-level",
+       .startup        = startup_level_ioapic,
+       .shutdown       = shutdown_level_ioapic,
+       .enable         = enable_level_ioapic,
+       .disable        = disable_level_ioapic,
+       .ack            = mask_and_ack_level_ioapic,
+       .end            = end_level_ioapic,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity,
+#endif
+       .retrigger      = ioapic_retrigger,
+};
+#endif /* !CONFIG_XEN */
+
+static inline void init_IO_APIC_traps(void)
+{
+       int irq;
+
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+               int tmp = irq;
+               if (use_pci_vector()) {
+                       if (!platform_legacy_irq(tmp))
+                               if ((tmp = vector_to_irq(tmp)) == -1)
+                                       continue;
+               }
+               if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+                        * interrupt if we can..
+                        */
+                       if (irq < 16)
+                               make_8259A_irq(irq);
+#ifndef CONFIG_XEN
+                       else
+                               /* Strange. Oh, well.. */
+                               irq_desc[irq].chip = &no_irq_type;
+#endif
+               }
+       }
+}
+
+#ifndef CONFIG_XEN
+static void enable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+       ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type __read_mostly = {
+       .typename       = "local-APIC-edge",
+       .startup        = NULL, /* startup_irq() not used for IRQ0 */
+       .shutdown       = NULL, /* shutdown_irq() not used for IRQ0 */
+       .enable         = enable_lapic_irq,
+       .disable        = disable_lapic_irq,
+       .ack            = ack_lapic_irq,
+       .end            = end_lapic_irq
+};
+
+static void setup_nmi (void)
+{
+       /*
+        * Dirty trick to enable the NMI watchdog ...
+        * We put the 8259A master into AEOI mode and
+        * unmask on all local APICs LVT0 as NMI.
+        *
+        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+        * is from Maciej W. Rozycki - so we do not have to EOI from
+        * the NMI handler or the timer interrupt.
+        */ 
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+
+       on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
+
+       apic_printk(APIC_VERBOSE, " done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+       unsigned long flags;
+
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (pin == -1)
+               return;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       clear_IO_APIC_pin(apic, pin);
+
+       memset(&entry1, 0, sizeof(entry1));
+
+       entry1.dest_mode = 0;                   /* physical delivery */
+       entry1.mask = 0;                        /* unmask IRQ now */
+       entry1.dest.physical.physical_dest = hard_smp_processor_id();
+       entry1.delivery_mode = dest_ExtINT;
+       entry1.polarity = entry0.polarity;
+       entry1.trigger = 0;
+       entry1.vector = 0;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+                  RTC_FREQ_SELECT);
+       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+       i = 100;
+       while (i-- > 0) {
+               mdelay(10);
+               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+                       i -= 10;
+       }
+
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(apic, pin);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+int timer_uses_ioapic_pin_0;
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+       int apic1, pin1, apic2, pin2;
+       int vector;
+
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+       vector = assign_irq_vector(0);
+       set_intr_gate(vector, interrupt[0]);
+
+       /*
+        * Subtle, code in do_timer_interrupt() expects an AEOI
+        * mode for the 8259A whenever interrupts are routed
+        * through I/O APICs.  Also IRQ0 has to be enabled in
+        * the 8259A which implies the virtual wire has to be
+        * disabled in the local APIC.
+        */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       init_8259A(1);
+       timer_ack = 1;
+       if (timer_over_8254 > 0)
+               enable_8259A_irq(0);
+
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+
+       if (pin1 == 0)
+               timer_uses_ioapic_pin_0 = 1;
+
+       printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d 
pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
+
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               unmask_IO_APIC_irq(0);
+               if (timer_irq_works()) {
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                       }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
+                       return;
+               }
+               clear_IO_APIC_pin(apic1, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+                               "IO-APIC\n");
+       }
+
+       printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A 
... ");
+       if (pin2 != -1) {
+               printk("\n..... (found pin %d) ...", pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
+               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
+               if (timer_irq_works()) {
+                       printk("works.\n");
+                       if (pin1 != -1)
+                               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+                       else
+                               add_pin_to_irq(0, apic2, pin2);
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               setup_nmi();
+                       }
+                       return;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               clear_IO_APIC_pin(apic2, pin2);
+       }
+       printk(" failed.\n");
+
+       if (nmi_watchdog == NMI_IO_APIC) {
+               printk(KERN_WARNING "timer doesn't work through the IO-APIC - 
disabling NMI Watchdog!\n");
+               nmi_watchdog = 0;
+       }
+
+       printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+       disable_8259A_irq(0);
+       irq_desc[0].chip = &lapic_irq_type;
+       apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
+       enable_8259A_irq(0);
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+       printk(" failed.\n");
+
+       printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+       timer_ack = 0;
+       init_8259A(0);
+       make_8259A_irq(0);
+       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+       unlock_ExtINT_logic();
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       printk(" failed :(.\n");
+       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+               "report.  Then try booting with the 'noapic' option");
+}
+#else
+int timer_uses_ioapic_pin_0 = 0;
+#define check_timer() ((void)0)
+#endif
+
+/*
+ *
+ * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ *   Linux doesn't really care, as it's not actually used
+ *   for any interrupt handling anyway.
+ */
+#define PIC_IRQS       (1 << PIC_CASCADE_IR)
+
+void __init setup_IO_APIC(void)
+{
+       enable_IO_APIC();
+
+       if (acpi_ioapic)
+               io_apic_irqs = ~0;      /* all IRQs go through IOAPIC */
+       else
+               io_apic_irqs = ~PIC_IRQS;
+
+       printk("ENABLING IO-APIC IRQs\n");
+
+       /*
+        * Set up IO-APIC IRQ routing.
+        */
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
+#ifndef CONFIG_XEN
+       sync_Arb_IDs();
+#endif
+       setup_IO_APIC_irqs();
+       init_IO_APIC_traps();
+       check_timer();
+       if (!acpi_ioapic)
+               print_IO_APIC();
+}
+
+static int __init setup_disable_8254_timer(char *s)
+{
+       timer_over_8254 = -1;
+       return 1;
+}
+static int __init setup_enable_8254_timer(char *s)
+{
+       timer_over_8254 = 2;
+       return 1;
+}
+
+__setup("disable_8254_timer", setup_disable_8254_timer);
+__setup("enable_8254_timer", setup_enable_8254_timer);
+
+/*
+ *     Called after all the initialization is done. If we didnt find any
+ *     APIC bugs then we can allow the modify fast path
+ */
+ 
+static int __init io_apic_bug_finalize(void)
+{
+       if(sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       if (is_initial_xendomain()) {
+               struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
+               op.u.platform_quirk.quirk_id = sis_apic_bug ?
+                       QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
+               HYPERVISOR_platform_op(&op);
+       }
+       return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+struct sysfs_ioapic_data {
+       struct sys_device dev;
+       struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+               *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+               *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       union IO_APIC_reg_00 reg_00;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(dev->id, 0);
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+               io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+               io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+       set_kset_name("ioapic"),
+       .suspend = ioapic_suspend,
+       .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+       struct sys_device * dev;
+       int i, size, error = 0;
+
+       error = sysdev_class_register(&ioapic_sysdev_class);
+       if (error)
+               return error;
+
+       for (i = 0; i < nr_ioapics; i++ ) {
+               size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
+                       * sizeof(struct IO_APIC_route_entry);
+               mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+               if (!mp_ioapic_data[i]) {
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+               memset(mp_ioapic_data[i], 0, size);
+               dev = &mp_ioapic_data[i]->dev;
+               dev->id = i; 
+               dev->cls = &ioapic_sysdev_class;
+               error = sysdev_register(dev);
+               if (error) {
+                       kfree(mp_ioapic_data[i]);
+                       mp_ioapic_data[i] = NULL;
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+       }
+
+       return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/* --------------------------------------------------------------------------
+                          ACPI-based IOAPIC Configuration
+   -------------------------------------------------------------------------- 
*/
+
+#ifdef CONFIG_ACPI
+
+int __init io_apic_get_unique_id (int ioapic, int apic_id)
+{
+#ifndef CONFIG_XEN
+       union IO_APIC_reg_00 reg_00;
+       static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+       physid_mask_t tmp;
+       unsigned long flags;
+       int i = 0;
+
+       /*
+        * The P4 platform supports up to 256 APIC IDs on two separate APIC 
+        * buses (one for LAPICs, one for IOAPICs), where predecessors only 
+        * supports up to 16 on one shared APIC bus.
+        * 
+        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+        *      advantage of new APIC bus architecture.
+        */
+
+       if (physids_empty(apic_id_map))
+               apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(ioapic, 0);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       if (apic_id >= get_physical_broadcast()) {
+               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
+               apic_id = reg_00.bits.ID;
+       }
+
+       /*
+        * Every APIC in a system must have a unique ID or we get lots of nice 
+        * 'stuck on smp_invalidate_needed IPI wait' messages.
+        */
+       if (check_apicid_used(apic_id_map, apic_id)) {
+
+               for (i = 0; i < get_physical_broadcast(); i++) {
+                       if (!check_apicid_used(apic_id_map, i))
+                               break;
+               }
+
+               if (i == get_physical_broadcast())
+                       panic("Max apic_id exceeded!\n");
+
+               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+                       "trying %d\n", ioapic, apic_id, i);
+
+               apic_id = i;
+       } 
+
+       tmp = apicid_to_cpu_present(apic_id);
+       physids_or(apic_id_map, apic_id_map, tmp);
+
+       if (reg_00.bits.ID != apic_id) {
+               reg_00.bits.ID = apic_id;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic, 0, reg_00.raw);
+               reg_00.raw = io_apic_read(ioapic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /* Sanity check */
+               if (reg_00.bits.ID != apic_id) {
+                       printk("IOAPIC[%d]: Unable to change apic_id!\n", 
ioapic);
+                       return -1;
+               }
+       }
+
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+#endif /* !CONFIG_XEN */
+
+       return apic_id;
+}
+
+
+int __init io_apic_get_version (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.version;
+}
+
+
+int __init io_apic_get_redir_entries (int ioapic)
+{
+       union IO_APIC_reg_01    reg_01;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_01.raw = io_apic_read(ioapic, 1);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return reg_01.bits.entries;
+}
+
+
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int 
active_high_low)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       if (!IO_APIC_IRQ(irq)) {
+               printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       /*
+        * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+        * Note that we mask (disable) IRQs now -- these get enabled when the
+        * corresponding device driver registers for this IRQ.
+        */
+
+       memset(&entry,0,sizeof(entry));
+
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.dest_mode = INT_DEST_MODE;
+       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.trigger = edge_level;
+       entry.polarity = active_high_low;
+       entry.mask  = 1;
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= 16)
+               add_pin_to_irq(irq, ioapic, pin);
+
+       entry.vector = assign_irq_vector(irq);
+
+       apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+               "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+               mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+               edge_level, active_high_low);
+
+       ioapic_register_intr(irq, entry.vector, edge_level);
+
+       if (!ioapic && (irq < 16))
+               disable_8259A_irq(irq);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+       set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return 0;
+}
+
+#endif /* CONFIG_ACPI */
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/ioport-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/ioport-xen.c     Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,122 @@
+/*
+ *     linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <xen/interface/physdev.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int 
extent, int new_value)
+{
+       unsigned long mask;
+       unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
+       unsigned int low_index = base & (BITS_PER_LONG-1);
+       int length = low_index + extent;
+
+       if (low_index != 0) {
+               mask = (~0UL << low_index);
+               if (length < BITS_PER_LONG)
+                       mask &= ~(~0UL << length);
+               if (new_value)
+                       *bitmap_base++ |= mask;
+               else
+                       *bitmap_base++ &= ~mask;
+               length -= BITS_PER_LONG;
+       }
+
+       mask = (new_value ? ~0UL : 0UL);
+       while (length >= BITS_PER_LONG) {
+               *bitmap_base++ = mask;
+               length -= BITS_PER_LONG;
+       }
+
+       if (length > 0) {
+               mask = ~(~0UL << length);
+               if (new_value)
+                       *bitmap_base++ |= mask;
+               else
+                       *bitmap_base++ &= ~mask;
+       }
+}
+
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+       struct thread_struct * t = &current->thread;
+       unsigned long *bitmap;
+       struct physdev_set_iobitmap set_iobitmap;
+
+       if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
+               return -EINVAL;
+       if (turn_on && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       /*
+        * If it's the first ioperm() call in this thread's lifetime, set the
+        * IO bitmap up. ioperm() is much less timing critical than clone(),
+        * this is why we delay this operation until now:
+        */
+       if (!t->io_bitmap_ptr) {
+               bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+               if (!bitmap)
+                       return -ENOMEM;
+
+               memset(bitmap, 0xff, IO_BITMAP_BYTES);
+               t->io_bitmap_ptr = bitmap;
+               set_thread_flag(TIF_IO_BITMAP);
+
+               set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
+               set_iobitmap.nr_ports = IO_BITMAP_BITS;
+               HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &set_iobitmap);
+       }
+
+       set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+
+       return 0;
+}
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+
+asmlinkage long sys_iopl(unsigned long unused)
+{
+       volatile struct pt_regs * regs = (struct pt_regs *) &unused;
+       unsigned int level = regs->ebx;
+       struct thread_struct *t = &current->thread;
+       unsigned int old = (t->iopl >> 12) & 3;
+
+       if (level > 3)
+               return -EINVAL;
+       /* Trying to gain more privileges? */
+       if (level > old) {
+               if (!capable(CAP_SYS_RAWIO))
+                       return -EPERM;
+       }
+       t->iopl = level << 12;
+       set_iopl_mask(t->iopl);
+       return 0;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/irq-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/irq-xen.c        Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,324 @@
+/*
+ *     linux/arch/i386/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+       struct thread_info      tinfo;
+       u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{      
+       /* high bit used in ret_from_ code */
+       int irq = ~regs->orig_eax;
+#ifdef CONFIG_4KSTACKS
+       union irq_ctx *curctx, *irqctx;
+       u32 *isp;
+#endif
+
+       if (unlikely((unsigned)irq >= NR_IRQS)) {
+               printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+                                       __FUNCTION__, irq);
+               BUG();
+       }
+
+       irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+       {
+               long esp;
+
+               __asm__ __volatile__("andl %%esp,%0" :
+                                       "=r" (esp) : "0" (THREAD_SIZE - 1));
+               if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+                       printk("do_IRQ: stack overflow: %ld\n",
+                               esp - sizeof(struct thread_info));
+                       dump_stack();
+               }
+       }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+       curctx = (union irq_ctx *) current_thread_info();
+       irqctx = hardirq_ctx[smp_processor_id()];
+
+       /*
+        * this is where we switch to the IRQ stack. However, if we are
+        * already using the IRQ stack (because we interrupted a hardirq
+        * handler) we can't do that and just have to keep using the
+        * current stack (which is the irq stack already after all)
+        */
+       if (curctx != irqctx) {
+               int arg1, arg2, ebx;
+
+               /* build the stack frame on the IRQ stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+               irqctx->tinfo.task = curctx->tinfo.task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               /*
+                * Copy the softirq bits in preempt_count so that the
+                * softirq checks work in the hardirq context.
+                */
+               irqctx->tinfo.preempt_count =
+                       (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+                       (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp      \n"
+                       "       call    __do_IRQ         \n"
+                       "       movl   %%ebx,%%esp      \n"
+                       : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+                       :  "0" (irq),   "1" (regs),  "2" (isp)
+                       : "memory", "cc", "ecx"
+               );
+       } else
+#endif
+               __do_IRQ(irq, regs);
+
+       irq_exit();
+
+       return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+       union irq_ctx *irqctx;
+
+       if (hardirq_ctx[cpu])
+               return;
+
+       irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       hardirq_ctx[cpu] = irqctx;
+
+       irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = 0;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       softirq_ctx[cpu] = irqctx;
+
+       printk("CPU %u irqstacks, hard=%p soft=%p\n",
+               cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+       hardirq_ctx[cpu] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+       unsigned long flags;
+       struct thread_info *curctx;
+       union irq_ctx *irqctx;
+       u32 *isp;
+
+       if (in_interrupt())
+               return;
+
+       local_irq_save(flags);
+
+       if (local_softirq_pending()) {
+               curctx = current_thread_info();
+               irqctx = softirq_ctx[smp_processor_id()];
+               irqctx->tinfo.task = curctx->task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               /* build the stack frame on the softirq stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp     \n"
+                       "       call    __do_softirq    \n"
+                       "       movl    %%ebx,%%esp     \n"
+                       : "=b"(isp)
+                       : "0"(isp)
+                       : "memory", "cc", "edx", "ecx", "eax"
+               );
+               /*
+                * Shouldnt happen, we returned above if in_interrupt():
+                */
+               WARN_ON_ONCE(softirq_count());
+       }
+
+       local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+       int i = *(loff_t *) v, j;
+       struct irqaction * action;
+       unsigned long flags;
+
+       if (i == 0) {
+               seq_printf(p, "           ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%-8d",j);
+               seq_putc(p, '\n');
+       }
+
+       if (i < NR_IRQS) {
+               spin_lock_irqsave(&irq_desc[i].lock, flags);
+               action = irq_desc[i].action;
+               if (!action)
+                       goto skip;
+               seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+               seq_printf(p, " %14s", irq_desc[i].chip->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+
+               seq_putc(p, '\n');
+skip:
+               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+       } else if (i == NR_IRQS) {
+               seq_printf(p, "NMI: ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", nmi_count(j));
+               seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+               seq_printf(p, "LOC: ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ",
+                               per_cpu(irq_stat,j).apic_timer_irqs);
+               seq_putc(p, '\n');
+#endif
+               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+       }
+       return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(cpumask_t map)
+{
+       unsigned int irq;
+       static int warned;
+
+       for (irq = 0; irq < NR_IRQS; irq++) {
+               cpumask_t mask;
+               if (irq == 2)
+                       continue;
+
+               cpus_and(mask, irq_desc[irq].affinity, map);
+               if (any_online_cpu(mask) == NR_CPUS) {
+                       /*printk("Breaking affinity for irq %i\n", irq);*/
+                       mask = map;
+               }
+               if (irq_desc[irq].chip->set_affinity)
+                       irq_desc[irq].chip->set_affinity(irq, mask);
+               else if (irq_desc[irq].action && !(warned++))
+                       printk("Cannot set affinity for irq %i\n", irq);
+       }
+
+#if 0
+       barrier();
+       /* Ingo Molnar says: "after the IO-APIC masks have been redirected
+          [note the nop - the interrupt-enable boundary on x86 is two
+          instructions from sti] - to flush out pending hardirqs and
+          IPIs. After this point nothing is supposed to reach this CPU." */
+       __asm__ __volatile__("sti; nop; cli");
+       barrier();
+#else
+       /* That doesn't seem sufficient.  Give it 1ms. */
+       local_irq_enable();
+       mdelay(1);
+       local_irq_disable();
+#endif
+}
+#endif
+
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/ldt-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/ldt-xen.c        Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,270 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@xxxxxxxxxx>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *null)
+{
+       if (current->active_mm)
+               load_LDT(&current->active_mm->context);
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+       void *oldldt;
+       void *newldt;
+       int oldsize;
+
+       if (mincount <= pc->size)
+               return 0;
+       oldsize = pc->size;
+       mincount = (mincount+511)&(~511);
+       if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+               newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+       else
+               newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+       if (!newldt)
+               return -ENOMEM;
+
+       if (oldsize)
+               memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+       oldldt = pc->ldt;
+       memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, 
(mincount-oldsize)*LDT_ENTRY_SIZE);
+       pc->ldt = newldt;
+       wmb();
+       pc->size = mincount;
+       wmb();
+
+       if (reload) {
+#ifdef CONFIG_SMP
+               cpumask_t mask;
+               preempt_disable();
+#endif
+               make_pages_readonly(
+                       pc->ldt,
+                       (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+                       XENFEAT_writable_descriptor_tables);
+               load_LDT(pc);
+#ifdef CONFIG_SMP
+               mask = cpumask_of_cpu(smp_processor_id());
+               if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+                       smp_call_function(flush_ldt, NULL, 1, 1);
+               preempt_enable();
+#endif
+       }
+       if (oldsize) {
+               make_pages_writable(
+                       oldldt,
+                       (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE,
+                       XENFEAT_writable_descriptor_tables);
+               if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+                       vfree(oldldt);
+               else
+                       kfree(oldldt);
+       }
+       return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+       int err = alloc_ldt(new, old->size, 0);
+       if (err < 0)
+               return err;
+       memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+       make_pages_readonly(
+               new->ldt,
+               (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+               XENFEAT_writable_descriptor_tables);
+       return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+       struct mm_struct * old_mm;
+       int retval = 0;
+
+       init_MUTEX(&mm->context.sem);
+       mm->context.size = 0;
+       mm->context.has_foreign_mappings = 0;
+       old_mm = current->mm;
+       if (old_mm && old_mm->context.size > 0) {
+               down(&old_mm->context.sem);
+               retval = copy_ldt(&mm->context, &old_mm->context);
+               up(&old_mm->context.sem);
+       }
+       return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void destroy_context(struct mm_struct *mm)
+{
+       if (mm->context.size) {
+               if (mm == current->active_mm)
+                       clear_LDT();
+               make_pages_writable(
+                       mm->context.ldt,
+                       (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE,
+                       XENFEAT_writable_descriptor_tables);
+               if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+                       vfree(mm->context.ldt);
+               else
+                       kfree(mm->context.ldt);
+               mm->context.size = 0;
+       }
+}
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+       int err;
+       unsigned long size;
+       struct mm_struct * mm = current->mm;
+
+       if (!mm->context.size)
+               return 0;
+       if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+               bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+       down(&mm->context.sem);
+       size = mm->context.size*LDT_ENTRY_SIZE;
+       if (size > bytecount)
+               size = bytecount;
+
+       err = 0;
+       if (copy_to_user(ptr, mm->context.ldt, size))
+               err = -EFAULT;
+       up(&mm->context.sem);
+       if (err < 0)
+               goto error_return;
+       if (size != bytecount) {
+               /* zero-fill the rest */
+               if (clear_user(ptr+size, bytecount-size) != 0) {
+                       err = -EFAULT;
+                       goto error_return;
+               }
+       }
+       return bytecount;
+error_return:
+       return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+       int err;
+       unsigned long size;
+       void *address;
+
+       err = 0;
+       address = &default_ldt[0];
+       size = 5*sizeof(struct desc_struct);
+       if (size > bytecount)
+               size = bytecount;
+
+       err = size;
+       if (copy_to_user(ptr, address, size))
+               err = -EFAULT;
+
+       return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+{
+       struct mm_struct * mm = current->mm;
+       __u32 entry_1, entry_2;
+       int error;
+       struct user_desc ldt_info;
+
+       error = -EINVAL;
+       if (bytecount != sizeof(ldt_info))
+               goto out;
+       error = -EFAULT;        
+       if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+               goto out;
+
+       error = -EINVAL;
+       if (ldt_info.entry_number >= LDT_ENTRIES)
+               goto out;
+       if (ldt_info.contents == 3) {
+               if (oldmode)
+                       goto out;
+               if (ldt_info.seg_not_present == 0)
+                       goto out;
+       }
+
+       down(&mm->context.sem);
+       if (ldt_info.entry_number >= mm->context.size) {
+               error = alloc_ldt(&current->mm->context, 
ldt_info.entry_number+1, 1);
+               if (error < 0)
+                       goto out_unlock;
+       }
+
+       /* Allow LDTs to be cleared by the user. */
+       if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+               if (oldmode || LDT_empty(&ldt_info)) {
+                       entry_1 = 0;
+                       entry_2 = 0;
+                       goto install;
+               }
+       }
+
+       entry_1 = LDT_entry_a(&ldt_info);
+       entry_2 = LDT_entry_b(&ldt_info);
+       if (oldmode)
+               entry_2 &= ~(1 << 20);
+
+       /* Install the new entry ...  */
+install:
+       error = write_ldt_entry(mm->context.ldt, ldt_info.entry_number,
+                               entry_1, entry_2);
+
+out_unlock:
+       up(&mm->context.sem);
+out:
+       return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long 
bytecount)
+{
+       int ret = -ENOSYS;
+
+       switch (func) {
+       case 0:
+               ret = read_ldt(ptr, bytecount);
+               break;
+       case 1:
+               ret = write_ldt(ptr, bytecount, 1);
+               break;
+       case 2:
+               ret = read_default_ldt(ptr, bytecount);
+               break;
+       case 0x11:
+               ret = write_ldt(ptr, bytecount, 0);
+               break;
+       }
+       return ret;
+}
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/machine_kexec.c
--- a/arch/i386/kernel/machine_kexec.c  Mon Jun 04 10:05:24 2007 +0100
+++ b/arch/i386/kernel/machine_kexec.c  Mon Jun 04 10:05:28 2007 +0100
@@ -19,6 +19,10 @@
 #include <asm/desc.h>
 #include <asm/system.h>
 
+#ifdef CONFIG_XEN
+#include <xen/interface/kexec.h>
+#endif
+
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 static u32 kexec_pgd[1024] PAGE_ALIGNED;
 #ifdef CONFIG_X86_PAE
@@ -27,6 +31,40 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED
 #endif
 static u32 kexec_pte0[1024] PAGE_ALIGNED;
 static u32 kexec_pte1[1024] PAGE_ALIGNED;
+
+#ifdef CONFIG_XEN
+
+#define __ma(x) (pfn_to_mfn(__pa((x)) >> PAGE_SHIFT) << PAGE_SHIFT)
+
+#if PAGES_NR > KEXEC_XEN_NO_PAGES
+#error PAGES_NR is greater than KEXEC_XEN_NO_PAGES - Xen support will break
+#endif
+
+#if PA_CONTROL_PAGE != 0
+#error PA_CONTROL_PAGE is non zero - Xen support will break
+#endif
+
+void machine_kexec_setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
+{
+       void *control_page;
+
+       memset(xki->page_list, 0, sizeof(xki->page_list));
+
+       control_page = page_address(image->control_code_page);
+       memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+       xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
+       xki->page_list[PA_PGD] = __ma(kexec_pgd);
+#ifdef CONFIG_X86_PAE
+       xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
+       xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
+#endif
+       xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
+       xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
+
+}
+
+#endif /* CONFIG_XEN */
 
 /*
  * A architecture hook called to validate the
@@ -54,6 +92,7 @@ void machine_kexec_cleanup(struct kimage
 {
 }
 
+#ifndef CONFIG_XEN
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
@@ -87,3 +126,4 @@ NORET_TYPE void machine_kexec(struct kim
        relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
                        image->start, cpu_has_pae);
 }
+#endif
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/microcode-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/microcode-xen.c  Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,144 @@
+/*
+ *     Intel CPU Microcode Update Driver for Linux
+ *
+ *     Copyright (C) 2000-2004 Tigran Aivazian
+ *
+ *     This driver allows to upgrade microcode on Intel processors
+ *     belonging to IA-32 family - PentiumPro, Pentium II, 
+ *     Pentium III, Xeon, Pentium 4, etc.
+ *
+ *     Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, 
+ *     Order Number 245472 or free download from:
+ *             
+ *     http://developer.intel.com/design/pentium4/manuals/245472.htm
+ *
+ *     For more information, go to http://www.urbanmyth.org/microcode
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+//#define DEBUG /* pr_debug */
+#include <linux/capability.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/syscalls.h>
+
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
+MODULE_AUTHOR("Tigran Aivazian <tigran@xxxxxxxxxxx>");
+MODULE_LICENSE("GPL");
+
+static int verbose;
+module_param(verbose, int, 0644);
+
+#define MICROCODE_VERSION      "1.14a-xen"
+
+#define DEFAULT_UCODE_DATASIZE         (2000)    /* 2000 bytes */
+#define MC_HEADER_SIZE         (sizeof (microcode_header_t))     /* 48 bytes */
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 
2048 bytes */
+
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+static DEFINE_MUTEX(microcode_mutex);
+                               
+static int microcode_open (struct inode *unused1, struct file *unused2)
+{
+       return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+
+static int do_microcode_update (const void __user *ubuf, size_t len)
+{
+       int err;
+       void *kbuf;
+
+       kbuf = vmalloc(len);
+       if (!kbuf)
+               return -ENOMEM;
+
+       if (copy_from_user(kbuf, ubuf, len) == 0) {
+               struct xen_platform_op op;
+
+               op.cmd = XENPF_microcode_update;
+               set_xen_guest_handle(op.u.microcode.data, kbuf);
+               op.u.microcode.length = len;
+               err = HYPERVISOR_platform_op(&op);
+       } else
+               err = -EFAULT;
+
+       vfree(kbuf);
+
+       return err;
+}
+
+static ssize_t microcode_write (struct file *file, const char __user *buf, 
size_t len, loff_t *ppos)
+{
+       ssize_t ret;
+
+       if (len < MC_HEADER_SIZE) {
+               printk(KERN_ERR "microcode: not enough data\n"); 
+               return -EINVAL;
+       }
+
+       mutex_lock(&microcode_mutex);
+
+       ret = do_microcode_update(buf, len);
+       if (!ret)
+               ret = (ssize_t)len;
+
+       mutex_unlock(&microcode_mutex);
+
+       return ret;
+}
+
+static struct file_operations microcode_fops = {
+       .owner          = THIS_MODULE,
+       .write          = microcode_write,
+       .open           = microcode_open,
+};
+
+static struct miscdevice microcode_dev = {
+       .minor          = MICROCODE_MINOR,
+       .name           = "microcode",
+       .fops           = &microcode_fops,
+};
+
+static int __init microcode_init (void)
+{
+       int error;
+
+       error = misc_register(&microcode_dev);
+       if (error) {
+               printk(KERN_ERR
+                       "microcode: can't misc_register on minor=%d\n",
+                       MICROCODE_MINOR);
+               return error;
+       }
+
+       printk(KERN_INFO 
+               "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " 
<tigran@xxxxxxxxxxx>\n");
+       return 0;
+}
+
+static void __exit microcode_exit (void)
+{
+       misc_deregister(&microcode_dev);
+}
+
+module_init(microcode_init)
+module_exit(microcode_exit)
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/mpparse-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/mpparse-xen.c    Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,1185 @@
+/*
+ *     Intel Multiprocessor Specification 1.1 and 1.4
+ *     compliant MP-table parsing routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@xxxxxxxxxx>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@xxxxxxxxxx>
+ *
+ *     Fixes
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *             Alan Cox        :       Added EBDA scanning
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Maciej W. Rozycki:      Bits for default MP configurations
+ *             Paul Diefenbaugh:       Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bitops.h>
+
+#include <asm/smp.h>
+#include <asm/acpi.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+
+#include <mach_apic.h>
+#include <mach_mpparse.h>
+#include <bios_ebda.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+unsigned int __initdata maxcpus = NR_CPUS;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_local [MAX_MP_BUSSES];
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+static int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+unsigned int def_to_bigsmp = 0;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+/* Internal processor count */
+static unsigned int __devinitdata num_processors;
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+       int sum = 0;
+
+       while (len--)
+               sum += *mp++;
+
+       return sum & 0xFF;
+}
+
+/*
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record; 
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] 
__initdata;
+
+#ifndef CONFIG_XEN
+static void __devinit MP_processor_info (struct mpc_config_processor *m)
+{
+       int ver, apicid;
+       physid_mask_t phys_cpu;
+       
+       if (!(m->mpc_cpuflag & CPU_ENABLED))
+               return;
+
+       apicid = mpc_apic_id(m, translation_table[mpc_record]);
+
+       if (m->mpc_featureflag&(1<<0))
+               Dprintk("    Floating point unit present.\n");
+       if (m->mpc_featureflag&(1<<7))
+               Dprintk("    Machine Exception supported.\n");
+       if (m->mpc_featureflag&(1<<8))
+               Dprintk("    64 bit compare & exchange supported.\n");
+       if (m->mpc_featureflag&(1<<9))
+               Dprintk("    Internal APIC present.\n");
+       if (m->mpc_featureflag&(1<<11))
+               Dprintk("    SEP present.\n");
+       if (m->mpc_featureflag&(1<<12))
+               Dprintk("    MTRR  present.\n");
+       if (m->mpc_featureflag&(1<<13))
+               Dprintk("    PGE  present.\n");
+       if (m->mpc_featureflag&(1<<14))
+               Dprintk("    MCA  present.\n");
+       if (m->mpc_featureflag&(1<<15))
+               Dprintk("    CMOV  present.\n");
+       if (m->mpc_featureflag&(1<<16))
+               Dprintk("    PAT  present.\n");
+       if (m->mpc_featureflag&(1<<17))
+               Dprintk("    PSE  present.\n");
+       if (m->mpc_featureflag&(1<<18))
+               Dprintk("    PSN  present.\n");
+       if (m->mpc_featureflag&(1<<19))
+               Dprintk("    Cache Line Flush Instruction present.\n");
+       /* 20 Reserved */
+       if (m->mpc_featureflag&(1<<21))
+               Dprintk("    Debug Trace and EMON Store present.\n");
+       if (m->mpc_featureflag&(1<<22))
+               Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+       if (m->mpc_featureflag&(1<<23))
+               Dprintk("    MMX  present.\n");
+       if (m->mpc_featureflag&(1<<24))
+               Dprintk("    FXSR  present.\n");
+       if (m->mpc_featureflag&(1<<25))
+               Dprintk("    XMM  present.\n");
+       if (m->mpc_featureflag&(1<<26))
+               Dprintk("    Willamette New Instructions  present.\n");
+       if (m->mpc_featureflag&(1<<27))
+               Dprintk("    Self Snoop  present.\n");
+       if (m->mpc_featureflag&(1<<28))
+               Dprintk("    HT  present.\n");
+       if (m->mpc_featureflag&(1<<29))
+               Dprintk("    Thermal Monitor present.\n");
+       /* 30, 31 Reserved */
+
+
+       if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+               Dprintk("    Bootup CPU\n");
+               boot_cpu_physical_apicid = m->mpc_apicid;
+       }
+
+       ver = m->mpc_apicver;
+
+       /*
+        * Validate version
+        */
+       if (ver == 0x0) {
+               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+                               "fixing up to 0x10. (tell your hw vendor)\n",
+                               m->mpc_apicid);
+               ver = 0x10;
+       }
+       apic_version[m->mpc_apicid] = ver;
+
+       phys_cpu = apicid_to_cpu_present(apicid);
+       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       "  Processor ignored.\n", NR_CPUS);
+               return;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                       " Processor ignored.\n", maxcpus);
+               return;
+       }
+
+       cpu_set(num_processors, cpu_possible_map);
+       num_processors++;
+
+       /*
+        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+        * but we need to work other dependencies like SMP_SUSPEND etc
+        * before this can be done without some confusion.
+        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+        *       - Ashok Raj <ashok.raj@xxxxxxxxx>
+        */
+       if (num_processors > 8) {
+               switch (boot_cpu_data.x86_vendor) {
+               case X86_VENDOR_INTEL:
+                       if (!APIC_XAPIC(ver)) {
+                               def_to_bigsmp = 0;
+                               break;
+                       }
+                       /* If P4 and above fall through */
+               case X86_VENDOR_AMD:
+                       def_to_bigsmp = 1;
+               }
+       }
+       bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+}
+#else
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+       num_processors++;
+}
+#endif /* CONFIG_XEN */
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+       char str[7];
+
+       memcpy(str, m->mpc_bustype, 6);
+       str[6] = 0;
+
+       mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
+       if (m->mpc_busid >= MAX_MP_BUSSES) {
+               printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+                       " is too large, max. supported is %d\n",
+                       m->mpc_busid, str, MAX_MP_BUSSES - 1);
+               return;
+       }
+
+       if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+       } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+       } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+               mpc_oem_pci_bus(m, translation_table[mpc_record]);
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+               mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+               mp_current_pci_id++;
+       } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+       } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
+       } else {
+               printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+       }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+       if (!(m->mpc_flags & MPC_APIC_USABLE))
+               return;
+
+       printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n",
+               m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found 
%d).\n",
+                       MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+       }
+       if (!m->mpc_apicaddr) {
+               printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+                       " found in MP table, skipping!\n");
+               return;
+       }
+       mp_ioapics[nr_ioapics] = *m;
+       nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+       mp_irqs [mp_irq_entries] = *m;
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+                       m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+       Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+                       m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+       /*
+        * Well it seems all SMP boards in existence
+        * use ExtINT/LVT1 == LINT0 and
+        * NMI/LVT2 == LINT1 - the following check
+        * will show us if this assumptions is false.
+        * Until then we do not have to add baggage.
+        */
+       if ((m->mpc_irqtype == mp_ExtINT) &&
+               (m->mpc_destapiclint != 0))
+                       BUG();
+       if ((m->mpc_irqtype == mp_NMI) &&
+               (m->mpc_destapiclint != 1))
+                       BUG();
+}
+
+#ifdef CONFIG_X86_NUMAQ
+static void __init MP_translation_info (struct mpc_config_translation *m)
+{
+       printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, 
local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, 
m->trans_local);
+
+       if (mpc_record >= MAX_MPC_ENTRY) 
+               printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+       else
+               translation_table[mpc_record] = m; /* stash this for later */
+       if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+               node_set_online(m->trans_quad);
+}
+
+/*
+ * Read/parse the MPC oem tables
+ */
+
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+       unsigned short oemsize)
+{
+       int count = sizeof (*oemtable); /* the header size */
+       unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+       
+       mpc_record = 0;
+       printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", 
oemtable);
+       if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+       {
+               printk(KERN_WARNING "SMP mpc oemtable: bad signature 
[%c%c%c%c]!\n",
+                       oemtable->oem_signature[0],
+                       oemtable->oem_signature[1],
+                       oemtable->oem_signature[2],
+                       oemtable->oem_signature[3]);
+               return;
+       }
+       if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+       {
+               printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+               return;
+       }
+       while (count < oemtable->oem_length) {
+               switch (*oemptr) {
+                       case MP_TRANSLATION:
+                       {
+                               struct mpc_config_translation *m=
+                                       (struct mpc_config_translation *)oemptr;
+                               MP_translation_info(m);
+                               oemptr += sizeof(*m);
+                               count += sizeof(*m);
+                               ++mpc_record;
+                               break;
+                       }
+                       default:
+                       {
+                               printk(KERN_WARNING "Unrecognised OEM table 
entry type! - %d\n", (int) *oemptr);
+                               return;
+                       }
+               }
+       }
+}
+
+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+               char *productid)
+{
+       if (strncmp(oem, "IBM NUMA", 8))
+               printk("Warning!  May not be a NUMA-Q system!\n");
+       if (mpc->mpc_oemptr)
+               smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+                               mpc->mpc_oemsize);
+}
+#endif /* CONFIG_X86_NUMAQ */
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+       char str[16];
+       char oem[10];
+       int count=sizeof(*mpc);
+       unsigned char *mpt=((unsigned char *)mpc)+count;
+
+       if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+               printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+                       *(u32 *)mpc->mpc_signature);
+               return 0;
+       }
+       if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+               printk(KERN_ERR "SMP mptable: checksum error!\n");
+               return 0;
+       }
+       if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+               printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+                       mpc->mpc_spec);
+               return 0;
+       }
+       if (!mpc->mpc_lapic) {
+               printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+               return 0;
+       }
+       memcpy(oem,mpc->mpc_oem,8);
+       oem[8]=0;
+       printk(KERN_INFO "OEM ID: %s ",oem);
+
+       memcpy(str,mpc->mpc_productid,12);
+       str[12]=0;
+       printk("Product ID: %s ",str);
+
+       mps_oem_check(mpc, oem, str);
+
+       printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
+
+       /* 
+        * Save the local APIC address (it might be non-default) -- but only
+        * if we're not using ACPI.
+        */
+       if (!acpi_lapic)
+               mp_lapic_addr = mpc->mpc_lapic;
+
+       /*
+        *      Now process the configuration blocks.
+        */
+       mpc_record = 0;
+       while (count < mpc->mpc_length) {
+               switch(*mpt) {
+                       case MP_PROCESSOR:
+                       {
+                               struct mpc_config_processor *m=
+                                       (struct mpc_config_processor *)mpt;
+                               /* ACPI may have already provided this data */
+                               if (!acpi_lapic)
+                                       MP_processor_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_BUS:
+                       {
+                               struct mpc_config_bus *m=
+                                       (struct mpc_config_bus *)mpt;
+                               MP_bus_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_IOAPIC:
+                       {
+                               struct mpc_config_ioapic *m=
+                                       (struct mpc_config_ioapic *)mpt;
+                               MP_ioapic_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_INTSRC:
+                       {
+                               struct mpc_config_intsrc *m=
+                                       (struct mpc_config_intsrc *)mpt;
+
+                               MP_intsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_LINTSRC:
+                       {
+                               struct mpc_config_lintsrc *m=
+                                       (struct mpc_config_lintsrc *)mpt;
+                               MP_lintsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       default:
+                       {
+                               count = mpc->mpc_length;
+                               break;
+                       }
+               }
+               ++mpc_record;
+       }
+       clustered_apic_check();
+       if (!num_processors)
+               printk(KERN_ERR "SMP mptable: no processors registered!\n");
+       return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+       unsigned int port;
+
+       port = 0x4d0 + (irq >> 3);
+       return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+       struct mpc_config_intsrc intsrc;
+       int i;
+       int ELCR_fallback = 0;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                 /* conforming */
+       intsrc.mpc_srcbus = 0;
+       intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+       intsrc.mpc_irqtype = mp_INT;
+
+       /*
+        *  If true, we have an ISA/PCI system with no IRQ entries
+        *  in the MP table. To prevent the PCI interrupts from being set up
+        *  incorrectly, we try to use the ELCR. The sanity check to see if
+        *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+        *  never be level sensitive, so we simply see if the ELCR agrees.
+        *  If it does, we assume it's valid.
+        */
+       if (mpc_default_type == 5) {
+               printk(KERN_INFO "ISA/PCI bus type with no IRQ information... 
falling back to ELCR\n");
+
+               if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || 
ELCR_trigger(13))
+                       printk(KERN_WARNING "ELCR contains invalid data... not 
using ELCR\n");
+               else {
+                       printk(KERN_INFO "Using ELCR to identify PCI 
interrupts\n");
+                       ELCR_fallback = 1;
+               }
+       }
+
+       for (i = 0; i < 16; i++) {
+               switch (mpc_default_type) {
+               case 2:
+                       if (i == 0 || i == 13)
+                               continue;       /* IRQ0 & IRQ13 not connected */
+                       /* fall through */
+               default:
+                       if (i == 2)
+                               continue;       /* IRQ2 is never connected */
+               }
+
+               if (ELCR_fallback) {
+                       /*
+                        *  If the ELCR indicates a level-sensitive interrupt, 
we
+                        *  copy that information over to the MP table in the
+                        *  irqflag field (level sensitive, active high 
polarity).
+                        */
+                       if (ELCR_trigger(i))
+                               intsrc.mpc_irqflag = 13;
+                       else
+                               intsrc.mpc_irqflag = 0;
+               }
+
+               intsrc.mpc_srcbusirq = i;
+               intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+               MP_intsrc_info(&intsrc);
+       }
+
+       intsrc.mpc_irqtype = mp_ExtINT;
+       intsrc.mpc_srcbusirq = 0;
+       intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+       MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+       struct mpc_config_processor processor;
+       struct mpc_config_bus bus;
+       struct mpc_config_ioapic ioapic;
+       struct mpc_config_lintsrc lintsrc;
+       int linttypes[2] = { mp_ExtINT, mp_NMI };
+       int i;
+
+       /*
+        * local APIC has default address
+        */
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /*
+        * 2 CPUs, numbered 0 & 1.
+        */
+       processor.mpc_type = MP_PROCESSOR;
+       /* Either an integrated APIC or a discrete 82489DX. */
+       processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       processor.mpc_cpuflag = CPU_ENABLED;
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+                                  (boot_cpu_data.x86_model << 4) |
+                                  boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+       for (i = 0; i < 2; i++) {
+               processor.mpc_apicid = i;
+               MP_processor_info(&processor);
+       }
+
+       bus.mpc_type = MP_BUS;
+       bus.mpc_busid = 0;
+       switch (mpc_default_type) {
+               default:
+                       printk("???\n");
+                       printk(KERN_ERR "Unknown standard configuration %d\n",
+                               mpc_default_type);
+                       /* fall through */
+               case 1:
+               case 5:
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       break;
+               case 2:
+               case 6:
+               case 3:
+                       memcpy(bus.mpc_bustype, "EISA  ", 6);
+                       break;
+               case 4:
+               case 7:
+                       memcpy(bus.mpc_bustype, "MCA   ", 6);
+       }
+       MP_bus_info(&bus);
+       if (mpc_default_type > 4) {
+               bus.mpc_busid = 1;
+               memcpy(bus.mpc_bustype, "PCI   ", 6);
+               MP_bus_info(&bus);
+       }
+
+       ioapic.mpc_type = MP_IOAPIC;
+       ioapic.mpc_apicid = 2;
+       ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       ioapic.mpc_flags = MPC_APIC_USABLE;
+       ioapic.mpc_apicaddr = 0xFEC00000;
+       MP_ioapic_info(&ioapic);
+
+       /*
+        * We set up most of the low 16 IO-APIC pins according to MPS rules.
+        */
+       construct_default_ioirq_mptable(mpc_default_type);
+
+       lintsrc.mpc_type = MP_LINTSRC;
+       lintsrc.mpc_irqflag = 0;                /* conforming */
+       lintsrc.mpc_srcbusid = 0;
+       lintsrc.mpc_srcbusirq = 0;
+       lintsrc.mpc_destapic = MP_APIC_ALL;
+       for (i = 0; i < 2; i++) {
+               lintsrc.mpc_irqtype = linttypes[i];
+               lintsrc.mpc_destapiclint = i;
+               MP_lintsrc_info(&lintsrc);
+       }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+       struct intel_mp_floating *mpf = mpf_found;
+
+       /*
+        * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+        * processors, where MPS only supports physical.
+        */
+       if (acpi_lapic && acpi_ioapic) {
+               printk(KERN_INFO "Using ACPI (MADT) for SMP configuration 
information\n");
+               return;
+       }
+       else if (acpi_lapic)
+               printk(KERN_INFO "Using ACPI for processor (LAPIC) 
configuration information\n");
+
+       printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 
mpf->mpf_specification);
+       if (mpf->mpf_feature2 & (1<<7)) {
+               printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+               pic_mode = 1;
+       } else {
+               printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+               pic_mode = 0;
+       }
+
+       /*
+        * Now see if we need to read further.
+        */
+       if (mpf->mpf_feature1 != 0) {
+
+               printk(KERN_INFO "Default MP configuration #%d\n", 
mpf->mpf_feature1);
+               construct_default_ISA_mptable(mpf->mpf_feature1);
+
+       } else if (mpf->mpf_physptr) {
+
+               /*
+                * Read the physical hardware table.  Anything here will
+                * override the defaults.
+                */
+               if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+                       smp_found_config = 0;
+                       printk(KERN_ERR "BIOS bug, MP table errors 
detected!...\n");
+                       printk(KERN_ERR "... disabling SMP support. (tell your 
hw vendor)\n");
+                       return;
+               }
+               /*
+                * If there are no explicit MP IRQ entries, then we are
+                * broken.  We set up most of the low 16 IO-APIC pins to
+                * ISA defaults and hope it will work.
+                */
+               if (!mp_irq_entries) {
+                       struct mpc_config_bus bus;
+
+                       printk(KERN_ERR "BIOS bug, no explicit IRQ entries, 
using default mptable. (tell your hw vendor)\n");
+
+                       bus.mpc_type = MP_BUS;
+                       bus.mpc_busid = 0;
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       MP_bus_info(&bus);
+
+                       construct_default_ioirq_mptable(0);
+               }
+
+       } else
+               BUG();
+
+       printk(KERN_INFO "Processors: %d\n", num_processors);
+       /*
+        * Only use the first configuration found.
+        */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+       unsigned long *bp = isa_bus_to_virt(base);
+       struct intel_mp_floating *mpf;
+
+       Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+       if (sizeof(*mpf) != 16)
+               printk("Error: MPF size\n");
+
+       while (length > 0) {
+               mpf = (struct intel_mp_floating *)bp;
+               if ((*bp == SMP_MAGIC_IDENT) &&
+                       (mpf->mpf_length == 1) &&
+                       !mpf_checksum((unsigned char *)bp, 16) &&
+                       ((mpf->mpf_specification == 1)
+                               || (mpf->mpf_specification == 4)) ) {
+
+                       smp_found_config = 1;
+#ifndef CONFIG_XEN
+                       printk(KERN_INFO "found SMP MP-table at %08lx\n",
+                                               virt_to_phys(mpf));
+                       reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+                       if (mpf->mpf_physptr) {
+                               /*
+                                * We cannot access to MPC table to compute
+                                * table size yet, as only few megabytes from
+                                * the bottom is mapped now.
+                                * PC-9800's MPC table places on the very last
+                                * of physical memory; so that simply reserving
+                                * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+                                * in reserve_bootmem.
+                                */
+                               unsigned long size = PAGE_SIZE;
+                               unsigned long end = max_low_pfn * PAGE_SIZE;
+                               if (mpf->mpf_physptr + size > end)
+                                       size = end - mpf->mpf_physptr;
+                               reserve_bootmem(mpf->mpf_physptr, size);
+                       }
+#else
+                       printk(KERN_INFO "found SMP MP-table at %08lx\n",
+                               ((unsigned long)bp - (unsigned 
long)isa_bus_to_virt(base)) + base);
+#endif
+
+                       mpf_found = mpf;
+                       return 1;
+               }
+               bp += 4;
+               length -= 16;
+       }
+       return 0;
+}
+
+void __init find_smp_config (void)
+{
+#ifndef CONFIG_XEN
+       unsigned int address;
+#endif
+
+       /*
+        * FIXME: Linux assumes you have 640K of base ram..
+        * this continues the error...
+        *
+        * 1) Scan the bottom 1K for a signature
+        * 2) Scan the top 1K of base RAM
+        * 3) Scan the 64K of bios
+        */
+       if (smp_scan_config(0x0,0x400) ||
+               smp_scan_config(639*0x400,0x400) ||
+                       smp_scan_config(0xF0000,0x10000))
+               return;
+       /*
+        * If it is an SMP machine we should know now, unless the
+        * configuration is in an EISA/MCA bus machine with an
+        * extended bios data area.
+        *
+        * there is a real-mode segmented pointer pointing to the
+        * 4K EBDA area at 0x40E, calculate and scan it here.
+        *
+        * NOTE! There are Linux loaders that will corrupt the EBDA
+        * area, and as such this kind of SMP config may be less
+        * trustworthy, simply because the SMP table may have been
+        * stomped on during early boot. These loaders are buggy and
+        * should be fixed.
+        *
+        * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+        */
+
+#ifndef CONFIG_XEN
+       address = get_bios_ebda();
+       if (address)
+               smp_scan_config(address, 0x400);
+#endif
+}
+
+int es7000_plat;
+
+/* --------------------------------------------------------------------------
+                            ACPI-based MP Configuration
+   -------------------------------------------------------------------------- 
*/
+
+#ifdef CONFIG_ACPI
+
+void __init mp_register_lapic_address (
+       u64                     address)
+{
+#ifndef CONFIG_XEN
+       mp_lapic_addr = (unsigned long) address;
+
+       set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+
+       if (boot_cpu_physical_apicid == -1U)
+               boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+       Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+#endif
+}
+
+
+void __devinit mp_register_lapic (
+       u8                      id, 
+       u8                      enabled)
+{
+       struct mpc_config_processor processor;
+       int                     boot_cpu = 0;
+       
+       if (MAX_APICS - id <= 0) {
+               printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+                       id, MAX_APICS);
+               return;
+       }
+
+       if (id == boot_cpu_physical_apicid)
+               boot_cpu = 1;
+
+#ifndef CONFIG_XEN
+       processor.mpc_type = MP_PROCESSOR;
+       processor.mpc_apicid = id;
+       processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+       processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+       processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+               (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+#endif
+
+       MP_processor_info(&processor);
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define MP_ISA_BUS             0
+#define MP_MAX_IOAPIC_PIN      127
+
+static struct mp_ioapic_routing {
+       int                     apic_id;
+       int                     gsi_base;
+       int                     gsi_end;
+       u32                     pin_programmed[4];
+} mp_ioapic_routing[MAX_IO_APICS];
+
+
+static int mp_find_ioapic (
+       int                     gsi)
+{
+       int                     i = 0;
+
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+               if ((gsi >= mp_ioapic_routing[i].gsi_base)
+                       && (gsi <= mp_ioapic_routing[i].gsi_end))
+                       return i;
+       }
+
+       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+
+       return -1;
+}
+       
+
+void __init mp_register_ioapic (
+       u8                      id, 
+       u32                     address,
+       u32                     gsi_base)
+{
+       int                     idx = 0;
+       int                     tmpid;
+
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+                       "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+       }
+       if (!address) {
+               printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+                       " found in MADT table, skipping!\n");
+               return;
+       }
+
+       idx = nr_ioapics++;
+
+       mp_ioapics[idx].mpc_type = MP_IOAPIC;
+       mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+       mp_ioapics[idx].mpc_apicaddr = address;
+
+#ifndef CONFIG_XEN
+       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+#endif
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               tmpid = io_apic_get_unique_id(idx, id);
+       else
+               tmpid = id;
+       if (tmpid == -1) {
+               nr_ioapics--;
+               return;
+       }
+       mp_ioapics[idx].mpc_apicid = tmpid;
+       mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+       
+       /* 
+        * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+        * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+        */
+       mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+       mp_ioapic_routing[idx].gsi_base = gsi_base;
+       mp_ioapic_routing[idx].gsi_end = gsi_base + 
+               io_apic_get_redir_entries(idx);
+
+       printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+               "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+               mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+               mp_ioapic_routing[idx].gsi_base,
+               mp_ioapic_routing[idx].gsi_end);
+
+       return;
+}
+
+
+void __init mp_override_legacy_irq (
+       u8                      bus_irq,
+       u8                      polarity, 
+       u8                      trigger, 
+       u32                     gsi)
+{
+       struct mpc_config_intsrc intsrc;
+       int                     ioapic = -1;
+       int                     pin = -1;
+
+       /* 
+        * Convert 'gsi' to 'ioapic.pin'.
+        */
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0)
+               return;
+       pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       /*
+        * TBD: This check is for faulty timer entries, where the override
+        *      erroneously sets the trigger to level, resulting in a HUGE 
+        *      increase of timer interrupts!
+        */
+       if ((bus_irq == 0) && (trigger == 3))
+               trigger = 1;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqtype = mp_INT;
+       intsrc.mpc_irqflag = (trigger << 2) | polarity;
+       intsrc.mpc_srcbus = MP_ISA_BUS;
+       intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+       intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+               intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+               (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+               intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+
+       mp_irqs[mp_irq_entries] = intsrc;
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!\n");
+
+       return;
+}
+
+void __init mp_config_acpi_legacy_irqs (void)
+{
+       struct mpc_config_intsrc intsrc;
+       int                     i = 0;
+       int                     ioapic = -1;
+
+       /* 
+        * Fabricate the legacy ISA bus (bus #31).
+        */
+       mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+       Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+       /*
+        * Older generations of ES7000 have no legacy identity mappings
+        */
+       if (es7000_plat == 1)
+               return;
+
+       /* 
+        * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+        */
+       ioapic = mp_find_ioapic(0);
+       if (ioapic < 0)
+               return;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                                 /* Conforming */
+       intsrc.mpc_srcbus = MP_ISA_BUS;
+       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+
+       /* 
+        * Use the default configuration for the IRQs 0-15.  Unless
+        * overriden by (MADT) interrupt source override entries.
+        */
+       for (i = 0; i < 16; i++) {
+               int idx;
+
+               for (idx = 0; idx < mp_irq_entries; idx++) {
+                       struct mpc_config_intsrc *irq = mp_irqs + idx;
+
+                       /* Do we already have a mapping for this ISA IRQ? */
+                       if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq 
== i)
+                               break;
+
+                       /* Do we already have a mapping for this IOAPIC pin */
+                       if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+                               (irq->mpc_dstirq == i))
+                               break;
+               }
+
+               if (idx != mp_irq_entries) {
+                       printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+                       continue;                       /* IRQ already used */
+               }
+
+               intsrc.mpc_irqtype = mp_INT;
+               intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+               intsrc.mpc_dstirq = i;
+
+               Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+                       "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+                       (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+                       intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+                       intsrc.mpc_dstirq);
+
+               mp_irqs[mp_irq_entries] = intsrc;
+               if (++mp_irq_entries == MAX_IRQ_SOURCES)
+                       panic("Max # of irq sources exceeded!\n");
+       }
+}
+
+#define MAX_GSI_NUM    4096
+
+int mp_register_gsi (u32 gsi, int triggering, int polarity)
+{
+       int                     ioapic = -1;
+       int                     ioapic_pin = 0;
+       int                     idx, bit = 0;
+       static int              pci_irq = 16;
+       /*
+        * Mapping between Global System Interrups, which
+        * represent all possible interrupts, and IRQs
+        * assigned to actual devices.
+        */
+       static int              gsi_to_irq[MAX_GSI_NUM];
+
+       /* Don't set up the ACPI SCI because it's already set up */
+       if (acpi_fadt.sci_int == gsi)
+               return gsi;
+
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0) {
+               printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+               return gsi;
+       }
+
+       ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       if (ioapic_renumber_irq)
+               gsi = ioapic_renumber_irq(ioapic, gsi);
+
+       /* 
+        * Avoid pin reprogramming.  PRTs typically include entries  
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       bit = ioapic_pin % 32;
+       idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+       if (idx > 3) {
+               printk(KERN_ERR "Invalid reference to IOAPIC pin "
+                       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+                       ioapic_pin);
+               return gsi;
+       }
+       if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+               return gsi_to_irq[gsi];
+       }
+
+       mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+       if (triggering == ACPI_LEVEL_SENSITIVE) {
+               /*
+                * For PCI devices assign IRQs in order, avoiding gaps
+                * due to unused I/O APIC pins.
+                */
+               int irq = gsi;
+               if (gsi < MAX_GSI_NUM) {
+                       /*
+                        * Retain the VIA chipset work-around (gsi > 15), but
+                        * avoid a problem where the 8254 timer (IRQ0) is setup
+                        * via an override (so it's not on pin 0 of the ioapic),
+                        * and at the same time, the pin 0 interrupt is a PCI
+                        * type.  The gsi > 15 test could cause these two pins
+                        * to be shared as IRQ0, and they are not shareable.
+                        * So test for this condition, and if necessary, avoid
+                        * the pin collision.
+                        */
+                       if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+                               gsi = pci_irq++;
+                       /*
+                        * Don't assign IRQ used by ACPI SCI
+                        */
+                       if (gsi == acpi_fadt.sci_int)
+                               gsi = pci_irq++;
+                       gsi_to_irq[irq] = gsi;
+               } else {
+                       printk(KERN_ERR "GSI %u is too high\n", gsi);
+                       return gsi;
+               }
+       }
+
+       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+                   triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+                   polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+       return gsi;
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+#endif /* CONFIG_ACPI */
diff -r 42f6970e18c9 -r a533be77c572 arch/i386/kernel/pci-dma-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/i386/kernel/pci-dma-xen.c    Mon Jun 04 10:05:28 2007 +0100
@@ -0,0 +1,378 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <asm/io.h>
+#include <xen/balloon.h>
+#include <xen/gnttab.h>
+#include <asm/swiotlb.h>
+#include <asm/tlbflush.h>
+#include <asm-i386/mach-xen/asm/swiotlb.h>
+#include <asm/bug.h>
+
+#ifdef __x86_64__
+#include <asm/proto.h>
+
+int iommu_merge __read_mostly = 0;
+EXPORT_SYMBOL(iommu_merge);
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+/* This tells the BIO block layer to assume merging. Default to off
+   because we cannot guarantee merging later. */
+int iommu_bio_merge __read_mostly = 0;
+EXPORT_SYMBOL(iommu_bio_merge);
+
+int force_iommu __read_mostly= 0;
+
+__init int iommu_setup(char *p)
+{
+    return 1;
+}
+
+void __init pci_iommu_alloc(void)
+{
+#ifdef CONFIG_SWIOTLB
+       pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+       no_iommu_init();
+       return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+#endif
+
+struct dma_coherent_mem {
+       void            *virt_base;
+       u32             device_base;
+       int             size;
+       int             flags;
+       unsigned long   *bitmap;
+};
+
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
+
+int
+dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+          enum dma_data_direction direction)
+{
+       int i, rc;
+
+       if (direction == DMA_NONE)
+               BUG();
+       WARN_ON(nents == 0 || sg[0].length == 0);
+
+       if (swiotlb) {
+               rc = swiotlb_map_sg(hwdev, sg, nents, direction);
+       } else {
+               for (i = 0; i < nents; i++ ) {
+                       sg[i].dma_address =
+                               gnttab_dma_map_page(sg[i].page) + sg[i].offset;
+                       sg[i].dma_length  = sg[i].length;
+                       BUG_ON(!sg[i].page);
+                       IOMMU_BUG_ON(address_needs_mapping(
+                               hwdev, sg[i].dma_address));
+               }
+               rc = nents;
+       }
+
+       flush_write_buffers();
+       return rc;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+void
+dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+            enum dma_data_direction direction)
+{
+       int i;
+
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_sg(hwdev, sg, nents, direction);
+       else {
+               for (i = 0; i < nents; i++ )
+                       gnttab_dma_unmap_page(sg[i].dma_address);
+       }
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+#ifdef CONFIG_HIGHMEM
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+            size_t size, enum dma_data_direction direction)
+{
+       dma_addr_t dma_addr;
+
+       BUG_ON(direction == DMA_NONE);
+
+       if (swiotlb) {
+               dma_addr = swiotlb_map_page(
+                       dev, page, offset, size, direction);
+       } else {
+               dma_addr = gnttab_dma_map_page(page) + offset;
+               IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+       }
+
+       return dma_addr;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(direction == DMA_NONE);
+       if (swiotlb)
+               swiotlb_unmap_page(dev, dma_address, size, direction);
+       else
+               gnttab_dma_unmap_page(dma_address);
+}
+EXPORT_SYMBOL(dma_unmap_page);
+#endif /* CONFIG_HIGHMEM */
+
+int
+dma_mapping_error(dma_addr_t dma_addr)
+{
+       if (swiotlb)
+               return swiotlb_dma_mapping_error(dma_addr);
+       return 0;
+}
+EXPORT_SYMBOL(dma_mapping_error);
+
+int
+dma_supported(struct device *dev, u64 mask)
+{
+       if (swiotlb)
+               return swiotlb_dma_supported(dev, mask);
+       /*
+        * By default we'll BUG when an infeasible DMA is requested, and
+        * request swiotlb=force (see IOMMU_BUG_ON).
+        */
+       return 1;
+}
+EXPORT_SYMBOL(dma_supported);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                          dma_addr_t *dma_handle, gfp_t gfp)
+{
+       void *ret;
+       struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+       unsigned int order = get_order(size);
+       unsigned long vstart;
+       u64 mask;
+
+       /* ignore region specifiers */
+       gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+       if (mem) {
+               int page = bitmap_find_free_region(mem->bitmap, mem->size,
+                                                    order);
+               if (page >= 0) {
+                       *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+                       ret = mem->virt_base + (page << PAGE_SHIFT);
+                       memset(ret, 0, size);
+                       return ret;
+               }
+               if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+                       return NULL;
+       }
+
+       if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+               gfp |= GFP_DMA;
+
+       vstart = __get_free_pages(gfp, order);
+       ret = (void *)vstart;
+
+       if (dev != NULL && dev->coherent_dma_mask)
+               mask = dev->coherent_dma_mask;
+       else
+               mask = 0xffffffff;
+
+       if (ret != NULL) {
+               if (xen_create_contiguous_region(vstart, order,
+                                                fls64(mask)) != 0) {
+                       free_pages(vstart, order);
+                       return NULL;
+               }
+               memset(ret, 0, size);
+               *dma_handle = virt_to_bus(ret);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+                        void *vaddr, dma_addr_t dma_handle)
+{

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [linux-2.6.18-xen] Imported linux-2.6-xen-sparse from xen-unstable.hg 15200:bd3d6b4c52ec, Xen patchbot-linux-2.6.18-xen <=