WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Final changes for linux 2.6.13 rebasing and some directo

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Final changes for linux 2.6.13 rebasing and some directory reorgs
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 15 Sep 2005 07:48:16 +0000
Delivery-date: Thu, 15 Sep 2005 07:52:15 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 3ca4ca7a9cc234d33c3981852fc37c73fcd72218
# Parent  d34925e4144bcdadb020ee2deef766a994bf7b04
Final changes for linux 2.6.13 rebasing and some directory reorgs

diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/Makefile    Thu Sep  1 18:46:28 2005
@@ -1,9 +1,6 @@
 include $(BASEDIR)/Rules.mk
 
-VPATH = linux linux-xen linux/lib
-#VPATH = linux-xen linux/lib
-
-# libs-y       += arch/ia64/lib/lib.a
+VPATH = xen vmx linux linux-xen
 
 OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \
        xenmisc.o acpi.o hypercall.o \
@@ -15,8 +12,6 @@
        irq_ia64.o irq_lsapic.o vhpt.o xenasm.o hyperprivop.o dom_fw.o \
        grant_table.o sn_console.o
 
-#OBJS += idiv64.o idiv32.o                     \
-
 # TMP holder to contain *.0 moved out of CONFIG_VTI
 OBJS += vmx_init.o
 
@@ -27,7 +22,7 @@
        pal_emul.o vmx_irq_ia64.o
 endif
 
-# files from xen/arch/ia64/linux/lib (linux/arch/ia64/lib)
+# lib files from xen/arch/ia64/linux/ (linux/arch/ia64/lib)
 OBJS +=        bitop.o clear_page.o flush.o copy_page_mck.o                    
\
        memset.o strlen.o memcpy_mck.o                                  \
        __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o                   \
@@ -86,9 +81,9 @@
         touch $@
 
 # I'm sure a Makefile wizard would know a better way to do this
-xen.lds.s: xen.lds.S
+xen.lds.s: xen/xen.lds.S
        $(CC) -E $(CPPFLAGS) -P -DXEN -D__ASSEMBLY__ \
-               -o xen.lds.s xen.lds.S
+               -o xen.lds.s xen/xen.lds.S
 
 # variants of divide/modulo
 # see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib)
@@ -111,7 +106,7 @@
 
 
 clean:
-       rm -f *.o *~ core  xen.lds.s 
$(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
+       rm -f *.o *~ core  xen.lds.s 
$(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s map.out
        rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
        rm -f linux/lib/*.o
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/README.origin
--- a/xen/arch/ia64/linux/README.origin Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/README.origin Thu Sep  1 18:46:28 2005
@@ -13,12 +13,13 @@
 machvec.c              -> linux/arch/ia64/kernel/machvec.c
 patch.c                        -> linux/arch/ia64/kernel/patch.c
 pcdp.h                 -> drivers/firmware/pcdp.h
-lib/bitop.c            -> linux/arch/ia64/lib/bitop.c
-lib/clear_page.S       -> linux/arch/ia64/lib/clear_page.S
-lib/copy_page_mck.S    -> linux/arch/ia64/lib/copy_page_mck.S
-lib/flush.S            -> linux/arch/ia64/lib/flush.S
-lib/idiv32.S           -> linux/arch/ia64/lib/idiv32.S
-lib/idiv64.S           -> linux/arch/ia64/lib/idiv64.S
-lib/memcpy_mck.S       -> linux/arch/ia64/lib/memcpy_mck.S
-lib/memset.S           -> linux/arch/ia64/lib/memset.S
-lib/strlen.S           -> linux/arch/ia64/lib/strlen.S
+
+bitop.c                        -> linux/arch/ia64/lib/bitop.c
+clear_page.S           -> linux/arch/ia64/lib/clear_page.S
+copy_page_mck.S                -> linux/arch/ia64/lib/copy_page_mck.S
+flush.S                        -> linux/arch/ia64/lib/flush.S
+idiv32.S               -> linux/arch/ia64/lib/idiv32.S
+idiv64.S               -> linux/arch/ia64/lib/idiv64.S
+memcpy_mck.S           -> linux/arch/ia64/lib/memcpy_mck.S
+memset.S               -> linux/arch/ia64/lib/memset.S
+strlen.S               -> linux/arch/ia64/lib/strlen.S
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h     Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/config.h     Thu Sep  1 18:46:28 2005
@@ -203,6 +203,7 @@
 #endif // CONFIG_VTI
 
 #define __attribute_used__     __attribute__ ((unused))
+#define __nocast
 
 // see include/asm-x86/atomic.h (different from standard linux)
 #define _atomic_set(v,i) (((v).counter) = (i))
@@ -262,9 +263,6 @@
 // these declarations got moved at some point, find a better place for them
 extern int ht_per_core;
 
-// needed for include/xen/smp.h
-#define __smp_processor_id()   0
-
 // xen/include/asm/config.h
 /******************************************************************************
  * config.h
@@ -297,6 +295,10 @@
 #endif /* __ASSEMBLY__ */
 #endif /* __XEN_IA64_CONFIG_H__ */
 
+// needed for include/xen/smp.h
+#define __smp_processor_id()   0
+
+
 // FOLLOWING ADDED FOR XEN POST-NGIO and/or LINUX 2.6.7
 
 // following derived from linux/include/linux/compiler-gcc3.h
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/bug.h
--- a/xen/include/asm-ia64/linux/asm-generic/bug.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/bug.h      Thu Sep  1 18:46:28 2005
@@ -4,17 +4,11 @@
 #include <linux/compiler.h>
 #include <linux/config.h>
 
+#ifdef CONFIG_BUG
 #ifndef HAVE_ARCH_BUG
 #define BUG() do { \
        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
        panic("BUG!"); \
-} while (0)
-#endif
-
-#ifndef HAVE_ARCH_PAGE_BUG
-#define PAGE_BUG(page) do { \
-       printk("page BUG for page at %p\n", page); \
-       BUG(); \
 } while (0)
 #endif
 
@@ -31,4 +25,18 @@
 } while (0)
 #endif
 
+#else /* !CONFIG_BUG */
+#ifndef HAVE_ARCH_BUG
+#define BUG()
 #endif
+
+#ifndef HAVE_ARCH_BUG_ON
+#define BUG_ON(condition) do { if (condition) ; } while(0)
+#endif
+
+#ifndef HAVE_ARCH_WARN_ON
+#define WARN_ON(condition) do { if (condition) ; } while(0)
+#endif
+#endif
+
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/errno.h
--- a/xen/include/asm-ia64/linux/asm-generic/errno.h    Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/errno.h    Thu Sep  1 18:46:28 2005
@@ -102,4 +102,8 @@
 #define        EKEYREVOKED     128     /* Key has been revoked */
 #define        EKEYREJECTED    129     /* Key was rejected by service */
 
+/* for robust mutexes */
+#define        EOWNERDEAD      130     /* Owner died */
+#define        ENOTRECOVERABLE 131     /* State not recoverable */
+
 #endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/iomap.h
--- a/xen/include/asm-ia64/linux/asm-generic/iomap.h    Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/iomap.h    Thu Sep  1 18:46:28 2005
@@ -2,6 +2,7 @@
 #define __GENERIC_IO_H
 
 #include <linux/linkage.h>
+#include <asm/byteorder.h>
 
 /*
  * These are the "generic" interfaces for doing new-style
@@ -26,11 +27,15 @@
  */
 extern unsigned int fastcall ioread8(void __iomem *);
 extern unsigned int fastcall ioread16(void __iomem *);
+extern unsigned int fastcall ioread16be(void __iomem *);
 extern unsigned int fastcall ioread32(void __iomem *);
+extern unsigned int fastcall ioread32be(void __iomem *);
 
 extern void fastcall iowrite8(u8, void __iomem *);
 extern void fastcall iowrite16(u16, void __iomem *);
+extern void fastcall iowrite16be(u16, void __iomem *);
 extern void fastcall iowrite32(u32, void __iomem *);
+extern void fastcall iowrite32be(u32, void __iomem *);
 
 /*
  * "string" versions of the above. Note that they
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/pci.h
--- a/xen/include/asm-ia64/linux/asm-generic/pci.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/pci.h      Thu Sep  1 18:46:28 2005
@@ -22,6 +22,14 @@
        region->end = res->end;
 }
 
+static inline void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+                       struct pci_bus_region *region)
+{
+       res->start = region->start;
+       res->end = region->end;
+}
+
 #define pcibios_scan_all_fns(a, b)     0
 
 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h
--- a/xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h    Thu Sep  1 
17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h    Thu Sep  1 
18:46:28 2005
@@ -2,6 +2,8 @@
 #define _PGTABLE_NOPUD_H
 
 #ifndef __ASSEMBLY__
+
+#define __PAGETABLE_PUD_FOLDED
 
 /*
  * Having the pud type consist of a pgd gets the size right, and allows
@@ -52,5 +54,8 @@
 #define pud_free(x)                            do { } while (0)
 #define __pud_free_tlb(tlb, x)                 do { } while (0)
 
+#undef  pud_addr_end
+#define pud_addr_end(addr, end)                        (end)
+
 #endif /* __ASSEMBLY__ */
 #endif /* _PGTABLE_NOPUD_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/pgtable.h
--- a/xen/include/asm-ia64/linux/asm-generic/pgtable.h  Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/pgtable.h  Thu Sep  1 18:46:28 2005
@@ -16,7 +16,7 @@
 #ifndef __HAVE_ARCH_SET_PTE_ATOMIC
 #define ptep_establish(__vma, __address, __ptep, __entry)              \
 do {                                                                   \
-       set_pte(__ptep, __entry);                                       \
+       set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry);       \
        flush_tlb_page(__vma, __address);                               \
 } while (0)
 #else /* __HAVE_ARCH_SET_PTE_ATOMIC */
@@ -37,26 +37,30 @@
  */
 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
 do {                                                                     \
-       set_pte(__ptep, __entry);                                         \
+       set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry);         \
        flush_tlb_page(__vma, __address);                                 \
 } while (0)
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_young(pte))
-               return 0;
-       set_pte(ptep, pte_mkold(pte));
-       return 1;
-}
+#define ptep_test_and_clear_young(__vma, __address, __ptep)            \
+({                                                                     \
+       pte_t __pte = *(__ptep);                                        \
+       int r = 1;                                                      \
+       if (!pte_young(__pte))                                          \
+               r = 0;                                                  \
+       else                                                            \
+               set_pte_at((__vma)->vm_mm, (__address),                 \
+                          (__ptep), pte_mkold(__pte));                 \
+       r;                                                              \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)               \
 ({                                                                     \
-       int __young = ptep_test_and_clear_young(__ptep);                \
+       int __young;                                                    \
+       __young = ptep_test_and_clear_young(__vma, __address, __ptep);  \
        if (__young)                                                    \
                flush_tlb_page(__vma, __address);                       \
        __young;                                                        \
@@ -64,20 +68,24 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       if (!pte_dirty(pte))
-               return 0;
-       set_pte(ptep, pte_mkclean(pte));
-       return 1;
-}
+#define ptep_test_and_clear_dirty(__vma, __address, __ptep)            \
+({                                                                     \
+       pte_t __pte = *__ptep;                                          \
+       int r = 1;                                                      \
+       if (!pte_dirty(__pte))                                          \
+               r = 0;                                                  \
+       else                                                            \
+               set_pte_at((__vma)->vm_mm, (__address), (__ptep),       \
+                          pte_mkclean(__pte));                         \
+       r;                                                              \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
 #define ptep_clear_flush_dirty(__vma, __address, __ptep)               \
 ({                                                                     \
-       int __dirty = ptep_test_and_clear_dirty(__ptep);                \
+       int __dirty;                                                    \
+       __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);  \
        if (__dirty)                                                    \
                flush_tlb_page(__vma, __address);                       \
        __dirty;                                                        \
@@ -85,36 +93,29 @@
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
-       pte_t pte = *ptep;
-       pte_clear(ptep);
-       return pte;
-}
+#define ptep_get_and_clear(__mm, __address, __ptep)                    \
+({                                                                     \
+       pte_t __pte = *(__ptep);                                        \
+       pte_clear((__mm), (__address), (__ptep));                       \
+       __pte;                                                          \
+})
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 #define ptep_clear_flush(__vma, __address, __ptep)                     \
 ({                                                                     \
-       pte_t __pte = ptep_get_and_clear(__ptep);                       \
+       pte_t __pte;                                                    \
+       __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);  \
        flush_tlb_page(__vma, __address);                               \
        __pte;                                                          \
 })
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long 
address, pte_t *ptep)
 {
        pte_t old_pte = *ptep;
-       set_pte(ptep, pte_wrprotect(old_pte));
-}
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_MKDIRTY
-static inline void ptep_mkdirty(pte_t *ptep)
-{
-       pte_t old_pte = *ptep;
-       set_pte(ptep, pte_mkdirty(old_pte));
+       set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 }
 #endif
 
@@ -124,6 +125,9 @@
 
 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
 #define page_test_and_clear_dirty(page) (0)
+#define pte_maybe_dirty(pte)           pte_dirty(pte)
+#else
+#define pte_maybe_dirty(pte)           (1)
 #endif
 
 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
@@ -134,4 +138,77 @@
 #define pgd_offset_gate(mm, addr)      pgd_offset(mm, addr)
 #endif
 
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte)      do { } while (0)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier.  Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;  \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end)                                                
\
+({     unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;      \
+       (__boundary - 1 < (end) - 1)? __boundary: (end);                \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+       if (pgd_none(*pgd))
+               return 1;
+       if (unlikely(pgd_bad(*pgd))) {
+               pgd_clear_bad(pgd);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+       if (pud_none(*pud))
+               return 1;
+       if (unlikely(pud_bad(*pud))) {
+               pud_clear_bad(pud);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+       if (pmd_none(*pmd))
+               return 1;
+       if (unlikely(pmd_bad(*pmd))) {
+               pmd_clear_bad(pmd);
+               return 1;
+       }
+       return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/sections.h
--- a/xen/include/asm-ia64/linux/asm-generic/sections.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/sections.h Thu Sep  1 18:46:28 2005
@@ -8,6 +8,9 @@
 extern char __bss_start[], __bss_stop[];
 extern char __init_begin[], __init_end[];
 extern char _sinittext[], _einittext[];
+extern char _sextratext[] __attribute__((weak));
+extern char _eextratext[] __attribute__((weak));
 extern char _end[];
+extern char __per_cpu_start[], __per_cpu_end[];
 
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/topology.h
--- a/xen/include/asm-ia64/linux/asm-generic/topology.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/topology.h Thu Sep  1 18:46:28 2005
@@ -41,8 +41,15 @@
 #ifndef node_to_first_cpu
 #define node_to_first_cpu(node)        (0)
 #endif
+#ifndef pcibus_to_node
+#define pcibus_to_node(node)   (-1)
+#endif
+
 #ifndef pcibus_to_cpumask
-#define pcibus_to_cpumask(bus) (cpu_online_map)
+#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
+                                       CPU_MASK_ALL : \
+                                       node_to_cpumask(pcibus_to_node(bus)) \
+                               )
 #endif
 
 #endif /* _ASM_GENERIC_TOPOLOGY_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h
--- a/xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h      Thu Sep  1 
17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h      Thu Sep  1 
18:46:28 2005
@@ -73,7 +73,7 @@
        }
 
 #define SECURITY_INIT                                                  \
-       .security_initcall.init : {                                     \
+       .security_initcall.init : AT(ADDR(.security_initcall.init) - 
LOAD_OFFSET) { \
                VMLINUX_SYMBOL(__security_initcall_start) = .;          \
                *(.security_initcall.init)                              \
                VMLINUX_SYMBOL(__security_initcall_end) = .;            \
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/acpi.h
--- a/xen/include/asm-ia64/linux/asm/acpi.h     Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/acpi.h     Thu Sep  1 18:46:28 2005
@@ -98,6 +98,15 @@
 int acpi_request_vector (u32 int_type);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
+/*
+ * Record the cpei override flag and current logical cpu. This is
+ * useful for CPU removal.
+ */
+extern unsigned int can_cpei_retarget(void);
+extern unsigned int is_cpu_cpei_target(unsigned int cpu);
+extern void set_cpei_target_cpu(unsigned int cpu);
+extern unsigned int get_cpei_target_cpu(void);
+
 #ifdef CONFIG_ACPI_NUMA
 /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/
 #define MAX_PXM_DOMAINS (256)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/bitops.h
--- a/xen/include/asm-ia64/linux/asm/bitops.h   Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/bitops.h   Thu Sep  1 18:46:28 2005
@@ -314,8 +314,8 @@
 #ifdef __KERNEL__
 
 /*
- * find_last_zero_bit - find the last zero bit in a 64 bit quantity
- * @x: The value to search
+ * Return bit number of last (most-significant) bit set.  Undefined
+ * for x==0.  Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
  */
 static inline unsigned long
 ia64_fls (unsigned long x)
@@ -327,10 +327,23 @@
        return exp - 0xffff;
 }
 
+/*
+ * Find the last (most significant) bit set.  Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
 static inline int
-fls (int x)
-{
-       return ia64_fls((unsigned int) x);
+fls (int t)
+{
+       unsigned long x = t & 0xffffffffu;
+
+       if (!x)
+               return 0;
+       x |= x >> 1;
+       x |= x >> 2;
+       x |= x >> 4;
+       x |= x >> 8;
+       x |= x >> 16;
+       return ia64_popcnt(x);
 }
 
 /*
@@ -353,9 +366,9 @@
        return result;
 }
 
-#define hweight32(x) hweight64 ((x) & 0xfffffffful)
-#define hweight16(x) hweight64 ((x) & 0xfffful)
-#define hweight8(x)  hweight64 ((x) & 0xfful)
+#define hweight32(x)   (unsigned int) hweight64((x) & 0xfffffffful)
+#define hweight16(x)   (unsigned int) hweight64((x) & 0xfffful)
+#define hweight8(x)    (unsigned int) hweight64((x) & 0xfful)
 
 #endif /* __KERNEL__ */
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/break.h
--- a/xen/include/asm-ia64/linux/asm/break.h    Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/break.h    Thu Sep  1 18:46:28 2005
@@ -12,6 +12,8 @@
  * OS-specific debug break numbers:
  */
 #define __IA64_BREAK_KDB               0x80100
+#define __IA64_BREAK_KPROBE            0x80200
+#define __IA64_BREAK_JPROBE            0x80300
 
 /*
  * OS-specific break numbers:
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/bug.h
--- a/xen/include/asm-ia64/linux/asm/bug.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/bug.h      Thu Sep  1 18:46:28 2005
@@ -1,6 +1,7 @@
 #ifndef _ASM_IA64_BUG_H
 #define _ASM_IA64_BUG_H
 
+#ifdef CONFIG_BUG
 #if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
 # define ia64_abort()  __builtin_trap()
 #else
@@ -8,8 +9,10 @@
 #endif
 #define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); 
ia64_abort(); } while (0)
 
-/* should this BUG should be made generic? */
+/* should this BUG be made generic? */
 #define HAVE_ARCH_BUG
+#endif
+
 #include <asm-generic/bug.h>
 
 #endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/cacheflush.h
--- a/xen/include/asm-ia64/linux/asm/cacheflush.h       Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/cacheflush.h       Thu Sep  1 18:46:28 2005
@@ -19,7 +19,7 @@
 #define flush_cache_all()                      do { } while (0)
 #define flush_cache_mm(mm)                     do { } while (0)
 #define flush_cache_range(vma, start, end)     do { } while (0)
-#define flush_cache_page(vma, vmaddr)          do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)     do { } while (0)
 #define flush_icache_page(vma,page)            do { } while (0)
 #define flush_cache_vmap(start, end)           do { } while (0)
 #define flush_cache_vunmap(start, end)         do { } while (0)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/param.h
--- a/xen/include/asm-ia64/linux/asm/param.h    Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/param.h    Thu Sep  1 18:46:28 2005
@@ -27,7 +27,7 @@
    */
 #  define HZ     32
 # else
-#  define HZ   1024
+#  define HZ   CONFIG_HZ
 # endif
 # define USER_HZ       HZ
 # define CLOCKS_PER_SEC        HZ      /* frequency at which times() counts */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/pci.h
--- a/xen/include/asm-ia64/linux/asm/pci.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/pci.h      Thu Sep  1 18:46:28 2005
@@ -47,7 +47,7 @@
 }
 
 static inline void
-pcibios_penalize_isa_irq (int irq)
+pcibios_penalize_isa_irq (int irq, int active)
 {
        /* We don't do dynamic PCI IRQ allocation */
 }
@@ -82,6 +82,25 @@
 #define sg_dma_len(sg)         ((sg)->dma_length)
 #define sg_dma_address(sg)     ((sg)->dma_address)
 
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+                                       enum pci_dma_burst_strategy *strat,
+                                       unsigned long *strategy_parameter)
+{
+       unsigned long cacheline_size;
+       u8 byte;
+
+       pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+       if (byte == 0)
+               cacheline_size = 1024;
+       else
+               cacheline_size = (int) byte * 4;
+
+       *strat = PCI_DMA_BURST_MULTIPLE;
+       *strategy_parameter = cacheline_size;
+}
+#endif
+
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct 
*vma,
                                enum pci_mmap_state mmap_state, int 
write_combine);
@@ -109,6 +128,7 @@
        void *acpi_handle;
        void *iommu;
        int segment;
+       int node;               /* nearest node with memory or -1 for global 
allocation */
 
        unsigned int windows;
        struct pci_window *window;
@@ -121,14 +141,9 @@
 
 extern struct pci_ops pci_root_ops;
 
-static inline int pci_name_bus(char *name, struct pci_bus *bus)
+static inline int pci_proc_domain(struct pci_bus *bus)
 {
-       if (pci_domain_nr(bus) == 0) {
-               sprintf(name, "%02x", bus->number);
-       } else {
-               sprintf(name, "%04x:%02x", pci_domain_nr(bus), bus->number);
-       }
-       return 0;
+       return (pci_domain_nr(bus) != 0);
 }
 
 static inline void pcibios_add_platform_entries(struct pci_dev *dev)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/percpu.h
--- a/xen/include/asm-ia64/linux/asm/percpu.h   Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/percpu.h   Thu Sep  1 18:46:28 2005
@@ -50,7 +50,7 @@
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)                      (*((void)cpu, &per_cpu__##var))
+#define per_cpu(var, cpu)                      (*((void)(cpu), 
&per_cpu__##var))
 #define __get_cpu_var(var)                     per_cpu__##var
 #define per_cpu_init()                         (__phys_per_cpu_start)
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/sections.h
--- a/xen/include/asm-ia64/linux/asm/sections.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/sections.h Thu Sep  1 18:46:28 2005
@@ -17,6 +17,7 @@
 extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[];
 extern char __start_gate_brl_fsys_bubble_down_patchlist[], 
__end_gate_brl_fsys_bubble_down_patchlist[];
 extern char __start_unwind[], __end_unwind[];
+extern char __start_ivt_text[], __end_ivt_text[];
 
 #endif /* _ASM_IA64_SECTIONS_H */
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/signal.h
--- a/xen/include/asm-ia64/linux/asm/signal.h   Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/signal.h   Thu Sep  1 18:46:28 2005
@@ -114,27 +114,11 @@
 #define _NSIG_BPW      64
 #define _NSIG_WORDS    (_NSIG / _NSIG_BPW)
 
-/*
- * These values of sa_flags are used only by the kernel as part of the
- * irq handling routines.
- *
- * SA_INTERRUPT is also used by the irq handling routines.
- * SA_SHIRQ is for shared interrupt support on PCI and EISA.
- */
-#define SA_PROBE               SA_ONESHOT
-#define SA_SAMPLE_RANDOM       SA_RESTART
-#define SA_SHIRQ               0x04000000
 #define SA_PERCPU_IRQ          0x02000000
 
 #endif /* __KERNEL__ */
 
-#define SIG_BLOCK          0   /* for blocking signals */
-#define SIG_UNBLOCK        1   /* for unblocking signals */
-#define SIG_SETMASK        2   /* for setting the signal mask */
-
-#define SIG_DFL        ((__sighandler_t)0)     /* default signal handling */
-#define SIG_IGN        ((__sighandler_t)1)     /* ignore signal */
-#define SIG_ERR        ((__sighandler_t)-1)    /* error return from signal */
+#include <asm-generic/signal.h>
 
 # ifndef __ASSEMBLY__
 
@@ -142,9 +126,6 @@
 
 /* Avoid too many header ordering problems.  */
 struct siginfo;
-
-/* Type of a signal handler.  */
-typedef void __user (*__sighandler_t)(int);
 
 typedef struct sigaltstack {
        void __user *ss_sp;
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/smp.h
--- a/xen/include/asm-ia64/linux/asm/smp.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/smp.h      Thu Sep  1 18:46:28 2005
@@ -3,16 +3,14 @@
  *
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999 Walt Drummond <drummond@xxxxxxxxxxx>
- * Copyright (C) 2001-2003 Hewlett-Packard Co
+ * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P.
  *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *     Bjorn Helgaas <bjorn.helgaas@xxxxxx>
  */
 #ifndef _ASM_IA64_SMP_H
 #define _ASM_IA64_SMP_H
 
 #include <linux/config.h>
-
-#ifdef CONFIG_SMP
-
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/kernel.h>
@@ -24,12 +22,31 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 
+static inline unsigned int
+ia64_get_lid (void)
+{
+       union {
+               struct {
+                       unsigned long reserved : 16;
+                       unsigned long eid : 8;
+                       unsigned long id : 8;
+                       unsigned long ignored : 32;
+               } f;
+               unsigned long bits;
+       } lid;
+
+       lid.bits = ia64_getreg(_IA64_REG_CR_LID);
+       return lid.f.id << 8 | lid.f.eid;
+}
+
+#ifdef CONFIG_SMP
+
 #define XTP_OFFSET             0x1e0008
 
 #define SMP_IRQ_REDIRECTION    (1 << 0)
 #define SMP_IPI_REDIRECTION    (1 << 1)
 
-#define smp_processor_id()     (current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
 extern struct smp_boot_data {
        int cpu_count;
@@ -39,6 +56,10 @@
 extern char no_int_routing __devinitdata;
 
 extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+extern int smp_num_siblings;
+extern int smp_num_cpucores;
 extern void __iomem *ipi_base_addr;
 extern unsigned char smp_int_redirect;
 
@@ -90,22 +111,7 @@
                writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */
 }
 
-static inline unsigned int
-hard_smp_processor_id (void)
-{
-       union {
-               struct {
-                       unsigned long reserved : 16;
-                       unsigned long eid : 8;
-                       unsigned long id : 8;
-                       unsigned long ignored : 32;
-               } f;
-               unsigned long bits;
-       } lid;
-
-       lid.bits = ia64_getreg(_IA64_REG_CR_LID);
-       return lid.f.id << 8 | lid.f.eid;
-}
+#define hard_smp_processor_id()                ia64_get_lid()
 
 /* Upping and downing of CPUs */
 extern int __cpu_disable (void);
@@ -122,10 +128,12 @@
 extern void smp_send_reschedule (int cpu);
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
+extern void identify_siblings (struct cpuinfo_ia64 *);
 
 #else
 
-#define cpu_logical_id(cpuid)          0
+#define cpu_logical_id(i)              0
+#define cpu_physical_id(i)             ia64_get_lid()
 
 #endif /* CONFIG_SMP */
 #endif /* _ASM_IA64_SMP_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux/asm/thread_info.h
--- a/xen/include/asm-ia64/linux/asm/thread_info.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/thread_info.h      Thu Sep  1 18:46:28 2005
@@ -25,7 +25,7 @@
        __u32 flags;                    /* thread_info flags (see TIF_*) */
        __u32 cpu;                      /* current CPU */
        mm_segment_t addr_limit;        /* user-level address space limit */
-       __s32 preempt_count;            /* 0=premptable, <0=BUG; will also 
serve as bh-counter */
+       int preempt_count;              /* 0=premptable, <0=BUG; will also 
serve as bh-counter */
        struct restart_block restart_block;
        struct {
                int signo;
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/topology.h
--- a/xen/include/asm-ia64/linux/asm/topology.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/topology.h Thu Sep  1 18:46:28 2005
@@ -40,27 +40,61 @@
  */
 #define node_to_first_cpu(node) (__ffs(node_to_cpumask(node)))
 
+/*
+ * Determines the node for a given pci bus
+ */
+#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
+
 void build_cpu_to_node_map(void);
+
+#define SD_CPU_INIT (struct sched_domain) {            \
+       .span                   = CPU_MASK_NONE,        \
+       .parent                 = NULL,                 \
+       .groups                 = NULL,                 \
+       .min_interval           = 1,                    \
+       .max_interval           = 4,                    \
+       .busy_factor            = 64,                   \
+       .imbalance_pct          = 125,                  \
+       .cache_hot_time         = (10*1000000),         \
+       .per_cpu_gain           = 100,                  \
+       .cache_nice_tries       = 2,                    \
+       .busy_idx               = 2,                    \
+       .idle_idx               = 1,                    \
+       .newidle_idx            = 2,                    \
+       .wake_idx               = 1,                    \
+       .forkexec_idx           = 1,                    \
+       .flags                  = SD_LOAD_BALANCE       \
+                               | SD_BALANCE_NEWIDLE    \
+                               | SD_BALANCE_EXEC       \
+                               | SD_WAKE_AFFINE,       \
+       .last_balance           = jiffies,              \
+       .balance_interval       = 1,                    \
+       .nr_balance_failed      = 0,                    \
+}
 
 /* sched_domains SD_NODE_INIT for IA64 NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {           \
        .span                   = CPU_MASK_NONE,        \
        .parent                 = NULL,                 \
        .groups                 = NULL,                 \
-       .min_interval           = 80,                   \
-       .max_interval           = 320,                  \
-       .busy_factor            = 320,                  \
+       .min_interval           = 8,                    \
+       .max_interval           = 8*(min(num_online_cpus(), 32)), \
+       .busy_factor            = 64,                   \
        .imbalance_pct          = 125,                  \
        .cache_hot_time         = (10*1000000),         \
-       .cache_nice_tries       = 1,                    \
+       .cache_nice_tries       = 2,                    \
+       .busy_idx               = 3,                    \
+       .idle_idx               = 2,                    \
+       .newidle_idx            = 0, /* unused */       \
+       .wake_idx               = 1,                    \
+       .forkexec_idx           = 1,                    \
        .per_cpu_gain           = 100,                  \
        .flags                  = SD_LOAD_BALANCE       \
                                | SD_BALANCE_EXEC       \
-                               | SD_BALANCE_NEWIDLE    \
-                               | SD_WAKE_IDLE          \
+                               | SD_BALANCE_FORK       \
                                | SD_WAKE_BALANCE,      \
        .last_balance           = jiffies,              \
-       .balance_interval       = 1,                    \
+       .balance_interval       = 64,                   \
        .nr_balance_failed      = 0,                    \
 }
 
@@ -69,17 +103,21 @@
        .span                   = CPU_MASK_NONE,        \
        .parent                 = NULL,                 \
        .groups                 = NULL,                 \
-       .min_interval           = 80,                   \
-       .max_interval           = 320,                  \
-       .busy_factor            = 320,                  \
-       .imbalance_pct          = 125,                  \
+       .min_interval           = 64,                   \
+       .max_interval           = 64*num_online_cpus(), \
+       .busy_factor            = 128,                  \
+       .imbalance_pct          = 133,                  \
        .cache_hot_time         = (10*1000000),         \
        .cache_nice_tries       = 1,                    \
+       .busy_idx               = 3,                    \
+       .idle_idx               = 3,                    \
+       .newidle_idx            = 0, /* unused */       \
+       .wake_idx               = 0, /* unused */       \
+       .forkexec_idx           = 0, /* unused */       \
        .per_cpu_gain           = 100,                  \
-       .flags                  = SD_LOAD_BALANCE       \
-                               | SD_BALANCE_EXEC,      \
+       .flags                  = SD_LOAD_BALANCE,      \
        .last_balance           = jiffies,              \
-       .balance_interval       = 100*(63+num_online_cpus())/64,   \
+       .balance_interval       = 64,                   \
        .nr_balance_failed      = 0,                    \
 }
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/unaligned.h
--- a/xen/include/asm-ia64/linux/asm/unaligned.h        Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/unaligned.h        Thu Sep  1 18:46:28 2005
@@ -1,121 +1,6 @@
 #ifndef _ASM_IA64_UNALIGNED_H
 #define _ASM_IA64_UNALIGNED_H
 
-#include <linux/types.h>
-
-/*
- * The main single-value unaligned transfer routines.
- *
- * Based on <asm-alpha/unaligned.h>.
- *
- * Copyright (C) 1998, 1999, 2003 Hewlett-Packard Co
- *     David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-#define get_unaligned(ptr) \
-       ((__typeof__(*(ptr)))ia64_get_unaligned((ptr), sizeof(*(ptr))))
-
-#define put_unaligned(x,ptr) \
-       ia64_put_unaligned((unsigned long)(x), (ptr), sizeof(*(ptr)))
-
-struct __una_u64 { __u64 x __attribute__((packed)); };
-struct __una_u32 { __u32 x __attribute__((packed)); };
-struct __una_u16 { __u16 x __attribute__((packed)); };
-
-static inline unsigned long
-__uld8 (const unsigned long * addr)
-{
-       const struct __una_u64 *ptr = (const struct __una_u64 *) addr;
-       return ptr->x;
-}
-
-static inline unsigned long
-__uld4 (const unsigned int * addr)
-{
-       const struct __una_u32 *ptr = (const struct __una_u32 *) addr;
-       return ptr->x;
-}
-
-static inline unsigned long
-__uld2 (const unsigned short * addr)
-{
-       const struct __una_u16 *ptr = (const struct __una_u16 *) addr;
-       return ptr->x;
-}
-
-static inline void
-__ust8 (unsigned long val, unsigned long * addr)
-{
-       struct __una_u64 *ptr = (struct __una_u64 *) addr;
-       ptr->x = val;
-}
-
-static inline void
-__ust4 (unsigned long val, unsigned int * addr)
-{
-       struct __una_u32 *ptr = (struct __una_u32 *) addr;
-       ptr->x = val;
-}
-
-static inline void
-__ust2 (unsigned long val, unsigned short * addr)
-{
-       struct __una_u16 *ptr = (struct __una_u16 *) addr;
-       ptr->x = val;
-}
-
-
-/*
- * This function doesn't actually exist.  The idea is that when someone uses 
the macros
- * below with an unsupported size (datatype), the linker will alert us to the 
problem via
- * an unresolved reference error.
- */
-extern unsigned long ia64_bad_unaligned_access_length (void);
-
-#define ia64_get_unaligned(_ptr,size)                                          
\
-({                                                                             
\
-       const void *__ia64_ptr = (_ptr);                                        
\
-       unsigned long __ia64_val;                                               
\
-                                                                               
\
-       switch (size) {                                                         
\
-             case 1:                                                           
\
-               __ia64_val = *(const unsigned char *) __ia64_ptr;               
\
-               break;                                                          
\
-             case 2:                                                           
\
-               __ia64_val = __uld2((const unsigned short *)__ia64_ptr);        
\
-               break;                                                          
\
-             case 4:                                                           
\
-               __ia64_val = __uld4((const unsigned int *)__ia64_ptr);          
\
-               break;                                                          
\
-             case 8:                                                           
\
-               __ia64_val = __uld8((const unsigned long *)__ia64_ptr);         
\
-               break;                                                          
\
-             default:                                                          
\
-               __ia64_val = ia64_bad_unaligned_access_length();                
\
-       }                                                                       
\
-       __ia64_val;                                                             
\
-})
-
-#define ia64_put_unaligned(_val,_ptr,size)                             \
-do {                                                                   \
-       const void *__ia64_ptr = (_ptr);                                \
-       unsigned long __ia64_val = (_val);                              \
-                                                                       \
-       switch (size) {                                                 \
-             case 1:                                                   \
-               *(unsigned char *)__ia64_ptr = (__ia64_val);            \
-               break;                                                  \
-             case 2:                                                   \
-               __ust2(__ia64_val, (unsigned short *)__ia64_ptr);       \
-               break;                                                  \
-             case 4:                                                   \
-               __ust4(__ia64_val, (unsigned int *)__ia64_ptr);         \
-               break;                                                  \
-             case 8:                                                   \
-               __ust8(__ia64_val, (unsigned long *)__ia64_ptr);        \
-               break;                                                  \
-             default:                                                  \
-               ia64_bad_unaligned_access_length();                     \
-       }                                                               \
-} while (0)
+#include <asm-generic/unaligned.h>
 
 #endif /* _ASM_IA64_UNALIGNED_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/unistd.h
--- a/xen/include/asm-ia64/linux/asm/unistd.h   Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/unistd.h   Thu Sep  1 18:46:28 2005
@@ -263,6 +263,12 @@
 #define __NR_add_key                   1271
 #define __NR_request_key               1272
 #define __NR_keyctl                    1273
+#define __NR_ioprio_set                        1274
+#define __NR_ioprio_get                        1275
+#define __NR_set_zone_reclaim          1276
+#define __NR_inotify_init              1277
+#define __NR_inotify_add_watch         1278
+#define __NR_inotify_rm_watch          1279
 
 #ifdef __KERNEL__
 
@@ -392,7 +398,7 @@
  * proper prototype, but we can't use __typeof__ either, because not all 
cond_syscall()
  * declarations have prototypes at the moment.
  */
-#define cond_syscall(x) asmlinkage long x (void) 
__attribute__((weak,alias("sys_ni_syscall")));
+#define cond_syscall(x) asmlinkage long x (void) 
__attribute__((weak,alias("sys_ni_syscall")))
 
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/bitmap.h
--- a/xen/include/asm-ia64/linux/bitmap.h       Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/bitmap.h       Thu Sep  1 18:46:28 2005
@@ -41,7 +41,9 @@
  * bitmap_shift_right(dst, src, n, nbits)      *dst = *src >> n
  * bitmap_shift_left(dst, src, n, nbits)       *dst = *src << n
  * bitmap_scnprintf(buf, len, src, nbits)      Print bitmap src to buf
- * bitmap_parse(ubuf, ulen, dst, nbits)                Parse bitmap dst from 
buf
+ * bitmap_parse(ubuf, ulen, dst, nbits)                Parse bitmap dst from 
user buf
+ * bitmap_scnlistprintf(buf, len, src, nbits)  Print bitmap src as list to buf
+ * bitmap_parselist(buf, dst, nbits)           Parse bitmap dst from list
  */
 
 /*
@@ -98,6 +100,10 @@
                        const unsigned long *src, int nbits);
 extern int bitmap_parse(const char __user *ubuf, unsigned int ulen,
                        unsigned long *dst, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+                       const unsigned long *src, int nbits);
+extern int bitmap_parselist(const char *buf, unsigned long *maskp,
+                       int nmaskbits);
 extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
 extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
 extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/bitops.h
--- a/xen/include/asm-ia64/linux/bitops.h       Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/bitops.h       Thu Sep  1 18:46:28 2005
@@ -134,4 +134,26 @@
        return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
 }
 
+/*
+ * rol32 - rotate a 32-bit value left
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+       return (word << shift) | (word >> (32 - shift));
+}
+
+/*
+ * ror32 - rotate a 32-bit value right
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 ror32(__u32 word, unsigned int shift)
+{
+       return (word >> shift) | (word << (32 - shift));
+}
+
 #endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/dma-mapping.h
--- a/xen/include/asm-ia64/linux/dma-mapping.h  Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/dma-mapping.h  Thu Sep  1 18:46:28 2005
@@ -14,7 +14,12 @@
 };
 
 #define DMA_64BIT_MASK 0xffffffffffffffffULL
+#define DMA_40BIT_MASK 0x000000ffffffffffULL
+#define DMA_39BIT_MASK 0x0000007fffffffffULL
 #define DMA_32BIT_MASK 0x00000000ffffffffULL
+#define DMA_31BIT_MASK 0x000000007fffffffULL
+#define DMA_30BIT_MASK 0x000000003fffffffULL
+#define DMA_29BIT_MASK 0x000000001fffffffULL
 
 #include <asm/dma-mapping.h>
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/efi.h
--- a/xen/include/asm-ia64/linux/efi.h  Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/efi.h  Thu Sep  1 18:46:28 2005
@@ -301,7 +301,6 @@
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
                                        struct resource *data_resource);
-extern efi_status_t phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc);
 extern unsigned long __init efi_get_time(void);
 extern int __init efi_set_rtc_mmss(unsigned long nowtime);
 extern struct efi_memory_map memmap;
@@ -316,7 +315,7 @@
  */
 static inline int efi_range_is_wc(unsigned long start, unsigned long len)
 {
-       int i;
+       unsigned long i;
 
        for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) {
                unsigned long paddr = __pa(start + i);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/err.h
--- a/xen/include/asm-ia64/linux/err.h  Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/err.h  Thu Sep  1 18:46:28 2005
@@ -13,6 +13,8 @@
  * This should be a per-architecture thing, to allow different
  * error and pointer decisions.
  */
+#define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L)
+
 static inline void *ERR_PTR(long error)
 {
        return (void *) error;
@@ -25,7 +27,7 @@
 
 static inline long IS_ERR(const void *ptr)
 {
-       return unlikely((unsigned long)ptr > (unsigned long)-1000L);
+       return IS_ERR_VALUE((unsigned long)ptr);
 }
 
 #endif /* _LINUX_ERR_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/gfp.h
--- a/xen/include/asm-ia64/linux/gfp.h  Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/gfp.h  Thu Sep  1 18:46:28 2005
@@ -12,8 +12,8 @@
  * GFP bitmasks..
  */
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA      0x01
-#define __GFP_HIGHMEM  0x02
+#define __GFP_DMA      0x01u
+#define __GFP_HIGHMEM  0x02u
 
 /*
  * Action modifiers - doesn't change the zoning
@@ -26,26 +26,29 @@
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  */
-#define __GFP_WAIT     0x10    /* Can wait and reschedule? */
-#define __GFP_HIGH     0x20    /* Should access emergency pools? */
-#define __GFP_IO       0x40    /* Can start physical IO? */
-#define __GFP_FS       0x80    /* Can call down to low-level FS? */
-#define __GFP_COLD     0x100   /* Cache-cold page required */
-#define __GFP_NOWARN   0x200   /* Suppress page allocation failure warning */
-#define __GFP_REPEAT   0x400   /* Retry the allocation.  Might fail */
-#define __GFP_NOFAIL   0x800   /* Retry for ever.  Cannot fail */
-#define __GFP_NORETRY  0x1000  /* Do not retry.  Might fail */
-#define __GFP_NO_GROW  0x2000  /* Slab internal usage */
-#define __GFP_COMP     0x4000  /* Add compound page metadata */
-#define __GFP_ZERO     0x8000  /* Return zeroed page on success */
+#define __GFP_WAIT     0x10u   /* Can wait and reschedule? */
+#define __GFP_HIGH     0x20u   /* Should access emergency pools? */
+#define __GFP_IO       0x40u   /* Can start physical IO? */
+#define __GFP_FS       0x80u   /* Can call down to low-level FS? */
+#define __GFP_COLD     0x100u  /* Cache-cold page required */
+#define __GFP_NOWARN   0x200u  /* Suppress page allocation failure warning */
+#define __GFP_REPEAT   0x400u  /* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL   0x800u  /* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY  0x1000u /* Do not retry.  Might fail */
+#define __GFP_NO_GROW  0x2000u /* Slab internal usage */
+#define __GFP_COMP     0x4000u /* Add compound page metadata */
+#define __GFP_ZERO     0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
+#define __GFP_NORECLAIM  0x20000u /* No realy zone reclaim during allocation */
 
-#define __GFP_BITS_SHIFT 16    /* Room for 16 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 20    /* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
                        __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
-                       __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+                       __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+                       __GFP_NOMEMALLOC|__GFP_NORECLAIM)
 
 #define GFP_ATOMIC     (__GFP_HIGH)
 #define GFP_NOIO       (__GFP_WAIT)
@@ -82,7 +85,7 @@
 extern struct page *
 FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
 
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
+static inline struct page *alloc_pages_node(int nid, unsigned int __nocast 
gfp_mask,
                                                unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
@@ -93,17 +96,17 @@
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, 
unsigned order);
 
 static inline struct page *
-alloc_pages(unsigned int gfp_mask, unsigned int order)
+alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
                return NULL;
 
        return alloc_pages_current(gfp_mask, order);
 }
-extern struct page *alloc_page_vma(unsigned gfp_mask,
+extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
                        struct vm_area_struct *vma, unsigned long addr);
 #else
 #define alloc_pages(gfp_mask, order) \
@@ -112,8 +115,8 @@
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned 
int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, 
unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
 
 #define __get_free_page(gfp_mask) \
                __get_free_pages((gfp_mask),0)
@@ -130,5 +133,10 @@
 #define free_page(addr) free_pages((addr),0)
 
 void page_alloc_init(void);
+#ifdef CONFIG_NUMA
+void drain_remote_pages(void);
+#else
+static inline void drain_remote_pages(void) { };
+#endif
 
 #endif /* __LINUX_GFP_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/mmzone.h
--- a/xen/include/asm-ia64/linux/mmzone.h       Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/mmzone.h       Thu Sep  1 18:46:28 2005
@@ -11,6 +11,7 @@
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <linux/numa.h>
+#include <linux/init.h>
 #include <asm/atomic.h>
 
 /* Free memory management - zoned buddy allocator.  */
@@ -61,6 +62,12 @@
        unsigned long other_node;       /* allocation from other node */
 #endif
 } ____cacheline_aligned_in_smp;
+
+#ifdef CONFIG_NUMA
+#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
+#else
+#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
+#endif
 
 #define ZONE_DMA               0
 #define ZONE_NORMAL            1
@@ -121,8 +128,11 @@
         */
        unsigned long           lowmem_reserve[MAX_NR_ZONES];
 
+#ifdef CONFIG_NUMA
+       struct per_cpu_pageset  *pageset[NR_CPUS];
+#else
        struct per_cpu_pageset  pageset[NR_CPUS];
-
+#endif
        /*
         * free areas of different sizes
         */
@@ -144,6 +154,14 @@
        int                     all_unreclaimable; /* All pages pinned */
 
        /*
+        * Does the allocator try to reclaim pages from the zone as soon
+        * as it fails a watermark_ok() in __alloc_pages?
+        */
+       int                     reclaim_pages;
+       /* A count of how many reclaimers are scanning this zone */
+       atomic_t                reclaim_in_progress;
+
+       /*
         * prev_priority holds the scanning priority for this zone.  It is
         * defined as the scanning priority at which we achieved our reclaim
         * target at the previous try_to_free_pages() or balance_pgdat()
@@ -251,7 +269,9 @@
        struct zone node_zones[MAX_NR_ZONES];
        struct zonelist node_zonelists[GFP_ZONETYPES];
        int nr_zones;
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
        struct page *node_mem_map;
+#endif
        struct bootmem_data *bdata;
        unsigned long node_start_pfn;
        unsigned long node_present_pages; /* total number of physical pages */
@@ -266,6 +286,12 @@
 
 #define node_present_pages(nid)        (NODE_DATA(nid)->node_present_pages)
 #define node_spanned_pages(nid)        (NODE_DATA(nid)->node_spanned_pages)
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#define pgdat_page_nr(pgdat, pagenr)   ((pgdat)->node_mem_map + (pagenr))
+#else
+#define pgdat_page_nr(pgdat, pagenr)   pfn_to_page((pgdat)->node_start_pfn + 
(pagenr))
+#endif
+#define nid_page_nr(nid, pagenr)       pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
 extern struct pglist_data *pgdat_list;
 
@@ -278,6 +304,16 @@
 int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
                int alloc_type, int can_try_harder, int gfp_high);
 
+#ifdef CONFIG_HAVE_MEMORY_PRESENT
+void memory_present(int nid, unsigned long start, unsigned long end);
+#else
+static inline void memory_present(int nid, unsigned long start, unsigned long 
end) {}
+#endif
+
+#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
+#endif
+
 /*
  * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
  */
@@ -370,9 +406,9 @@
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
-#define numa_node_id()         (cpu_to_node(_smp_processor_id()))
-
-#ifndef CONFIG_DISCONTIGMEM
+#define numa_node_id()         (cpu_to_node(raw_smp_processor_id()))
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
 
 extern struct pglist_data contig_page_data;
 #define NODE_DATA(nid)         (&contig_page_data)
@@ -380,35 +416,176 @@
 #define MAX_NODES_SHIFT                1
 #define pfn_to_nid(pfn)                (0)
 
-#else /* CONFIG_DISCONTIGMEM */
+#else /* CONFIG_NEED_MULTIPLE_NODES */
 
 #include <asm/mmzone.h>
+
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_SPARSEMEM
+#include <asm/sparsemem.h>
+#endif
 
 #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
 /*
  * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
  * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
  */
-#define MAX_NODES_SHIFT                6
+#define FLAGS_RESERVED         8
+
 #elif BITS_PER_LONG == 64
 /*
  * with 64 bit flags field, there's plenty of room.
  */
-#define MAX_NODES_SHIFT                10
-#endif
-
-#endif /* !CONFIG_DISCONTIGMEM */
-
-#if NODES_SHIFT > MAX_NODES_SHIFT
-#error NODES_SHIFT > MAX_NODES_SHIFT
-#endif
-
-/* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */
-#define MAX_ZONES_SHIFT                2
-
-#if ZONES_SHIFT > MAX_ZONES_SHIFT
-#error ZONES_SHIFT > MAX_ZONES_SHIFT
-#endif
+#define FLAGS_RESERVED         32
+
+#else
+
+#error BITS_PER_LONG not defined
+
+#endif
+
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#define early_pfn_to_nid(nid)  (0UL)
+#endif
+
+#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+
+#ifdef CONFIG_SPARSEMEM
+
+/*
+ * SECTION_SHIFT               #bits space required to store a section #
+ *
+ * PA_SECTION_SHIFT            physical address to/from section number
+ * PFN_SECTION_SHIFT           pfn to/from section number
+ */
+#define SECTIONS_SHIFT         (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
+
+#define PA_SECTION_SHIFT       (SECTION_SIZE_BITS)
+#define PFN_SECTION_SHIFT      (SECTION_SIZE_BITS - PAGE_SHIFT)
+
+#define NR_MEM_SECTIONS                (1UL << SECTIONS_SHIFT)
+
+#define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
+#define PAGE_SECTION_MASK      (~(PAGES_PER_SECTION-1))
+
+#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
+#error Allocator MAX_ORDER exceeds SECTION_SIZE
+#endif
+
+struct page;
+struct mem_section {
+       /*
+        * This is, logically, a pointer to an array of struct
+        * pages.  However, it is stored with some other magic.
+        * (see sparse.c::sparse_init_one_section())
+        *
+        * Making it a UL at least makes someone do a cast
+        * before using it wrong.
+        */
+       unsigned long section_mem_map;
+};
+
+extern struct mem_section mem_section[NR_MEM_SECTIONS];
+
+static inline struct mem_section *__nr_to_section(unsigned long nr)
+{
+       return &mem_section[nr];
+}
+
+/*
+ * We use the lower bits of the mem_map pointer to store
+ * a little bit of information.  There should be at least
+ * 3 bits here due to 32-bit alignment.
+ */
+#define        SECTION_MARKED_PRESENT  (1UL<<0)
+#define SECTION_HAS_MEM_MAP    (1UL<<1)
+#define SECTION_MAP_LAST_BIT   (1UL<<2)
+#define SECTION_MAP_MASK       (~(SECTION_MAP_LAST_BIT-1))
+
+static inline struct page *__section_mem_map_addr(struct mem_section *section)
+{
+       unsigned long map = section->section_mem_map;
+       map &= SECTION_MAP_MASK;
+       return (struct page *)map;
+}
+
+static inline int valid_section(struct mem_section *section)
+{
+       return (section->section_mem_map & SECTION_MARKED_PRESENT);
+}
+
+static inline int section_has_mem_map(struct mem_section *section)
+{
+       return (section->section_mem_map & SECTION_HAS_MEM_MAP);
+}
+
+static inline int valid_section_nr(unsigned long nr)
+{
+       return valid_section(__nr_to_section(nr));
+}
+
+/*
+ * Given a kernel address, find the home node of the underlying memory.
+ */
+#define kvaddr_to_nid(kaddr)   pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT)
+
+static inline struct mem_section *__pfn_to_section(unsigned long pfn)
+{
+       return __nr_to_section(pfn_to_section_nr(pfn));
+}
+
+#define pfn_to_page(pfn)                                               \
+({                                                                     \
+       unsigned long __pfn = (pfn);                                    \
+       __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn;        \
+})
+#define page_to_pfn(page)                                              \
+({                                                                     \
+       page - __section_mem_map_addr(__nr_to_section(                  \
+               page_to_section(page)));                                \
+})
+
+static inline int pfn_valid(unsigned long pfn)
+{
+       if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
+               return 0;
+       return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
+}
+
+/*
+ * These are _only_ used during initialisation, therefore they
+ * can use __initdata ...  They could have names to indicate
+ * this restriction.
+ */
+#ifdef CONFIG_NUMA
+#define pfn_to_nid             early_pfn_to_nid
+#endif
+
+#define pfn_to_pgdat(pfn)                                              \
+({                                                                     \
+       NODE_DATA(pfn_to_nid(pfn));                                     \
+})
+
+#define early_pfn_valid(pfn)   pfn_valid(pfn)
+void sparse_init(void);
+#else
+#define sparse_init()  do {} while (0)
+#endif /* CONFIG_SPARSEMEM */
+
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+#define early_pfn_in_nid(pfn, nid)     (early_pfn_to_nid(pfn) == (nid))
+#else
+#define early_pfn_in_nid(pfn, nid)     (1)
+#endif
+
+#ifndef early_pfn_valid
+#define early_pfn_valid(pfn)   (1)
+#endif
+
+void memory_present(int nid, unsigned long start, unsigned long end);
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
 
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/numa.h
--- a/xen/include/asm-ia64/linux/numa.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/numa.h Thu Sep  1 18:46:28 2005
@@ -3,7 +3,7 @@
 
 #include <linux/config.h>
 
-#ifdef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_FLATMEM
 #include <asm/numnodes.h>
 #endif
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/page-flags.h
--- a/xen/include/asm-ia64/linux/page-flags.h   Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/page-flags.h   Thu Sep  1 18:46:28 2005
@@ -61,21 +61,20 @@
 #define PG_active               6
 #define PG_slab                         7      /* slab debug (Suparna wants 
this) */
 
-#define PG_highmem              8
-#define PG_checked              9      /* kill me in 2.5.<early>. */
-#define PG_arch_1              10
-#define PG_reserved            11
-
-#define PG_private             12      /* Has something at ->private */
-#define PG_writeback           13      /* Page is under writeback */
-#define PG_nosave              14      /* Used for system suspend/resume */
-#define PG_compound            15      /* Part of a compound page */
-
-#define PG_swapcache           16      /* Swap page: swp_entry_t in private */
-#define PG_mappedtodisk                17      /* Has blocks allocated on-disk 
*/
-#define PG_reclaim             18      /* To be reclaimed asap */
-#define PG_nosave_free         19      /* Free, should not be written */
-
+#define PG_checked              8      /* kill me in 2.5.<early>. */
+#define PG_arch_1               9
+#define PG_reserved            10
+#define PG_private             11      /* Has something at ->private */
+
+#define PG_writeback           12      /* Page is under writeback */
+#define PG_nosave              13      /* Used for system suspend/resume */
+#define PG_compound            14      /* Part of a compound page */
+#define PG_swapcache           15      /* Swap page: swp_entry_t in private */
+
+#define PG_mappedtodisk                16      /* Has blocks allocated on-disk 
*/
+#define PG_reclaim             17      /* To be reclaimed asap */
+#define PG_nosave_free         18      /* Free, should not be written */
+#define PG_uncached            19      /* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -131,12 +130,13 @@
        unsigned long allocstall;       /* direct reclaim calls */
 
        unsigned long pgrotated;        /* pages rotated to tail of the LRU */
+       unsigned long nr_bounce;        /* pages for bounce buffers */
 };
 
 extern void get_page_state(struct page_state *ret);
 extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned offset);
-extern void __mod_page_state(unsigned offset, unsigned long delta);
+extern unsigned long __read_page_state(unsigned long offset);
+extern void __mod_page_state(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
        __read_page_state(offsetof(struct page_state, member))
@@ -214,7 +214,7 @@
 #define TestSetPageSlab(page)  test_and_set_bit(PG_slab, &(page)->flags)
 
 #ifdef CONFIG_HIGHMEM
-#define PageHighMem(page)      test_bit(PG_highmem, &(page)->flags)
+#define PageHighMem(page)      is_highmem(page_zone(page))
 #else
 #define PageHighMem(page)      0 /* needed to optimize away at compile time */
 #endif
@@ -301,10 +301,13 @@
 #define PageSwapCache(page)    0
 #endif
 
+#define PageUncached(page)     test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page)  set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page)        clear_bit(PG_uncached, &(page)->flags)
+
 struct page;   /* forward declaration */
 
 int test_clear_page_dirty(struct page *page);
-int __clear_page_dirty(struct page *page);
 int test_clear_page_writeback(struct page *page);
 int test_set_page_writeback(struct page *page);
 
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/slab.h
--- a/xen/include/asm-ia64/linux/slab.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/slab.h Thu Sep  1 18:46:28 2005
@@ -1,3 +1,137 @@
-#include <xen/xmalloc.h>
-#include <linux/gfp.h>
-#include <asm/delay.h>
+/*
+ * linux/mm/slab.h
+ * Written by Mark Hemment, 1996.
+ * (markhe@xxxxxxxxxxxxxxxxx)
+ */
+
+#ifndef _LINUX_SLAB_H
+#define        _LINUX_SLAB_H
+
+#if    defined(__KERNEL__)
+
+typedef struct kmem_cache_s kmem_cache_t;
+
+#include       <linux/config.h>        /* kmalloc_sizes.h needs CONFIG_ 
options */
+#include       <linux/gfp.h>
+#include       <linux/init.h>
+#include       <linux/types.h>
+#include       <asm/page.h>            /* kmalloc_sizes.h needs PAGE_SIZE */
+#include       <asm/cache.h>           /* kmalloc_sizes.h needs L1_CACHE_BYTES 
*/
+
+/* flags for kmem_cache_alloc() */
+#define        SLAB_NOFS               GFP_NOFS
+#define        SLAB_NOIO               GFP_NOIO
+#define        SLAB_ATOMIC             GFP_ATOMIC
+#define        SLAB_USER               GFP_USER
+#define        SLAB_KERNEL             GFP_KERNEL
+#define        SLAB_DMA                GFP_DMA
+
+#define SLAB_LEVEL_MASK                GFP_LEVEL_MASK
+
+#define        SLAB_NO_GROW            __GFP_NO_GROW   /* don't grow a cache */
+
+/* flags to pass to kmem_cache_create().
+ * The first 3 are only valid when the allocator as been build
+ * SLAB_DEBUG_SUPPORT.
+ */
+#define        SLAB_DEBUG_FREE         0x00000100UL    /* Peform (expensive) 
checks on free */
+#define        SLAB_DEBUG_INITIAL      0x00000200UL    /* Call constructor (as 
verifier) */
+#define        SLAB_RED_ZONE           0x00000400UL    /* Red zone objs in a 
cache */
+#define        SLAB_POISON             0x00000800UL    /* Poison objects */
+#define        SLAB_NO_REAP            0x00001000UL    /* never reap from the 
cache */
+#define        SLAB_HWCACHE_ALIGN      0x00002000UL    /* align objs on a h/w 
cache lines */
+#define SLAB_CACHE_DMA         0x00004000UL    /* use GFP_DMA memory */
+#define SLAB_MUST_HWCACHE_ALIGN        0x00008000UL    /* force alignment */
+#define SLAB_STORE_USER                0x00010000UL    /* store the last owner 
for bug hunting */
+#define SLAB_RECLAIM_ACCOUNT   0x00020000UL    /* track pages allocated to 
indicate
+                                                  what is reclaimable later*/
+#define SLAB_PANIC             0x00040000UL    /* panic if kmem_cache_create() 
fails */
+#define SLAB_DESTROY_BY_RCU    0x00080000UL    /* defer freeing pages to RCU */
+
+/* flags passed to a constructor func */
+#define        SLAB_CTOR_CONSTRUCTOR   0x001UL         /* if not set, then 
deconstructor */
+#define SLAB_CTOR_ATOMIC       0x002UL         /* tell constructor it can't 
sleep */
+#define        SLAB_CTOR_VERIFY        0x004UL         /* tell constructor 
it's a verify call */
+
+/* prototypes */
+extern void __init kmem_cache_init(void);
+
+extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned 
long,
+                                      void (*)(void *, kmem_cache_t *, 
unsigned long),
+                                      void (*)(void *, kmem_cache_t *, 
unsigned long));
+extern int kmem_cache_destroy(kmem_cache_t *);
+extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast);
+extern void kmem_cache_free(kmem_cache_t *, void *);
+extern unsigned int kmem_cache_size(kmem_cache_t *);
+extern const char *kmem_cache_name(kmem_cache_t *);
+extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int 
__nocast gfpflags);
+
+/* Size description struct for general caches. */
+struct cache_sizes {
+       size_t           cs_size;
+       kmem_cache_t    *cs_cachep;
+       kmem_cache_t    *cs_dmacachep;
+};
+extern struct cache_sizes malloc_sizes[];
+extern void *__kmalloc(size_t, unsigned int __nocast);
+
+static inline void *kmalloc(size_t size, unsigned int __nocast flags)
+{
+       if (__builtin_constant_p(size)) {
+               int i = 0;
+#define CACHE(x) \
+               if (size <= x) \
+                       goto found; \
+               else \
+                       i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+               {
+                       extern void __you_cannot_kmalloc_that_much(void);
+                       __you_cannot_kmalloc_that_much();
+               }
+found:
+               return kmem_cache_alloc((flags & GFP_DMA) ?
+                       malloc_sizes[i].cs_dmacachep :
+                       malloc_sizes[i].cs_cachep, flags);
+       }
+       return __kmalloc(size, flags);
+}
+
+extern void *kcalloc(size_t, size_t, unsigned int __nocast);
+extern void kfree(const void *);
+extern unsigned int ksize(const void *);
+
+#ifdef CONFIG_NUMA
+extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node);
+extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node);
+#else
+static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int 
node)
+{
+       return kmem_cache_alloc(cachep, flags);
+}
+static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int 
node)
+{
+       return kmalloc(size, flags);
+}
+#endif
+
+extern int FASTCALL(kmem_cache_reap(int));
+extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
+
+/* System wide caches */
+extern kmem_cache_t    *vm_area_cachep;
+extern kmem_cache_t    *names_cachep;
+extern kmem_cache_t    *files_cachep;
+extern kmem_cache_t    *filp_cachep;
+extern kmem_cache_t    *fs_cachep;
+extern kmem_cache_t    *signal_cachep;
+extern kmem_cache_t    *sighand_cachep;
+extern kmem_cache_t    *bio_cachep;
+
+extern atomic_t slab_reclaim_pages;
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SLAB_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/threads.h
--- a/xen/include/asm-ia64/linux/threads.h      Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/threads.h      Thu Sep  1 18:46:28 2005
@@ -7,7 +7,7 @@
  * The default limit for the nr of threads is now in
  * /proc/sys/kernel/threads-max.
  */
- 
+
 /*
  * Maximum supported processors that can run under SMP.  This value is
  * set via configure setting.  The maximum is equal to the size of the
@@ -25,11 +25,12 @@
 /*
  * This controls the default maximum pid allocated to a process
  */
-#define PID_MAX_DEFAULT 0x8000
+#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
 
 /*
  * A maximum of 4 million PIDs should be enough for a while:
  */
-#define PID_MAX_LIMIT (sizeof(long) > 4 ? 4*1024*1024 : PID_MAX_DEFAULT)
+#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
+       (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
 
 #endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/timex.h
--- a/xen/include/asm-ia64/linux/timex.h        Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/timex.h        Thu Sep  1 18:46:28 2005
@@ -240,9 +240,7 @@
 extern long time_maxerror;     /* maximum error */
 extern long time_esterror;     /* estimated error */
 
-extern long time_phase;                /* phase offset (scaled us) */
 extern long time_freq;         /* frequency offset (scaled ppm) */
-extern long time_adj;          /* tick adjust (scaled 1 / HZ) */
 extern long time_reftime;      /* time at last adjustment (s) */
 
 extern long time_adjust;       /* The amount of adjtime left */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/topology.h
--- a/xen/include/asm-ia64/linux/topology.h     Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/topology.h     Thu Sep  1 18:46:28 2005
@@ -31,8 +31,11 @@
 #include <linux/bitops.h>
 #include <linux/mmzone.h>
 #include <linux/smp.h>
+#include <asm/topology.h>
 
-#include <asm/topology.h>
+#ifndef node_has_online_mem
+#define node_has_online_mem(nid) (1)
+#endif
 
 #ifndef nr_cpus_node
 #define nr_cpus_node(node)                                                     
\
@@ -86,6 +89,11 @@
        .cache_hot_time         = 0,                    \
        .cache_nice_tries       = 0,                    \
        .per_cpu_gain           = 25,                   \
+       .busy_idx               = 0,                    \
+       .idle_idx               = 0,                    \
+       .newidle_idx            = 1,                    \
+       .wake_idx               = 0,                    \
+       .forkexec_idx           = 0,                    \
        .flags                  = SD_LOAD_BALANCE       \
                                | SD_BALANCE_NEWIDLE    \
                                | SD_BALANCE_EXEC       \
@@ -112,12 +120,15 @@
        .cache_hot_time         = (5*1000000/2),        \
        .cache_nice_tries       = 1,                    \
        .per_cpu_gain           = 100,                  \
+       .busy_idx               = 2,                    \
+       .idle_idx               = 1,                    \
+       .newidle_idx            = 2,                    \
+       .wake_idx               = 1,                    \
+       .forkexec_idx           = 1,                    \
        .flags                  = SD_LOAD_BALANCE       \
                                | SD_BALANCE_NEWIDLE    \
                                | SD_BALANCE_EXEC       \
-                               | SD_WAKE_AFFINE        \
-                               | SD_WAKE_IDLE          \
-                               | SD_WAKE_BALANCE,      \
+                               | SD_WAKE_AFFINE,       \
        .last_balance           = jiffies,              \
        .balance_interval       = 1,                    \
        .nr_balance_failed      = 0,                    \
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/wait.h
--- a/xen/include/asm-ia64/linux/wait.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/wait.h Thu Sep  1 18:46:28 2005
@@ -33,7 +33,7 @@
 struct __wait_queue {
        unsigned int flags;
 #define WQ_FLAG_EXCLUSIVE      0x01
-       struct task_struct * task;
+       void *private;
        wait_queue_func_t func;
        struct list_head task_list;
 };
@@ -60,7 +60,7 @@
  */
 
 #define __WAITQUEUE_INITIALIZER(name, tsk) {                           \
-       .task           = tsk,                                          \
+       .private        = tsk,                                          \
        .func           = default_wake_function,                        \
        .task_list      = { NULL, NULL } }
 
@@ -79,14 +79,14 @@
 
 static inline void init_waitqueue_head(wait_queue_head_t *q)
 {
-       q->lock = SPIN_LOCK_UNLOCKED;
+       spin_lock_init(&q->lock);
        INIT_LIST_HEAD(&q->task_list);
 }
 
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 {
        q->flags = 0;
-       q->task = p;
+       q->private = p;
        q->func = default_wake_function;
 }
 
@@ -94,7 +94,7 @@
                                        wait_queue_func_t func)
 {
        q->flags = 0;
-       q->task = NULL;
+       q->private = NULL;
        q->func = func;
 }
 
@@ -110,7 +110,7 @@
  * aio specifies a wait queue entry with an async notification
  * callback routine, not associated with any task.
  */
-#define is_sync_wait(wait)     (!(wait) || ((wait)->task))
+#define is_sync_wait(wait)     (!(wait) || ((wait)->private))
 
 extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * 
wait));
 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, 
wait_queue_t * wait));
@@ -169,6 +169,18 @@
        finish_wait(&wq, &__wait);                                      \
 } while (0)
 
+/**
+ * wait_event - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
 #define wait_event(wq, condition)                                      \
 do {                                                                   \
        if (condition)                                                  \
@@ -191,6 +203,22 @@
        finish_wait(&wq, &__wait);                                      \
 } while (0)
 
+/**
+ * wait_event_timeout - sleep until a condition gets true or a timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if the @timeout elapsed, and the remaining
+ * jiffies if the condition evaluated to true before the timeout elapsed.
+ */
 #define wait_event_timeout(wq, condition, timeout)                     \
 ({                                                                     \
        long __ret = timeout;                                           \
@@ -217,6 +245,21 @@
        finish_wait(&wq, &__wait);                                      \
 } while (0)
 
+/**
+ * wait_event_interruptible - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a
+ * signal and 0 if @condition evaluated to true.
+ */
 #define wait_event_interruptible(wq, condition)                                
\
 ({                                                                     \
        int __ret = 0;                                                  \
@@ -245,6 +288,23 @@
        finish_wait(&wq, &__wait);                                      \
 } while (0)
 
+/**
+ * wait_event_interruptible_timeout - sleep until a condition gets true or a 
timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
+ * was interrupted by a signal, and the remaining jiffies otherwise
+ * if the condition evaluated to true before the timeout elapsed.
+ */
 #define wait_event_interruptible_timeout(wq, condition, timeout)       \
 ({                                                                     \
        long __ret = timeout;                                           \
@@ -324,18 +384,16 @@
 
 #define DEFINE_WAIT(name)                                              \
        wait_queue_t name = {                                           \
-               .task           = current,                              \
+               .private        = current,                              \
                .func           = autoremove_wake_function,             \
-               .task_list      = {     .next = &(name).task_list,      \
-                                       .prev = &(name).task_list,      \
-                               },                                      \
+               .task_list      = LIST_HEAD_INIT((name).task_list),     \
        }
 
 #define DEFINE_WAIT_BIT(name, word, bit)                               \
        struct wait_bit_queue name = {                                  \
                .key = __WAIT_BIT_KEY_INITIALIZER(word, bit),           \
                .wait   = {                                             \
-                       .task           = current,                      \
+                       .private        = current,                      \
                        .func           = wake_bit_function,            \
                        .task_list      =                               \
                                LIST_HEAD_INIT((name).wait.task_list),  \
@@ -344,7 +402,7 @@
 
 #define init_wait(wait)                                                        
\
        do {                                                            \
-               (wait)->task = current;                                 \
+               (wait)->private = current;                              \
                (wait)->func = autoremove_wake_function;                \
                INIT_LIST_HEAD(&(wait)->task_list);                     \
        } while (0)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/mm.h Thu Sep  1 18:46:28 2005
@@ -316,6 +316,7 @@
 #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
 #endif
 
+#if 0  /* removed when rebasing to 2.6.13 */
 /*
  * The zone field is never updated after free_area_init_core()
  * sets it, so none of the operations on it need to be atomic.
@@ -347,6 +348,7 @@
        page->flags &= ~(~0UL << NODEZONE_SHIFT);
        page->flags |= nodezone_num << NODEZONE_SHIFT;
 }
+#endif
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/time.h
--- a/xen/include/asm-ia64/time.h       Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/time.h       Thu Sep  1 18:46:28 2005
@@ -1,1 +1,1 @@
-#include <xen/linuxtime.h>
+#include <asm/linux/time.h>
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/bitop.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/bitop.c       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,88 @@
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+
+/*
+ * Find next zero bit in a bitmap reasonably efficiently..
+ */
+
+int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long 
offset)
+{
+       unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+       unsigned long result = offset & ~63UL;
+       unsigned long tmp;
+
+       if (offset >= size)
+               return size;
+       size -= result;
+       offset &= 63UL;
+       if (offset) {
+               tmp = *(p++);
+               tmp |= ~0UL >> (64-offset);
+               if (size < 64)
+                       goto found_first;
+               if (~tmp)
+                       goto found_middle;
+               size -= 64;
+               result += 64;
+       }
+       while (size & ~63UL) {
+               if (~(tmp = *(p++)))
+                       goto found_middle;
+               result += 64;
+               size -= 64;
+       }
+       if (!size)
+               return result;
+       tmp = *p;
+found_first:
+       tmp |= ~0UL << size;
+       if (tmp == ~0UL)                /* any bits zero? */
+               return result + size;   /* nope */
+found_middle:
+       return result + ffz(tmp);
+}
+EXPORT_SYMBOL(__find_next_zero_bit);
+
+/*
+ * Find next bit in a bitmap reasonably efficiently..
+ */
+int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
+{
+       unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+       unsigned long result = offset & ~63UL;
+       unsigned long tmp;
+
+       if (offset >= size)
+               return size;
+       size -= result;
+       offset &= 63UL;
+       if (offset) {
+               tmp = *(p++);
+               tmp &= ~0UL << offset;
+               if (size < 64)
+                       goto found_first;
+               if (tmp)
+                       goto found_middle;
+               size -= 64;
+               result += 64;
+       }
+       while (size & ~63UL) {
+               if ((tmp = *(p++)))
+                       goto found_middle;
+               result += 64;
+               size -= 64;
+       }
+       if (!size)
+               return result;
+       tmp = *p;
+  found_first:
+       tmp &= ~0UL >> (64-size);
+       if (tmp == 0UL)         /* Are any bits set? */
+               return result + size; /* Nope. */
+  found_middle:
+       return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(__find_next_bit);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/clear_page.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/clear_page.S  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 1999-2002 Hewlett-Packard Co
+ *     Stephane Eranian <eranian@xxxxxxxxxx>
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx>
+ *
+ * 1/06/01 davidm      Tuned for Itanium.
+ * 2/12/02 kchen       Tuned for both Itanium and McKinley
+ * 3/08/02 davidm      Some more tweaking
+ */
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_ITANIUM
+# define L3_LINE_SIZE  64      // Itanium L3 line size
+# define PREFETCH_LINES        9       // magic number
+#else
+# define L3_LINE_SIZE  128     // McKinley L3 line size
+# define PREFETCH_LINES        12      // magic number
+#endif
+
+#define saved_lc       r2
+#define dst_fetch      r3
+#define dst1           r8
+#define dst2           r9
+#define dst3           r10
+#define dst4           r11
+
+#define dst_last       r31
+
+GLOBAL_ENTRY(clear_page)
+       .prologue
+       .regstk 1,0,0,0
+       mov r16 = PAGE_SIZE/L3_LINE_SIZE-1      // main loop count, 
-1=repeat/until
+       .save ar.lc, saved_lc
+       mov saved_lc = ar.lc
+
+       .body
+       mov ar.lc = (PREFETCH_LINES - 1)
+       mov dst_fetch = in0
+       adds dst1 = 16, in0
+       adds dst2 = 32, in0
+       ;;
+.fetch:        stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+       adds dst3 = 48, in0             // executing this multiple times is 
harmless
+       br.cloop.sptk.few .fetch
+       ;;
+       addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
+       mov ar.lc = r16                 // one L3 line per iteration
+       adds dst4 = 64, in0
+       ;;
+#ifdef CONFIG_ITANIUM
+       // Optimized for Itanium
+1:     stf.spill.nta [dst1] = f0, 64
+       stf.spill.nta [dst2] = f0, 64
+       cmp.lt p8,p0=dst_fetch, dst_last
+       ;;
+#else
+       // Optimized for McKinley
+1:     stf.spill.nta [dst1] = f0, 64
+       stf.spill.nta [dst2] = f0, 64
+       stf.spill.nta [dst3] = f0, 64
+       stf.spill.nta [dst4] = f0, 128
+       cmp.lt p8,p0=dst_fetch, dst_last
+       ;;
+       stf.spill.nta [dst1] = f0, 64
+       stf.spill.nta [dst2] = f0, 64
+#endif
+       stf.spill.nta [dst3] = f0, 64
+(p8)   stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+       br.cloop.sptk.few 1b
+       ;;
+       mov ar.lc = saved_lc            // restore lc
+       br.ret.sptk.many rp
+END(clear_page)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/copy_page_mck.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/copy_page_mck.S       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,185 @@
+/*
+ * McKinley-optimized version of copy_page().
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ *     David Mosberger <davidm@xxxxxxxxxx>
+ *
+ * Inputs:
+ *     in0:    address of target page
+ *     in1:    address of source page
+ * Output:
+ *     no return value
+ *
+ * General idea:
+ *     - use regular loads and stores to prefetch data to avoid consuming 
M-slot just for
+ *       lfetches => good for in-cache performance
+ *     - avoid l2 bank-conflicts by not storing into the same 16-byte bank 
within a single
+ *       cycle
+ *
+ * Principle of operation:
+ *     First, note that L1 has a line-size of 64 bytes and L2 a line-size of 
128 bytes.
+ *     To avoid secondary misses in L2, we prefetch both source and 
destination with a line-size
+ *     of 128 bytes.  When both of these lines are in the L2 and the first 
half of the
+ *     source line is in L1, we start copying the remaining words.  The second 
half of the
+ *     source line is prefetched in an earlier iteration, so that by the time 
we start
+ *     accessing it, it's also present in the L1.
+ *
+ *     We use a software-pipelined loop to control the overall operation.  The 
pipeline
+ *     has 2*PREFETCH_DIST+K stages.  The first PREFETCH_DIST stages are used 
for prefetching
+ *     source cache-lines.  The second PREFETCH_DIST stages are used for 
prefetching destination
+ *     cache-lines, the last K stages are used to copy the cache-line words 
not copied by
+ *     the prefetches.  The four relevant points in the pipelined are called 
A, B, C, D:
+ *     p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a 
destination-line
+ *     should be prefetched, p[C] is TRUE if the second half of an L2 line 
should be brought
+ *     into L1D and p[D] is TRUE if a cacheline needs to be copied.
+ *
+ *     This all sounds very complicated, but thanks to the modulo-scheduled 
loop support,
+ *     the resulting code is very regular and quite easy to follow (once you 
get the idea).
+ *
+ *     As a secondary optimization, the first 2*PREFETCH_DIST iterations are 
implemented
+ *     as the separate .prefetch_loop.  Logically, this loop performs exactly 
like the
+ *     main-loop (.line_copy), but has all known-to-be-predicated-off 
instructions removed,
+ *     so that each loop iteration is faster (again, good for cached case).
+ *
+ *     When reading the code, it helps to keep the following picture in mind:
+ *
+ *            word 0 word 1
+ *            +------+------+---
+ *           | v[x] |  t1  | ^
+ *           | t2   |  t3  | |
+ *           | t4   |  t5  | |
+ *           | t6   |  t7  | | 128 bytes
+ *                   | n[y] |  t9  | | (L2 cache line)
+ *           | t10  |  t11 | |
+ *           | t12  |  t13 | |
+ *           | t14  |  t15 | v
+ *           +------+------+---
+ *
+ *     Here, v[x] is copied by the (memory) prefetch.  n[y] is loaded at p[C]
+ *     to fetch the second-half of the L2 cache line into L1, and the tX words 
are copied in
+ *     an order that avoids bank conflicts.
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define PREFETCH_DIST  8               // McKinley sustains 16 outstanding L2 
misses (8 ld, 8 st)
+
+#define src0           r2
+#define src1           r3
+#define dst0           r9
+#define dst1           r10
+#define src_pre_mem    r11
+#define dst_pre_mem    r14
+#define src_pre_l2     r15
+#define dst_pre_l2     r16
+#define t1             r17
+#define t2             r18
+#define t3             r19
+#define t4             r20
+#define t5             t1      // alias!
+#define t6             t2      // alias!
+#define t7             t3      // alias!
+#define t9             t5      // alias!
+#define t10            t4      // alias!
+#define t11            t7      // alias!
+#define t12            t6      // alias!
+#define t14            t10     // alias!
+#define t13            r21
+#define t15            r22
+
+#define saved_lc       r23
+#define saved_pr       r24
+
+#define        A       0
+#define B      (PREFETCH_DIST)
+#define C      (B + PREFETCH_DIST)
+#define D      (C + 3)
+#define N      (D + 1)
+#define Nrot   ((N + 7) & ~7)
+
+GLOBAL_ENTRY(copy_page)
+       .prologue
+       alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
+
+       .rotr v[2*PREFETCH_DIST], n[D-C+1]
+       .rotp p[N]
+
+       .save ar.lc, saved_lc
+       mov saved_lc = ar.lc
+       .save pr, saved_pr
+       mov saved_pr = pr
+       .body
+
+       mov src_pre_mem = in1
+       mov pr.rot = 0x10000
+       mov ar.ec = 1                           // special unrolled loop
+
+       mov dst_pre_mem = in0
+       mov ar.lc = 2*PREFETCH_DIST - 1
+
+       add src_pre_l2 = 8*8, in1
+       add dst_pre_l2 = 8*8, in0
+       add src0 = 8, in1                       // first t1 src
+       add src1 = 3*8, in1                     // first t3 src
+       add dst0 = 8, in0                       // first t1 dst
+       add dst1 = 3*8, in0                     // first t3 dst
+       mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
+       nop.m 0
+       nop.i 0
+       ;;
+       // same as .line_copy loop, but with all predicated-off instructions 
removed:
+.prefetch_loop:
+(p[A]) ld8 v[A] = [src_pre_mem], 128           // M0
+(p[B]) st8 [dst_pre_mem] = v[B], 128           // M2
+       br.ctop.sptk .prefetch_loop
+       ;;
+       cmp.eq p16, p0 = r0, r0                 // reset p16 to 1 (br.ctop 
cleared it to zero)
+       mov ar.lc = t1                          // with 64KB pages, t1 is too 
big to fit in 8 bits!
+       mov ar.ec = N                           // # of stages in pipeline
+       ;;
+.line_copy:
+(p[D]) ld8 t2 = [src0], 3*8                    // M0
+(p[D]) ld8 t4 = [src1], 3*8                    // M1
+(p[B]) st8 [dst_pre_mem] = v[B], 128           // M2 prefetch dst from memory
+(p[D]) st8 [dst_pre_l2] = n[D-C], 128          // M3 prefetch dst from L2
+       ;;
+(p[A]) ld8 v[A] = [src_pre_mem], 128           // M0 prefetch src from memory
+(p[C]) ld8 n[0] = [src_pre_l2], 128            // M1 prefetch src from L2
+(p[D]) st8 [dst0] =  t1, 8                     // M2
+(p[D]) st8 [dst1] =  t3, 8                     // M3
+       ;;
+(p[D]) ld8  t5 = [src0], 8
+(p[D]) ld8  t7 = [src1], 3*8
+(p[D]) st8 [dst0] =  t2, 3*8
+(p[D]) st8 [dst1] =  t4, 3*8
+       ;;
+(p[D]) ld8  t6 = [src0], 3*8
+(p[D]) ld8 t10 = [src1], 8
+(p[D]) st8 [dst0] =  t5, 8
+(p[D]) st8 [dst1] =  t7, 3*8
+       ;;
+(p[D]) ld8  t9 = [src0], 3*8
+(p[D]) ld8 t11 = [src1], 3*8
+(p[D]) st8 [dst0] =  t6, 3*8
+(p[D]) st8 [dst1] = t10, 8
+       ;;
+(p[D]) ld8 t12 = [src0], 8
+(p[D]) ld8 t14 = [src1], 8
+(p[D]) st8 [dst0] =  t9, 3*8
+(p[D]) st8 [dst1] = t11, 3*8
+       ;;
+(p[D]) ld8 t13 = [src0], 4*8
+(p[D]) ld8 t15 = [src1], 4*8
+(p[D]) st8 [dst0] = t12, 8
+(p[D]) st8 [dst1] = t14, 8
+       ;;
+(p[D-1])ld8  t1 = [src0], 8
+(p[D-1])ld8  t3 = [src1], 8
+(p[D]) st8 [dst0] = t13, 4*8
+(p[D]) st8 [dst1] = t15, 4*8
+       br.ctop.sptk .line_copy
+       ;;
+       mov ar.lc = saved_lc
+       mov pr = saved_pr, -1
+       br.ret.sptk.many rp
+END(copy_page)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/flush.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/flush.S       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,61 @@
+/*
+ * Cache flushing routines.
+ *
+ * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 05/28/05 Zoltan Menyhart    Dynamic stride size
+ */
+
+#include <asm/asmmacro.h>
+
+
+       /*
+        * flush_icache_range(start,end)
+        *
+        *      Make i-cache(s) coherent with d-caches.
+        *
+        *      Must deal with range from start to end-1 but nothing else (need 
to
+        *      be careful not to touch addresses that may be unmapped).
+        *
+        *      Note: "in0" and "in1" are preserved for debugging purposes.
+        */
+GLOBAL_ENTRY(flush_icache_range)
+
+       .prologue
+       alloc   r2=ar.pfs,2,0,0,0
+       movl    r3=ia64_i_cache_stride_shift
+       mov     r21=1
+       ;;
+       ld8     r20=[r3]                // r20: stride shift
+       sub     r22=in1,r0,1            // last byte address
+       ;;
+       shr.u   r23=in0,r20             // start / (stride size)
+       shr.u   r22=r22,r20             // (last byte address) / (stride size)
+       shl     r21=r21,r20             // r21: stride size of the i-cache(s)
+       ;;
+       sub     r8=r22,r23              // number of strides - 1
+       shl     r24=r23,r20             // r24: addresses for "fc.i" =
+                                       //      "start" rounded down to stride 
boundary
+       .save   ar.lc,r3
+       mov     r3=ar.lc                // save ar.lc
+       ;;
+
+       .body
+       mov     ar.lc=r8
+       ;;
+       /*
+        * 32 byte aligned loop, even number of (actually 2) bundles
+        */
+.Loop: fc.i    r24                     // issuable on M0 only
+       add     r24=r21,r24             // we flush "stride size" bytes per 
iteration
+       nop.i   0
+       br.cloop.sptk.few .Loop
+       ;;
+       sync.i
+       ;;
+       srlz.i
+       ;;
+       mov     ar.lc=r3                // restore ar.lc
+       br.ret.sptk.many rp
+END(flush_icache_range)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/idiv32.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/idiv32.S      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 32-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture".  This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP    mod
+#else
+# define OP    div
+#endif
+
+#ifdef UNSIGNED
+# define SGN   u
+# define EXTEND        zxt4
+# define INT_TO_FP(a,b)        fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b)        fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define EXTEND        sxt4
+# define INT_TO_FP(a,b)        fcvt.xf a=b
+# define FP_TO_INT(a,b)        fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b)    a##b
+#define PASTE(a,b)     PASTE1(a,b)
+#define NAME           PASTE(PASTE(__,SGN),PASTE(OP,si3))
+
+GLOBAL_ENTRY(NAME)
+       .regstk 2,0,0,0
+       // Transfer inputs to FP registers.
+       mov r2 = 0xffdd                 // r2 = -34 + 65535 (fp reg format bias)
+       EXTEND in0 = in0                // in0 = a
+       EXTEND in1 = in1                // in1 = b
+       ;;
+       setf.sig f8 = in0
+       setf.sig f9 = in1
+#ifdef MODULO
+       sub in1 = r0, in1               // in1 = -b
+#endif
+       ;;
+       // Convert the inputs to FP, to avoid FP software-assist faults.
+       INT_TO_FP(f8, f8)
+       INT_TO_FP(f9, f9)
+       ;;
+       setf.exp f7 = r2                // f7 = 2^-34
+       frcpa.s1 f6, p6 = f8, f9        // y0 = frcpa(b)
+       ;;
+(p6)   fmpy.s1 f8 = f8, f6             // q0 = a*y0
+(p6)   fnma.s1 f6 = f9, f6, f1         // e0 = -b*y0 + 1 
+       ;;
+#ifdef MODULO
+       setf.sig f9 = in1               // f9 = -b
+#endif
+(p6)   fma.s1 f8 = f6, f8, f8          // q1 = e0*q0 + q0
+(p6)   fma.s1 f6 = f6, f6, f7          // e1 = e0*e0 + 2^-34
+       ;;
+#ifdef MODULO
+       setf.sig f7 = in0
+#endif
+(p6)   fma.s1 f6 = f6, f8, f8          // q2 = e1*q1 + q1
+       ;;
+       FP_TO_INT(f6, f6)               // q = trunc(q2)
+       ;;
+#ifdef MODULO
+       xma.l f6 = f6, f9, f7           // r = q*(-b) + a
+       ;;
+#endif
+       getf.sig r8 = f6                // transfer result to result register
+       br.ret.sptk.many rp
+END(NAME)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/idiv64.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/idiv64.S      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 64-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture".  This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP    mod
+#else
+# define OP    div
+#endif
+
+#ifdef UNSIGNED
+# define SGN   u
+# define INT_TO_FP(a,b)        fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b)        fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define INT_TO_FP(a,b)        fcvt.xf a=b
+# define FP_TO_INT(a,b)        fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b)    a##b
+#define PASTE(a,b)     PASTE1(a,b)
+#define NAME           PASTE(PASTE(__,SGN),PASTE(OP,di3))
+
+GLOBAL_ENTRY(NAME)
+       .regstk 2,0,0,0
+       // Transfer inputs to FP registers.
+       setf.sig f8 = in0
+       setf.sig f9 = in1
+       ;;
+       // Convert the inputs to FP, to avoid FP software-assist faults.
+       INT_TO_FP(f8, f8)
+       INT_TO_FP(f9, f9)
+       ;;
+       frcpa.s1 f11, p6 = f8, f9       // y0 = frcpa(b)
+       ;;
+(p6)   fmpy.s1 f7 = f8, f11            // q0 = a*y0
+(p6)   fnma.s1 f6 = f9, f11, f1        // e0 = -b*y0 + 1
+       ;;
+(p6)   fma.s1 f10 = f7, f6, f7         // q1 = q0*e0 + q0
+(p6)   fmpy.s1 f7 = f6, f6             // e1 = e0*e0
+       ;;
+#ifdef MODULO
+       sub in1 = r0, in1               // in1 = -b
+#endif
+(p6)   fma.s1 f10 = f10, f7, f10       // q2 = q1*e1 + q1
+(p6)   fma.s1 f6 = f11, f6, f11        // y1 = y0*e0 + y0
+       ;;
+(p6)   fma.s1 f6 = f6, f7, f6          // y2 = y1*e1 + y1
+(p6)   fnma.s1 f7 = f9, f10, f8        // r = -b*q2 + a
+       ;;
+#ifdef MODULO
+       setf.sig f8 = in0               // f8 = a
+       setf.sig f9 = in1               // f9 = -b
+#endif
+(p6)   fma.s1 f11 = f7, f6, f10        // q3 = r*y2 + q2
+       ;;
+       FP_TO_INT(f11, f11)             // q = trunc(q3)
+       ;;
+#ifdef MODULO
+       xma.l f11 = f11, f9, f8         // r = q*(-b) + a
+       ;;
+#endif
+       getf.sig r8 = f11               // transfer result to result register
+       br.ret.sptk.many rp
+END(NAME)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/memcpy_mck.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/memcpy_mck.S  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,661 @@
+/*
+ * Itanium 2-optimized version of memcpy and copy_user function
+ *
+ * Inputs:
+ *     in0:    destination address
+ *     in1:    source address
+ *     in2:    number of bytes to copy
+ * Output:
+ *     0 if success, or number of byte NOT copied if error occurred.
+ *
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx>
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define EK(y...) EX(y)
+
+/* McKinley specific optimization */
+
+#define retval         r8
+#define saved_pfs      r31
+#define saved_lc       r10
+#define saved_pr       r11
+#define saved_in0      r14
+#define saved_in1      r15
+#define saved_in2      r16
+
+#define src0           r2
+#define src1           r3
+#define dst0           r17
+#define dst1           r18
+#define cnt            r9
+
+/* r19-r30 are temp for each code section */
+#define PREFETCH_DIST  8
+#define src_pre_mem    r19
+#define dst_pre_mem    r20
+#define src_pre_l2     r21
+#define dst_pre_l2     r22
+#define t1             r23
+#define t2             r24
+#define t3             r25
+#define t4             r26
+#define t5             t1      // alias!
+#define t6             t2      // alias!
+#define t7             t3      // alias!
+#define n8             r27
+#define t9             t5      // alias!
+#define t10            t4      // alias!
+#define t11            t7      // alias!
+#define t12            t6      // alias!
+#define t14            t10     // alias!
+#define t13            r28
+#define t15            r29
+#define tmp            r30
+
+/* defines for long_copy block */
+#define        A       0
+#define B      (PREFETCH_DIST)
+#define C      (B + PREFETCH_DIST)
+#define D      (C + 1)
+#define N      (D + 1)
+#define Nrot   ((N + 7) & ~7)
+
+/* alias */
+#define in0            r32
+#define in1            r33
+#define in2            r34
+
+GLOBAL_ENTRY(memcpy)
+       and     r28=0x7,in0
+       and     r29=0x7,in1
+       mov     f6=f0
+       br.cond.sptk .common_code
+       ;;
+END(memcpy)
+GLOBAL_ENTRY(__copy_user)
+       .prologue
+// check dest alignment
+       and     r28=0x7,in0
+       and     r29=0x7,in1
+       mov     f6=f1
+       mov     saved_in0=in0   // save dest pointer
+       mov     saved_in1=in1   // save src pointer
+       mov     saved_in2=in2   // save len
+       ;;
+.common_code:
+       cmp.gt  p15,p0=8,in2    // check for small size
+       cmp.ne  p13,p0=0,r28    // check dest alignment
+       cmp.ne  p14,p0=0,r29    // check src alignment
+       add     src0=0,in1
+       sub     r30=8,r28       // for .align_dest
+       mov     retval=r0       // initialize return value
+       ;;
+       add     dst0=0,in0
+       add     dst1=1,in0      // dest odd index
+       cmp.le  p6,p0 = 1,r30   // for .align_dest
+(p15)  br.cond.dpnt .memcpy_short
+(p13)  br.cond.dpnt .align_dest
+(p14)  br.cond.dpnt .unaligned_src
+       ;;
+
+// both dest and src are aligned on 8-byte boundary
+.aligned_src:
+       .save ar.pfs, saved_pfs
+       alloc   saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
+       .save pr, saved_pr
+       mov     saved_pr=pr
+
+       shr.u   cnt=in2,7       // this much cache line
+       ;;
+       cmp.lt  p6,p0=2*PREFETCH_DIST,cnt
+       cmp.lt  p7,p8=1,cnt
+       .save ar.lc, saved_lc
+       mov     saved_lc=ar.lc
+       .body
+       add     cnt=-1,cnt
+       add     src_pre_mem=0,in1       // prefetch src pointer
+       add     dst_pre_mem=0,in0       // prefetch dest pointer
+       ;;
+(p7)   mov     ar.lc=cnt       // prefetch count
+(p8)   mov     ar.lc=r0
+(p6)   br.cond.dpnt .long_copy
+       ;;
+
+.prefetch:
+       lfetch.fault      [src_pre_mem], 128
+       lfetch.fault.excl [dst_pre_mem], 128
+       br.cloop.dptk.few .prefetch
+       ;;
+
+.medium_copy:
+       and     tmp=31,in2      // copy length after iteration
+       shr.u   r29=in2,5       // number of 32-byte iteration
+       add     dst1=8,dst0     // 2nd dest pointer
+       ;;
+       add     cnt=-1,r29      // ctop iteration adjustment
+       cmp.eq  p10,p0=r29,r0   // do we really need to loop?
+       add     src1=8,src0     // 2nd src pointer
+       cmp.le  p6,p0=8,tmp
+       ;;
+       cmp.le  p7,p0=16,tmp
+       mov     ar.lc=cnt       // loop setup
+       cmp.eq  p16,p17 = r0,r0
+       mov     ar.ec=2
+(p10)  br.dpnt.few .aligned_src_tail
+       ;;
+       TEXT_ALIGN(32)
+1:
+EX(.ex_handler, (p16)  ld8     r34=[src0],16)
+EK(.ex_handler, (p16)  ld8     r38=[src1],16)
+EX(.ex_handler, (p17)  st8     [dst0]=r33,16)
+EK(.ex_handler, (p17)  st8     [dst1]=r37,16)
+       ;;
+EX(.ex_handler, (p16)  ld8     r32=[src0],16)
+EK(.ex_handler, (p16)  ld8     r36=[src1],16)
+EX(.ex_handler, (p16)  st8     [dst0]=r34,16)
+EK(.ex_handler, (p16)  st8     [dst1]=r38,16)
+       br.ctop.dptk.few 1b
+       ;;
+
+.aligned_src_tail:
+EX(.ex_handler, (p6)   ld8     t1=[src0])
+       mov     ar.lc=saved_lc
+       mov     ar.pfs=saved_pfs
+EX(.ex_hndlr_s, (p7)   ld8     t2=[src1],8)
+       cmp.le  p8,p0=24,tmp
+       and     r21=-8,tmp
+       ;;
+EX(.ex_hndlr_s, (p8)   ld8     t3=[src1])
+EX(.ex_handler, (p6)   st8     [dst0]=t1)      // store byte 1
+       and     in2=7,tmp       // remaining length
+EX(.ex_hndlr_d, (p7)   st8     [dst1]=t2,8)    // store byte 2
+       add     src0=src0,r21   // setting up src pointer
+       add     dst0=dst0,r21   // setting up dest pointer
+       ;;
+EX(.ex_handler, (p8)   st8     [dst1]=t3)      // store byte 3
+       mov     pr=saved_pr,-1
+       br.dptk.many .memcpy_short
+       ;;
+
+/* code taken from copy_page_mck */
+.long_copy:
+       .rotr v[2*PREFETCH_DIST]
+       .rotp p[N]
+
+       mov src_pre_mem = src0
+       mov pr.rot = 0x10000
+       mov ar.ec = 1                           // special unrolled loop
+
+       mov dst_pre_mem = dst0
+
+       add src_pre_l2 = 8*8, src0
+       add dst_pre_l2 = 8*8, dst0
+       ;;
+       add src0 = 8, src_pre_mem               // first t1 src
+       mov ar.lc = 2*PREFETCH_DIST - 1
+       shr.u cnt=in2,7                         // number of lines
+       add src1 = 3*8, src_pre_mem             // first t3 src
+       add dst0 = 8, dst_pre_mem               // first t1 dst
+       add dst1 = 3*8, dst_pre_mem             // first t3 dst
+       ;;
+       and tmp=127,in2                         // remaining bytes after this 
block
+       add cnt = -(2*PREFETCH_DIST) - 1, cnt
+       // same as .line_copy loop, but with all predicated-off instructions 
removed:
+.prefetch_loop:
+EX(.ex_hndlr_lcpy_1, (p[A])    ld8 v[A] = [src_pre_mem], 128)          // M0
+EK(.ex_hndlr_lcpy_1, (p[B])    st8 [dst_pre_mem] = v[B], 128)          // M2
+       br.ctop.sptk .prefetch_loop
+       ;;
+       cmp.eq p16, p0 = r0, r0                 // reset p16 to 1
+       mov ar.lc = cnt
+       mov ar.ec = N                           // # of stages in pipeline
+       ;;
+.line_copy:
+EX(.ex_handler,        (p[D])  ld8 t2 = [src0], 3*8)                   // M0
+EK(.ex_handler,        (p[D])  ld8 t4 = [src1], 3*8)                   // M1
+EX(.ex_handler_lcpy,   (p[B])  st8 [dst_pre_mem] = v[B], 128)          // M2 
prefetch dst from memory
+EK(.ex_handler_lcpy,   (p[D])  st8 [dst_pre_l2] = n8, 128)             // M3 
prefetch dst from L2
+       ;;
+EX(.ex_handler_lcpy,   (p[A])  ld8 v[A] = [src_pre_mem], 128)          // M0 
prefetch src from memory
+EK(.ex_handler_lcpy,   (p[C])  ld8 n8 = [src_pre_l2], 128)             // M1 
prefetch src from L2
+EX(.ex_handler,        (p[D])  st8 [dst0] =  t1, 8)                    // M2
+EK(.ex_handler,        (p[D])  st8 [dst1] =  t3, 8)                    // M3
+       ;;
+EX(.ex_handler,        (p[D])  ld8  t5 = [src0], 8)
+EK(.ex_handler,        (p[D])  ld8  t7 = [src1], 3*8)
+EX(.ex_handler,        (p[D])  st8 [dst0] =  t2, 3*8)
+EK(.ex_handler,        (p[D])  st8 [dst1] =  t4, 3*8)
+       ;;
+EX(.ex_handler,        (p[D])  ld8  t6 = [src0], 3*8)
+EK(.ex_handler,        (p[D])  ld8 t10 = [src1], 8)
+EX(.ex_handler,        (p[D])  st8 [dst0] =  t5, 8)
+EK(.ex_handler,        (p[D])  st8 [dst1] =  t7, 3*8)
+       ;;
+EX(.ex_handler,        (p[D])  ld8  t9 = [src0], 3*8)
+EK(.ex_handler,        (p[D])  ld8 t11 = [src1], 3*8)
+EX(.ex_handler,        (p[D])  st8 [dst0] =  t6, 3*8)
+EK(.ex_handler,        (p[D])  st8 [dst1] = t10, 8)
+       ;;
+EX(.ex_handler,        (p[D])  ld8 t12 = [src0], 8)
+EK(.ex_handler,        (p[D])  ld8 t14 = [src1], 8)
+EX(.ex_handler,        (p[D])  st8 [dst0] =  t9, 3*8)
+EK(.ex_handler,        (p[D])  st8 [dst1] = t11, 3*8)
+       ;;
+EX(.ex_handler,        (p[D])  ld8 t13 = [src0], 4*8)
+EK(.ex_handler,        (p[D])  ld8 t15 = [src1], 4*8)
+EX(.ex_handler,        (p[D])  st8 [dst0] = t12, 8)
+EK(.ex_handler,        (p[D])  st8 [dst1] = t14, 8)
+       ;;
+EX(.ex_handler,        (p[C])  ld8  t1 = [src0], 8)
+EK(.ex_handler,        (p[C])  ld8  t3 = [src1], 8)
+EX(.ex_handler,        (p[D])  st8 [dst0] = t13, 4*8)
+EK(.ex_handler,        (p[D])  st8 [dst1] = t15, 4*8)
+       br.ctop.sptk .line_copy
+       ;;
+
+       add dst0=-8,dst0
+       add src0=-8,src0
+       mov in2=tmp
+       .restore sp
+       br.sptk.many .medium_copy
+       ;;
+
+#define BLOCK_SIZE     128*32
+#define blocksize      r23
+#define curlen         r24
+
+// dest is on 8-byte boundary, src is not. We need to do
+// ld8-ld8, shrp, then st8.  Max 8 byte copy per cycle.
+.unaligned_src:
+       .prologue
+       .save ar.pfs, saved_pfs
+       alloc   saved_pfs=ar.pfs,3,5,0,8
+       .save ar.lc, saved_lc
+       mov     saved_lc=ar.lc
+       .save pr, saved_pr
+       mov     saved_pr=pr
+       .body
+.4k_block:
+       mov     saved_in0=dst0  // need to save all input arguments
+       mov     saved_in2=in2
+       mov     blocksize=BLOCK_SIZE
+       ;;
+       cmp.lt  p6,p7=blocksize,in2
+       mov     saved_in1=src0
+       ;;
+(p6)   mov     in2=blocksize
+       ;;
+       shr.u   r21=in2,7       // this much cache line
+       shr.u   r22=in2,4       // number of 16-byte iteration
+       and     curlen=15,in2   // copy length after iteration
+       and     r30=7,src0      // source alignment
+       ;;
+       cmp.lt  p7,p8=1,r21
+       add     cnt=-1,r21
+       ;;
+
+       add     src_pre_mem=0,src0      // prefetch src pointer
+       add     dst_pre_mem=0,dst0      // prefetch dest pointer
+       and     src0=-8,src0            // 1st src pointer
+(p7)   mov     ar.lc = cnt
+(p8)   mov     ar.lc = r0
+       ;;
+       TEXT_ALIGN(32)
+1:     lfetch.fault      [src_pre_mem], 128
+       lfetch.fault.excl [dst_pre_mem], 128
+       br.cloop.dptk.few 1b
+       ;;
+
+       shladd  dst1=r22,3,dst0 // 2nd dest pointer
+       shladd  src1=r22,3,src0 // 2nd src pointer
+       cmp.eq  p8,p9=r22,r0    // do we really need to loop?
+       cmp.le  p6,p7=8,curlen; // have at least 8 byte remaining?
+       add     cnt=-1,r22      // ctop iteration adjustment
+       ;;
+EX(.ex_handler, (p9)   ld8     r33=[src0],8)   // loop primer
+EK(.ex_handler, (p9)   ld8     r37=[src1],8)
+(p8)   br.dpnt.few .noloop
+       ;;
+
+// The jump address is calculated based on src alignment. The COPYU
+// macro below need to confine its size to power of two, so an entry
+// can be caulated using shl instead of an expensive multiply. The
+// size is then hard coded by the following #define to match the
+// actual size.  This make it somewhat tedious when COPYU macro gets
+// changed and this need to be adjusted to match.
+#define LOOP_SIZE 6
+1:
+       mov     r29=ip          // jmp_table thread
+       mov     ar.lc=cnt
+       ;;
+       add     r29=.jump_table - 1b - (.jmp1-.jump_table), r29
+       shl     r28=r30, LOOP_SIZE      // jmp_table thread
+       mov     ar.ec=2         // loop setup
+       ;;
+       add     r29=r29,r28             // jmp_table thread
+       cmp.eq  p16,p17=r0,r0
+       ;;
+       mov     b6=r29                  // jmp_table thread
+       ;;
+       br.cond.sptk.few b6
+
+// for 8-15 byte case
+// We will skip the loop, but need to replicate the side effect
+// that the loop produces.
+.noloop:
+EX(.ex_handler, (p6)   ld8     r37=[src1],8)
+       add     src0=8,src0
+(p6)   shl     r25=r30,3
+       ;;
+EX(.ex_handler, (p6)   ld8     r27=[src1])
+(p6)   shr.u   r28=r37,r25
+(p6)   sub     r26=64,r25
+       ;;
+(p6)   shl     r27=r27,r26
+       ;;
+(p6)   or      r21=r28,r27
+
+.unaligned_src_tail:
+/* check if we have more than blocksize to copy, if so go back */
+       cmp.gt  p8,p0=saved_in2,blocksize
+       ;;
+(p8)   add     dst0=saved_in0,blocksize
+(p8)   add     src0=saved_in1,blocksize
+(p8)   sub     in2=saved_in2,blocksize
+(p8)   br.dpnt .4k_block
+       ;;
+
+/* we have up to 15 byte to copy in the tail.
+ * part of work is already done in the jump table code
+ * we are at the following state.
+ * src side:
+ * 
+ *   xxxxxx xx                   <----- r21 has xxxxxxxx already
+ * -------- -------- --------
+ * 0        8        16
+ *          ^
+ *          |
+ *          src1
+ * 
+ * dst
+ * -------- -------- --------
+ * ^
+ * |
+ * dst1
+ */
+EX(.ex_handler, (p6)   st8     [dst1]=r21,8)   // more than 8 byte to copy
+(p6)   add     curlen=-8,curlen        // update length
+       mov     ar.pfs=saved_pfs
+       ;;
+       mov     ar.lc=saved_lc
+       mov     pr=saved_pr,-1
+       mov     in2=curlen      // remaining length
+       mov     dst0=dst1       // dest pointer
+       add     src0=src1,r30   // forward by src alignment
+       ;;
+
+// 7 byte or smaller.
+.memcpy_short:
+       cmp.le  p8,p9   = 1,in2
+       cmp.le  p10,p11 = 2,in2
+       cmp.le  p12,p13 = 3,in2
+       cmp.le  p14,p15 = 4,in2
+       add     src1=1,src0     // second src pointer
+       add     dst1=1,dst0     // second dest pointer
+       ;;
+
+EX(.ex_handler_short, (p8)     ld1     t1=[src0],2)
+EK(.ex_handler_short, (p10)    ld1     t2=[src1],2)
+(p9)   br.ret.dpnt rp          // 0 byte copy
+       ;;
+
+EX(.ex_handler_short, (p8)     st1     [dst0]=t1,2)
+EK(.ex_handler_short, (p10)    st1     [dst1]=t2,2)
+(p11)  br.ret.dpnt rp          // 1 byte copy
+
+EX(.ex_handler_short, (p12)    ld1     t3=[src0],2)
+EK(.ex_handler_short, (p14)    ld1     t4=[src1],2)
+(p13)  br.ret.dpnt rp          // 2 byte copy
+       ;;
+
+       cmp.le  p6,p7   = 5,in2
+       cmp.le  p8,p9   = 6,in2
+       cmp.le  p10,p11 = 7,in2
+
+EX(.ex_handler_short, (p12)    st1     [dst0]=t3,2)
+EK(.ex_handler_short, (p14)    st1     [dst1]=t4,2)
+(p15)  br.ret.dpnt rp          // 3 byte copy
+       ;;
+
+EX(.ex_handler_short, (p6)     ld1     t5=[src0],2)
+EK(.ex_handler_short, (p8)     ld1     t6=[src1],2)
+(p7)   br.ret.dpnt rp          // 4 byte copy
+       ;;
+
+EX(.ex_handler_short, (p6)     st1     [dst0]=t5,2)
+EK(.ex_handler_short, (p8)     st1     [dst1]=t6,2)
+(p9)   br.ret.dptk rp          // 5 byte copy
+
+EX(.ex_handler_short, (p10)    ld1     t7=[src0],2)
+(p11)  br.ret.dptk rp          // 6 byte copy
+       ;;
+
+EX(.ex_handler_short, (p10)    st1     [dst0]=t7,2)
+       br.ret.dptk rp          // done all cases
+
+
+/* Align dest to nearest 8-byte boundary. We know we have at
+ * least 7 bytes to copy, enough to crawl to 8-byte boundary.
+ * Actual number of byte to crawl depend on the dest alignment.
+ * 7 byte or less is taken care at .memcpy_short
+
+ * src0 - source even index
+ * src1 - source  odd index
+ * dst0 - dest even index
+ * dst1 - dest  odd index
+ * r30  - distance to 8-byte boundary
+ */
+
+.align_dest:
+       add     src1=1,in1      // source odd index
+       cmp.le  p7,p0 = 2,r30   // for .align_dest
+       cmp.le  p8,p0 = 3,r30   // for .align_dest
+EX(.ex_handler_short, (p6)     ld1     t1=[src0],2)
+       cmp.le  p9,p0 = 4,r30   // for .align_dest
+       cmp.le  p10,p0 = 5,r30
+       ;;
+EX(.ex_handler_short, (p7)     ld1     t2=[src1],2)
+EK(.ex_handler_short, (p8)     ld1     t3=[src0],2)
+       cmp.le  p11,p0 = 6,r30
+EX(.ex_handler_short, (p6)     st1     [dst0] = t1,2)
+       cmp.le  p12,p0 = 7,r30
+       ;;
+EX(.ex_handler_short, (p9)     ld1     t4=[src1],2)
+EK(.ex_handler_short, (p10)    ld1     t5=[src0],2)
+EX(.ex_handler_short, (p7)     st1     [dst1] = t2,2)
+EK(.ex_handler_short, (p8)     st1     [dst0] = t3,2)
+       ;;
+EX(.ex_handler_short, (p11)    ld1     t6=[src1],2)
+EK(.ex_handler_short, (p12)    ld1     t7=[src0],2)
+       cmp.eq  p6,p7=r28,r29
+EX(.ex_handler_short, (p9)     st1     [dst1] = t4,2)
+EK(.ex_handler_short, (p10)    st1     [dst0] = t5,2)
+       sub     in2=in2,r30
+       ;;
+EX(.ex_handler_short, (p11)    st1     [dst1] = t6,2)
+EK(.ex_handler_short, (p12)    st1     [dst0] = t7)
+       add     dst0=in0,r30    // setup arguments
+       add     src0=in1,r30
+(p6)   br.cond.dptk .aligned_src
+(p7)   br.cond.dpnt .unaligned_src
+       ;;
+
+/* main loop body in jump table format */
+#define COPYU(shift)                                                           
        \
+1:                                                                             
        \
+EX(.ex_handler,  (p16) ld8     r32=[src0],8);          /* 1 */                 
        \
+EK(.ex_handler,  (p16) ld8     r36=[src1],8);                                  
        \
+                (p17)  shrp    r35=r33,r34,shift;;     /* 1 */                 
        \
+EX(.ex_handler,  (p6)  ld8     r22=[src1]);    /* common, prime for tail 
section */    \
+                nop.m  0;                                                      
        \
+                (p16)  shrp    r38=r36,r37,shift;                              
        \
+EX(.ex_handler,  (p17) st8     [dst0]=r35,8);          /* 1 */                 
        \
+EK(.ex_handler,  (p17) st8     [dst1]=r39,8);                                  
        \
+                br.ctop.dptk.few 1b;;                                          
        \
+                (p7)   add     src1=-8,src1;   /* back out for <8 byte case */ 
        \
+                shrp   r21=r22,r38,shift;      /* speculative work */          
        \
+                br.sptk.few .unaligned_src_tail /* branch out of jump table */ 
        \
+                ;;
+       TEXT_ALIGN(32)
+.jump_table:
+       COPYU(8)        // unaligned cases
+.jmp1:
+       COPYU(16)
+       COPYU(24)
+       COPYU(32)
+       COPYU(40)
+       COPYU(48)
+       COPYU(56)
+
+#undef A
+#undef B
+#undef C
+#undef D
+
+/*
+ * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
+ * instruction failed in the bundle.  The exception algorithm is that we
+ * first figure out the faulting address, then detect if there is any
+ * progress made on the copy, if so, redo the copy from last known copied
+ * location up to the faulting address (exclusive). In the copy_from_user
+ * case, remaining byte in kernel buffer will be zeroed.
+ *
+ * Take copy_from_user as an example, in the code there are multiple loads
+ * in a bundle and those multiple loads could span over two pages, the
+ * faulting address is calculated as page_round_down(max(src0, src1)).
+ * This is based on knowledge that if we can access one byte in a page, we
+ * can access any byte in that page.
+ *
+ * predicate used in the exception handler:
+ * p6-p7: direction
+ * p10-p11: src faulting addr calculation
+ * p12-p13: dst faulting addr calculation
+ */
+
+#define A      r19
+#define B      r20
+#define C      r21
+#define D      r22
+#define F      r28
+
+#define memset_arg0    r32
+#define memset_arg2    r33
+
+#define saved_retval   loc0
+#define saved_rtlink   loc1
+#define saved_pfs_stack        loc2
+
+.ex_hndlr_s:
+       add     src0=8,src0
+       br.sptk .ex_handler
+       ;;
+.ex_hndlr_d:
+       add     dst0=8,dst0
+       br.sptk .ex_handler
+       ;;
+.ex_hndlr_lcpy_1:
+       mov     src1=src_pre_mem
+       mov     dst1=dst_pre_mem
+       cmp.gtu p10,p11=src_pre_mem,saved_in1
+       cmp.gtu p12,p13=dst_pre_mem,saved_in0
+       ;;
+(p10)  add     src0=8,saved_in1
+(p11)  mov     src0=saved_in1
+(p12)  add     dst0=8,saved_in0
+(p13)  mov     dst0=saved_in0
+       br.sptk .ex_handler
+.ex_handler_lcpy:
+       // in line_copy block, the preload addresses should always ahead
+       // of the other two src/dst pointers.  Furthermore, src1/dst1 should
+       // always ahead of src0/dst0.
+       mov     src1=src_pre_mem
+       mov     dst1=dst_pre_mem
+.ex_handler:
+       mov     pr=saved_pr,-1          // first restore pr, lc, and pfs
+       mov     ar.lc=saved_lc
+       mov     ar.pfs=saved_pfs
+       ;;
+.ex_handler_short: // fault occurred in these sections didn't change pr, lc, 
pfs
+       cmp.ltu p6,p7=saved_in0, saved_in1      // get the copy direction
+       cmp.ltu p10,p11=src0,src1
+       cmp.ltu p12,p13=dst0,dst1
+       fcmp.eq p8,p0=f6,f0             // is it memcpy?
+       mov     tmp = dst0
+       ;;
+(p11)  mov     src1 = src0             // pick the larger of the two
+(p13)  mov     dst0 = dst1             // make dst0 the smaller one
+(p13)  mov     dst1 = tmp              // and dst1 the larger one
+       ;;
+(p6)   dep     F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
+(p7)   dep     F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
+       ;;
+(p6)   cmp.le  p14,p0=dst0,saved_in0   // no progress has been made on store
+(p7)   cmp.le  p14,p0=src0,saved_in1   // no progress has been made on load
+       mov     retval=saved_in2
+(p8)   ld1     tmp=[src1]              // force an oops for memcpy call
+(p8)   st1     [dst1]=r0               // force an oops for memcpy call
+(p14)  br.ret.sptk.many rp
+
+/*
+ * The remaining byte to copy is calculated as:
+ *
+ * A = (faulting_addr - orig_src)      -> len to faulting ld address
+ *     or 
+ *     (faulting_addr - orig_dst)      -> len to faulting st address
+ * B = (cur_dst - orig_dst)            -> len copied so far
+ * C = A - B                           -> len need to be copied
+ * D = orig_len - A                    -> len need to be zeroed
+ */
+(p6)   sub     A = F, saved_in0
+(p7)   sub     A = F, saved_in1
+       clrrrb
+       ;;
+       alloc   saved_pfs_stack=ar.pfs,3,3,3,0
+       sub     B = dst0, saved_in0     // how many byte copied so far
+       ;;
+       sub     C = A, B
+       sub     D = saved_in2, A
+       ;;
+       cmp.gt  p8,p0=C,r0              // more than 1 byte?
+       add     memset_arg0=saved_in0, A
+(p6)   mov     memset_arg2=0           // copy_to_user should not call memset
+(p7)   mov     memset_arg2=D           // copy_from_user need to have kbuf 
zeroed
+       mov     r8=0
+       mov     saved_retval = D
+       mov     saved_rtlink = b0
+
+       add     out0=saved_in0, B
+       add     out1=saved_in1, B
+       mov     out2=C
+(p8)   br.call.sptk.few b0=__copy_user // recursive call
+       ;;
+
+       add     saved_retval=saved_retval,r8    // above might return non-zero 
value
+       cmp.gt  p8,p0=memset_arg2,r0    // more than 1 byte?
+       mov     out0=memset_arg0        // *s
+       mov     out1=r0                 // c
+       mov     out2=memset_arg2        // n
+(p8)   br.call.sptk.few b0=memset
+       ;;
+
+       mov     retval=saved_retval
+       mov     ar.pfs=saved_pfs_stack
+       mov     b0=saved_rtlink
+       br.ret.sptk.many rp
+
+/* end of McKinley specific optimization */
+END(__copy_user)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/memset.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/memset.S      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,362 @@
+/* Optimized version of the standard memset() function.
+
+   Copyright (c) 2002 Hewlett-Packard Co/CERN
+       Sverre Jarp <Sverre.Jarp@xxxxxxx>
+
+   Return: dest
+
+   Inputs:
+        in0:    dest
+        in1:    value
+        in2:    count
+
+   The algorithm is fairly straightforward: set byte by byte until we
+   we get to a 16B-aligned address, then loop on 128 B chunks using an
+   early store as prefetching, then loop on 32B chucks, then clear remaining
+   words, finally clear remaining bytes.
+   Since a stf.spill f0 can store 16B in one go, we use this instruction
+   to get peak speed when value = 0.  */
+
+#include <asm/asmmacro.h>
+#undef ret
+
+#define dest           in0
+#define value          in1
+#define        cnt             in2
+
+#define tmp            r31
+#define save_lc                r30
+#define ptr0           r29
+#define ptr1           r28
+#define ptr2           r27
+#define ptr3           r26
+#define ptr9           r24
+#define        loopcnt         r23
+#define linecnt                r22
+#define bytecnt                r21
+
+#define fvalue         f6
+
+// This routine uses only scratch predicate registers (p6 - p15)
+#define p_scr          p6                      // default register for 
same-cycle branches
+#define p_nz           p7
+#define p_zr           p8
+#define p_unalgn       p9
+#define p_y            p11
+#define p_n            p12
+#define p_yy           p13
+#define p_nn           p14
+
+#define MIN1           15
+#define MIN1P1HALF     8
+#define LINE_SIZE      128
+#define LSIZE_SH        7                      // shift amount
+#define PREF_AHEAD     8
+
+GLOBAL_ENTRY(memset)
+{ .mmi
+       .prologue
+       alloc   tmp = ar.pfs, 3, 0, 0, 0
+       lfetch.nt1 [dest]                       //
+       .save   ar.lc, save_lc
+       mov.i   save_lc = ar.lc
+       .body
+} { .mmi
+       mov     ret0 = dest                     // return value
+       cmp.ne  p_nz, p_zr = value, r0          // use stf.spill if value is 
zero
+       cmp.eq  p_scr, p0 = cnt, r0
+;; }
+{ .mmi
+       and     ptr2 = -(MIN1+1), dest          // aligned address
+       and     tmp = MIN1, dest                // prepare to check for correct 
alignment
+       tbit.nz p_y, p_n = dest, 0              // Do we have an odd address? 
(M_B_U)
+} { .mib
+       mov     ptr1 = dest
+       mux1    value = value, @brcst           // create 8 identical bytes in 
word
+(p_scr)        br.ret.dpnt.many rp                     // return immediately 
if count = 0
+;; }
+{ .mib
+       cmp.ne  p_unalgn, p0 = tmp, r0          //
+} { .mib
+       sub     bytecnt = (MIN1+1), tmp         // NB: # of bytes to move is 1 
higher than loopcnt
+       cmp.gt  p_scr, p0 = 16, cnt             // is it a minimalistic task?
+(p_scr)        br.cond.dptk.many .move_bytes_unaligned // go move just a few 
(M_B_U)
+;; }
+{ .mmi
+(p_unalgn) add ptr1 = (MIN1+1), ptr2           // after alignment
+(p_unalgn) add ptr2 = MIN1P1HALF, ptr2         // after alignment
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3   // should we do a st8 ?
+;; }
+{ .mib
+(p_y)  add     cnt = -8, cnt                   //
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
+} { .mib
+(p_y)  st8     [ptr2] = value,-4               //
+(p_n)  add     ptr2 = 4, ptr2                  //
+;; }
+{ .mib
+(p_yy) add     cnt = -4, cnt                   //
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1   // should we do a st2 ?
+} { .mib
+(p_yy) st4     [ptr2] = value,-2               //
+(p_nn) add     ptr2 = 2, ptr2                  //
+;; }
+{ .mmi
+       mov     tmp = LINE_SIZE+1               // for compare
+(p_y)  add     cnt = -2, cnt                   //
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
+} { .mmi
+       setf.sig fvalue=value                   // transfer value to FLP side
+(p_y)  st2     [ptr2] = value,-1               //
+(p_n)  add     ptr2 = 1, ptr2                  //
+;; }
+
+{ .mmi
+(p_yy) st1     [ptr2] = value                  //
+       cmp.gt  p_scr, p0 = tmp, cnt            // is it a minimalistic task?
+} { .mbb
+(p_yy) add     cnt = -1, cnt                   //
+(p_scr)        br.cond.dpnt.many .fraction_of_line     // go move just a few
+;; }
+
+{ .mib
+       nop.m 0
+       shr.u   linecnt = cnt, LSIZE_SH
+(p_zr) br.cond.dptk.many .l1b                  // Jump to use stf.spill
+;; }
+
+       TEXT_ALIGN(32) // --------------------- //  L1A: store ahead into cache 
lines; fill later
+{ .mmi
+       and     tmp = -(LINE_SIZE), cnt         // compute end of range
+       mov     ptr9 = ptr1                     // used for prefetching
+       and     cnt = (LINE_SIZE-1), cnt        // remainder
+} { .mmi
+       mov     loopcnt = PREF_AHEAD-1          // default prefetch loop
+       cmp.gt  p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+;; }
+{ .mmi
+(p_scr)        add     loopcnt = -1, linecnt           //
+       add     ptr2 = 8, ptr1                  // start of stores (beyond 
prefetch stores)
+       add     ptr1 = tmp, ptr1                // first address beyond total 
range
+;; }
+{ .mmi
+       add     tmp = -1, linecnt               // next loop count
+       mov.i   ar.lc = loopcnt                 //
+;; }
+.pref_l1a:
+{ .mib
+       stf8 [ptr9] = fvalue, 128               // Do stores one cache line 
apart
+       nop.i   0
+       br.cloop.dptk.few .pref_l1a
+;; }
+{ .mmi
+       add     ptr0 = 16, ptr2                 // Two stores in parallel
+       mov.i   ar.lc = tmp                     //
+;; }
+.l1ax:
+ { .mmi
+       stf8 [ptr2] = fvalue, 8
+       stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+       stf8 [ptr2] = fvalue, 24
+       stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+       stf8 [ptr2] = fvalue, 8
+       stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+       stf8 [ptr2] = fvalue, 24
+       stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+       stf8 [ptr2] = fvalue, 8
+       stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+       stf8 [ptr2] = fvalue, 24
+       stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+       stf8 [ptr2] = fvalue, 8
+       stf8 [ptr0] = fvalue, 32
+       cmp.lt  p_scr, p0 = ptr9, ptr1          // do we need more prefetching?
+ ;; }
+{ .mmb
+       stf8 [ptr2] = fvalue, 24
+(p_scr)        stf8 [ptr9] = fvalue, 128
+       br.cloop.dptk.few .l1ax
+;; }
+{ .mbb
+       cmp.le  p_scr, p0 = 8, cnt              // just a few bytes left ?
+(p_scr) br.cond.dpnt.many  .fraction_of_line   // Branch no. 2
+       br.cond.dpnt.many  .move_bytes_from_alignment   // Branch no. 3
+;; }
+
+       TEXT_ALIGN(32)
+.l1b:  // ------------------------------------ //  L1B: store ahead into cache 
lines; fill later
+{ .mmi
+       and     tmp = -(LINE_SIZE), cnt         // compute end of range
+       mov     ptr9 = ptr1                     // used for prefetching
+       and     cnt = (LINE_SIZE-1), cnt        // remainder
+} { .mmi
+       mov     loopcnt = PREF_AHEAD-1          // default prefetch loop
+       cmp.gt  p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+;; }
+{ .mmi
+(p_scr)        add     loopcnt = -1, linecnt
+       add     ptr2 = 16, ptr1                 // start of stores (beyond 
prefetch stores)
+       add     ptr1 = tmp, ptr1                // first address beyond total 
range
+;; }
+{ .mmi
+       add     tmp = -1, linecnt               // next loop count
+       mov.i   ar.lc = loopcnt
+;; }
+.pref_l1b:
+{ .mib
+       stf.spill [ptr9] = f0, 128              // Do stores one cache line 
apart
+       nop.i   0
+       br.cloop.dptk.few .pref_l1b
+;; }
+{ .mmi
+       add     ptr0 = 16, ptr2                 // Two stores in parallel
+       mov.i   ar.lc = tmp
+;; }
+.l1bx:
+ { .mmi
+       stf.spill [ptr2] = f0, 32
+       stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+       stf.spill [ptr2] = f0, 32
+       stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+       stf.spill [ptr2] = f0, 32
+       stf.spill [ptr0] = f0, 64
+       cmp.lt  p_scr, p0 = ptr9, ptr1          // do we need more prefetching?
+ ;; }
+{ .mmb
+       stf.spill [ptr2] = f0, 32
+(p_scr)        stf.spill [ptr9] = f0, 128
+       br.cloop.dptk.few .l1bx
+;; }
+{ .mib
+       cmp.gt  p_scr, p0 = 8, cnt              // just a few bytes left ?
+(p_scr)        br.cond.dpnt.many  .move_bytes_from_alignment   //
+;; }
+
+.fraction_of_line:
+{ .mib
+       add     ptr2 = 16, ptr1
+       shr.u   loopcnt = cnt, 5                // loopcnt = cnt / 32
+;; }
+{ .mib
+       cmp.eq  p_scr, p0 = loopcnt, r0
+       add     loopcnt = -1, loopcnt
+(p_scr)        br.cond.dpnt.many .store_words
+;; }
+{ .mib
+       and     cnt = 0x1f, cnt                 // compute the remaining cnt
+       mov.i   ar.lc = loopcnt
+;; }
+       TEXT_ALIGN(32)
+.l2:   // ------------------------------------ //  L2A:  store 32B in 2 cycles
+{ .mmb
+       stf8    [ptr1] = fvalue, 8
+       stf8    [ptr2] = fvalue, 8
+;; } { .mmb
+       stf8    [ptr1] = fvalue, 24
+       stf8    [ptr2] = fvalue, 24
+       br.cloop.dptk.many .l2
+;; }
+.store_words:
+{ .mib
+       cmp.gt  p_scr, p0 = 8, cnt              // just a few bytes left ?
+(p_scr)        br.cond.dpnt.many .move_bytes_from_alignment    // Branch
+;; }
+
+{ .mmi
+       stf8    [ptr1] = fvalue, 8              // store
+       cmp.le  p_y, p_n = 16, cnt
+       add     cnt = -8, cnt                   // subtract
+;; }
+{ .mmi
+(p_y)  stf8    [ptr1] = fvalue, 8              // store
+(p_y)  cmp.le.unc p_yy, p_nn = 16, cnt
+(p_y)  add     cnt = -8, cnt                   // subtract
+;; }
+{ .mmi                                         // store
+(p_yy) stf8    [ptr1] = fvalue, 8
+(p_yy) add     cnt = -8, cnt                   // subtract
+;; }
+
+.move_bytes_from_alignment:
+{ .mib
+       cmp.eq  p_scr, p0 = cnt, r0
+       tbit.nz.unc p_y, p0 = cnt, 2            // should we terminate with a 
st4 ?
+(p_scr)        br.cond.dpnt.few .restore_and_exit
+;; }
+{ .mib
+(p_y)  st4     [ptr1] = value,4
+       tbit.nz.unc p_yy, p0 = cnt, 1           // should we terminate with a 
st2 ?
+;; }
+{ .mib
+(p_yy) st2     [ptr1] = value,2
+       tbit.nz.unc p_y, p0 = cnt, 0            // should we terminate with a 
st1 ?
+;; }
+
+{ .mib
+(p_y)  st1     [ptr1] = value
+;; }
+.restore_and_exit:
+{ .mib
+       nop.m   0
+       mov.i   ar.lc = save_lc
+       br.ret.sptk.many rp
+;; }
+
+.move_bytes_unaligned:
+{ .mmi
+       .pred.rel "mutex",p_y, p_n
+       .pred.rel "mutex",p_yy, p_nn
+(p_n)  cmp.le  p_yy, p_nn = 4, cnt
+(p_y)  cmp.le  p_yy, p_nn = 5, cnt
+(p_n)  add     ptr2 = 2, ptr1
+} { .mmi
+(p_y)  add     ptr2 = 3, ptr1
+(p_y)  st1     [ptr1] = value, 1               // fill 1 (odd-aligned) byte 
[15, 14 (or less) left]
+(p_y)  add     cnt = -1, cnt
+;; }
+{ .mmi
+(p_yy) cmp.le.unc p_y, p0 = 8, cnt
+       add     ptr3 = ptr1, cnt                // prepare last store
+       mov.i   ar.lc = save_lc
+} { .mmi
+(p_yy) st2     [ptr1] = value, 4               // fill 2 (aligned) bytes
+(p_yy) st2     [ptr2] = value, 4               // fill 2 (aligned) bytes [11, 
10 (o less) left]
+(p_yy) add     cnt = -4, cnt
+;; }
+{ .mmi
+(p_y)  cmp.le.unc p_yy, p0 = 8, cnt
+       add     ptr3 = -1, ptr3                 // last store
+       tbit.nz p_scr, p0 = cnt, 1              // will there be a st2 at the 
end ?
+} { .mmi
+(p_y)  st2     [ptr1] = value, 4               // fill 2 (aligned) bytes
+(p_y)  st2     [ptr2] = value, 4               // fill 2 (aligned) bytes [7, 6 
(or less) left]
+(p_y)  add     cnt = -4, cnt
+;; }
+{ .mmi
+(p_yy) st2     [ptr1] = value, 4               // fill 2 (aligned) bytes
+(p_yy) st2     [ptr2] = value, 4               // fill 2 (aligned) bytes [3, 2 
(or less) left]
+       tbit.nz p_y, p0 = cnt, 0                // will there be a st1 at the 
end ?
+} { .mmi
+(p_yy) add     cnt = -4, cnt
+;; }
+{ .mmb
+(p_scr)        st2     [ptr1] = value                  // fill 2 (aligned) 
bytes
+(p_y)  st1     [ptr3] = value                  // fill last byte (using ptr3)
+       br.ret.sptk.many rp
+}
+END(memset)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/strlen.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/linux/strlen.S      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,192 @@
+/*
+ *
+ * Optimized version of the standard strlen() function
+ *
+ *
+ * Inputs:
+ *     in0     address of string
+ *
+ * Outputs:
+ *     ret0    the number of characters in the string (0 if empty string)
+ *     does not count the \0
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ *     Stephane Eranian <eranian@xxxxxxxxxx>
+ *
+ * 09/24/99 S.Eranian add speculation recovery code
+ */
+
+#include <asm/asmmacro.h>
+
+//
+//
+// This is an enhanced version of the basic strlen. it includes a combination
+// of compute zero index (czx), parallel comparisons, speculative loads and
+// loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+//       The goal is to look at the string in chunks of 8 bytes.
+//       so we need to do a few extra checks at the beginning because the
+//       string may not be 8-byte aligned. In this case we load the 8byte
+//       quantity which includes the start of the string and mask the unused
+//       bytes with 0xff to avoid confusing czx.
+//       We use speculative loads and software pipelining to hide memory
+//       latency and do read ahead safely. This way we defer any exception.
+//
+//       Because we don't want the kernel to be relying on particular
+//       settings of the DCR register, we provide recovery code in case
+//       speculation fails. The recovery code is going to "redo" the work using
+//       only normal loads. If we still get a fault then we generate a
+//       kernel panic. Otherwise we return the strlen as usual.
+//
+//       The fact that speculation may fail can be caused, for instance, by
+//       the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+//       a NaT bit will be set if the translation is not present. The normal
+//       load, on the other hand, will cause the translation to be inserted
+//       if the mapping exists.
+//
+//       It should be noted that we execute recovery code only when we need
+//       to use the data that has been speculatively loaded: we don't execute
+//       recovery code on pure read ahead data.
+//
+// Remarks:
+//     - the cmp r0,r0 is used as a fast way to initialize a predicate
+//       register to 1. This is required to make sure that we get the parallel
+//       compare correct.
+//
+//     - we don't use the epilogue counter to exit the loop but we need to set
+//       it to zero beforehand.
+//
+//     - after the loop we must test for Nat values because neither the
+//       czx nor cmp instruction raise a NaT consumption fault. We must be
+//       careful not to look too far for a Nat for which we don't care.
+//       For instance we don't need to look at a NaT in val2 if the zero byte
+//       was in val1.
+//
+//     - Clearly performance tuning is required.
+//
+//
+//
+#define saved_pfs      r11
+#define        tmp             r10
+#define base           r16
+#define orig           r17
+#define saved_pr       r18
+#define src            r19
+#define mask           r20
+#define val            r21
+#define val1           r22
+#define val2           r23
+
+GLOBAL_ENTRY(strlen)
+       .prologue
+       .save ar.pfs, saved_pfs
+       alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
+
+       .rotr v[2], w[2]        // declares our 4 aliases
+
+       extr.u tmp=in0,0,3      // tmp=least significant 3 bits
+       mov orig=in0            // keep trackof initial byte address
+       dep src=0,in0,0,3       // src=8byte-aligned in0 address
+       .save pr, saved_pr
+       mov saved_pr=pr         // preserve predicates (rotation)
+       ;;
+
+       .body
+
+       ld8 v[1]=[src],8        // must not speculate: can fail here
+       shl tmp=tmp,3           // multiply by 8bits/byte
+       mov mask=-1             // our mask
+       ;;
+       ld8.s w[1]=[src],8      // speculatively load next
+       cmp.eq p6,p0=r0,r0      // sets p6 to true for cmp.and
+       sub tmp=64,tmp          // how many bits to shift our mask on the right
+       ;;
+       shr.u   mask=mask,tmp   // zero enough bits to hold v[1] valuable part
+       mov ar.ec=r0            // clear epilogue counter (saved in ar.pfs)
+       ;;
+       add base=-16,src        // keep track of aligned base
+       or v[1]=v[1],mask       // now we have a safe initial byte pattern
+       ;;
+1:
+       ld8.s v[0]=[src],8      // speculatively load next
+       czx1.r val1=v[1]        // search 0 byte from right
+       czx1.r val2=w[1]        // search 0 byte from right following 8bytes
+       ;;
+       ld8.s w[0]=[src],8      // speculatively load next to next
+       cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
+       cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
+(p6)   br.wtop.dptk 1b         // loop until p6 == 0
+       ;;
+       //
+       // We must return try the recovery code iff
+       // val1_is_nat || (val1==8 && val2_is_nat)
+       //
+       // XXX Fixme
+       //      - there must be a better way of doing the test
+       //
+       cmp.eq  p8,p9=8,val1    // p6 = val1 had zero (disambiguate)
+       tnat.nz p6,p7=val1      // test NaT on val1
+(p6)   br.cond.spnt .recover   // jump to recovery if val1 is NaT
+       ;;
+       //
+       // if we come here p7 is true, i.e., initialized for // cmp
+       //
+       cmp.eq.and  p7,p0=8,val1// val1==8?
+       tnat.nz.and p7,p0=val2  // test NaT if val2
+(p7)   br.cond.spnt .recover   // jump to recovery if val2 is NaT
+       ;;
+(p8)   mov val1=val2           // the other test got us out of the loop
+(p8)   adds src=-16,src        // correct position when 3 ahead
+(p9)   adds src=-24,src        // correct position when 4 ahead
+       ;;
+       sub ret0=src,orig       // distance from base
+       sub tmp=8,val1          // which byte in word
+       mov pr=saved_pr,0xffffffffffff0000
+       ;;
+       sub ret0=ret0,tmp       // adjust
+       mov ar.pfs=saved_pfs    // because of ar.ec, restore no matter what
+       br.ret.sptk.many rp     // end of normal execution
+
+       //
+       // Outlined recovery code when speculation failed
+       //
+       // This time we don't use speculation and rely on the normal exception
+       // mechanism. that's why the loop is not as good as the previous one
+       // because read ahead is not possible
+       //
+       // IMPORTANT:
+       // Please note that in the case of strlen() as opposed to strlen_user()
+       // we don't use the exception mechanism, as this function is not
+       // supposed to fail. If that happens it means we have a bug and the
+       // code will cause of kernel fault.
+       //
+       // XXX Fixme
+       //      - today we restart from the beginning of the string instead
+       //        of trying to continue where we left off.
+       //
+.recover:
+       ld8 val=[base],8        // will fail if unrecoverable fault
+       ;;
+       or val=val,mask         // remask first bytes
+       cmp.eq p0,p6=r0,r0      // nullify first ld8 in loop
+       ;;
+       //
+       // ar.ec is still zero here
+       //
+2:
+(p6)   ld8 val=[base],8        // will fail if unrecoverable fault
+       ;;
+       czx1.r val1=val         // search 0 byte from right
+       ;;
+       cmp.eq p6,p0=8,val1     // val1==8 ?
+(p6)   br.wtop.dptk 2b         // loop until p6 == 0
+       ;;                      // (avoid WAW on p63)
+       sub ret0=base,orig      // distance from base
+       sub tmp=8,val1
+       mov pr=saved_pr,0xffffffffffff0000
+       ;;
+       sub ret0=ret0,tmp       // length=now - back -1
+       mov ar.pfs=saved_pfs    // because of ar.ec, restore no matter what
+       br.ret.sptk.many rp     // end of successful recovery code
+END(strlen)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/mm.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/mm.c    Thu Sep  1 18:46:28 2005
@@ -0,0 +1,152 @@
+/******************************************************************************
+ * arch/ia64/mm.c
+ * 
+ * Copyright (c) 2002-2005 K A Fraser
+ * Copyright (c) 2004 Christian Limpach
+ * Copyright (c) 2005, Intel Corporation.
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * A description of the x86 page table API:
+ * 
+ * Domains trap to do_mmu_update with a list of update requests.
+ * This is a list of (ptr, val) pairs, where the requested operation
+ * is *ptr = val.
+ * 
+ * Reference counting of pages:
+ * ----------------------------
+ * Each page has two refcounts: tot_count and type_count.
+ * 
+ * TOT_COUNT is the obvious reference count. It counts all uses of a
+ * physical page frame by a domain, including uses as a page directory,
+ * a page table, or simple mappings via a PTE. This count prevents a
+ * domain from releasing a frame back to the free pool when it still holds
+ * a reference to it.
+ * 
+ * TYPE_COUNT is more subtle. A frame can be put to one of three
+ * mutually-exclusive uses: it might be used as a page directory, or a
+ * page table, or it may be mapped writable by the domain [of course, a
+ * frame may not be used in any of these three ways!].
+ * So, type_count is a count of the number of times a frame is being 
+ * referred to in its current incarnation. Therefore, a page can only
+ * change its type when its type count is zero.
+ * 
+ * Pinning the page type:
+ * ----------------------
+ * The type of a page can be pinned/unpinned with the commands
+ * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
+ * pinning is not reference counted, so it can't be nested).
+ * This is useful to prevent a page's type count falling to zero, at which
+ * point safety checks would need to be carried out next time the count
+ * is increased again.
+ * 
+ * A further note on writable page mappings:
+ * -----------------------------------------
+ * For simplicity, the count of writable mappings for a page may not
+ * correspond to reality. The 'writable count' is incremented for every
+ * PTE which maps the page with the _PAGE_RW flag set. However, for
+ * write access to be possible the page directory entry must also have
+ * its _PAGE_RW bit set. We do not check this as it complicates the 
+ * reference counting considerably [consider the case of multiple
+ * directory entries referencing a single page table, some with the RW
+ * bit set, others not -- it starts getting a bit messy].
+ * In normal use, this simplification shouldn't be a problem.
+ * However, the logic can be added if required.
+ * 
+ * One more note on read-only page mappings:
+ * -----------------------------------------
+ * We want domains to be able to map pages for read-only access. The
+ * main reason is that page tables and directories should be readable
+ * by a domain, but it would not be safe for them to be writable.
+ * However, domains have free access to rings 1 & 2 of the Intel
+ * privilege model. In terms of page protection, these are considered
+ * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
+ * read-only restrictions are respected in supervisor mode -- if the 
+ * bit is clear then any mapped page is writable.
+ * 
+ * We get round this by always setting the WP bit and disallowing 
+ * updates to it. This is very unlikely to cause a problem for guest
+ * OS's, which will generally use the WP bit to simplify copy-on-write
+ * implementation (in that case, OS wants a fault when it writes to
+ * an application-supplied buffer).
+ */
+
+#include <xen/config.h>
+#include <public/xen.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/errno.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmmu.h>
+#include <asm/regionreg.h>
+#include <asm/vmx_mm_def.h>
+/*
+        uregs->ptr is virtual address
+        uregs->val is pte value
+ */
+#ifdef CONFIG_VTI
+int do_mmu_update(mmu_update_t *ureqs,u64 count,u64 *pdone,u64 foreigndom)
+{
+    int i,cmd;
+    u64 mfn, gpfn;
+    VCPU *vcpu;
+    mmu_update_t req;
+    ia64_rr rr;
+    thash_cb_t *hcb;
+    thash_data_t entry={0},*ovl;
+    vcpu = current;
+    search_section_t sections;
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    for ( i = 0; i < count; i++ )
+    {
+        copy_from_user(&req, ureqs, sizeof(req));
+        cmd = req.ptr&3;
+        req.ptr &= ~3;
+        if(cmd ==MMU_NORMAL_PT_UPDATE){
+            entry.page_flags = req.val;
+            entry.locked = 1;
+            entry.tc = 1;
+            entry.cl = DSIDE_TLB;
+            rr = vmx_vcpu_rr(vcpu, req.ptr);
+            entry.ps = rr.ps;
+            entry.key = redistribute_rid(rr.rid);
+            entry.rid = rr.rid;
+            entry.vadr = PAGEALIGN(req.ptr,entry.ps);
+            sections.tr = 1;
+            sections.tc = 0;
+            ovl = thash_find_overlap(hcb, &entry, sections);
+            if (ovl) {
+                  // generate MCA.
+                panic("Tlb conflict!!");
+                return;
+            }
+            thash_purge_and_insert(hcb, &entry);
+        }else if(cmd == MMU_MACHPHYS_UPDATE){
+            mfn = req.ptr >>PAGE_SHIFT;
+            gpfn = req.val;
+            set_machinetophys(mfn,gpfn);
+        }else{
+            printf("Unkown command of mmu_update:ptr: %lx,val: %lx 
\n",req.ptr,req.val);
+            while(1);
+        }
+        ureqs ++;
+    }
+    return 0;
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/mmio.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/mmio.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,515 @@
+
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ *  Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx)
+ */
+
+#include <linux/sched.h>
+#include <asm/tlb.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/gcc_intrin.h>
+#include <linux/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/privop.h>
+#include <asm/types.h>
+#include <public/io/ioreq.h>
+#include <asm/mm.h>
+#include <asm/vmx.h>
+
+/*
+struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
+{
+    int     i;
+    for (i=0; mio_base[i].iot != NOT_IO; i++ ) {
+        if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end )
+            return &mio_base[i];
+    }
+    return NULL;
+}
+*/
+
+#define        PIB_LOW_HALF(ofst)      !(ofst&(1<<20))
+#define PIB_OFST_INTA           0x1E0000
+#define PIB_OFST_XTP            0x1E0008
+
+static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int 
ma)
+{
+    switch (pib_off) {
+    case PIB_OFST_INTA:
+        panic("Undefined write on PIB INTA\n");
+        break;
+    case PIB_OFST_XTP:
+        if ( s == 1 && ma == 4 /* UC */) {
+            vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src;
+        }
+        else {
+            panic("Undefined write on PIB XTP\n");
+        }
+        break;
+    default:
+        if ( PIB_LOW_HALF(pib_off) ) {   // lower half
+            if ( s != 8 || ma != 0x4 /* UC */ ) {
+                panic("Undefined IPI-LHF write with s %d, ma %d!\n", s, ma);
+            }
+            else {
+                write_ipi(vcpu, pib_off, *(uint64_t *)src);
+                // TODO for SM-VP
+            }
+        }
+        else {      // upper half
+            printf("IPI-UHF write %lx\n",pib_off);
+            panic("Not support yet for SM-VP\n");
+        }
+        break;
+    }
+}
+
+static void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int 
ma)
+{
+    switch (pib_off) {
+    case PIB_OFST_INTA:
+        // todo --- emit on processor system bus.
+        if ( s == 1 && ma == 4) { // 1 byte load
+            // TODO: INTA read from IOSAPIC
+        }
+        else {
+            panic("Undefined read on PIB INTA\n");
+        }
+        break;
+    case PIB_OFST_XTP:
+        if ( s == 1 && ma == 4) {
+            *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp;
+        }
+        else {
+            panic("Undefined read on PIB XTP\n");
+        }
+        break;
+    default:
+        if ( PIB_LOW_HALF(pib_off) ) {   // lower half
+            if ( s != 8 || ma != 4 ) {
+                panic("Undefined IPI-LHF read!\n");
+            }
+            else {
+#ifdef  IPI_DEBUG
+                printf("IPI-LHF read %lx\n",pib_off);
+#endif
+                *(uint64_t *)dest = 0;  // TODO for SM-VP
+            }
+        }
+        else {      // upper half
+            if ( s != 1 || ma != 4 ) {
+                panic("Undefined PIB-UHF read!\n");
+            }
+            else {
+#ifdef  IPI_DEBUG
+                printf("IPI-UHF read %lx\n",pib_off);
+#endif
+                *(uint8_t *)dest = 0;   // TODO for SM-VP
+            }
+        }
+        break;
+    }
+}
+
+static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+    unsigned long addr;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (vio == 0) {
+        panic("bad shared page: %lx", (unsigned long)vio);
+    }
+    p = &vio->vp_ioreq;
+    p->addr = pa;
+    p->size = s;
+    p->count = 1;
+    p->dir = dir;
+    if(dir==IOREQ_WRITE)     //write;
+        p->u.data = *val;
+    p->pdata_valid = 0;
+    p->port_mm = 1;
+    p->df = 0;
+
+    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+    p->state = STATE_IOREQ_READY;
+    evtchn_send(iopacket_port(v->domain));
+    vmx_wait_io();
+    if(dir==IOREQ_READ){ //read
+        *val=p->u.data;
+    }
+    return;
+}
+#define TO_LEGACY_IO(pa)  (((pa)>>12<<2)|((pa)&0x3))
+
+static void legacy_io_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+    unsigned long addr;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (vio == 0) {
+        panic("bad shared page: %lx");
+    }
+    p = &vio->vp_ioreq;
+    p->addr = TO_LEGACY_IO(pa&0x3ffffffUL);
+    p->size = s;
+    p->count = 1;
+    p->dir = dir;
+    if(dir==IOREQ_WRITE)     //write;
+        p->u.data = *val;
+    p->pdata_valid = 0;
+    p->port_mm = 0;
+    p->df = 0;
+
+    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+    p->state = STATE_IOREQ_READY;
+    evtchn_send(iopacket_port(v->domain));
+
+    vmx_wait_io();
+    if(dir==IOREQ_READ){ //read
+        *val=p->u.data;
+    }
+#ifdef DEBUG_PCI
+    if(dir==IOREQ_WRITE)
+        if(p->addr == 0xcf8UL)
+            printk("Write 0xcf8, with val [0x%lx]\n", p->u.data);
+    else
+        if(p->addr == 0xcfcUL)
+            printk("Read 0xcfc, with val [0x%lx]\n", p->u.data);
+#endif //DEBUG_PCI
+    return;
+}
+
+static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma, 
int dir)
+{
+    struct virutal_platform_def *v_plat;
+    //mmio_type_t iot;
+    unsigned long iot;
+    iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT);
+    v_plat = vmx_vcpu_get_plat(vcpu);
+
+    switch (iot) {
+    case GPFN_PIB:
+        if(!dir)
+            pib_write(vcpu, dest, src_pa - v_plat->pib_base, s, ma);
+        else
+            pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
+        break;
+    case GPFN_GFW:
+        break;
+    case GPFN_IOSAPIC:
+    case GPFN_FRAME_BUFFER:
+    case GPFN_LOW_MMIO:
+        low_mmio_access(vcpu, src_pa, dest, s, dir);
+        break;
+    case GPFN_LEGACY_IO:
+        legacy_io_access(vcpu, src_pa, dest, s, dir);
+        break;
+    default:
+        panic("Bad I/O access\n");
+        break;
+    }
+    return;
+}
+
+/*
+ * Read or write data in guest virtual address mode.
+ */
+/*
+void
+memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
+{
+    uint64_t pa;
+
+    if (!vtlb->nomap)
+        panic("Normal memory write shouldn't go to this point!");
+    pa = PPN_2_PA(vtlb->ppn);
+    pa += POFFSET((u64)dest, vtlb->ps);
+    mmio_write (vcpu, src, pa, s, vtlb->ma);
+}
+
+
+void
+memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
+{
+    uint64_t pa = (uint64_t)dest;
+    int    ma;
+
+    if ( pa & (1UL <<63) ) {
+        // UC
+        ma = 4;
+        pa <<=1;
+        pa >>=1;
+    }
+    else {
+        // WBL
+        ma = 0;     // using WB for WBL
+    }
+    mmio_write (vcpu, src, pa, s, ma);
+}
+
+void
+memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
+{
+    uint64_t pa;
+
+    if (!vtlb->nomap)
+        panic("Normal memory write shouldn't go to this point!");
+    pa = PPN_2_PA(vtlb->ppn);
+    pa += POFFSET((u64)src, vtlb->ps);
+
+    mmio_read(vcpu, pa, dest, s, vtlb->ma);
+}
+
+void
+memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
+{
+    uint64_t pa = (uint64_t)src;
+    int    ma;
+
+    if ( pa & (1UL <<63) ) {
+        // UC
+        ma = 4;
+        pa <<=1;
+        pa >>=1;
+    }
+    else {
+        // WBL
+        ma = 0;     // using WB for WBL
+    }
+    mmio_read(vcpu, pa, dest, s, ma);
+}
+*/
+
+
+/*
+ * Deliver IPI message. (Only U-VP is supported now)
+ *  offset: address offset to IPI space.
+ *  value:  deliver value.
+ */
+static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector)
+{
+#ifdef  IPI_DEBUG
+  printf ("deliver_ipi %lx %lx\n",dm,vector);
+#endif
+    switch ( dm ) {
+    case 0:     // INT
+        vmx_vcpu_pend_interrupt (vcpu, vector);
+        break;
+    case 2:     // PMI
+        // TODO -- inject guest PMI
+        panic ("Inject guest PMI!\n");
+        break;
+    case 4:     // NMI
+        vmx_vcpu_pend_interrupt (vcpu, 2);
+        break;
+    case 5:     // INIT
+        // TODO -- inject guest INIT
+        panic ("Inject guest INIT!\n");
+        break;
+    case 7:     // ExtINT
+        vmx_vcpu_pend_interrupt (vcpu, 0);
+        break;
+    case 1:
+    case 3:
+    case 6:
+    default:
+        panic ("Deliver reserved IPI!\n");
+        break;
+    }
+}
+
+/*
+ * TODO: Use hash table for the lookup.
+ */
+static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid)
+{
+       int   i;
+       VCPU  *vcpu;
+       LID       lid;
+       for (i=0; i<MAX_VIRT_CPUS; i++) {
+               vcpu = d->vcpu[i];
+               if (!vcpu)
+                       continue;
+               lid.val = VPD_CR(vcpu, lid);
+               if ( lid.id == id && lid.eid == eid ) {
+                   return vcpu;
+               }
+       }
+       return NULL;
+}
+
+/*
+ * execute write IPI op.
+ */
+static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value)
+{
+    VCPU   *target_cpu;
+ 
+    target_cpu = lid_2_vcpu(vcpu->domain, 
+                               ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid);
+    if ( target_cpu == NULL ) panic("Unknown IPI cpu\n");
+    if ( target_cpu == vcpu ) {
+       // IPI to self
+        deliver_ipi (vcpu, ((ipi_d_t)value).dm, 
+                ((ipi_d_t)value).vector);
+        return 1;
+    }
+    else {
+       // TODO: send Host IPI to inject guest SMP IPI interruption
+        panic ("No SM-VP supported!\n");
+        return 0;
+    }
+}
+
+
+/*
+   dir 1: read 0:write
+    inst_type 0:integer 1:floating point
+ */
+extern IA64_BUNDLE __vmx_get_domain_bundle(u64 iip);
+#define SL_INTEGER  0        // store/load interger
+#define SL_FLOATING    1       // store/load floating
+
+void emulate_io_inst(VCPU *vcpu, u64 padr, u64 ma)
+{
+    REGS *regs;
+    IA64_BUNDLE bundle;
+    int slot, dir, inst_type;
+    size_t size;
+    u64 data, value,post_update, slot1a, slot1b, temp;
+    INST64 inst;
+    regs=vcpu_regs(vcpu);
+    bundle = __vmx_get_domain_bundle(regs->cr_iip);
+    slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+    if (!slot) inst.inst = bundle.slot0;
+    else if (slot == 1){
+        slot1a=bundle.slot1a;
+        slot1b=bundle.slot1b;
+        inst.inst =slot1a + (slot1b<<18);
+    }
+    else if (slot == 2) inst.inst = bundle.slot2;
+
+
+    // Integer Load/Store
+    if(inst.M1.major==4&&inst.M1.m==0&&inst.M1.x==0){
+        inst_type = SL_INTEGER;  //
+        size=(inst.M1.x6&0x3);
+        if((inst.M1.x6>>2)>0xb){      // write
+            dir=IOREQ_WRITE;     //write
+            vmx_vcpu_get_gr(vcpu,inst.M4.r2,&data);
+        }else if((inst.M1.x6>>2)<0xb){   //  read
+            dir=IOREQ_READ;
+            vmx_vcpu_get_gr(vcpu,inst.M1.r1,&value);
+        }
+    }
+    // Integer Load + Reg update
+    else if(inst.M2.major==4&&inst.M2.m==1&&inst.M2.x==0){
+        inst_type = SL_INTEGER;
+        dir = IOREQ_READ;     //write
+        size = (inst.M2.x6&0x3);
+        vmx_vcpu_get_gr(vcpu,inst.M2.r1,&value);
+        vmx_vcpu_get_gr(vcpu,inst.M2.r3,&temp);
+        vmx_vcpu_get_gr(vcpu,inst.M2.r2,&post_update);
+        temp += post_update;
+        vmx_vcpu_set_gr(vcpu,inst.M2.r3,temp,0);
+    }
+    // Integer Load/Store + Imm update
+    else if(inst.M3.major==5){
+        inst_type = SL_INTEGER;  //
+        size=(inst.M3.x6&0x3);
+        if((inst.M5.x6>>2)>0xb){      // write
+            dir=IOREQ_WRITE;     //write
+            vmx_vcpu_get_gr(vcpu,inst.M5.r2,&data);
+            vmx_vcpu_get_gr(vcpu,inst.M5.r3,&temp);
+            post_update = (inst.M5.i<<7)+inst.M5.imm7;
+            if(inst.M5.s)
+                temp -= post_update;
+            else
+                temp += post_update;
+            vmx_vcpu_set_gr(vcpu,inst.M5.r3,temp,0);
+
+        }else if((inst.M3.x6>>2)<0xb){   //  read
+            dir=IOREQ_READ;
+            vmx_vcpu_get_gr(vcpu,inst.M3.r1,&value);
+            vmx_vcpu_get_gr(vcpu,inst.M3.r3,&temp);
+            post_update = (inst.M3.i<<7)+inst.M3.imm7;
+            if(inst.M3.s)
+                temp -= post_update;
+            else
+                temp += post_update;
+            vmx_vcpu_set_gr(vcpu,inst.M3.r3,temp,0);
+
+        }
+    }
+    // Floating-point Load/Store
+//    else if(inst.M6.major==6&&inst.M6.m==0&&inst.M6.x==0&&inst.M6.x6==3){
+//        inst_type=SL_FLOATING;  //fp
+//        dir=IOREQ_READ;
+//        size=3;     //ldfd
+//    }
+    else{
+        printf("This memory access instruction can't be emulated two: %lx\n 
",inst.inst);
+        while(1);
+    }
+
+    size = 1 << size;
+    if(dir==IOREQ_WRITE){
+        mmio_access(vcpu, padr, &data, size, ma, dir);
+    }else{
+        mmio_access(vcpu, padr, &data, size, ma, dir);
+        if(size==0)
+            data = (value & 0xffffffffffffff00U) | (data & 0xffU);
+        else if(size==1)
+            data = (value & 0xffffffffffff0000U) | (data & 0xffffU);
+        else if(size==2)
+            data = (value & 0xffffffff00000000U) | (data & 0xffffffffU);
+
+        if(inst_type==SL_INTEGER){       //gp
+            vmx_vcpu_set_gr(vcpu,inst.M1.r1,data,0);
+        }else{
+            panic("Don't support ldfd now !");
+/*            switch(inst.M6.f1){
+
+            case 6:
+                regs->f6=(struct ia64_fpreg)data;
+            case 7:
+                regs->f7=(struct ia64_fpreg)data;
+            case 8:
+                regs->f8=(struct ia64_fpreg)data;
+            case 9:
+                regs->f9=(struct ia64_fpreg)data;
+            case 10:
+                regs->f10=(struct ia64_fpreg)data;
+            case 11:
+                regs->f11=(struct ia64_fpreg)data;
+            default :
+                ia64_ldfs(inst.M6.f1,&data);
+            }
+*/
+        }
+    }
+    vmx_vcpu_increment_iip(vcpu);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/pal_emul.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/pal_emul.c      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,280 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@xxxxxxxxx>
+ * Copyright (c) 2005 Yu Ke <ke.yu@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <asm/vmx_vcpu.h>
+
+static void
+get_pal_parameters (VCPU *vcpu, UINT64 *gr29,
+                       UINT64 *gr30, UINT64 *gr31) {
+
+       vmx_vcpu_get_gr(vcpu,29,gr29);
+       vmx_vcpu_get_gr(vcpu,30,gr30); 
+       vmx_vcpu_get_gr(vcpu,31,gr31);
+}
+
+static void
+set_pal_result (VCPU *vcpu,struct ia64_pal_retval result) {
+
+       vmx_vcpu_set_gr(vcpu,8, result.status,0);
+       vmx_vcpu_set_gr(vcpu,9, result.v0,0);
+       vmx_vcpu_set_gr(vcpu,10, result.v1,0);
+       vmx_vcpu_set_gr(vcpu,11, result.v2,0);
+}
+
+
+static struct ia64_pal_retval
+pal_cache_flush (VCPU *vcpu) {
+       UINT64 gr28,gr29, gr30, gr31;
+       struct ia64_pal_retval result;
+
+       get_pal_parameters (vcpu, &gr29, &gr30, &gr31);
+       vmx_vcpu_get_gr(vcpu,28,&gr28);
+
+       /* Always call Host Pal in int=1 */
+       gr30 = gr30 &(~(0x2UL));
+
+       /* call Host PAL cache flush */
+       result=ia64_pal_call_static(gr28 ,gr29, gr30,gr31,1);  // Clear psr.ic 
when call PAL_CACHE_FLUSH
+
+       /* If host PAL call is interrupted, then loop to complete it */
+//     while (result.status == 1) {
+//             ia64_pal_call_static(gr28 ,gr29, gr30, 
+//                             result.v1,1LL);
+//     }
+       while (result.status != 0) {
+        panic("PAL_CACHE_FLUSH ERROR, status %d", result.status);
+       }
+
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_vm_tr_read (VCPU *vcpu ) {
+#warning pal_vm_tr_read: to be implemented
+       struct ia64_pal_retval result;
+
+       result.status= -1; //unimplemented
+
+       return result;
+}
+
+
+static struct ia64_pal_retval
+pal_prefetch_visibility (VCPU *vcpu)  {
+       /* Due to current MM virtualization algorithm,
+        * We do not allow guest to change mapping attribute.
+        * Thus we will not support PAL_PREFETCH_VISIBILITY
+        */
+       struct ia64_pal_retval result;
+
+       result.status= -1; //unimplemented
+
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_platform_addr(VCPU *vcpu) {
+       struct ia64_pal_retval result;
+
+       result.status= 0; //success
+
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_halt (VCPU *vcpu) {
+#warning pal_halt: to be implemented
+       //bugbug: to be implement. 
+       struct ia64_pal_retval result;
+
+       result.status= -1; //unimplemented
+
+       return result;
+}
+
+
+static struct ia64_pal_retval
+pal_halt_light (VCPU *vcpu) {
+       struct ia64_pal_retval result;
+
+       result.status= -1; //unimplemented
+
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_cache_read (VCPU *vcpu) {
+       struct ia64_pal_retval result;
+
+       result.status= -1; //unimplemented
+
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_cache_write (VCPU *vcpu) {
+       struct ia64_pal_retval result;
+
+       result.status= -1; //unimplemented
+
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_bus_get_features(VCPU *vcpu){
+       
+}
+
+static struct ia64_pal_retval
+pal_cache_summary(VCPU *vcpu){
+       
+}
+
+static struct ia64_pal_retval
+pal_cache_init(VCPU *vcpu){
+       struct ia64_pal_retval result;
+       result.status=0;
+       return result;
+}
+
+static struct ia64_pal_retval
+pal_cache_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_cache_prot_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_cache_shared_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_mem_attrib(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_debug_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_fixed_addr(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_freq_base(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_freq_ratios(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_halt_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_logical_to_physica(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_perf_mon_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_proc_get_features(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_ptce_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_register_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_rse_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_test_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_vm_summary(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_vm_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_vm_page_size(VCPU *vcpu){
+}
+
+void
+pal_emul( VCPU *vcpu) {
+       UINT64 gr28;
+       struct ia64_pal_retval result;
+
+
+       vmx_vcpu_get_gr(vcpu,28,&gr28);  //bank1
+
+       switch (gr28) {
+               case PAL_CACHE_FLUSH:
+                       result = pal_cache_flush (vcpu);
+                       break;
+
+               case PAL_PREFETCH_VISIBILITY:
+                       result = pal_prefetch_visibility (vcpu);
+                       break;
+
+               case PAL_VM_TR_READ:
+                       result = pal_vm_tr_read (vcpu);
+                       break;
+
+               case PAL_HALT:
+                       result = pal_halt (vcpu);
+                       break;
+
+               case PAL_HALT_LIGHT:
+                       result = pal_halt_light (vcpu);
+                       break;
+
+               case PAL_CACHE_READ:
+                       result = pal_cache_read (vcpu);
+                       break;
+
+               case PAL_CACHE_WRITE:
+                       result = pal_cache_write (vcpu);
+                       break;
+                       
+               case PAL_PLATFORM_ADDR:
+                       result = pal_platform_addr (vcpu);
+                       break;
+
+               default:
+                       panic("pal_emul(): guest call unsupported pal" );
+  }
+               set_pal_result (vcpu, result);
+}
+
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vlsapic.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vlsapic.c       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,620 @@
+
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vlsapic.c: virtual lsapic model including ITC timer.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ */
+
+#include <linux/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+#define  SHARED_VLAPIC_INF
+#ifdef V_IOSAPIC_READY
+static inline vl_apic_info* get_psapic(VCPU *vcpu)
+{
+    shared_iopage_t  *sp = get_sp(vcpu->domain);
+    return &(sp->vcpu_iodata[vcpu->vcpu_id].apic_intr);
+}
+#endif
+//u64  fire_itc;
+//u64  fire_itc2;
+//u64  fire_itm;
+//u64  fire_itm2;
+/*
+ * Update the checked last_itc.
+ */
+static void update_last_itc(vtime_t *vtm, uint64_t cur_itc)
+{
+    vtm->last_itc = cur_itc;
+}
+
+/*
+ * ITC value saw in guest (host+offset+drift).
+ */
+static uint64_t now_itc(vtime_t *vtm)
+{
+        uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc();
+        
+        if ( vtm->vtm_local_drift ) {
+//          guest_itc -= vtm->vtm_local_drift;
+        }       
+        if ( (long)(guest_itc - vtm->last_itc) > 0 ) {
+            return guest_itc;
+
+        }
+        else {
+            /* guest ITC backwarded due after LP switch */
+            return vtm->last_itc;
+        }
+}
+
+/*
+ * Interval time components reset.
+ */
+static void vtm_reset(VCPU *vcpu)
+{
+    uint64_t    cur_itc;
+    vtime_t     *vtm;
+    
+    vtm=&(vcpu->arch.arch_vmx.vtm);
+    vtm->vtm_offset = 0;
+    vtm->vtm_local_drift = 0;
+    VPD_CR(vcpu, itm) = 0;
+    VPD_CR(vcpu, itv) = 0x10000;
+    cur_itc = ia64_get_itc();
+    vtm->last_itc = vtm->vtm_offset + cur_itc;
+}
+
+/* callback function when vtm_timer expires */
+static void vtm_timer_fn(void *data)
+{
+    vtime_t *vtm;
+    VCPU    *vcpu = data;
+    u64            cur_itc,vitm;
+
+    UINT64  vec;
+    
+    vec = VPD_CR(vcpu, itv) & 0xff;
+    vmx_vcpu_pend_interrupt(vcpu, vec);
+
+    vtm=&(vcpu->arch.arch_vmx.vtm);
+    cur_itc = now_itc(vtm);
+    vitm =VPD_CR(vcpu, itm);
+ //fire_itc2 = cur_itc;
+ //fire_itm2 = vitm;
+    update_last_itc(vtm,cur_itc);  // pseudo read to update vITC
+}
+
+void vtm_init(VCPU *vcpu)
+{
+    vtime_t     *vtm;
+    uint64_t    itc_freq;
+    
+    vtm=&(vcpu->arch.arch_vmx.vtm);
+
+    itc_freq = local_cpu_data->itc_freq;
+    vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
+    vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
+    init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
+    vtm_reset(vcpu);
+}
+
+/*
+ * Action when guest read ITC.
+ */
+uint64_t vtm_get_itc(VCPU *vcpu)
+{
+    uint64_t    guest_itc, spsr;
+    vtime_t    *vtm;
+
+    vtm=&(vcpu->arch.arch_vmx.vtm);
+    // FIXME: should use local_irq_disable & local_irq_enable ??
+    local_irq_save(spsr);
+    guest_itc = now_itc(vtm);
+//    update_last_itc(vtm, guest_itc);
+
+    local_irq_restore(spsr);
+    return guest_itc;
+}
+
+void vtm_set_itc(VCPU *vcpu, uint64_t new_itc)
+{
+    uint64_t    spsr;
+    vtime_t     *vtm;
+
+    vtm=&(vcpu->arch.arch_vmx.vtm);
+    local_irq_save(spsr);
+    vtm->vtm_offset = new_itc - ia64_get_itc();
+    vtm->last_itc = new_itc;
+    vtm_interruption_update(vcpu, vtm);
+    local_irq_restore(spsr);
+}
+
+void vtm_set_itv(VCPU *vcpu)
+{
+    uint64_t    spsr,itv;
+    vtime_t     *vtm;
+
+    vtm=&(vcpu->arch.arch_vmx.vtm);
+    local_irq_save(spsr);
+    itv = VPD_CR(vcpu, itv);
+    if ( ITV_IRQ_MASK(itv) )
+        rem_ac_timer(&vtm->vtm_timer);
+    vtm_interruption_update(vcpu, vtm);
+    local_irq_restore(spsr);
+}
+
+
+/*
+ * Update interrupt or hook the vtm ac_timer for fire 
+ * At this point vtm_timer should be removed if itv is masked.
+ */
+/* Interrupt must be disabled at this point */
+
+extern u64 tick_to_ns(u64 tick);
+#define TIMER_SLOP (50*1000) /* ns */  /* copy from ac_timer.c */
+void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
+{
+    uint64_t    cur_itc,vitm,vitv;
+    uint64_t    expires;
+    long        diff_now, diff_last;
+    uint64_t    spsr;
+    
+    vitv = VPD_CR(vcpu, itv);
+    if ( ITV_IRQ_MASK(vitv) ) {
+        return;
+    }
+    
+    vitm =VPD_CR(vcpu, itm);
+    local_irq_save(spsr);
+    cur_itc =now_itc(vtm);
+    diff_last = vtm->last_itc - vitm;
+    diff_now = cur_itc - vitm;
+    update_last_itc (vtm,cur_itc);
+    
+    if ( diff_last >= 0 ) {
+        // interrupt already fired.
+        rem_ac_timer(&vtm->vtm_timer);
+    }
+    else if ( diff_now >= 0 ) {
+        // ITV is fired.
+        vmx_vcpu_pend_interrupt(vcpu, vitv&0xff);
+    }
+    /* Both last_itc & cur_itc < itm, wait for fire condition */
+    else {
+        expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP;
+        set_ac_timer(&vtm->vtm_timer, expires);
+    }
+    local_irq_restore(spsr);
+}
+
+/*
+ * Action for vtm when the domain is scheduled out.
+ * Remove the ac_timer for vtm.
+ */
+void vtm_domain_out(VCPU *vcpu)
+{
+    if(!is_idle_task(vcpu->domain))
+       rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
+}
+
+/*
+ * Action for vtm when the domain is scheduled in.
+ * Fire vtm IRQ or add the ac_timer for vtm.
+ */
+void vtm_domain_in(VCPU *vcpu)
+{
+    vtime_t     *vtm;
+
+    if(!is_idle_task(vcpu->domain)) {
+       vtm=&(vcpu->arch.arch_vmx.vtm);
+       vtm_interruption_update(vcpu, vtm);
+    }
+}
+
+/*
+ * Next for vLSapic
+ */
+
+#define  NMI_VECTOR         2
+#define  ExtINT_VECTOR      0
+#define  NULL_VECTOR        -1
+#define  VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i])
+static void update_vhpi(VCPU *vcpu, int vec)
+{
+    u64     vhpi;
+    if ( vec == NULL_VECTOR ) {
+        vhpi = 0;
+    }
+    else if ( vec == NMI_VECTOR ) { // NMI
+        vhpi = 32;
+    } else if (vec == ExtINT_VECTOR) { //ExtINT
+        vhpi = 16;
+    }
+    else {
+        vhpi = vec / 16;
+    }
+
+    VMX_VPD(vcpu,vhpi) = vhpi;
+    // TODO: Add support for XENO
+    if ( VMX_VPD(vcpu,vac).a_int ) {
+        ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, 
+                (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0);
+    }
+}
+
+#ifdef V_IOSAPIC_READY
+void vlapic_update_shared_info(VCPU *vcpu)
+{
+    //int      i;
+    
+    vl_apic_info *ps;
+
+    if (vcpu->domain == dom0)
+       return;
+
+    ps = get_psapic(vcpu);
+    ps->vl_lapic_id = ((VPD_CR(vcpu, lid) >> 16) & 0xffff) << 16; 
+    printf("vl_lapic_id = %x\n", ps->vl_lapic_id);
+    ps->vl_apr = 0;
+    // skip ps->vl_logical_dest && ps->vl_dest_format
+    // IPF support physical destination mode only
+    ps->vl_arb_id = 0;
+    /*
+    for ( i=0; i<4; i++ ) {
+       ps->tmr[i] = 0;         // edge trigger 
+    }
+    */
+}
+
+void vlapic_update_ext_irq(VCPU *vcpu)
+{
+    int  vec;
+    
+    vl_apic_info *ps = get_psapic(vcpu);
+    while ( (vec = highest_bits(ps->irr)) != NULL_VECTOR ) {
+       clear_bit (vec, ps->irr);
+        vmx_vcpu_pend_interrupt(vcpu, vec);
+    }
+}
+#endif
+
+void vlsapic_reset(VCPU *vcpu)
+{
+    int     i;
+#ifdef V_IOSAPIC_READY
+    vl_apic_info  *psapic;     // shared lapic inf.
+#endif
+    
+    VPD_CR(vcpu, lid) = ia64_getreg(_IA64_REG_CR_LID);
+    VPD_CR(vcpu, ivr) = 0;
+    VPD_CR(vcpu,tpr) = 0x10000;
+    VPD_CR(vcpu, eoi) = 0;
+    VPD_CR(vcpu, irr[0]) = 0;
+    VPD_CR(vcpu, irr[1]) = 0;
+    VPD_CR(vcpu, irr[2]) = 0;
+    VPD_CR(vcpu, irr[3]) = 0;
+    VPD_CR(vcpu, pmv) = 0x10000;
+    VPD_CR(vcpu, cmcv) = 0x10000;
+    VPD_CR(vcpu, lrr0) = 0x10000;   // default reset value?
+    VPD_CR(vcpu, lrr1) = 0x10000;   // default reset value?
+    update_vhpi(vcpu, NULL_VECTOR);
+    for ( i=0; i<4; i++) {
+        VLSAPIC_INSVC(vcpu,i) = 0;
+    }
+#ifdef V_IOSAPIC_READY
+    vlapic_update_shared_info(vcpu);
+    //vlapic_update_shared_irr(vcpu);
+#endif
+    DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) );
+}
+
+/*
+ *  Find highest signaled bits in 4 words (long). 
+ *
+ *  return 0-255: highest bits.
+ *          -1 : Not found.
+ */
+static __inline__ int highest_bits(uint64_t *dat)
+{
+    uint64_t  bits, bitnum;
+    int i;
+    
+    /* loop for all 256 bits */
+    for ( i=3; i >= 0 ; i -- ) {
+        bits = dat[i];
+        if ( bits ) {
+            bitnum = ia64_fls(bits);
+            return i*64+bitnum;
+        }
+    }
+   return NULL_VECTOR;
+}
+
+/*
+ * Return 0-255 for pending irq.
+ *        NULL_VECTOR: when no pending.
+ */
+static int highest_pending_irq(VCPU *vcpu)
+{
+    if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
+    if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
+    return highest_bits(&VPD_CR(vcpu, irr[0]));
+}
+
+static int highest_inservice_irq(VCPU *vcpu)
+{
+    if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
+    if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
+    return highest_bits(&(VLSAPIC_INSVC(vcpu, 0)));
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static int is_higher_irq(int pending, int inservice)
+{
+    return ( (pending >> 4) > (inservice>>4) || 
+                ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) );
+}
+
+static int is_higher_class(int pending, int mic)
+{
+    return ( (pending >> 4) > mic );
+}
+
+static int is_invalid_irq(int vec)
+{
+    return (vec == 1 || ((vec <= 14 && vec >= 3)));
+}
+
+#define   IRQ_NO_MASKED         0
+#define   IRQ_MASKED_BY_VTPR    1
+#define   IRQ_MASKED_BY_INSVC   2   // masked by inservice IRQ
+
+/* See Table 5-8 in SDM vol2 for the definition */
+static int
+_xirq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+    tpr_t    vtpr;
+    uint64_t    mmi;
+    
+    vtpr.val = VPD_CR(vcpu, tpr);
+
+    if ( h_inservice == NMI_VECTOR ) {
+        return IRQ_MASKED_BY_INSVC;
+    }
+    if ( h_pending == NMI_VECTOR ) {
+        // Non Maskable Interrupt
+        return IRQ_NO_MASKED;
+    }
+    if ( h_inservice == ExtINT_VECTOR ) {
+        return IRQ_MASKED_BY_INSVC;
+    }
+    mmi = vtpr.mmi;
+    if ( h_pending == ExtINT_VECTOR ) {
+        if ( mmi ) {
+            // mask all external IRQ
+            return IRQ_MASKED_BY_VTPR;
+        }
+        else {
+            return IRQ_NO_MASKED;
+        }
+    }
+
+    if ( is_higher_irq(h_pending, h_inservice) ) {
+        if ( !mmi && is_higher_class(h_pending, vtpr.mic) ) {
+            return IRQ_NO_MASKED;
+        }
+        else {
+            return IRQ_MASKED_BY_VTPR;
+        }
+    }
+    else {
+        return IRQ_MASKED_BY_INSVC;
+    }
+}
+
+static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+    int mask;
+    
+    mask = _xirq_masked(vcpu, h_pending, h_inservice);
+    return mask;
+}
+
+
+/*
+ * May come from virtualization fault or
+ * nested host interrupt.
+ */
+void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
+{
+    uint64_t    spsr;
+
+    if (vector & ~0xff) {
+        DPRINTK("vmx_vcpu_pend_interrupt: bad vector\n");
+        return;
+    }
+    local_irq_save(spsr);
+    VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63);
+    //vlapic_update_shared_irr(vcpu);
+    local_irq_restore(spsr);
+    vcpu->arch.irq_new_pending = 1;
+}
+
+/*
+ * Add batch of pending interrupt.
+ * The interrupt source is contained in pend_irr[0-3] with
+ * each bits stand for one interrupt.
+ */
+void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, UINT64 *pend_irr)
+{
+    uint64_t    spsr;
+    int     i;
+
+    local_irq_save(spsr);
+    for (i=0 ; i<4; i++ ) {
+        VPD_CR(vcpu,irr[i]) |= pend_irr[i];
+    }
+    //vlapic_update_shared_irr(vcpu);
+    local_irq_restore(spsr);
+    vcpu->arch.irq_new_pending = 1;
+}
+
+/*
+ * If the new pending interrupt is enabled and not masked, we directly inject 
+ * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when 
+ * the interrupt becomes unmasked, it gets injected.
+ * RETURN:
+ *  TRUE:   Interrupt is injected.
+ *  FALSE:  Not injected but may be in VHPI when vac.a_int=1
+ *
+ * Optimization: We defer setting the VHPI until the EOI time, if a higher 
+ *               priority interrupt is in-service. The idea is to reduce the 
+ *               number of unnecessary calls to inject_vhpi.
+ */
+int vmx_check_pending_irq(VCPU *vcpu)
+{
+    uint64_t  spsr, mask;
+    int     h_pending, h_inservice;
+    int injected=0;
+    uint64_t    isr;
+    IA64_PSR    vpsr;
+
+    local_irq_save(spsr);
+    h_pending = highest_pending_irq(vcpu);
+    if ( h_pending == NULL_VECTOR ) goto chk_irq_exit;
+    h_inservice = highest_inservice_irq(vcpu);
+
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    mask = irq_masked(vcpu, h_pending, h_inservice);
+    if (  vpsr.i && IRQ_NO_MASKED == mask ) {
+        isr = vpsr.val & IA64_PSR_RI;
+        if ( !vpsr.ic )
+            panic("Interrupt when IC=0\n");
+        vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
+        injected = 1;
+    }
+    else if ( mask == IRQ_MASKED_BY_INSVC ) {
+        // cann't inject VHPI
+//        DPRINTK("IRQ masked by higher inservice\n");
+    }
+    else {
+        // masked by vpsr.i or vtpr.
+        update_vhpi(vcpu,h_pending);
+    }
+
+chk_irq_exit:
+    local_irq_restore(spsr);
+    return injected;
+}
+
+/*
+ * Only coming from virtualization fault.
+ */
+void guest_write_eoi(VCPU *vcpu)
+{
+    int vec;
+    uint64_t  spsr;
+
+    vec = highest_inservice_irq(vcpu);
+    if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
+    local_irq_save(spsr);
+    VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
+    local_irq_restore(spsr);
+    VPD_CR(vcpu, eoi)=0;    // overwrite the data
+    vmx_check_pending_irq(vcpu);
+}
+
+uint64_t guest_read_vivr(VCPU *vcpu)
+{
+    int vec, next, h_inservice;
+    uint64_t  spsr;
+
+    local_irq_save(spsr);
+    vec = highest_pending_irq(vcpu);
+    h_inservice = highest_inservice_irq(vcpu);
+    if ( vec == NULL_VECTOR || 
+        irq_masked(vcpu, vec, h_inservice) != IRQ_NO_MASKED ) {
+        local_irq_restore(spsr);
+        return IA64_SPURIOUS_INT_VECTOR;
+    }
+ 
+    VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63));
+    VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63));
+    update_vhpi(vcpu, NULL_VECTOR);     // clear VHPI till EOI or IRR write
+    //vlapic_update_shared_irr(vcpu);
+    local_irq_restore(spsr);
+    return (uint64_t)vec;
+}
+
+static void generate_exirq(VCPU *vcpu)
+{
+    IA64_PSR    vpsr;
+    uint64_t    isr;
+    
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    update_vhpi(vcpu, NULL_VECTOR);
+    isr = vpsr.val & IA64_PSR_RI;
+    if ( !vpsr.ic )
+        panic("Interrupt when IC=0\n");
+    vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
+}
+
+vhpi_detection(VCPU *vcpu)
+{
+    uint64_t    threshold,vhpi;
+    tpr_t       vtpr;
+    IA64_PSR    vpsr;
+    
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    vtpr.val = VPD_CR(vcpu, tpr);
+
+    threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+    vhpi = VMX_VPD(vcpu,vhpi);
+    if ( vhpi > threshold ) {
+        // interrupt actived
+        generate_exirq (vcpu);
+    }
+}
+
+vmx_vexirq(VCPU *vcpu)
+{
+    static  uint64_t  vexirq_count=0;
+
+    vexirq_count ++;
+    printk("Virtual ex-irq %ld\n", vexirq_count);
+    generate_exirq (vcpu);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmmu.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmmu.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,846 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmmu.c: virtual memory management unit components.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/tlb.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vcpu.h>
+#include <linux/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+/*
+ * Architecture ppn is in 4KB unit while XEN
+ * page may be different(1<<PAGE_SHIFT).
+ */
+static inline u64 arch_ppn_to_xen_ppn(u64 appn)
+{
+    return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT;
+}
+
+static inline u64 xen_ppn_to_arch_ppn(u64 xppn)
+{
+    return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT;
+}
+
+
+/*
+ * Get the machine page frame number in 16KB unit
+ * Input:
+ *  d: 
+ */
+u64 get_mfn(domid_t domid, u64 gpfn, u64 pages)
+{
+    struct domain *d;
+    u64    i, xen_gppn, xen_mppn, mpfn;
+    
+    if ( domid == DOMID_SELF ) {
+        d = current->domain;
+    }
+    else {
+        d = find_domain_by_id(domid);
+    }
+    xen_gppn = arch_ppn_to_xen_ppn(gpfn);
+    xen_mppn = __gpfn_to_mfn(d, xen_gppn);
+/*
+    for (i=0; i<pages; i++) {
+        if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) {
+            return INVALID_MFN;
+        }
+    }
+*/
+    mpfn= xen_ppn_to_arch_ppn(xen_mppn);
+    mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn);
+    return mpfn;
+    
+}
+
+/*
+ * The VRN bits of va stand for which rr to get.
+ */
+ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va)
+{
+    ia64_rr   vrr;
+    vmx_vcpu_get_rr(vcpu, va, &vrr.rrval);
+    return vrr;
+}
+
+
+void recycle_message(thash_cb_t *hcb, u64 para)
+{
+    printk("hcb=%p recycled with %lx\n",hcb,para);
+}
+
+
+/*
+ * Purge all guest TCs in logical processor.
+ * Instead of purging all LP TCs, we should only purge   
+ * TCs that belong to this guest.
+ */
+void
+purge_machine_tc_by_domid(domid_t domid)
+{
+#ifndef PURGE_GUEST_TC_ONLY
+    // purge all TCs
+    struct ia64_pal_retval  result;
+    u64 addr;
+    u32 count1,count2;
+    u32 stride1,stride2;
+    u32 i,j;
+    u64 psr;
+    
+
+    result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0);
+    if ( result.status != 0 ) {
+        panic ("PAL_PTCE_INFO failed\n");
+    }
+    addr = result.v0;
+    count1 = HIGH_32BITS(result.v1);
+    count2 = LOW_32BITS (result.v1);
+    stride1 = HIGH_32BITS(result.v2);
+    stride2 = LOW_32BITS (result.v2);
+    
+    local_irq_save(psr);
+    for (i=0; i<count1; i++) {
+        for (j=0; j<count2; j++) {
+            ia64_ptce(addr);
+            addr += stride2;
+        }
+        addr += stride1;
+    }
+    local_irq_restore(psr);
+#else
+    // purge all TCs belong to this guest.
+#endif
+}
+
+static thash_cb_t *init_domain_vhpt(struct vcpu *d)
+{
+    struct pfn_info *page;
+    void   *vbase,*vcur;
+    vhpt_special *vs;
+    thash_cb_t  *vhpt;
+    PTA pta_value;
+    
+    page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
+    if ( page == NULL ) {
+        panic("No enough contiguous memory for init_domain_mm\n");
+    }
+    vbase = page_to_virt(page);
+    printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase);
+    memset(vbase, 0, VCPU_TLB_SIZE);
+    vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
+    vhpt = --((thash_cb_t*)vcur);
+    vhpt->ht = THASH_VHPT;
+    vhpt->vcpu = d;
+    vhpt->hash_func = machine_thash;
+    vs = --((vhpt_special *)vcur);
+
+    /* Setup guest pta */
+    pta_value.val = 0;
+    pta_value.ve = 1;
+    pta_value.vf = 1;
+    pta_value.size = VCPU_TLB_SHIFT - 1;    /* 2M */
+    pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT;
+    d->arch.arch_vmx.mpta = pta_value.val;
+   
+    vhpt->vs = vs;
+    vhpt->vs->get_mfn = get_mfn;
+    vhpt->vs->tag_func = machine_ttag;
+    vhpt->hash = vbase;
+    vhpt->hash_sz = VCPU_TLB_SIZE/2;
+    vhpt->cch_buf = (u64)vbase + vhpt->hash_sz;
+    vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf;
+    vhpt->recycle_notifier = recycle_message;
+    thash_init(vhpt,VCPU_TLB_SHIFT-1);
+    return vhpt;
+}
+
+
+thash_cb_t *init_domain_tlb(struct vcpu *d)
+{
+    struct pfn_info *page;
+    void    *vbase,*vcur;
+    tlb_special_t  *ts;
+    thash_cb_t  *tlb;
+    
+    page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
+    if ( page == NULL ) {
+        panic("No enough contiguous memory for init_domain_mm\n");
+    }
+    vbase = page_to_virt(page);
+    printk("Allocate domain tlb at 0x%lx\n", (u64)vbase);
+    memset(vbase, 0, VCPU_TLB_SIZE);
+    vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
+    tlb = --((thash_cb_t*)vcur);
+    tlb->ht = THASH_TLB;
+    tlb->vcpu = d;
+    ts = --((tlb_special_t *)vcur);
+    tlb->ts = ts;
+    tlb->ts->vhpt = init_domain_vhpt(d);
+    tlb->hash_func = machine_thash;
+    tlb->hash = vbase;
+    tlb->hash_sz = VCPU_TLB_SIZE/2;
+    tlb->cch_buf = (u64)vbase + tlb->hash_sz;
+    tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf;
+    tlb->recycle_notifier = recycle_message;
+    thash_init(tlb,VCPU_TLB_SHIFT-1);
+    return tlb;
+}
+
+/* Allocate physical to machine mapping table for domN
+ * FIXME: Later this interface may be removed, if that table is provided
+ * by control panel. Dom0 has gpfn identical to mfn, which doesn't need
+ * this interface at all.
+ */
+void
+alloc_pmt(struct domain *d)
+{
+    struct pfn_info *page;
+
+    /* Only called once */
+    ASSERT(d->arch.pmt);
+
+    page = alloc_domheap_pages(NULL, get_order(d->max_pages), 0);
+    ASSERT(page);
+
+    d->arch.pmt = page_to_virt(page);
+    memset(d->arch.pmt, 0x55, d->max_pages * 8);
+}
+
+/*
+ * Insert guest TLB to machine TLB.
+ *  data:   In TLB format
+ */
+void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb)
+{
+    u64     saved_itir, saved_ifa, saved_rr;
+    u64     pages;
+    thash_data_t    mtlb;
+    ia64_rr vrr;
+    unsigned int    cl = tlb->cl;
+
+    mtlb.ifa = tlb->vadr;
+    mtlb.itir = tlb->itir & ~ITIR_RV_MASK;
+    vrr = vmmu_get_rr(d,mtlb.ifa);
+    //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value);
+    pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
+    mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
+    mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages);
+    if (mtlb.ppn == INVALID_MFN)
+    panic("Machine tlb insert with invalid mfn number.\n");
+
+    __asm __volatile("rsm   psr.ic|psr.i;; srlz.i" );
+    
+    saved_itir = ia64_getreg(_IA64_REG_CR_ITIR);
+    saved_ifa = ia64_getreg(_IA64_REG_CR_IFA);
+    saved_rr = ia64_get_rr(mtlb.ifa);
+
+    ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir);
+    ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa);
+    /* Only access memory stack which is mapped by TR,
+     * after rr is switched.
+     */
+    ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.rrval));
+    ia64_srlz_d();
+    if ( cl == ISIDE_TLB ) {
+        ia64_itci(mtlb.page_flags);
+    ia64_srlz_i();
+    }
+    else {
+        ia64_itcd(mtlb.page_flags);
+    ia64_srlz_d();
+    }
+    ia64_set_rr(mtlb.ifa,saved_rr);
+    ia64_srlz_d();
+    ia64_setreg(_IA64_REG_CR_IFA, saved_ifa);
+    ia64_setreg(_IA64_REG_CR_ITIR, saved_itir);
+    __asm __volatile("ssm   psr.ic|psr.i;; srlz.i" );
+}
+
+u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps)
+{
+    u64     saved_pta, saved_rr0;
+    u64     hash_addr, tag;
+    unsigned long psr;
+    struct vcpu *v = current;
+    ia64_rr vrr;
+
+    
+    saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
+    saved_rr0 = ia64_get_rr(0);
+    vrr.rrval = saved_rr0;
+    vrr.rid = rid;
+    vrr.ps = ps;
+
+    va = (va << 3) >> 3;    // set VRN to 0.
+    // TODO: Set to enforce lazy mode
+    local_irq_save(psr);
+    ia64_setreg(_IA64_REG_CR_PTA, pta.val);
+    ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
+    ia64_srlz_d();
+
+    hash_addr = ia64_thash(va);
+    ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
+
+    ia64_set_rr(0, saved_rr0);
+    ia64_srlz_d();
+    local_irq_restore(psr);
+    return hash_addr;
+}
+
+u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps)
+{
+    u64     saved_pta, saved_rr0;
+    u64     hash_addr, tag;
+    u64     psr;
+    struct vcpu *v = current;
+    ia64_rr vrr;
+
+    // TODO: Set to enforce lazy mode    
+    saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
+    saved_rr0 = ia64_get_rr(0);
+    vrr.rrval = saved_rr0;
+    vrr.rid = rid;
+    vrr.ps = ps;
+
+    va = (va << 3) >> 3;    // set VRN to 0.
+    local_irq_save(psr);
+    ia64_setreg(_IA64_REG_CR_PTA, pta.val);
+    ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
+    ia64_srlz_d();
+
+    tag = ia64_ttag(va);
+    ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
+
+    ia64_set_rr(0, saved_rr0);
+    ia64_srlz_d();
+    local_irq_restore(psr);
+    return tag;
+}
+
+/*
+ *  Purge machine tlb.
+ *  INPUT
+ *      rr:     guest rr.
+ *      va:     only bits 0:60 is valid
+ *      size:   bits format (1<<size) for the address range to purge.
+ *
+ */
+void machine_tlb_purge(u64 rid, u64 va, u64 ps)
+{
+    u64       saved_rr0;
+    u64       psr;
+    ia64_rr vrr;
+
+    va = (va << 3) >> 3;    // set VRN to 0.
+    saved_rr0 = ia64_get_rr(0);
+    vrr.rrval = saved_rr0;
+    vrr.rid = rid;
+    vrr.ps = ps;
+    local_irq_save(psr);
+    ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.rrval) );
+    ia64_srlz_d();
+    ia64_ptcl(va, ps << 2);
+    ia64_set_rr( 0, saved_rr0 );
+    ia64_srlz_d();
+    local_irq_restore(psr);
+}
+
+
+int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref)
+{
+    ia64_rr  vrr;
+    PTA   vpta;
+    IA64_PSR  vpsr; 
+
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    vrr = vmx_vcpu_rr(vcpu, vadr);
+    vmx_vcpu_get_pta(vcpu,&vpta.val);
+
+    if ( vrr.ve & vpta.ve ) {
+        switch ( ref ) {
+        case DATA_REF:
+        case NA_REF:
+            return vpsr.dt;
+        case INST_REF:
+            return vpsr.dt && vpsr.it && vpsr.ic;
+        case RSE_REF:
+            return vpsr.dt && vpsr.rt;
+
+        }
+    }
+    return 0;
+}
+
+
+int unimplemented_gva(VCPU *vcpu,u64 vadr)
+{
+    int bit=vcpu->domain->arch.imp_va_msb;
+    u64 ladr =(vadr<<3)>>(3+bit);
+    if(!ladr||ladr==(1U<<(61-bit))-1){
+        return 0;
+    }else{
+        return 1;
+    }
+}
+
+
+/*
+ * Prefetch guest bundle code.
+ * INPUT:
+ *  code: buffer pointer to hold the read data.
+ *  num:  number of dword (8byts) to read.
+ */
+int
+fetch_code(VCPU *vcpu, u64 gip, u64 *code)
+{
+    u64     gpip;   // guest physical IP
+    u64     mpa;
+    thash_data_t    *tlb;
+    ia64_rr vrr;
+    u64     mfn;
+    
+    if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) {   // I-side physical mode
+        gpip = gip;
+    }
+    else {
+        vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval);
+        tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu), 
+                vrr.rid, gip, ISIDE_TLB );
+        if ( tlb == NULL ) panic("No entry found in ITLB\n");
+        gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) );
+    }
+    mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
+    if ( mfn == INVALID_MFN ) return 0;
+    
+    mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT);
+    *code = *(u64*)__va(mpa);
+    return 1;
+}
+
+IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+
+    thash_data_t data, *ovl;
+    thash_cb_t  *hcb;
+    search_section_t sections;
+    ia64_rr vrr;
+
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+    data.itir=itir;
+    data.vadr=PAGEALIGN(ifa,data.ps);
+    data.tc = 1;
+    data.cl=ISIDE_TLB;
+    vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+    data.rid = vrr.rid;
+    
+    sections.tr = 1;
+    sections.tc = 0;
+
+    ovl = thash_find_overlap(hcb, &data, sections);
+    while (ovl) {
+        // generate MCA.
+        panic("Tlb conflict!!");
+        return;
+    }
+    thash_purge_and_insert(hcb, &data);
+    return IA64_NO_FAULT;
+}
+
+
+
+
+IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+
+    thash_data_t data, *ovl;
+    thash_cb_t  *hcb;
+    search_section_t sections;
+    ia64_rr vrr;
+
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+    data.itir=itir;
+    data.vadr=PAGEALIGN(ifa,data.ps);
+    data.tc = 1;
+    data.cl=DSIDE_TLB;
+    vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+    data.rid = vrr.rid;
+    sections.tr = 1;
+    sections.tc = 0;
+
+    ovl = thash_find_overlap(hcb, &data, sections);
+    if (ovl) {
+          // generate MCA.
+        panic("Tlb conflict!!");
+        return;
+    }
+    thash_purge_and_insert(hcb, &data);
+    return IA64_NO_FAULT;
+}
+
+/*
+ * Return TRUE/FALSE for success of lock operation
+ */
+int vmx_lock_guest_dtc (VCPU *vcpu, UINT64 va, int lock)
+{
+
+    thash_cb_t  *hcb;
+    ia64_rr vrr;
+    u64          preferred_size;
+
+    vmx_vcpu_get_rr(vcpu, va, &vrr);
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    va = PAGEALIGN(va,vrr.ps);
+    preferred_size = PSIZE(vrr.ps);
+    return thash_lock_tc(hcb, va, preferred_size, vrr.rid, DSIDE_TLB, lock);
+}
+
+IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, 
UINT64 idx)
+{
+
+    thash_data_t data, *ovl;
+    thash_cb_t  *hcb;
+    search_section_t sections;
+    ia64_rr vrr;
+
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+    data.itir=itir;
+    data.vadr=PAGEALIGN(ifa,data.ps);
+    data.tc = 0;
+    data.cl=ISIDE_TLB;
+    vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+    data.rid = vrr.rid;
+    sections.tr = 1;
+    sections.tc = 0;
+
+    ovl = thash_find_overlap(hcb, &data, sections);
+    if (ovl) {
+        // generate MCA.
+        panic("Tlb conflict!!");
+        return;
+    }
+    sections.tr = 0;
+    sections.tc = 1;
+    thash_purge_entries(hcb, &data, sections);
+    thash_tr_insert(hcb, &data, ifa, idx);
+    return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, 
UINT64 idx)
+{
+
+    thash_data_t data, *ovl;
+    thash_cb_t  *hcb;
+    search_section_t sections;
+    ia64_rr    vrr;
+
+
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+    data.itir=itir;
+    data.vadr=PAGEALIGN(ifa,data.ps);
+    data.tc = 0;
+    data.cl=DSIDE_TLB;
+    vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+    data.rid = vrr.rid;
+    sections.tr = 1;
+    sections.tc = 0;
+
+    ovl = thash_find_overlap(hcb, &data, sections);
+    while (ovl) {
+        // generate MCA.
+        panic("Tlb conflict!!");
+        return;
+    }
+    sections.tr = 0;
+    sections.tc = 1;
+    thash_purge_entries(hcb, &data, sections);
+    thash_tr_insert(hcb, &data, ifa, idx);
+    return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+    thash_cb_t  *hcb;
+    ia64_rr rr;
+    search_section_t sections;
+
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    rr=vmx_vcpu_rr(vcpu,vadr);
+    sections.tr = 1;
+    sections.tc = 1;
+    thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB);
+    return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+    thash_cb_t  *hcb;
+    ia64_rr rr;
+    search_section_t sections;
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    rr=vmx_vcpu_rr(vcpu,vadr);
+    sections.tr = 1;
+    sections.tc = 1;
+    thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB);
+    return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps)
+{
+    thash_cb_t  *hcb;
+    ia64_rr vrr;
+    search_section_t sections;
+    thash_data_t data, *ovl;
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    vrr=vmx_vcpu_rr(vcpu,vadr);
+    sections.tr = 0;
+    sections.tc = 1;
+    vadr = PAGEALIGN(vadr, ps);
+
+    thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB);
+    thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB);
+    return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
+{
+    thash_cb_t  *hcb;
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    thash_purge_all(hcb);
+    return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps)
+{
+    vmx_vcpu_ptc_l(vcpu, vadr, ps);
+    return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+    vmx_vcpu_ptc_l(vcpu, vadr, ps);
+    return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+    PTA vpta;
+    ia64_rr vrr;
+    u64 vhpt_offset,tmp;
+    vmx_vcpu_get_pta(vcpu, &vpta.val);
+    vrr=vmx_vcpu_rr(vcpu, vadr);
+    if(vpta.vf){
+        panic("THASH,Don't support long format VHPT");
+        *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0);
+    }else{
+        vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1);
+        *pval = (vadr&VRN_MASK)|
+            (vpta.val<<3>>(vpta.size+3)<<(vpta.size))|
+            vhpt_offset;
+    }
+    return  IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+    ia64_rr vrr;
+    PTA vpta;
+    vmx_vcpu_get_pta(vcpu, &vpta.val);
+    vrr=vmx_vcpu_rr(vcpu, vadr);
+    if(vpta.vf){
+        panic("THASH,Don't support long format VHPT");
+        *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0);
+    }else{
+        *pval = 1;
+    }
+    return  IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+    thash_data_t *data;
+    thash_cb_t  *hcb;
+    ia64_rr vrr;
+    ISR visr,pt_isr;
+    REGS *regs;
+    u64 vhpt_adr;
+    IA64_PSR vpsr;
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    vrr=vmx_vcpu_rr(vcpu,vadr);
+    regs=vcpu_regs(vcpu);
+    pt_isr.val=regs->cr_isr;
+    visr.val=0;
+    visr.ei=pt_isr.ei;
+    visr.ir=pt_isr.ir;
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    if(vpsr.ic==0){
+         visr.ni=1;
+    }
+    visr.na=1;
+    data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB);
+    if(data){
+        if(data->p==0){
+            visr.na=1;
+            vmx_vcpu_set_isr(vcpu,visr.val);
+            page_not_present(vcpu, vadr);
+            return IA64_FAULT;
+        }else if(data->ma == VA_MATTR_NATPAGE){
+            visr.na = 1;
+            vmx_vcpu_set_isr(vcpu, visr.val);
+            dnat_page_consumption(vcpu, vadr);
+            return IA64_FAULT;
+        }else{
+            *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1));
+            return IA64_NO_FAULT;
+        }
+    }else{
+        if(!vhpt_enabled(vcpu, vadr, NA_REF)){
+            if(vpsr.ic){
+                vmx_vcpu_set_isr(vcpu, visr.val);
+                alt_dtlb(vcpu, vadr);
+                return IA64_FAULT;
+            }
+            else{
+                nested_dtlb(vcpu);
+                return IA64_FAULT;
+            }
+        }
+        else{
+            vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+            vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+            data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB);
+            if(data){
+                if(vpsr.ic){
+                    vmx_vcpu_set_isr(vcpu, visr.val);
+                    dtlb_fault(vcpu, vadr);
+                    return IA64_FAULT;
+                }
+                else{
+                    nested_dtlb(vcpu);
+                    return IA64_FAULT;
+                }
+            }
+            else{
+                if(vpsr.ic){
+                    vmx_vcpu_set_isr(vcpu, visr.val);
+                    dvhpt_fault(vcpu, vadr);
+                    return IA64_FAULT;
+                }
+                else{
+                    nested_dtlb(vcpu);
+                    return IA64_FAULT;
+                }
+            }
+        }
+    }
+}
+
+IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
+{
+    thash_data_t *data;
+    thash_cb_t  *hcb;
+    ia64_rr rr;
+    PTA vpta;
+    vmx_vcpu_get_pta(vcpu, &vpta.val);
+    if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){
+        *key=1;
+        return IA64_NO_FAULT;
+    }
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    rr=vmx_vcpu_rr(vcpu,vadr);
+    data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB);
+    if(!data||!data->p){
+        *key=1;
+    }else{
+        *key=data->key;
+    }
+    return IA64_NO_FAULT;
+}
+
+/*
+ * [FIXME] Is there any effective way to move this routine
+ * into vmx_uaccess.h? struct exec_domain is incomplete type
+ * in that way...
+ *
+ * This is the interface to lookup virtual TLB, and then
+ * return corresponding machine address in 2nd parameter.
+ * The 3rd parameter contains how many bytes mapped by
+ * matched vTLB entry, thus to allow caller copy more once.
+ *
+ * If failed to lookup, -EFAULT is returned. Or else reutrn
+ * 0. All upper domain access utilities rely on this routine
+ * to determine the real machine address. 
+ *
+ * Yes, put_user and get_user seems to somhow slow upon it.
+ * However it's the necessary steps for any vmx domain virtual
+ * address, since that's difference address space as HV's one.
+ * Later some short-circuit may be created for special case
+ */
+long
+__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len)
+{
+    unsigned long      mpfn, gpfn, m, n = *len;
+    thash_cb_t         *vtlb;
+    unsigned long      end;    /* end of the area mapped by current entry */
+    thash_data_t       *entry;
+    struct vcpu *v = current;
+    ia64_rr    vrr;
+
+    vtlb = vmx_vcpu_get_vtlb(v); 
+    vrr = vmx_vcpu_rr(v, va);
+    entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB);
+    if (entry == NULL)
+       return -EFAULT;
+
+    gpfn =(entry->ppn>>(PAGE_SHIFT-12));
+    gpfn =PAGEALIGN(gpfn,(entry->ps-PAGE_SHIFT));
+    gpfn = gpfn | POFFSET(va>>PAGE_SHIFT,(entry->ps-PAGE_SHIFT)); 
+
+    mpfn = __gpfn_to_mfn(v->domain, gpfn);
+    m = (mpfn<<PAGE_SHIFT) | (va & (PAGE_SIZE - 1));
+    /* machine address may be not continuous */
+    end = PAGEALIGN(m, PAGE_SHIFT) + PAGE_SIZE;
+    /*end = PAGEALIGN(m, entry->ps) + PSIZE(entry->ps);*/
+    /* Current entry can't map all requested area */
+    if ((m + n) > end)
+       n = end - m;
+
+    *ma = m;
+    *len = n;
+    return 0;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_entry.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_entry.S     Thu Sep  1 18:46:28 2005
@@ -0,0 +1,611 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_entry.S:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
+ *  Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
+ */
+
+#ifndef VCPU_TLB_SHIFT
+#define VCPU_TLB_SHIFT 22
+#endif
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#include "vmx_minstate.h"
+
+/*
+ * prev_task <- vmx_ia64_switch_to(struct task_struct *next)
+ *     With Ingo's new scheduler, interrupts are disabled when this routine 
gets
+ *     called.  The code starting at .map relies on this.  The rest of the code
+ *     doesn't care about the interrupt masking status.
+ *
+ * Since we allocate domain stack in xenheap, there's no need to map new
+ * domain's stack since all xenheap is mapped by TR. Another different task
+ * for vmx_ia64_switch_to is to switch to bank0 and change current pointer.
+ */
+GLOBAL_ENTRY(vmx_ia64_switch_to)
+       .prologue
+       alloc r16=ar.pfs,1,0,0,0
+       DO_SAVE_SWITCH_STACK
+       .body
+
+       bsw.0   // Switch to bank0, because bank0 r21 is current pointer
+       ;;
+       adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+       movl r25=init_task
+       adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0
+       ;;
+       st8 [r22]=sp                    // save kernel stack pointer of old task
+       ;;
+       /*
+        * TR always mapped this task's page, we can skip doing it again.
+        */
+       ld8 sp=[r26]                    // load kernel stack pointer of new task
+       mov r21=in0                     // update "current" application register
+       mov r8=r13                      // return pointer to previously running 
task
+       mov r13=in0                     // set "current" pointer
+       ;;
+       bsw.1
+       ;;
+       DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+       sync.i                          // ensure "fc"s done by this CPU are 
visible on other CPUs
+#endif
+       br.ret.sptk.many rp             // boogie on out in new context
+END(vmx_ia64_switch_to)
+
+GLOBAL_ENTRY(ia64_leave_nested)
+       rsm psr.i
+       ;;
+       adds r21=PT(PR)+16,r12
+       ;;
+
+       lfetch [r21],PT(CR_IPSR)-PT(PR)
+       adds r2=PT(B6)+16,r12
+       adds r3=PT(R16)+16,r12
+       ;;
+       lfetch [r21]
+       ld8 r28=[r2],8          // load b6
+       adds r29=PT(R24)+16,r12
+
+       ld8.fill r16=[r3]
+       adds r3=PT(AR_CSD)-PT(R16),r3
+       adds r30=PT(AR_CCV)+16,r12
+       ;;
+       ld8.fill r24=[r29]
+       ld8 r15=[r30]           // load ar.ccv
+       ;;
+       ld8 r29=[r2],16         // load b7
+       ld8 r30=[r3],16         // load ar.csd
+       ;;
+       ld8 r31=[r2],16         // load ar.ssd
+       ld8.fill r8=[r3],16
+       ;;
+       ld8.fill r9=[r2],16
+       ld8.fill r10=[r3],PT(R17)-PT(R10)
+       ;;
+       ld8.fill r11=[r2],PT(R18)-PT(R11)
+       ld8.fill r17=[r3],16
+       ;;
+       ld8.fill r18=[r2],16
+       ld8.fill r19=[r3],16
+       ;;
+       ld8.fill r20=[r2],16
+       ld8.fill r21=[r3],16
+       mov ar.csd=r30
+       mov ar.ssd=r31
+       ;;
+       rsm psr.i | psr.ic      // initiate turning off of interrupt and 
interruption collection
+       invala                  // invalidate ALAT
+       ;;
+       ld8.fill r22=[r2],24
+       ld8.fill r23=[r3],24
+       mov b6=r28
+       ;;
+       ld8.fill r25=[r2],16
+       ld8.fill r26=[r3],16
+       mov b7=r29
+       ;;
+       ld8.fill r27=[r2],16
+       ld8.fill r28=[r3],16
+       ;;
+       ld8.fill r29=[r2],16
+       ld8.fill r30=[r3],24
+       ;;
+       ld8.fill r31=[r2],PT(F9)-PT(R31)
+       adds r3=PT(F10)-PT(F6),r3
+       ;;
+       ldf.fill f9=[r2],PT(F6)-PT(F9)
+       ldf.fill f10=[r3],PT(F8)-PT(F10)
+       ;;
+       ldf.fill f6=[r2],PT(F7)-PT(F6)
+       ;;
+       ldf.fill f7=[r2],PT(F11)-PT(F7)
+       ldf.fill f8=[r3],32
+       ;;
+       srlz.i                  // ensure interruption collection is off
+       mov ar.ccv=r15
+       ;;
+       bsw.0                   // switch back to bank 0 (no stop bit required 
beforehand...)
+       ;;
+       ldf.fill f11=[r2]
+//     mov r18=r13
+//    mov r21=r13
+       adds r16=PT(CR_IPSR)+16,r12
+       adds r17=PT(CR_IIP)+16,r12
+       ;;
+       ld8 r29=[r16],16        // load cr.ipsr
+       ld8 r28=[r17],16        // load cr.iip
+       ;;
+       ld8 r30=[r16],16        // load cr.ifs
+       ld8 r25=[r17],16        // load ar.unat
+       ;;
+       ld8 r26=[r16],16        // load ar.pfs
+       ld8 r27=[r17],16        // load ar.rsc
+       cmp.eq p9,p0=r0,r0      // set p9 to indicate that we should restore 
cr.ifs
+       ;;
+       ld8 r24=[r16],16        // load ar.rnat (may be garbage)
+       ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+       ;;
+       ld8 r31=[r16],16        // load predicates
+       ld8 r22=[r17],16        // load b0
+       ;;
+       ld8 r19=[r16],16        // load ar.rsc value for "loadrs"
+       ld8.fill r1=[r17],16    // load r1
+       ;;
+       ld8.fill r12=[r16],16
+       ld8.fill r13=[r17],16
+       ;;
+       ld8 r20=[r16],16        // ar.fpsr
+       ld8.fill r15=[r17],16
+       ;;
+       ld8.fill r14=[r16],16
+       ld8.fill r2=[r17]
+       ;;
+       ld8.fill r3=[r16]
+       ;;
+       mov r16=ar.bsp          // get existing backing store pointer
+       ;;
+       mov b0=r22
+       mov ar.pfs=r26
+       mov cr.ifs=r30
+       mov cr.ipsr=r29
+       mov ar.fpsr=r20
+       mov cr.iip=r28
+       ;;
+       mov ar.rsc=r27
+       mov ar.unat=r25
+       mov pr=r31,-1
+       rfi
+END(ia64_leave_nested)
+
+
+
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+    PT_REGS_UNWIND_INFO(0)
+    /*
+     * work.need_resched etc. mustn't get changed by this CPU before it 
returns to
+    ;;
+     * user- or fsys-mode, hence we disable interrupts early on:
+     */
+    rsm psr.i
+    ;;
+    alloc loc0=ar.pfs,0,1,1,0
+    adds out0=16,r12
+    ;;
+    br.call.sptk.many b0=leave_hypervisor_tail
+    mov ar.pfs=loc0
+    adds r8=IA64_VPD_BASE_OFFSET,r13
+    ;;
+    ld8 r8=[r8]
+    ;;
+    adds r9=VPD(VPSR),r8
+    ;;
+    ld8 r9=[r9]
+    ;;
+    tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT
+    ;;
+(pBN0) add r7=VPD(VBNAT),r8;
+(pBN1) add r7=VPD(VNAT),r8;
+    ;;
+    ld8 r7=[r7]
+    ;;
+    mov ar.unat=r7
+(pBN0) add r4=VPD(VBGR),r8;
+(pBN1) add r4=VPD(VGR),r8;
+(pBN0) add r5=VPD(VBGR)+0x8,r8;
+(pBN1) add r5=VPD(VGR)+0x8,r8;
+    ;;
+    ld8.fill r16=[r4],16
+    ld8.fill r17=[r5],16
+    ;;
+    ld8.fill r18=[r4],16
+    ld8.fill r19=[r5],16
+    ;;
+    ld8.fill r20=[r4],16
+    ld8.fill r21=[r5],16
+    ;;
+    ld8.fill r22=[r4],16
+    ld8.fill r23=[r5],16
+    ;;
+    ld8.fill r24=[r4],16
+    ld8.fill r25=[r5],16
+    ;;
+    ld8.fill r26=[r4],16
+    ld8.fill r27=[r5],16
+    ;;
+    ld8.fill r28=[r4],16
+    ld8.fill r29=[r5],16
+    ;;
+    ld8.fill r30=[r4],16
+    ld8.fill r31=[r5],16
+    ;;
+    bsw.0
+    ;;
+    mov r18=r8      //vpd
+    mov r19=r9      //vpsr
+    adds r20=PT(PR)+16,r12
+    ;;
+    lfetch [r20],PT(CR_IPSR)-PT(PR)
+    adds r16=PT(B6)+16,r12
+    adds r17=PT(B7)+16,r12
+    ;;
+    lfetch [r20]
+    mov r21=r13                // get current
+    ;;
+    ld8 r30=[r16],16      // load b6
+    ld8 r31=[r17],16      // load b7
+    add r20=PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r29=[r20]       //load ar_unat
+    mov b6=r30
+    mov b7=r31
+    ld8 r30=[r16],16    //load ar_csd
+    ld8 r31=[r17],16    //load ar_ssd
+    ;;
+    mov ar.unat=r29
+    mov ar.csd=r30
+    mov ar.ssd=r31
+    ;;
+    ld8.fill r8=[r16],16    //load r8
+    ld8.fill r9=[r17],16    //load r9
+    ;;
+    ld8.fill r10=[r16],PT(R1)-PT(R10)    //load r10
+    ld8.fill r11=[r17],PT(R12)-PT(R11)    //load r11
+    ;;
+    ld8.fill r1=[r16],16    //load r1
+    ld8.fill r12=[r17],16    //load r12
+    ;;
+    ld8.fill r13=[r16],16    //load r13
+    ld8 r30=[r17],16    //load ar_fpsr
+    ;;
+    ld8.fill r15=[r16],16    //load r15
+    ld8.fill r14=[r17],16    //load r14
+    mov ar.fpsr=r30
+    ;;
+    ld8.fill r2=[r16],16    //load r2
+    ld8.fill r3=[r17],16    //load r3
+    ;;
+/*
+(pEml) ld8.fill r4=[r16],16    //load r4
+(pEml) ld8.fill r5=[r17],16    //load r5
+    ;;
+(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6)   //load r6
+(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7)   //load r7
+    ;;
+(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16
+(pNonEml) adds r17=PT(F7)-PT(R5),r17
+    ;;
+*/
+    ld8.fill r4=[r16],16    //load r4
+    ld8.fill r5=[r17],16    //load r5
+     ;;
+    ld8.fill r6=[r16],PT(AR_CCV)-PT(R6)   //load r6
+    ld8.fill r7=[r17],PT(F7)-PT(R7)   //load r7
+    ;;
+
+    ld8 r30=[r16],PT(F6)-PT(AR_CCV)
+    rsm psr.i | psr.ic  // initiate turning off of interrupt and interruption 
collection
+    ;;
+    srlz.i          // ensure interruption collection is off
+    ;;
+    invala          // invalidate ALAT
+    ;;
+    ldf.fill f6=[r16],32
+    ldf.fill f7=[r17],32
+    ;;
+    ldf.fill f8=[r16],32
+    ldf.fill f9=[r17],32
+    ;;
+    ldf.fill f10=[r16]
+    ldf.fill f11=[r17]
+    ;;
+    mov ar.ccv=r30
+    adds r16=PT(CR_IPSR)-PT(F10),r16
+    adds r17=PT(CR_IIP)-PT(F11),r17
+    ;;
+    ld8 r31=[r16],16    // load cr.ipsr
+    ld8 r30=[r17],16    // load cr.iip
+    ;;
+    ld8 r29=[r16],16    // load cr.ifs
+    ld8 r28=[r17],16    // load ar.unat
+    ;;
+    ld8 r27=[r16],16    // load ar.pfs
+    ld8 r26=[r17],16    // load ar.rsc
+    ;;
+    ld8 r25=[r16],16    // load ar.rnat (may be garbage)
+    ld8 r24=[r17],16// load ar.bspstore (may be garbage)
+    ;;
+    ld8 r23=[r16],16    // load predicates
+    ld8 r22=[r17],PT(RFI_PFS)-PT(B0)    // load b0
+    ;;
+    ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+    ;;
+//rbs_switch
+    // loadrs has already been shifted
+    alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+    ;;
+    mov ar.rsc=r20
+    ;;
+    loadrs
+    ;;
+    mov ar.bspstore=r24
+    ;;
+    ld8 r24=[r17]       //load rfi_pfs
+    mov ar.unat=r28
+    mov ar.rnat=r25
+    mov ar.rsc=r26
+    ;;
+    mov cr.ipsr=r31
+    mov cr.iip=r30
+    mov cr.ifs=r29
+    cmp.ne p6,p0=r24,r0
+(p6)br.sptk vmx_dorfirfi
+    ;;
+vmx_dorfirfi_back:
+    mov ar.pfs=r27
+
+//vsa_sync_write_start
+    movl r20=__vsa_base
+    ;;
+    ld8 r20=[r20]       // read entry point
+    mov r25=r18
+    ;;
+    add r16=PAL_VPS_SYNC_WRITE,r20
+    movl r24=switch_rr7  // calculate return address
+    ;;
+    mov b0=r16
+    br.cond.sptk b0         // call the service
+    ;;
+// switch rr7 and rr5
+switch_rr7:
+    adds r24=SWITCH_MRR5_OFFSET, r21
+    adds r26=SWITCH_MRR6_OFFSET, r21
+    adds r16=SWITCH_MRR7_OFFSET ,r21
+    movl r25=(5<<61)
+    movl r27=(6<<61)
+    movl r17=(7<<61)
+    ;;
+    ld8 r24=[r24]
+    ld8 r26=[r26]
+    ld8 r16=[r16]
+    ;;
+    mov rr[r25]=r24
+    mov rr[r27]=r26
+    mov rr[r17]=r16
+    ;;
+    srlz.i
+    ;;
+    add r24=SWITCH_MPTA_OFFSET, r21
+    ;;
+    ld8 r24=[r24]
+    ;;
+    mov cr.pta=r24
+    ;;
+    srlz.i
+    ;;
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ *  must be at bank 0
+ *  parameter:
+ *  r18:vpd
+ *  r19:vpsr
+ *  r20:__vsa_base
+ *  r22:b0
+ *  r23:predicate
+ */
+    mov r24=r22
+    mov r25=r18
+    tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
+    ;;
+    mov pr=r23,-2
+    mov b0=r29
+    ;;
+    br.cond.sptk b0             // call pal service
+END(ia64_leave_hypervisor)
+
+//r24 rfi_pfs
+//r17 address of rfi_pfs
+GLOBAL_ENTRY(vmx_dorfirfi)
+    mov r16=ar.ec
+    movl r20 = vmx_dorfirfi_back
+       ;;
+// clean rfi_pfs
+    st8 [r17]=r0
+    mov b0=r20
+// pfs.pec=ar.ec
+    dep r24 = r16, r24, 52, 6
+    ;;
+    mov ar.pfs=r24
+       ;;
+    br.ret.sptk b0
+       ;;
+END(vmx_dorfirfi)
+
+
+#define VMX_PURGE_RR7  0
+#define VMX_INSERT_RR7 1
+/*
+ * in0: old rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ */
+GLOBAL_ENTRY(vmx_purge_double_mapping)
+    alloc loc1 = ar.pfs,5,9,0,0
+    mov loc0 = rp
+    movl r8 = 1f
+    ;;
+    movl loc4 = KERNEL_TR_PAGE_SHIFT
+    movl loc5 = VCPU_TLB_SHIFT
+    mov loc6 = psr
+    movl loc7 = XEN_RR7_SWITCH_STUB
+    mov loc8 = (1<<VMX_PURGE_RR7)
+    ;;
+    srlz.i
+    ;;
+    rsm psr.i | psr.ic
+    ;;
+    srlz.i
+    ;;
+    mov ar.rsc = 0
+    mov b6 = loc7
+    mov rp = r8
+    ;;
+    br.sptk b6
+1:
+    mov ar.rsc = 3
+    mov rp = loc0
+    ;;
+    mov psr.l = loc6
+    ;;
+    srlz.i
+    ;;
+    br.ret.sptk rp
+END(vmx_purge_double_mapping)
+
+/*
+ * in0: new rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ * in3: pte entry of xen image
+ * in4: pte entry of vhpt table
+ */
+GLOBAL_ENTRY(vmx_insert_double_mapping)
+    alloc loc1 = ar.pfs,5,9,0,0
+    mov loc0 = rp
+    movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image
+    ;;
+    movl loc3 = IA64_TR_VHPT_IN_DOM    // TR number for vhpt table
+    movl r8 = 1f
+    movl loc4 = KERNEL_TR_PAGE_SHIFT
+    ;;
+    movl loc5 = VCPU_TLB_SHIFT
+    mov loc6 = psr
+    movl loc7 = XEN_RR7_SWITCH_STUB
+    ;;
+    srlz.i
+    ;;
+    rsm psr.i | psr.ic
+    mov loc8 = (1<<VMX_INSERT_RR7)
+    ;;
+    srlz.i
+    ;;
+    mov ar.rsc = 0
+    mov b6 = loc7
+    mov rp = r8
+    ;;
+    br.sptk b6
+1:
+    mov ar.rsc = 3
+    mov rp = loc0
+    ;;
+    mov psr.l = loc6
+    ;;
+    srlz.i
+    ;;
+    br.ret.sptk rp
+END(vmx_insert_double_mapping)
+
+    .align PAGE_SIZE
+/*
+ * Stub to add double mapping for new domain, which shouldn't
+ * access any memory when active. Before reaching this point,
+ * both psr.i/ic is cleared and rse is set in lazy mode.
+ *
+ * in0: new rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ * in3: pte entry of xen image
+ * in4: pte entry of vhpt table
+ * loc2: TR number for xen image
+ * loc3: TR number for vhpt table
+ * loc4: page size for xen image
+ * loc5: page size of vhpt table
+ * loc7: free to use
+ * loc8: purge or insert
+ * r8: will contain old rid value
+ */
+GLOBAL_ENTRY(vmx_switch_rr7)
+    movl loc7 = (7<<61)
+    dep.z loc4 = loc4, 2, 6
+    dep.z loc5 = loc5, 2, 6
+    ;;
+    tbit.nz p6,p7=loc8, VMX_INSERT_RR7
+    mov r8 = rr[loc7]
+    ;;
+    mov rr[loc7] = in0
+(p6)mov cr.ifa = in1
+(p6)mov cr.itir = loc4
+    ;;
+    srlz.i
+    ;;
+(p6)itr.i itr[loc2] = in3
+(p7)ptr.i in1, loc4
+    ;;
+(p6)itr.d dtr[loc2] = in3
+(p7)ptr.d in1, loc4
+    ;;
+    srlz.i
+    ;;
+(p6)mov cr.ifa = in2
+(p6)mov cr.itir = loc5
+    ;;
+(p6)itr.d dtr[loc3] = in4
+(p7)ptr.d in2, loc5
+    ;;
+    srlz.i
+    ;;
+    mov rr[loc7] = r8
+    ;;
+    srlz.i
+    br.sptk rp
+END(vmx_switch_rr7)
+    .align PAGE_SIZE
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_hypercall.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_hypercall.c Thu Sep  1 18:46:28 2005
@@ -0,0 +1,235 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_hyparcall.c: handling hypercall from domain
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <asm/vmx_vcpu.h>
+#include <public/xen.h>
+#include <public/event_channel.h>
+#include <asm/vmmu.h>
+#include <asm/tlb.h>
+#include <asm/regionreg.h>
+#include <asm/page.h>
+#include <xen/mm.h>
+#include <xen/multicall.h>
+
+
+void hyper_not_support(void)
+{
+    VCPU *vcpu=current;
+    vmx_vcpu_set_gr(vcpu, 8, -1, 0);
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_mmu_update(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,r33,r34,r35,ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    vmx_vcpu_get_gr(vcpu,17,&r33);
+    vmx_vcpu_get_gr(vcpu,18,&r34);
+    vmx_vcpu_get_gr(vcpu,19,&r35);
+    ret=do_mmu_update((mmu_update_t*)r32,r33,r34,r35);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+unsigned long __hypercall_create_continuation(
+    unsigned int op, unsigned int nr_args, ...)
+{
+    struct mc_state *mcs = &mc_state[smp_processor_id()];
+    VCPU *vcpu = current;
+    struct cpu_user_regs *regs = vcpu_regs(vcpu);
+    unsigned int i;
+    va_list args;
+
+    va_start(args, nr_args);
+    if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) {
+       panic("PREEMPT happen in multicall\n"); // Not support yet
+    } else {
+       vmx_vcpu_set_gr(vcpu, 15, op, 0);
+       for ( i = 0; i < nr_args; i++) {
+           switch (i) {
+           case 0: vmx_vcpu_set_gr(vcpu, 16, va_arg(args, unsigned long), 0);
+                   break;
+           case 1: vmx_vcpu_set_gr(vcpu, 17, va_arg(args, unsigned long), 0);
+                   break;
+           case 2: vmx_vcpu_set_gr(vcpu, 18, va_arg(args, unsigned long), 0);
+                   break;
+           case 3: vmx_vcpu_set_gr(vcpu, 19, va_arg(args, unsigned long), 0);
+                   break;
+           case 4: vmx_vcpu_set_gr(vcpu, 20, va_arg(args, unsigned long), 0);
+                   break;
+           default: panic("Too many args for hypercall continuation\n");
+                   break;
+           }
+       }
+    }
+    vcpu->arch.hypercall_continuation = 1;
+    va_end(args);
+    return op;
+}
+
+void hyper_dom_mem_op(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,r33,r34,r35,r36;
+    u64 ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    vmx_vcpu_get_gr(vcpu,17,&r33);
+    vmx_vcpu_get_gr(vcpu,18,&r34);
+    vmx_vcpu_get_gr(vcpu,19,&r35);
+    vmx_vcpu_get_gr(vcpu,20,&r36);
+    ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36);
+    printf("do_dom_mem return value: %lx\n", ret);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+    /* Hard to define a special return value to indicate hypercall restart.
+     * So just add a new mark, which is SMP safe
+     */
+    if (vcpu->arch.hypercall_continuation == 1)
+       vcpu->arch.hypercall_continuation = 0;
+    else
+       vmx_vcpu_increment_iip(vcpu);
+}
+
+
+void hyper_sched_op(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    ret=do_sched_op(r32);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_dom0_op(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    ret=do_dom0_op((dom0_op_t *)r32);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_event_channel_op(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    ret=do_event_channel_op((evtchn_op_t *)r32);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_xen_version(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    ret=do_xen_version((int )r32);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+static int do_lock_page(VCPU *vcpu, u64 va, u64 lock)
+{
+    int i;
+    ia64_rr rr;
+    thash_cb_t *hcb;
+    hcb = vmx_vcpu_get_vtlb(vcpu);
+    rr = vmx_vcpu_rr(vcpu, va);
+    return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock);
+}
+
+/*
+ * Lock guest page in vTLB, so that it's not relinquished by recycle
+ * session when HV is servicing that hypercall.
+ */
+void hyper_lock_page(void)
+{
+//TODO:
+    VCPU *vcpu=current;
+    u64 va,lock, ret;
+    vmx_vcpu_get_gr(vcpu,16,&va);
+    vmx_vcpu_get_gr(vcpu,17,&lock);
+    ret=do_lock_page(vcpu, va, lock);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+static int do_set_shared_page(VCPU *vcpu, u64 gpa)
+{
+    u64 shared_info, o_info;
+    struct domain *d = vcpu->domain;
+    struct vcpu *v;
+    if(vcpu->domain!=dom0)
+        return -EPERM;
+    shared_info = __gpa_to_mpa(vcpu->domain, gpa);
+    o_info = (u64)vcpu->domain->shared_info;
+    d->shared_info= (shared_info_t *)__va(shared_info);
+
+    /* Copy existing shared info into new page */
+    if (o_info) {
+       memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE);
+       for_each_vcpu(d, v) {
+               v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
+       }
+       /* If original page belongs to xen heap, then relinguish back
+        * to xen heap. Or else, leave to domain itself to decide.
+        */
+       if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info))))
+               free_xenheap_page(o_info);
+    } else
+        memset(d->shared_info, 0, PAGE_SIZE);
+    return 0;
+}
+
+void hyper_set_shared_page(void)
+{
+    VCPU *vcpu=current;
+    u64 gpa,ret;
+    vmx_vcpu_get_gr(vcpu,16,&gpa);
+
+    ret=do_set_shared_page(vcpu, gpa);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+    vmx_vcpu_increment_iip(vcpu);
+}
+
+/*
+void hyper_grant_table_op(void)
+{
+    VCPU *vcpu=current;
+    u64 r32,r33,r34,ret;
+    vmx_vcpu_get_gr(vcpu,16,&r32);
+    vmx_vcpu_get_gr(vcpu,17,&r33);
+    vmx_vcpu_get_gr(vcpu,18,&r34);
+
+    ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34);
+    vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+}
+*/
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_init.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_init.c      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,375 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_init.c: initialization work for vt specific domain
+ * Copyright (c) 2005, Intel Corporation.
+ *     Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>
+ *     Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx>
+ *     Fred Yang <fred.yang@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
+ * Disable doubling mapping
+ *
+ * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
+ * Simplied design in first step:
+ *     - One virtual environment
+ *     - Domain is bound to one LP
+ * Later to support guest SMP:
+ *     - Need interface to handle VP scheduled to different LP
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <asm/pal.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/vmx_vcpu.h>
+#include <xen/lib.h>
+#include <asm/vmmu.h>
+#include <public/arch-ia64.h>
+#include <public/io/ioreq.h>
+#include <asm/vmx_phy_mode.h>
+#include <asm/processor.h>
+#include <asm/vmx.h>
+#include <xen/mm.h>
+
+/* Global flag to identify whether Intel vmx feature is on */
+u32 vmx_enabled = 0;
+static u32 vm_order;
+static u64 buffer_size;
+static u64 vp_env_info;
+static u64 vm_buffer = 0;      /* Buffer required to bring up VMX feature */
+u64 __vsa_base = 0;    /* Run-time service base of VMX */
+
+/* Check whether vt feature is enabled or not. */
+void
+identify_vmx_feature(void)
+{
+       pal_status_t ret;
+       u64 avail = 1, status = 1, control = 1;
+
+       vmx_enabled = 0;
+       /* Check VT-i feature */
+       ret = ia64_pal_proc_get_features(&avail, &status, &control);
+       if (ret != PAL_STATUS_SUCCESS) {
+               printk("Get proc features failed.\n");
+               goto no_vti;
+       }
+
+       /* FIXME: do we need to check status field, to see whether
+        * PSR.vm is actually enabled? If yes, aonther call to
+        * ia64_pal_proc_set_features may be reuqired then.
+        */
+       printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
+               avail, status, control, avail & PAL_PROC_VM_BIT);
+       if (!(avail & PAL_PROC_VM_BIT)) {
+               printk("No VT feature supported.\n");
+               goto no_vti;
+       }
+
+       ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
+       if (ret != PAL_STATUS_SUCCESS) {
+               printk("Get vp environment info failed.\n");
+               goto no_vti;
+       }
+
+       /* Does xen has ability to decode itself? */
+       if (!(vp_env_info & VP_OPCODE))
+               printk("WARNING: no opcode provided from hardware(%lx)!!!\n", 
vp_env_info);
+       vm_order = get_order(buffer_size);
+       printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order);
+
+       vmx_enabled = 1;
+no_vti:
+       return;
+}
+
+/*
+ * Init virtual environment on current LP
+ * vsa_base is the indicator whether it's first LP to be initialized
+ * for current domain.
+ */ 
+void
+vmx_init_env(void)
+{
+       u64 status, tmp_base;
+
+       if (!vm_buffer) {
+               vm_buffer = alloc_xenheap_pages(vm_order);
+               ASSERT(vm_buffer);
+               printk("vm_buffer: 0x%lx\n", vm_buffer);
+       }
+
+       status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : 
VP_INIT_ENV_INITALIZE,
+                                   __pa(vm_buffer),
+                                   vm_buffer,
+                                   &tmp_base);
+
+       if (status != PAL_STATUS_SUCCESS) {
+               printk("ia64_pal_vp_init_env failed.\n");
+               return -1;
+       }
+
+       if (!__vsa_base)
+               __vsa_base = tmp_base;
+       else
+               ASSERT(tmp_base != __vsa_base);
+
+#ifdef XEN_DBL_MAPPING
+       /* Init stub for rr7 switch */
+       vmx_init_double_mapping_stub();
+#endif 
+}
+
+void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c)
+{
+       struct domain *d = v->domain;
+       shared_iopage_t *sp;
+
+       ASSERT(d != dom0); /* only for non-privileged vti domain */
+       d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg);
+       sp = get_sp(d);
+       memset((char *)sp,0,PAGE_SIZE);
+       /* FIXME: temp due to old CP */
+       sp->sp_global.eport = 2;
+#ifdef V_IOSAPIC_READY
+       sp->vcpu_number = 1;
+#endif
+       /* TEMP */
+       d->arch.vmx_platform.pib_base = 0xfee00000UL;
+
+       /* One more step to enable interrupt assist */
+       set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
+       /* Only open one port for I/O and interrupt emulation */
+       if (v == d->vcpu[0]) {
+           memset(&d->shared_info->evtchn_mask[0], 0xff,
+               sizeof(d->shared_info->evtchn_mask));
+           clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
+       }
+
+       /* FIXME: only support PMT table continuously by far */
+       d->arch.pmt = __va(c->pt_base);
+       d->arch.max_pfn = c->pt_max_pfn;
+
+       vmx_final_setup_domain(d);
+}
+
+typedef union {
+       u64 value;
+       struct {
+               u64 number : 8;
+               u64 revision : 8;
+               u64 model : 8;
+               u64 family : 8;
+               u64 archrev : 8;
+               u64 rv : 24;
+       };
+} cpuid3_t;
+
+/* Allocate vpd from xenheap */
+static vpd_t *alloc_vpd(void)
+{
+       int i;
+       cpuid3_t cpuid3;
+       vpd_t *vpd;
+
+       vpd = alloc_xenheap_pages(get_order(VPD_SIZE));
+       if (!vpd) {
+               printk("VPD allocation failed.\n");
+               return NULL;
+       }
+
+       printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t));
+       memset(vpd, 0, VPD_SIZE);
+       /* CPUID init */
+       for (i = 0; i < 5; i++)
+               vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+       /* Limit the CPUID number to 5 */
+       cpuid3.value = vpd->vcpuid[3];
+       cpuid3.number = 4;      /* 5 - 1 */
+       vpd->vcpuid[3] = cpuid3.value;
+
+       vpd->vdc.d_vmsw = 1;
+       return vpd;
+}
+
+
+#ifdef CONFIG_VTI
+/*
+ * Create a VP on intialized VMX environment.
+ */
+static void
+vmx_create_vp(struct vcpu *v)
+{
+       u64 ret;
+       vpd_t *vpd = v->arch.arch_vmx.vpd;
+       u64 ivt_base;
+    extern char vmx_ia64_ivt;
+       /* ia64_ivt is function pointer, so need this tranlation */
+       ivt_base = (u64) &vmx_ia64_ivt;
+       printk("ivt_base: 0x%lx\n", ivt_base);
+       ret = ia64_pal_vp_create(vpd, ivt_base, 0);
+       if (ret != PAL_STATUS_SUCCESS)
+               panic("ia64_pal_vp_create failed. \n");
+}
+
+#ifdef XEN_DBL_MAPPING
+void vmx_init_double_mapping_stub(void)
+{
+       u64 base, psr;
+       extern void vmx_switch_rr7(void);
+
+       base = (u64) &vmx_switch_rr7;
+       base = *((u64*)base);
+
+       psr = ia64_clear_ic();
+       ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB,
+                pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)),
+                RR7_SWITCH_SHIFT);
+       ia64_set_psr(psr);
+       ia64_srlz_i();
+       printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n", 
(u64)(__pa(base)));
+}
+#endif
+
+/* Other non-context related tasks can be done in context switch */
+void
+vmx_save_state(struct vcpu *v)
+{
+       u64 status, psr;
+       u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
+
+       /* FIXME: about setting of pal_proc_vector... time consuming */
+       status = ia64_pal_vp_save(v->arch.arch_vmx.vpd, 0);
+       if (status != PAL_STATUS_SUCCESS)
+               panic("Save vp status failed\n");
+
+#ifdef XEN_DBL_MAPPING
+       /* FIXME: Do we really need purge double mapping for old vcpu?
+        * Since rid is completely different between prev and next,
+        * it's not overlap and thus no MCA possible... */
+       dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
+        vmx_purge_double_mapping(dom_rr7, KERNEL_START,
+                                (u64)v->arch.vtlb->ts->vhpt->hash);
+#endif
+
+       /* Need to save KR when domain switch, though HV itself doesn;t
+        * use them.
+        */
+       v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
+       v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
+       v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
+       v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
+       v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
+       v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
+       v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
+       v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
+}
+
+/* Even guest is in physical mode, we still need such double mapping */
+void
+vmx_load_state(struct vcpu *v)
+{
+       u64 status, psr;
+       u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
+       u64 pte_xen, pte_vhpt;
+       int i;
+
+       status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0);
+       if (status != PAL_STATUS_SUCCESS)
+               panic("Restore vp status failed\n");
+
+#ifdef XEN_DBL_MAPPING
+       dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
+       pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
+       pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >> 
PAGE_SHIFT), PAGE_KERNEL));
+       vmx_insert_double_mapping(dom_rr7, KERNEL_START,
+                                 (u64)v->arch.vtlb->ts->vhpt->hash,
+                                 pte_xen, pte_vhpt);
+#endif
+
+       ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
+       ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
+       ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
+       ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
+       ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
+       ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
+       ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
+       ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
+       /* Guest vTLB is not required to be switched explicitly, since
+        * anchored in vcpu */
+}
+
+#ifdef XEN_DBL_MAPPING
+/* Purge old double mapping and insert new one, due to rr7 change */
+void
+vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7)
+{
+       u64 pte_xen, pte_vhpt, vhpt_base;
+
+    vhpt_base = (u64)v->arch.vtlb->ts->vhpt->hash;
+    vmx_purge_double_mapping(oldrr7, KERNEL_START,
+                                vhpt_base);
+
+       pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
+       pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT), 
PAGE_KERNEL));
+       vmx_insert_double_mapping(newrr7, KERNEL_START,
+                                 vhpt_base,
+                                 pte_xen, pte_vhpt);
+}
+#endif // XEN_DBL_MAPPING
+#endif // CONFIG_VTI
+
+/*
+ * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
+ * is registered here.
+ */
+void
+vmx_final_setup_domain(struct domain *d)
+{
+       struct vcpu *v = d->vcpu[0];
+       vpd_t *vpd;
+
+       /* Allocate resources for vcpu 0 */
+       //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct));
+
+       vpd = alloc_vpd();
+       ASSERT(vpd);
+
+       v->arch.arch_vmx.vpd = vpd;
+       vpd->virt_env_vaddr = vm_buffer;
+
+#ifdef CONFIG_VTI
+       /* v->arch.schedule_tail = arch_vmx_do_launch; */
+       vmx_create_vp(v);
+
+       /* Set this ed to be vmx */
+       set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags);
+
+       /* Physical mode emulation initialization, including
+       * emulation ID allcation and related memory request
+       */
+       physical_mode_init(v);
+
+       vlsapic_reset(v);
+       vtm_init(v);
+#endif
+
+       /* Other vmx specific initialization work */
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_interrupt.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_interrupt.c Thu Sep  1 18:46:28 2005
@@ -0,0 +1,388 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_interrupt.c: handle inject interruption.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ *  Xiaoyan Feng (Fleming Feng)  <fleming.feng@xxxxxxxxx>
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+
+#include <xen/types.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx_pal_vsa.h>
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+void
+collect_interruption(VCPU *vcpu)
+{
+    u64 ipsr;
+    u64 vdcr;
+    u64 vifs;
+    IA64_PSR vpsr;
+    REGS * regs = vcpu_regs(vcpu);
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+
+    if(vpsr.ic){
+       extern void vmx_dorfirfi(void);
+       if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi)
+               panic("COLLECT interruption for vmx_dorfirfi\n");
+
+        /* Sync mpsr id/da/dd/ss/ed bits to vipsr
+         * since after guest do rfi, we still want these bits on in
+         * mpsr
+         */
+
+        ipsr = regs->cr_ipsr;
+        vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+             | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED));
+        vmx_vcpu_set_ipsr(vcpu, vpsr.val);
+
+        /* Currently, for trap, we do not advance IIP to next
+         * instruction. That's because we assume caller already
+         * set up IIP correctly
+         */
+
+        vmx_vcpu_set_iip(vcpu , regs->cr_iip);
+
+        /* set vifs.v to zero */
+        vifs = VPD_CR(vcpu,ifs);
+        vifs &= ~IA64_IFS_V;
+        vmx_vcpu_set_ifs(vcpu, vifs);
+
+        vmx_vcpu_set_iipa(vcpu, regs->cr_iipa);
+    }
+
+    vdcr = VPD_CR(vcpu,dcr);
+
+    /* Set guest psr
+     * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+     * be: set to the value of dcr.be
+     * pp: set to the value of dcr.pp
+     */
+    vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+    vpsr.val |= ( vdcr & IA64_DCR_BE);
+
+    /* VDCR pp bit position is different from VPSR pp bit */
+    if ( vdcr & IA64_DCR_PP ) {
+        vpsr.val |= IA64_PSR_PP;
+    } else {
+        vpsr.val &= ~IA64_PSR_PP;;
+    }
+
+    vmx_vcpu_set_psr(vcpu, vpsr.val);
+
+}
+int
+inject_guest_interruption(VCPU *vcpu, u64 vec)
+{
+    u64 viva;
+    REGS *regs;
+    regs=vcpu_regs(vcpu);
+
+    collect_interruption(vcpu);
+
+    vmx_vcpu_get_iva(vcpu,&viva);
+    regs->cr_iip = viva + vec;
+}
+
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ *  set_ifa: if true, set vIFA
+ *  set_itir: if true, set vITIR
+ *  set_iha: if true, set vIHA
+ */
+void
+set_ifa_itir_iha (VCPU *vcpu, u64 vadr,
+          int set_ifa, int set_itir, int set_iha)
+{
+    IA64_PSR vpsr;
+    u64 value;
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    /* Vol2, Table 8-1 */
+    if ( vpsr.ic ) {
+        if ( set_ifa){
+            vmx_vcpu_set_ifa(vcpu, vadr);
+        }
+        if ( set_itir) {
+            value = vmx_vcpu_get_itir_on_fault(vcpu, vadr);
+            vmx_vcpu_set_itir(vcpu, value);
+        }
+
+        if ( set_iha) {
+            vmx_vcpu_thash(vcpu, vadr, &value);
+            vmx_vcpu_set_iha(vcpu, value);
+        }
+    }
+
+
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dtlb_fault (VCPU *vcpu, u64 vadr)
+{
+    /* If vPSR.ic, IFA, ITIR, IHA */
+    set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+    inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ *  @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+itlb_fault (VCPU *vcpu, u64 vadr)
+{
+     /* If vPSR.ic, IFA, ITIR, IHA */
+    set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+    inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR);
+}
+
+
+
+/*
+ * Data Nested TLB Fault
+ *  @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+nested_dtlb (VCPU *vcpu)
+{
+    inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ *  @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+alt_dtlb (VCPU *vcpu, u64 vadr)
+{
+    set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+    inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR);
+}
+
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+alt_itlb (VCPU *vcpu, u64 vadr)
+{
+    set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+    inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ *  VHPT Translation Vector
+ */
+static void
+_vhpt_fault(VCPU *vcpu, u64 vadr)
+{
+    /* If vPSR.ic, IFA, ITIR, IHA*/
+    set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+    inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR);
+
+
+}
+
+/*
+ * VHPT Instruction Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+ivhpt_fault (VCPU *vcpu, u64 vadr)
+{
+    _vhpt_fault(vcpu, vadr);
+}
+
+
+/*
+ * VHPT Data Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dvhpt_fault (VCPU *vcpu, u64 vadr)
+{
+    _vhpt_fault(vcpu, vadr);
+}
+
+
+
+/*
+ * Deal with:
+ *  General Exception vector
+ */
+void
+_general_exception (VCPU *vcpu)
+{
+    inject_guest_interruption(vcpu,IA64_GENEX_VECTOR);
+}
+
+
+/*
+ * Illegal Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+illegal_op (VCPU *vcpu)
+{
+    _general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+illegal_dep (VCPU *vcpu)
+{
+    _general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+rsv_reg_field (VCPU *vcpu)
+{
+    _general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void
+privilege_op (VCPU *vcpu)
+{
+    _general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+unimpl_daddr (VCPU *vcpu)
+{
+    _general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+privilege_reg (VCPU *vcpu)
+{
+    _general_exception(vcpu);
+}
+
+/* Deal with
+ *  Nat consumption vector
+ * Parameter:
+ *  vaddr: Optional, if t == REGISTER
+ */
+static void
+_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t)
+{
+    /* If vPSR.ic && t == DATA/INST, IFA */
+    if ( t == DATA || t == INSTRUCTION ) {
+        /* IFA */
+        set_ifa_itir_iha (vcpu, vadr, 1, 0, 0);
+    }
+
+    inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * IR Data Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+static void
+ir_nat_page_consumption (VCPU *vcpu, u64 vadr)
+{
+    _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+inat_page_consumption (VCPU *vcpu, u64 vadr)
+{
+    _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+rnat_consumption (VCPU *vcpu)
+{
+    _nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dnat_page_consumption (VCPU *vcpu, uint64_t vadr)
+{
+    _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ *  Page not present vector
+ */
+void
+page_not_present(VCPU *vcpu, u64 vadr)
+{
+    /* If vPSR.ic, IFA, ITIR */
+    set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+    inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_irq_ia64.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_irq_ia64.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,127 @@
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/jiffies.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/slab.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>      /* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+#include <linux/bitops.h>
+
+#include <asm/delay.h>
+#include <asm/intrinsics.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#define IRQ_DEBUG      0
+
+#ifdef  CONFIG_VTI
+#define vmx_irq_enter()                \
+       add_preempt_count(HARDIRQ_OFFSET);
+
+/* Now softirq will be checked when leaving hypervisor, or else
+ * scheduler irq will be executed too early.
+ */
+#define vmx_irq_exit(void)     \
+       sub_preempt_count(HARDIRQ_OFFSET);
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+{
+       unsigned long saved_tpr;
+       int     wake_dom0 = 0;
+
+
+#if IRQ_DEBUG
+       {
+               unsigned long bsp, sp;
+
+               /*
+                * Note: if the interrupt happened while executing in
+                * the context switch routine (ia64_switch_to), we may
+                * get a spurious stack overflow here.  This is
+                * because the register and the memory stack are not
+                * switched atomically.
+                */
+               bsp = ia64_getreg(_IA64_REG_AR_BSP);
+               sp = ia64_getreg(_IA64_REG_AR_SP);
+
+               if ((sp - bsp) < 1024) {
+                       static unsigned char count;
+                       static long last_time;
+
+                       if (jiffies - last_time > 5*HZ)
+                               count = 0;
+                       if (++count < 5) {
+                               last_time = jiffies;
+                               printk("ia64_handle_irq: DANGER: less than "
+                                      "1KB of free stack space!!\n"
+                                      "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+                       }
+               }
+       }
+#endif /* IRQ_DEBUG */
+
+       /*
+        * Always set TPR to limit maximum interrupt nesting depth to
+        * 16 (without this, it would be ~240, which could easily lead
+        * to kernel stack overflows).
+        */
+       vmx_irq_enter();
+       saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+       ia64_srlz_d();
+       while (vector != IA64_SPURIOUS_INT_VECTOR) {
+           if (!IS_RESCHEDULE(vector)) {
+               ia64_setreg(_IA64_REG_CR_TPR, vector);
+               ia64_srlz_d();
+
+               if (vector != IA64_TIMER_VECTOR) {
+                       /* FIXME: Leave IRQ re-route later */
+                       vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
+                       wake_dom0 = 1;
+               }
+               else {  // FIXME: Handle Timer only now
+                       __do_IRQ(local_vector_to_irq(vector), regs);
+               }
+               
+               /*
+                * Disable interrupts and send EOI:
+                */
+               local_irq_disable();
+               ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+           }
+           else {
+                printf("Oops: RESCHEDULE IPI absorbed by HV\n");
+            }
+           ia64_eoi();
+           vector = ia64_get_ivr();
+       }
+       /*
+        * This must be done *after* the ia64_eoi().  For example, the keyboard 
softirq
+        * handler needs to be able to wait for further keyboard interrupts, 
which can't
+        * come through until ia64_eoi() has been done.
+        */
+       vmx_irq_exit();
+       if ( wake_dom0 && current != dom0 ) 
+               vcpu_wake(dom0->vcpu[0]);
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_ivt.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_ivt.S       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1085 @@
+/*
+ * arch/ia64/kernel/vmx_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *     Stephane Eranian <eranian@xxxxxxxxxx>
+ *     David Mosberger <davidm@xxxxxxxxxx>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *     Asit Mallick <asit.k.mallick@xxxxxxxxx>
+ *      Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ *      Kenneth Chen <kenneth.w.chen@xxxxxxxxx>
+ *      Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now 
uses virtual PT.
+ *
+ * 05/3/20 Xuefei Xu  (Anthony Xu) (anthony.xu@xxxxxxxxx)
+ *              Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *             // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ *  entry offset ----/     /         /                  /          /
+ *  entry number ---------/         /                  /          /
+ *  size of the entry -------------/                  /          /
+ *  vector name -------------------------------------/          /
+ *  interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/vhpt.h>
+
+
+#if 0
+  /*
+   * This lets you track the last eight faults that occurred on the CPU.  Make 
sure ar.k2 isn't
+   * needed for something else before enabling this...
+   */
+# define VMX_DBG_FAULT(i)      mov r16=ar.k2;; shl r16=r16,8;; add 
r16=(i),r16;;mov ar.k2=r16
+#else
+# define VMX_DBG_FAULT(i)
+#endif
+
+#include "vmx_minstate.h"
+
+
+
+#define VMX_FAULT(n)    \
+vmx_fault_##n:;          \
+    br.sptk vmx_fault_##n;         \
+    ;;                  \
+
+
+#define VMX_REFLECT(n)                         \
+       mov r31=pr;                                                             
        \
+       mov r19=n;                      /* prepare to save predicates */        
        \
+    mov r29=cr.ipsr;        \
+    ;;      \
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7) br.sptk.many vmx_dispatch_reflection;        \
+    VMX_FAULT(n);            \
+
+
+GLOBAL_ENTRY(vmx_panic)
+    br.sptk.many vmx_panic
+    ;;
+END(vmx_panic)
+
+
+
+
+
+       .section .text.ivt,"ax"
+
+       .align 32768    // align on 32KB boundary
+       .global vmx_ia64_ivt
+vmx_ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vmx_vhpt_miss)
+    VMX_FAULT(0)
+END(vmx_vhpt_miss)
+
+       .org vmx_ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(vmx_itlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6) br.sptk vmx_fault_1
+    mov r16 = cr.ifa
+    ;;
+    thash r17 = r16
+    ttag r20 = r16
+    ;;
+vmx_itlb_loop:
+    cmp.eq p6,p0 = r0, r17
+(p6) br vmx_itlb_out
+    ;;
+    adds r22 = VLE_TITAG_OFFSET, r17
+    adds r23 = VLE_CCHAIN_OFFSET, r17
+    ;;
+    ld8 r24 = [r22]
+    ld8 r25 = [r23]
+    ;;
+    lfetch [r25]
+    cmp.eq  p6,p7 = r20, r24
+    ;;
+(p7)    mov r17 = r25;
+(p7)    br.sptk vmx_itlb_loop
+    ;;
+    adds r23 = VLE_PGFLAGS_OFFSET, r17
+    adds r24 = VLE_ITIR_OFFSET, r17
+    ;;
+    ld8 r26 = [r23]
+    ld8 r25 = [r24]
+    ;;
+    mov cr.itir = r25
+    ;;
+    itc.i r26
+    ;;
+    srlz.i
+    ;;
+    mov r23=r31
+    mov r22=b0
+    adds r16=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r16]
+    ;;
+    adds r19=VPD(VPSR),r18
+    movl r20=__vsa_base
+    ;;
+    ld8 r19=[r19]
+    ld8 r20=[r20]
+    ;;
+    br.sptk ia64_vmm_entry
+    ;;
+vmx_itlb_out:
+    mov r19 = 1
+    br.sptk vmx_dispatch_tlb_miss
+    VMX_FAULT(1);
+END(vmx_itlb_miss)
+
+       .org vmx_ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(vmx_dtlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk vmx_fault_2
+    mov r16 = cr.ifa
+    ;;
+    thash r17 = r16
+    ttag r20 = r16
+    ;;
+vmx_dtlb_loop:
+    cmp.eq p6,p0 = r0, r17
+(p6)br vmx_dtlb_out
+    ;;
+    adds r22 = VLE_TITAG_OFFSET, r17
+    adds r23 = VLE_CCHAIN_OFFSET, r17
+    ;;
+    ld8 r24 = [r22]
+    ld8 r25 = [r23]
+    ;;
+    lfetch [r25]
+    cmp.eq  p6,p7 = r20, r24
+    ;;
+(p7)mov r17 = r25;
+(p7)br.sptk vmx_dtlb_loop
+    ;;
+    adds r23 = VLE_PGFLAGS_OFFSET, r17
+    adds r24 = VLE_ITIR_OFFSET, r17
+    ;;
+    ld8 r26 = [r23]
+    ld8 r25 = [r24]
+    ;;
+    mov cr.itir = r25
+    ;;
+    itc.d r26
+    ;;
+    srlz.d;
+    ;;
+    mov r23=r31
+    mov r22=b0
+    adds r16=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r16]
+    ;;
+    adds r19=VPD(VPSR),r18
+    movl r20=__vsa_base
+    ;;
+    ld8 r19=[r19]
+    ld8 r20=[r20]
+    ;;
+    br.sptk ia64_vmm_entry
+    ;;
+vmx_dtlb_out:
+    mov r19 = 2
+    br.sptk vmx_dispatch_tlb_miss
+    VMX_FAULT(2);
+END(vmx_dtlb_miss)
+
+       .org vmx_ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(vmx_alt_itlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p7)br.sptk vmx_fault_3
+       mov r16=cr.ifa          // get address that caused the TLB miss
+       movl r17=PAGE_KERNEL
+       mov r24=cr.ipsr
+       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+       ;;
+       and r19=r19,r16         // clear ed, reserved bits, and PTE control bits
+       shr.u r18=r16,55        // move address bit 59 to bit 4
+       ;;
+       and r18=0x10,r18        // bit 4=address-bit(61)
+       or r19=r17,r19          // insert PTE control bits into r19
+       ;;
+       or r19=r19,r18          // set bit 4 (uncached) if the access was to 
region 6
+       ;;
+       itc.i r19               // insert the TLB entry
+       mov pr=r31,-1
+       rfi
+    VMX_FAULT(3);
+END(vmx_alt_itlb_miss)
+
+
+       .org vmx_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(vmx_alt_dtlb_miss)
+       mov r31=pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p7)br.sptk vmx_fault_4
+       mov r16=cr.ifa          // get address that caused the TLB miss
+       movl r17=PAGE_KERNEL
+       mov r20=cr.isr
+       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+       mov r24=cr.ipsr
+       ;;
+       and r22=IA64_ISR_CODE_MASK,r20          // get the isr.code field
+       tbit.nz p6,p7=r20,IA64_ISR_SP_BIT       // is speculation bit on?
+       shr.u r18=r16,55                        // move address bit 59 to bit 4
+       and r19=r19,r16                         // clear ed, reserved bits, and 
PTE control bits
+       tbit.nz p9,p0=r20,IA64_ISR_NA_BIT       // is non-access bit on?
+       ;;
+       and r18=0x10,r18        // bit 4=address-bit(61)
+(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22    // check isr.code field
+       dep r24=-1,r24,IA64_PSR_ED_BIT,1
+       or r19=r19,r17          // insert PTE control bits into r19
+       ;;
+       or r19=r19,r18          // set bit 4 (uncached) if the access was to 
region 6
+(p6) mov cr.ipsr=r24
+       ;;
+(p7) itc.d r19         // insert the TLB entry
+       mov pr=r31,-1
+       rfi
+    VMX_FAULT(4);
+END(vmx_alt_dtlb_miss)
+
+       .org vmx_ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(vmx_nested_dtlb_miss)
+    VMX_FAULT(5)
+END(vmx_nested_dtlb_miss)
+
+       .org vmx_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(vmx_ikey_miss)
+       VMX_REFLECT(6)
+END(vmx_ikey_miss)
+
+       .org vmx_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(vmx_dkey_miss)
+       VMX_REFLECT(7)
+END(vmx_dkey_miss)
+
+       .org vmx_ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(vmx_dirty_bit)
+       VMX_REFLECT(8)
+END(vmx_idirty_bit)
+
+       .org vmx_ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(vmx_iaccess_bit)
+       VMX_REFLECT(9)
+END(vmx_iaccess_bit)
+
+       .org vmx_ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(vmx_daccess_bit)
+       VMX_REFLECT(10)
+END(vmx_daccess_bit)
+
+       .org vmx_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(vmx_break_fault)
+       mov r31=pr
+    mov r19=11
+    mov r30=cr.iim
+    movl r29=0x1100
+    ;;
+    cmp.eq p6,p7=r30,r0
+    (p6) br.sptk vmx_fault_11
+    ;;
+    cmp.eq  p6,p7=r29,r30
+    (p6) br.dptk.few vmx_hypercall_dispatch
+    (p7) br.sptk.many vmx_dispatch_break_fault
+    ;;
+    VMX_FAULT(11);
+END(vmx_break_fault)
+
+       .org vmx_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(vmx_interrupt)
+       mov r31=pr              // prepare to save predicates
+    mov r19=12
+    mov r29=cr.ipsr
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT
+    ;;
+(p7) br.sptk vmx_dispatch_interrupt
+    ;;
+       mov r27=ar.rsc                  /* M */
+       mov r20=r1                      /* A */
+       mov r25=ar.unat         /* M */
+       mov r26=ar.pfs                  /* I */
+       mov r28=cr.iip                  /* M */
+       cover               /* B (or nothing) */
+       ;;
+       mov r1=sp
+       ;;
+       invala                          /* M */
+       mov r30=cr.ifs
+       ;;
+    addl r1=-IA64_PT_REGS_SIZE,r1
+    ;;
+       adds r17=2*L1_CACHE_BYTES,r1            /* really: biggest cache-line 
size */
+       adds r16=PT(CR_IPSR),r1
+       ;;
+       lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+       st8 [r16]=r29           /* save cr.ipsr */
+       ;;
+       lfetch.fault.excl.nt1 [r17]
+       mov r29=b0
+       ;;
+       adds r16=PT(R8),r1      /* initialize first base pointer */
+       adds r17=PT(R9),r1      /* initialize second base pointer */
+       mov r18=r0                      /* make sure r18 isn't NaT */
+       ;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+        ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+        ;;
+       st8 [r16]=r28,16        /* save cr.iip */
+       st8 [r17]=r30,16        /* save cr.ifs */
+       mov r8=ar.fpsr          /* M */
+       mov r9=ar.csd
+       mov r10=ar.ssd
+       movl r11=FPSR_DEFAULT   /* L-unit */
+       ;;
+       st8 [r16]=r25,16        /* save ar.unat */
+       st8 [r17]=r26,16        /* save ar.pfs */
+       shl r18=r18,16          /* compute ar.rsc to be used for "loadrs" */
+       ;;
+    st8 [r16]=r27,16   /* save ar.rsc */
+    adds r17=16,r17    /* skip over ar_rnat field */
+    ;;          /* avoid RAW on r16 & r17 */
+    st8 [r17]=r31,16   /* save predicates */
+    adds r16=16,r16    /* skip over ar_bspstore field */
+    ;;
+    st8 [r16]=r29,16   /* save b0 */
+    st8 [r17]=r18,16   /* save ar.rsc value for "loadrs" */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+    adds r12=-16,r1    /* switch to kernel memory stack (with 16 bytes of 
scratch) */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+    mov r13=r21    /* establish `current' */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+    dep r14=-1,r0,60,4
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+    adds r2=IA64_PT_REGS_R16_OFFSET,r1
+    ;;
+    mov r8=ar.ccv
+    movl r1=__gp       /* establish kernel global pointer */
+    ;;                                          \
+    bsw.1
+    ;;
+       alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+       mov out0=cr.ivr         // pass cr.ivr as first arg
+       add out1=16,sp          // pass pointer to pt_regs as second arg
+
+       ssm psr.ic
+    ;;
+    srlz.i
+       ;;
+    (p15) ssm psr.i
+       adds r3=8,r2            // set up second base pointer for SAVE_REST
+       srlz.i                  // ensure everybody knows psr.ic is back on
+       ;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+    mov r18=b6
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+    mov r19=b7
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+    ;;
+    mov ar.fpsr=r11     /* M-unit */
+    st8 [r2]=r8,8      /* ar.ccv */
+    adds r24=PT(B6)-PT(F7),r3
+    ;;
+    stf.spill [r2]=f6,32
+    stf.spill [r3]=f7,32
+    ;;
+    stf.spill [r2]=f8,32
+    stf.spill [r3]=f9,32
+    ;;
+    stf.spill [r2]=f10
+    stf.spill [r3]=f11
+    adds r25=PT(B7)-PT(F11),r3
+    ;;
+    st8 [r24]=r18,16       /* b6 */
+    st8 [r25]=r19,16       /* b7 */
+    ;;
+    st8 [r24]=r9           /* ar.csd */
+    st8 [r25]=r10          /* ar.ssd */
+    ;;
+       srlz.d                  // make sure we see the effect of cr.ivr
+       movl r14=ia64_leave_nested
+       ;;
+       mov rp=r14
+       br.call.sptk.many b6=vmx_ia64_handle_irq
+       ;;
+END(vmx_interrupt)
+
+       .org vmx_ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(vmx_virtual_exirq)
+       VMX_DBG_FAULT(13)
+       mov r31=pr
+        mov r19=13
+        br.sptk vmx_dispatch_vexirq
+END(vmx_virtual_exirq)
+
+       .org vmx_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+       VMX_DBG_FAULT(14)
+       VMX_FAULT(14)
+
+
+       .org vmx_ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+       VMX_DBG_FAULT(15)
+       VMX_FAULT(15)
+
+
+       .org vmx_ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+       VMX_DBG_FAULT(16)
+       VMX_FAULT(16)
+
+       .org vmx_ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+       VMX_DBG_FAULT(17)
+       VMX_FAULT(17)
+
+       .org vmx_ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+       VMX_DBG_FAULT(18)
+       VMX_FAULT(18)
+
+       .org vmx_ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+       VMX_DBG_FAULT(19)
+       VMX_FAULT(19)
+
+    .org vmx_ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(vmx_page_not_present)
+       VMX_REFLECT(20)
+END(vmx_page_not_present)
+
+    .org vmx_ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(vmx_key_permission)
+       VMX_REFLECT(21)
+END(vmx_key_permission)
+
+    .org vmx_ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(vmx_iaccess_rights)
+       VMX_REFLECT(22)
+END(vmx_iaccess_rights)
+
+       .org vmx_ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(vmx_daccess_rights)
+       VMX_REFLECT(23)
+END(vmx_daccess_rights)
+
+       .org vmx_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(vmx_general_exception)
+    VMX_FAULT(24)
+//    VMX_REFLECT(24)
+END(vmx_general_exception)
+
+       .org vmx_ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(vmx_disabled_fp_reg)
+       VMX_REFLECT(25)
+END(vmx_disabled_fp_reg)
+
+       .org vmx_ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(vmx_nat_consumption)
+       VMX_REFLECT(26)
+END(vmx_nat_consumption)
+
+       .org vmx_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(vmx_speculation_vector)
+       VMX_REFLECT(27)
+END(vmx_speculation_vector)
+
+       .org vmx_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(28)
+       VMX_FAULT(28)
+
+       .org vmx_ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(vmx_debug_vector)
+       VMX_DBG_FAULT(29)
+       VMX_FAULT(29)
+END(vmx_debug_vector)
+
+       .org vmx_ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(vmx_unaligned_access)
+       VMX_REFLECT(30)
+END(vmx_unaligned_access)
+
+       .org vmx_ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(vmx_unsupported_data_reference)
+       VMX_REFLECT(31)
+END(vmx_unsupported_data_reference)
+
+       .org vmx_ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(vmx_floating_point_fault)
+       VMX_REFLECT(32)
+END(vmx_floating_point_fault)
+
+       .org vmx_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(vmx_floating_point_trap)
+       VMX_REFLECT(33)
+END(vmx_floating_point_trap)
+
+       .org vmx_ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(vmx_lower_privilege_trap)
+       VMX_REFLECT(34)
+END(vmx_lower_privilege_trap)
+
+       .org vmx_ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(vmx_taken_branch_trap)
+       VMX_REFLECT(35)
+END(vmx_taken_branch_trap)
+
+       .org vmx_ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(vmx_single_step_trap)
+       VMX_REFLECT(36)
+END(vmx_single_step_trap)
+
+       .org vmx_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(vmx_virtualization_fault)
+       VMX_DBG_FAULT(37)
+       mov r31=pr
+    mov r19=37
+    br.sptk vmx_dispatch_virtualization_fault
+END(vmx_virtualization_fault)
+
+       .org vmx_ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(38)
+       VMX_FAULT(38)
+
+       .org vmx_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(39)
+       VMX_FAULT(39)
+
+       .org vmx_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(40)
+       VMX_FAULT(40)
+
+       .org vmx_ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(41)
+       VMX_FAULT(41)
+
+       .org vmx_ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(42)
+       VMX_FAULT(42)
+
+       .org vmx_ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(43)
+       VMX_FAULT(43)
+
+       .org vmx_ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(44)
+       VMX_FAULT(44)
+
+       .org vmx_ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(vmx_ia32_exception)
+       VMX_DBG_FAULT(45)
+       VMX_FAULT(45)
+END(vmx_ia32_exception)
+
+       .org vmx_ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(vmx_ia32_intercept)
+       VMX_DBG_FAULT(46)
+       VMX_FAULT(46)
+END(vmx_ia32_intercept)
+
+       .org vmx_ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
+ENTRY(vmx_ia32_interrupt)
+       VMX_DBG_FAULT(47)
+       VMX_FAULT(47)
+END(vmx_ia32_interrupt)
+
+       .org vmx_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(48)
+       VMX_FAULT(48)
+
+       .org vmx_ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(49)
+       VMX_FAULT(49)
+
+       .org vmx_ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(50)
+       VMX_FAULT(50)
+
+       .org vmx_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(51)
+       VMX_FAULT(51)
+
+       .org vmx_ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(52)
+       VMX_FAULT(52)
+
+       .org vmx_ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(53)
+       VMX_FAULT(53)
+
+       .org vmx_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(54)
+       VMX_FAULT(54)
+
+       .org vmx_ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(55)
+       VMX_FAULT(55)
+
+       .org vmx_ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(56)
+       VMX_FAULT(56)
+
+       .org vmx_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(57)
+       VMX_FAULT(57)
+
+       .org vmx_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(58)
+       VMX_FAULT(58)
+
+       .org vmx_ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(59)
+       VMX_FAULT(59)
+
+       .org vmx_ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(60)
+       VMX_FAULT(60)
+
+       .org vmx_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(61)
+       VMX_FAULT(61)
+
+       .org vmx_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(62)
+       VMX_FAULT(62)
+
+       .org vmx_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(63)
+       VMX_FAULT(63)
+
+       .org vmx_ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+    VMX_DBG_FAULT(64)
+       VMX_FAULT(64)
+
+       .org vmx_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(65)
+       VMX_FAULT(65)
+
+       .org vmx_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(66)
+       VMX_FAULT(66)
+
+       .org vmx_ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+       VMX_DBG_FAULT(67)
+       VMX_FAULT(67)
+
+       .org vmx_ia64_ivt+0x8000
+    // There is no particular reason for this code to be here, other than that
+    // there happens to be space here that would go unused otherwise.  If this
+    // fault ever gets "unreserved", simply moved the following code to a more
+    // suitable spot...
+
+
+ENTRY(vmx_dispatch_reflection)
+    /*
+     * Input:
+     *  psr.ic: off
+     *  r19:    intr type (offset into ivt, see ia64_int.h)
+     *  r31:    contains saved predicates (pr)
+     */
+    VMX_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,4,0
+    mov out0=cr.ifa
+    mov out1=cr.isr
+    mov out2=cr.iim
+    mov out3=r15
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15) ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    movl r14=ia64_leave_hypervisor
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_reflect_interruption
+END(vmx_dispatch_reflection)
+
+ENTRY(vmx_dispatch_virtualization_fault)
+    VMX_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,3,0        // now it's safe (must be first in insn 
group!)
+    mov out0=r13        //vcpu
+    mov out1=r4         //cause
+    mov out2=r5         //opcode
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15) ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    movl r14=ia64_leave_hypervisor
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_emulate
+END(vmx_dispatch_virtualization_fault)
+
+
+ENTRY(vmx_dispatch_vexirq)
+    VMX_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,1,0
+    mov out0=r13
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15) ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    movl r14=ia64_leave_hypervisor
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vexirq
+END(vmx_dispatch_vexirq)
+
+ENTRY(vmx_dispatch_tlb_miss)
+    VMX_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=r13
+    mov out1=r15
+    mov out2=cr.ifa
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15) ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    movl r14=ia64_leave_hypervisor
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_hpw_miss
+END(vmx_dispatch_tlb_miss)
+
+
+ENTRY(vmx_dispatch_break_fault)
+    VMX_SAVE_MIN_WITH_COVER_R19
+    ;;
+    ;;
+    alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+    mov out0=cr.ifa
+    adds out1=16,sp
+    mov out2=cr.isr     // FIXME: pity to make this slow access twice
+    mov out3=cr.iim     // FIXME: pity to make this slow access twice
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15)ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    movl r14=ia64_leave_hypervisor
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_ia64_handle_break
+    ;;
+END(vmx_dispatch_break_fault)
+
+
+ENTRY(vmx_hypercall_dispatch)
+    VMX_SAVE_MIN_WITH_COVER
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    (p15) ssm psr.i               // restore psr.i
+    adds r3=16,r2                // set up second base pointer
+    ;;
+    VMX_SAVE_REST
+    ;;
+    movl r14=ia64_leave_hypervisor
+    movl r2=hyper_call_table
+    ;;
+    mov rp=r14
+    shladd r2=r15,3,r2
+    ;;
+    ld8 r2=[r2]
+    ;;
+    mov b6=r2
+    ;;
+    br.call.sptk.many b6=b6
+    ;;
+END(vmx_hypercall_dispatch)
+
+
+
+ENTRY(vmx_dispatch_interrupt)
+       VMX_SAVE_MIN_WITH_COVER_R19     // uses r31; defines r2 and r3
+       ;;
+       alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+       mov out0=cr.ivr         // pass cr.ivr as first arg
+       add out1=16,sp          // pass pointer to pt_regs as second arg
+
+       ssm psr.ic
+       ;;
+    srlz.i
+    ;;
+    (p15) ssm psr.i
+       adds r3=16,r2           // set up second base pointer for SAVE_REST
+       ;;
+       VMX_SAVE_REST
+       movl r14=ia64_leave_hypervisor
+       ;;
+       mov rp=r14
+       br.call.sptk.many b6=vmx_ia64_handle_irq
+END(vmx_dispatch_interrupt)
+
+
+
+    .rodata
+    .align 8
+    .globl hyper_call_table
+hyper_call_table:
+    data8 hyper_not_support     //hyper_set_trap_table     /*  0 */
+    data8 hyper_mmu_update
+    data8 hyper_not_support     //hyper_set_gdt
+    data8 hyper_not_support     //hyper_stack_switch
+    data8 hyper_not_support     //hyper_set_callbacks
+    data8 hyper_not_support     //hyper_fpu_taskswitch     /*  5 */
+    data8 hyper_sched_op
+    data8 hyper_dom0_op
+    data8 hyper_not_support     //hyper_set_debugreg
+    data8 hyper_not_support     //hyper_get_debugreg
+    data8 hyper_not_support     //hyper_update_descriptor  /* 10 */
+    data8 hyper_not_support     //hyper_set_fast_trap
+    data8 hyper_dom_mem_op
+    data8 hyper_not_support     //hyper_multicall
+    data8 hyper_not_support     //hyper_update_va_mapping
+    data8 hyper_not_support     //hyper_set_timer_op       /* 15 */
+    data8 hyper_event_channel_op
+    data8 hyper_xen_version
+    data8 hyper_not_support     //hyper_console_io
+    data8 hyper_not_support     //hyper_physdev_op
+    data8 hyper_not_support     //hyper_grant_table_op     /* 20 */
+    data8 hyper_not_support     //hyper_vm_assist
+    data8 hyper_not_support     //hyper_update_va_mapping_otherdomain
+    data8 hyper_not_support     //hyper_switch_vm86
+    data8 hyper_not_support     //hyper_boot_vcpu
+    data8 hyper_not_support     //hyper_ni_hypercall       /* 25 */
+    data8 hyper_not_support     //hyper_mmuext_op
+    data8 hyper_lock_page
+    data8 hyper_set_shared_page
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_minstate.h
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_minstate.h  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,333 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_minstate.h:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/mmu_context.h>
+#include <asm/offsets.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/vmx_vpd.h>
+#include <asm/cache.h>
+#include "entry.h"
+
+#define VMX_MINSTATE_START_SAVE_MIN         \
+    mov ar.rsc=0;       /* set enforced lazy mode, pl 0, little-endian, 
loadrs=0 */ \
+    ;;                                          \
+    mov.m r28=ar.rnat;                                  \
+    addl r22=IA64_RBS_OFFSET,r1;            /* compute base of RBS */       \
+    ;;                                          \
+    lfetch.fault.excl.nt1 [r22];                                \
+    addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;   /* compute base of memory 
stack */  \
+    mov r23=ar.bspstore;                /* save ar.bspstore */          \
+    ;;                                          \
+    mov ar.bspstore=r22;                /* switch to kernel RBS */      \
+    ;;                                          \
+    mov r18=ar.bsp;                                     \
+    mov ar.rsc=0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */    
 \
+
+
+
+#define VMX_MINSTATE_END_SAVE_MIN           \
+    bsw.1;          /* switch back to bank 1 (must be last in insn group) */   
 \
+    ;;
+
+
+#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL           \
+    /* begin to call pal vps sync_read and cleanup psr.pl */     \
+    add r25=IA64_VPD_BASE_OFFSET, r21;       \
+    movl r20=__vsa_base;     \
+    ;;          \
+    ld8 r25=[r25];      /* read vpd base */     \
+    ld8 r20=[r20];      /* read entry point */  \
+    ;;      \
+    mov r6=r25;     \
+    add r20=PAL_VPS_SYNC_READ,r20;  \
+    ;;  \
+{ .mii;  \
+    add r22=VPD(VPSR),r25;   \
+    mov r24=ip;        \
+    mov b0=r20;     \
+    ;;      \
+};           \
+{ .mmb;      \
+    add r24 = 0x20, r24;    \
+    mov r16 = cr.ipsr;  /* Temp workaround since psr.ic is off */ \
+    br.cond.sptk b0;        /*  call the service */ \
+    ;;              \
+};           \
+    ld8 r7=[r22];   \
+    /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */    \
+    extr.u r30=r16, IA64_PSR_CPL0_BIT, 2;   \
+    ;;      \
+    dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2;   \
+    ;;      \
+    extr.u r30=r16, IA64_PSR_BE_BIT, 5;   \
+    ;;      \
+    dep r7=r30, r7, IA64_PSR_BE_BIT, 5;   \
+    ;;      \
+    extr.u r30=r16, IA64_PSR_RI_BIT, 2;   \
+    ;;      \
+    dep r7=r30, r7, IA64_PSR_RI_BIT, 2;   \
+    ;;      \
+    st8 [r22]=r7;      \
+    ;;
+
+
+
+#define IA64_CURRENT_REG    IA64_KR(CURRENT)  /* r21 is reserved for current 
pointer */
+//#define VMX_MINSTATE_GET_CURRENT(reg)   mov reg=IA64_CURRENT_REG
+#define VMX_MINSTATE_GET_CURRENT(reg)   mov reg=r21
+
+/*
+ * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *  psr.ic: off
+ *  r31:    contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *  psr.ic: off
+ *   r2 = points to &pt_regs.r16
+ *   r8 = contents of ar.ccv
+ *   r9 = contents of ar.csd
+ *  r10 = contents of ar.ssd
+ *  r11 = FPSR_DEFAULT
+ *  r12 = kernel sp (kernel virtual address)
+ *  r13 = points to current task_struct (kernel virtual address)
+ *  p15 = TRUE if psr.i is set in cr.ipsr
+ *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *      preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)                           \
+/*  switch rr7 */       \
+    movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | 
(IA64_GRANULE_SHIFT << 2)); \
+    movl r17=(7<<61);        \
+    movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | 
(IA64_GRANULE_SHIFT << 2)); \
+    movl r22=(6<<61);        \
+    movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 
2) | 1);                \
+    movl r23=(5<<61);  \
+    ;;              \
+    mov rr[r17]=r16;             \
+    mov rr[r22]=r20;            \
+    mov rr[r23]=r18;            \
+    ;;      \
+    srlz.i;      \
+    ;;  \
+    VMX_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */                   \
+    mov r27=ar.rsc;         /* M */                         \
+    mov r20=r1;         /* A */                         \
+    mov r26=ar.unat;        /* M */                         \
+    mov r29=cr.ipsr;        /* M */                         \
+    mov r18=cr.isr;         \
+    COVER;              /* B;; (or nothing) */                  \
+    ;;                                          \
+    tbit.z p6,p0=r29,IA64_PSR_VM_BIT;       \
+    tbit.nz.or p6,p0 = r18,39; \
+    ;;        \
+(p6) br.sptk.few vmx_panic;        \
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT;   \
+    mov r1=r16;                     \
+/*    mov r21=r16;     */              \
+    /* switch from user to kernel RBS: */                           \
+    ;;                                          \
+    invala;             /* M */                         \
+    SAVE_IFS;                                       \
+    ;;                                          \
+    VMX_MINSTATE_START_SAVE_MIN                                 \
+    adds r17=2*L1_CACHE_BYTES,r1;       /* really: biggest cache-line size */  
     \
+    adds r16=PT(CR_IPSR),r1;                                \
+    ;;                                          \
+    lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;                     \
+    st8 [r16]=r29;      /* save cr.ipsr */                      \
+    ;;                                          \
+    lfetch.fault.excl.nt1 [r17];                                \
+    tbit.nz p15,p0=r29,IA64_PSR_I_BIT;                          \
+    mov r29=b0                                      \
+    ;;                                          \
+    adds r16=PT(R8),r1; /* initialize first base pointer */             \
+    adds r17=PT(R9),r1; /* initialize second base pointer */                \
+    ;;                                          \
+.mem.offset 0,0; st8.spill [r16]=r8,16;                             \
+.mem.offset 8,0; st8.spill [r17]=r9,16;                             \
+        ;;                                          \
+.mem.offset 0,0; st8.spill [r16]=r10,24;                            \
+.mem.offset 8,0; st8.spill [r17]=r11,24;                            \
+        ;;                                          \
+    mov r8=ar.pfs;         /* I */                         \
+    mov r9=cr.iip;         /* M */                         \
+    mov r10=ar.fpsr;        /* M */                         \
+        ;;                      \
+    st8 [r16]=r9,16;    /* save cr.iip */                       \
+    st8 [r17]=r30,16;   /* save cr.ifs */                       \
+    sub r18=r18,r22;    /* r18=RSE.ndirty*8 */                      \
+    ;;          \
+    st8 [r16]=r26,16;   /* save ar.unat */                      \
+    st8 [r17]=r8,16;    /* save ar.pfs */                       \
+    shl r18=r18,16;     /* compute ar.rsc to be used for "loadrs" */           
 \
+    ;;                                          \
+    st8 [r16]=r27,16;   /* save ar.rsc */                       \
+    st8 [r17]=r28,16;   /* save ar.rnat */                      \
+    ;;          /* avoid RAW on r16 & r17 */                    \
+    st8 [r16]=r23,16;   /* save ar.bspstore */                      \
+    st8 [r17]=r31,16;   /* save predicates */                       \
+    ;;                                          \
+    st8 [r16]=r29,16;   /* save b0 */                           \
+    st8 [r17]=r18,16;   /* save ar.rsc value for "loadrs" */                \
+    ;;                                          \
+.mem.offset 0,0; st8.spill [r16]=r20,16;    /* save original r1 */             
 \
+.mem.offset 8,0; st8.spill [r17]=r12,16;                            \
+    adds r12=-16,r1;    /* switch to kernel memory stack (with 16 bytes of 
scratch) */  \
+    ;;                                          \
+.mem.offset 0,0; st8.spill [r16]=r13,16;                            \
+.mem.offset 8,0; st8.spill [r17]=r10,16;    /* save ar.fpsr */              \
+    mov r13=r21;   /* establish `current' */               \
+    ;;                                          \
+.mem.offset 0,0; st8.spill [r16]=r15,16;                            \
+.mem.offset 8,0; st8.spill [r17]=r14,16;                            \
+    ;;                                          \
+.mem.offset 0,0; st8.spill [r16]=r2,16;                             \
+.mem.offset 8,0; st8.spill [r17]=r3,16;                             \
+    adds r2=PT(F6),r1;                         \
+    ;;                                          \
+ .mem.offset 0,0; st8.spill [r16]=r4,16;                             \
+ .mem.offset 8,0; st8.spill [r17]=r5,16;                             \
+    ;;          \
+ .mem.offset 0,0; st8.spill [r16]=r6,16;     \
+ .mem.offset 8,0; st8.spill [r17]=r7,16;     \
+    mov r20=ar.ccv;      \
+    ;;  \
+  mov r18=cr.iipa;  \
+  mov r4=cr.isr;   \
+  mov r22=ar.unat;    \
+    ;;  \
+  st8 [r16]=r18,16;      \
+  st8 [r17]=r4;      \
+    ;;      \
+    adds r16=PT(EML_UNAT),r1;   \
+    adds r17=PT(AR_CCV),r1;                 \
+    ;;                      \
+    st8 [r16]=r22,8;     \
+    st8 [r17]=r20;       \
+    mov r4=r24;         \
+    mov r5=r25;         \
+     ;;  \
+    st8 [r16]=r0;  \
+    EXTRA;                                          \
+    mov r9=ar.csd;                                      \
+    mov r10=ar.ssd;                                     \
+    movl r11=FPSR_DEFAULT;   /* L-unit */                           \
+    movl r1=__gp;       /* establish kernel global pointer */               \
+    ;;                                          \
+    PAL_VSA_SYNC_READ_CLEANUP_PSR_PL           \
+    VMX_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *  psr.ic: on
+ *  r2: points to &pt_regs.f6
+ *  r3: points to &pt_regs.f7
+ *  r4,r5,scrach
+ *  r6: points to vpd
+ *  r7: vpsr
+ *  r9: contents of ar.csd
+ *  r10:    contents of ar.ssd
+ *  r11:    FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define VMX_SAVE_REST               \
+    tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT;  /* guest bank0 or bank1 ? */      \
+    ;;      \
+(pBN0) add r4=VPD(VBGR),r6;     \
+(pBN0) add r5=VPD(VBGR)+0x8,r6;     \
+(pBN0) add r7=VPD(VBNAT),r6;     \
+    ;;      \
+(pBN1) add r5=VPD(VGR)+0x8,r6;      \
+(pBN1) add r4=VPD(VGR),r6;      \
+(pBN1) add r7=VPD(VNAT),r6;      \
+    ;;      \
+.mem.offset 0,0; st8.spill [r4]=r16,16;     \
+.mem.offset 8,0; st8.spill [r5]=r17,16;     \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r18,16;     \
+.mem.offset 8,0; st8.spill [r5]=r19,16;     \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r20,16;     \
+.mem.offset 8,0; st8.spill [r5]=r21,16;     \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r22,16;     \
+.mem.offset 8,0; st8.spill [r5]=r23,16;     \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r24,16;     \
+.mem.offset 8,0; st8.spill [r5]=r25,16;     \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r26,16;     \
+.mem.offset 8,0; st8.spill [r5]=r27,16;     \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r28,16;     \
+.mem.offset 8,0; st8.spill [r5]=r29,16;     \
+    mov r26=b6;         \
+    ;;                  \
+.mem.offset 0,0; st8.spill [r4]=r30,16;     \
+.mem.offset 8,0; st8.spill [r5]=r31,16;     \
+    mov r27=b7;     \
+    ;;                  \
+    mov r30=ar.unat;    \
+    ;;      \
+    st8 [r7]=r30;       \
+    mov ar.fpsr=r11;    /* M-unit */    \
+    ;;                  \
+    stf.spill [r2]=f6,32;           \
+    stf.spill [r3]=f7,32;           \
+    ;;                  \
+    stf.spill [r2]=f8,32;           \
+    stf.spill [r3]=f9,32;           \
+    ;;                  \
+    stf.spill [r2]=f10;         \
+    stf.spill [r3]=f11;         \
+    ;;                  \
+    adds r2=PT(B6)-PT(F10),r2;      \
+    adds r3=PT(B7)-PT(F11),r3;      \
+    ;;          \
+    st8 [r2]=r26,16;       /* b6 */    \
+    st8 [r3]=r27,16;       /* b7 */    \
+    ;;                  \
+    st8 [r2]=r9;           /* ar.csd */    \
+    st8 [r3]=r10;          /* ar.ssd */    \
+    ;;
+
+#define VMX_SAVE_MIN_WITH_COVER   VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,)
+#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov 
r15=r19)
+#define VMX_SAVE_MIN      VMX_DO_SAVE_MIN(     , mov r30=r0, )
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_phy_mode.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_phy_mode.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,433 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_phy_mode.c: emulating domain physical mode.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Arun Sharma (arun.sharma@xxxxxxxxx)
+ * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
+ * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
+ */
+
+
+#include <asm/processor.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_phy_mode.h>
+#include <xen/sched.h>
+#include <asm/pgtable.h>
+
+
+int valid_mm_mode[8] = {
+    GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */
+    INV_MODE,
+    INV_MODE,
+    GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */
+    INV_MODE,
+    GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */
+    INV_MODE,
+    GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/
+};
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ *   mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+    /*  2004/09/12(Kevin): Allow switch to self */
+        /*
+         *  (it,dt,rt): (0,0,0) -> (1,1,1)
+         *  This kind of transition usually occurs in the very early
+     *  stage of Linux boot up procedure. Another case is in efi
+     *  and pal calls. (see "arch/ia64/kernel/head.S")
+     *
+     *  (it,dt,rt): (0,0,0) -> (0,1,1)
+     *  This kind of transition is found when OSYa exits efi boot
+     *  service. Due to gva = gpa in this case (Same region),
+     *  data access can be satisfied though itlb entry for physical
+     *  emulation is hit.
+         */
+    SW_SELF,0,  0,  SW_NOP, 0,  0,  0,  SW_P2V,
+    0,  0,  0,  0,  0,  0,  0,  0,
+    0,  0,  0,  0,  0,  0,  0,  0,
+    /*
+     *  (it,dt,rt): (0,1,1) -> (1,1,1)
+     *  This kind of transition is found in OSYa.
+     *
+     *  (it,dt,rt): (0,1,1) -> (0,0,0)
+     *  This kind of transition is found in OSYa
+     */
+    SW_NOP, 0,  0,  SW_SELF,0,  0,  0,  SW_P2V,
+    /* (1,0,0)->(1,1,1) */
+    0,  0,  0,  0,  0,  0,  0,  SW_P2V,
+    /*
+         *  (it,dt,rt): (1,0,1) -> (1,1,1)
+         *  This kind of transition usually occurs when Linux returns
+     *  from the low level TLB miss handlers.
+         *  (see "arch/ia64/kernel/ivt.S")
+         */
+    0,  0,  0,  0,  0,  SW_SELF,0,  SW_P2V,
+    0,  0,  0,  0,  0,  0,  0,  0,
+    /*
+         *  (it,dt,rt): (1,1,1) -> (1,0,1)
+         *  This kind of transition usually occurs in Linux low level
+     *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+     *
+     *  (it,dt,rt): (1,1,1) -> (0,0,0)
+     *  This kind of transition usually occurs in pal and efi calls,
+     *  which requires running in physical mode.
+     *  (see "arch/ia64/kernel/head.S")
+     *  (1,1,1)->(1,0,0)
+     */
+
+    SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF,
+};
+
+void
+physical_mode_init(VCPU *vcpu)
+{
+    UINT64 psr;
+    struct domain * d = vcpu->domain;
+
+    vcpu->arch.old_rsc = 0;
+    vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
+#if 0
+void
+physical_itlb_miss_domn(VCPU *vcpu, u64 vadr)
+{
+    u64 psr;
+    IA64_PSR vpsr;
+    u64 mppn,gppn,mpp1,gpp1;
+    struct domain *d;
+    static u64 test=0;
+    d=vcpu->domain;
+    if(test)
+        panic("domn physical itlb miss happen\n");
+    else
+        test=1;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    gppn=(vadr<<1)>>13;
+    mppn = get_mfn(DOMID_SELF,gppn,1);
+    mppn=(mppn<<12)|(vpsr.cpl<<7);
+    gpp1=0;
+    mpp1 = get_mfn(DOMID_SELF,gpp1,1);
+    mpp1=(mpp1<<12)|(vpsr.cpl<<7);
+//    if(vadr>>63)
+//        mppn |= PHY_PAGE_UC;
+//    else
+//        mppn |= PHY_PAGE_WB;
+    mpp1 |= PHY_PAGE_WB;
+    psr=ia64_clear_ic();
+    ia64_itr(0x1, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
+    ia64_srlz_i();
+    ia64_itr(0x2, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
+    ia64_stop();
+    ia64_srlz_i();
+    ia64_itr(0x1, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), 
(mppn|PHY_PAGE_WB), 24);
+    ia64_srlz_i();
+    ia64_itr(0x2, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), 
(mppn|PHY_PAGE_WB), 24);
+    ia64_stop();
+    ia64_srlz_i();
+    ia64_itr(0x1, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
+    ia64_srlz_i();
+    ia64_itr(0x2, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
+    ia64_stop();
+    ia64_srlz_i();
+    ia64_set_psr(psr);
+    ia64_srlz_i();
+    return;
+}
+#endif
+
+void
+physical_itlb_miss(VCPU *vcpu, u64 vadr)
+{
+        physical_itlb_miss_dom0(vcpu, vadr);
+}
+
+
+void
+physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr)
+{
+    u64 psr;
+    IA64_PSR vpsr;
+    u64 mppn,gppn;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    gppn=(vadr<<1)>>13;
+    mppn = get_mfn(DOMID_SELF,gppn,1);
+    mppn=(mppn<<12)|(vpsr.cpl<<7); 
+//    if(vadr>>63)
+//       mppn |= PHY_PAGE_UC;
+//    else
+    mppn |= PHY_PAGE_WB;
+
+    psr=ia64_clear_ic();
+    ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
+    ia64_set_psr(psr);
+    ia64_srlz_i();
+    return;
+}
+
+
+void
+physical_dtlb_miss(VCPU *vcpu, u64 vadr)
+{
+    u64 psr;
+    IA64_PSR vpsr;
+    u64 mppn,gppn;
+//    if(vcpu->domain!=dom0)
+//        panic("dom n physical dtlb miss happen\n");
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    gppn=(vadr<<1)>>13;
+    mppn = get_mfn(DOMID_SELF,gppn,1);
+    mppn=(mppn<<12)|(vpsr.cpl<<7);
+    if(vadr>>63)
+        mppn |= PHY_PAGE_UC;
+    else
+        mppn |= PHY_PAGE_WB;
+
+    psr=ia64_clear_ic();
+    ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
+    ia64_set_psr(psr);
+    ia64_srlz_i();
+    return;
+}
+
+void
+vmx_init_all_rr(VCPU *vcpu)
+{
+       VMX(vcpu,vrr[VRN0]) = 0x38;
+       VMX(vcpu,vrr[VRN1]) = 0x38;
+       VMX(vcpu,vrr[VRN2]) = 0x38;
+       VMX(vcpu,vrr[VRN3]) = 0x38;
+       VMX(vcpu,vrr[VRN4]) = 0x38;
+       VMX(vcpu,vrr[VRN5]) = 0x38;
+       VMX(vcpu,vrr[VRN6]) = 0x60;
+       VMX(vcpu,vrr[VRN7]) = 0x60;
+
+       VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38);
+       VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60);
+       VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60);
+}
+
+void
+vmx_load_all_rr(VCPU *vcpu)
+{
+       unsigned long psr;
+       ia64_rr phy_rr;
+
+       psr = ia64_clear_ic();
+
+       phy_rr.ps = EMUL_PHY_PAGE_SHIFT; 
+       phy_rr.ve = 1;
+
+       /* WARNING: not allow co-exist of both virtual mode and physical
+        * mode in same region
+        */
+       if (is_physical_mode(vcpu)) {
+               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+                       panic("Unexpected domain switch in phy emul\n");
+               phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
+               ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval);
+               phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
+               ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval);
+       } else {
+               ia64_set_rr((VRN0 << VRN_SHIFT),
+                            vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0])));
+               ia64_set_rr((VRN4 << VRN_SHIFT),
+                            vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4])));
+       }
+
+#if 1
+       /* rr567 will be postponed to last point when resuming back to guest */
+       ia64_set_rr((VRN1 << VRN_SHIFT),
+                    vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1])));
+       ia64_set_rr((VRN2 << VRN_SHIFT),
+                    vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2])));
+       ia64_set_rr((VRN3 << VRN_SHIFT),
+                    vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3])));
+#endif
+       ia64_srlz_d();
+       ia64_set_psr(psr);
+    ia64_srlz_i();
+}
+
+void
+switch_to_physical_rid(VCPU *vcpu)
+{
+    UINT64 psr;
+    ia64_rr phy_rr;
+
+    phy_rr.ps = EMUL_PHY_PAGE_SHIFT; 
+    phy_rr.ve = 1;
+
+    /* Save original virtual mode rr[0] and rr[4] */
+    psr=ia64_clear_ic();
+    phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
+    ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval);
+    ia64_srlz_d();
+    phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
+    ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval);
+    ia64_srlz_d();
+
+    ia64_set_psr(psr);
+    ia64_srlz_i();
+    return;
+}
+
+
+void
+switch_to_virtual_rid(VCPU *vcpu)
+{
+    UINT64 psr;
+    ia64_rr mrr;
+
+    psr=ia64_clear_ic();
+
+    mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT);
+    ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
+    ia64_srlz_d();
+    mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT);
+    ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
+    ia64_srlz_d();
+    ia64_set_psr(psr);
+    ia64_srlz_i();
+    return;
+}
+
+static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr)
+{
+    return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void
+switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
+{
+    int act;
+    REGS * regs=vcpu_regs(vcpu);
+    act = mm_switch_action(old_psr, new_psr);
+    switch (act) {
+    case SW_V2P:
+        vcpu->arch.old_rsc = regs->ar_rsc;
+        switch_to_physical_rid(vcpu);
+        /*
+         * Set rse to enforced lazy, to prevent active rse save/restor when
+         * guest physical mode.
+         */
+        regs->ar_rsc &= ~(IA64_RSC_MODE);
+        vcpu->arch.mode_flags |= GUEST_IN_PHY;
+        break;
+    case SW_P2V:
+        switch_to_virtual_rid(vcpu);
+        /*
+         * recover old mode which is saved when entering
+         * guest physical mode
+         */
+        regs->ar_rsc = vcpu->arch.old_rsc;
+        vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+        break;
+    case SW_SELF:
+        printf("Switch to self-0x%lx!!! MM mode doesn't change...\n",
+            old_psr.val);
+        break;
+    case SW_NOP:
+        printf("No action required for mode transition: (0x%lx -> 0x%lx)\n",
+            old_psr.val, new_psr.val);
+        break;
+    default:
+        /* Sanity check */
+    printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
+        panic("Unexpected virtual <--> physical mode transition");
+        break;
+    }
+    return;
+}
+
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void
+check_mm_mode_switch (VCPU *vcpu,  IA64_PSR old_psr, IA64_PSR new_psr)
+{
+
+    if ( (old_psr.dt != new_psr.dt ) ||
+         (old_psr.it != new_psr.it ) ||
+         (old_psr.rt != new_psr.rt )
+         ) {
+        switch_mm_mode (vcpu, old_psr, new_psr);
+    }
+
+    return 0;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void
+prepare_if_physical_mode(VCPU *vcpu)
+{
+    if (is_physical_mode(vcpu)) {
+       vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+        switch_to_virtual_rid(vcpu);
+    }
+    return;
+}
+
+/* Recover always follows prepare */
+void
+recover_if_physical_mode(VCPU *vcpu)
+{
+    if (is_physical_mode(vcpu)) {
+       vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+        switch_to_physical_rid(vcpu);
+    }
+    return;
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_process.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_process.c   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,375 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_process.c: handling VMX architecture-related VM exits
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Xiaoyan Feng (Fleming Feng)  <fleming.feng@xxxxxxxxx>
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+
+#include <linux/efi.h>  /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h>    /* FOR struct ia64_sal_retval */
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <asm/regionreg.h>
+#include <asm/privop.h>
+#include <asm/ia64_int.h>
+#include <asm/hpsim_ssc.h>
+#include <asm/dom_fw.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/kregs.h>
+#include <asm/vmx.h>
+#include <asm/vmx_mm_def.h>
+#include <xen/mm.h>
+/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+
+
+extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_sal_retval 
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+extern void rnat_consumption (VCPU *vcpu);
+#define DOMN_PAL_REQUEST    0x110000
+IA64FAULT
+vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long 
isr, unsigned long iim)
+{
+       static int first_time = 1;
+       struct domain *d = (struct domain *) current->domain;
+       struct vcpu *v = (struct domain *) current;
+       extern unsigned long running_on_sim;
+       unsigned long i, sal_param[8];
+
+#if 0
+       if (first_time) {
+               if (platform_is_hp_ski()) running_on_sim = 1;
+               else running_on_sim = 0;
+               first_time = 0;
+       }
+       if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
+               if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
+               else do_ssc(vcpu_get_gr(current,36), regs);
+       }
+#endif
+       if (iim == d->arch.breakimm) {
+               struct ia64_sal_retval x;
+               switch (regs->r2) {
+                   case FW_HYPERCALL_PAL_CALL:
+                       //printf("*** PAL hypercall: index=%d\n",regs->r28);
+                       //FIXME: This should call a C routine
+                       x = pal_emulator_static(VMX_VPD(v, vgr[12]));
+                       regs->r8 = x.status; regs->r9 = x.v0;
+                       regs->r10 = x.v1; regs->r11 = x.v2;
+#if 0
+                       if (regs->r8)
+                               printk("Failed vpal emulation, with 
index:0x%lx\n",
+                                       VMX_VPD(v, vgr[12]));
+#endif
+                       break;
+                   case FW_HYPERCALL_SAL_CALL:
+                       for (i = 0; i < 8; i++)
+                               vmx_vcpu_get_gr(v, 32+i, &sal_param[i]);
+                       x = sal_emulator(sal_param[0], sal_param[1],
+                                        sal_param[2], sal_param[3],
+                                        sal_param[4], sal_param[5],
+                                        sal_param[6], sal_param[7]);
+                       regs->r8 = x.status; regs->r9 = x.v0;
+                       regs->r10 = x.v1; regs->r11 = x.v2;
+#if 0
+                       if (regs->r8)
+                               printk("Failed vsal emulation, with 
index:0x%lx\n",
+                                       sal_param[0]);
+#endif
+                       break;
+                   case FW_HYPERCALL_EFI_RESET_SYSTEM:
+                       printf("efi.reset_system called ");
+                       if (current->domain == dom0) {
+                               printf("(by dom0)\n ");
+                               (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+                       }
+                       printf("(not supported for non-0 domain)\n");
+                       regs->r8 = EFI_UNSUPPORTED;
+                       break;
+                   case FW_HYPERCALL_EFI_GET_TIME:
+                       {
+                       unsigned long *tv, *tc;
+                       vmx_vcpu_get_gr(v, 32, &tv);
+                       vmx_vcpu_get_gr(v, 33, &tc);
+                       printf("efi_get_time(%p,%p) called...",tv,tc);
+                       tv = __va(translate_domain_mpaddr(tv));
+                       if (tc) tc = __va(translate_domain_mpaddr(tc));
+                       regs->r8 = (*efi.get_time)(tv,tc);
+                       printf("and returns %lx\n",regs->r8);
+                       }
+                       break;
+                   case FW_HYPERCALL_EFI_SET_TIME:
+                   case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+                   case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+                       // FIXME: need fixes in efi.h from 2.6.9
+                   case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+                       // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
+                       // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
+                       // POINTER ARGUMENTS WILL BE VIRTUAL!!
+                   case FW_HYPERCALL_EFI_GET_VARIABLE:
+                       // FIXME: need fixes in efi.h from 2.6.9
+                   case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+                   case FW_HYPERCALL_EFI_SET_VARIABLE:
+                   case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+                       // FIXME: need fixes in efi.h from 2.6.9
+                       regs->r8 = EFI_UNSUPPORTED;
+                       break;
+               }
+#if 0
+               if (regs->r8)
+                       printk("Failed vgfw emulation, with index:0x%lx\n",
+                               regs->r2);
+#endif
+               vmx_vcpu_increment_iip(current);
+       }else if(iim == DOMN_PAL_REQUEST){
+        pal_emul(current);
+               vmx_vcpu_increment_iip(current);
+    }  else
+               vmx_reflect_interruption(ifa,isr,iim,11);
+}
+
+static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800,
+    0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000,
+    0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600,
+    0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000,
+    0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00,
+    0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400,
+    0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00,
+    0x7f00,
+};
+
+
+
+void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim,
+     UINT64 vector)
+{
+    VCPU *vcpu = current;
+    REGS *regs=vcpu_regs(vcpu);
+    UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu);
+    if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){
+        panic("Guest nested fault!");
+    }
+    VPD_CR(vcpu,isr)=isr;
+    VPD_CR(vcpu,iipa) = regs->cr_iip;
+    vector=vec2off[vector];
+    if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+        VPD_CR(vcpu,iim) = iim;
+    else {
+        set_ifa_itir_iha(vcpu,ifa,1,1,1);
+    }
+    inject_guest_interruption(vcpu, vector);
+}
+
+// ONLY gets called from ia64_leave_kernel
+// ONLY call with interrupts disabled?? (else might miss one?)
+// NEVER successful if already reflecting a trap/fault because psr.i==0
+void leave_hypervisor_tail(struct pt_regs *regs)
+{
+       struct domain *d = current->domain;
+       struct vcpu *v = current;
+       // FIXME: Will this work properly if doing an RFI???
+       if (!is_idle_task(d) ) {        // always comes from guest
+               extern void vmx_dorfirfi(void);
+               struct pt_regs *user_regs = vcpu_regs(current);
+
+               if (local_softirq_pending())
+                       do_softirq();
+               local_irq_disable();
+ 
+               if (user_regs != regs)
+                       printk("WARNING: checking pending interrupt in nested 
interrupt!!!\n");
+
+               /* VMX Domain N has other interrupt source, saying DM  */
+                if (test_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags))
+                      vmx_intr_assist(v);
+
+               /* FIXME: Check event pending indicator, and set
+                * pending bit if necessary to inject back to guest.
+                * Should be careful about window between this check
+                * and above assist, since IOPACKET_PORT shouldn't be
+                * injected into vmx domain.
+                *
+                * Now hardcode the vector as 0x10 temporarily
+                */
+               if 
(event_pending(v)&&(!((v->arch.arch_vmx.in_service[0])&(1UL<<0x10)))) {
+                       VPD_CR(v, irr[0]) |= 1UL << 0x10;
+                       v->arch.irq_new_pending = 1;
+               }
+ 
+               if ( v->arch.irq_new_pending ) {
+                       v->arch.irq_new_pending = 0;
+                       vmx_check_pending_irq(v);
+               }
+       }
+}
+
+extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
+
+/* We came here because the H/W VHPT walker failed to find an entry */
+void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr)
+{
+    IA64_PSR vpsr;
+    CACHE_LINE_TYPE type;
+    u64 vhpt_adr;
+    ISR misr;
+    ia64_rr vrr;
+    REGS *regs;
+    thash_cb_t *vtlb, *vhpt;
+    thash_data_t *data, me;
+    vtlb=vmx_vcpu_get_vtlb(vcpu);
+#ifdef  VTLB_DEBUG
+    check_vtlb_sanity(vtlb);
+    dump_vtlb(vtlb);
+#endif
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    regs = vcpu_regs(vcpu);
+    misr.val=regs->cr_isr;
+/*  TODO
+    if(vcpu->domain->id && vec == 2 &&
+       vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){
+        emulate_ins(&v);
+        return;
+    }
+*/
+
+    if((vec==1)&&(!vpsr.it)){
+        physical_itlb_miss(vcpu, vadr);
+        return;
+    }
+    if((vec==2)&&(!vpsr.dt)){
+        
if(vcpu->domain!=dom0&&__gpfn_is_io(vcpu->domain,(vadr<<1)>>(PAGE_SHIFT+1))){
+            emulate_io_inst(vcpu,((vadr<<1)>>1),4);   //  UC
+        }else{
+            physical_dtlb_miss(vcpu, vadr);
+        }
+        return;
+    }
+    vrr = vmx_vcpu_rr(vcpu,vadr);
+    if(vec == 1) type = ISIDE_TLB;
+    else if(vec == 2) type = DSIDE_TLB;
+    else panic("wrong vec\n");
+
+//    prepare_if_physical_mode(vcpu);
+
+    if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){
+        if(vcpu->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(vcpu->domain, 
data->ppn>>(PAGE_SHIFT-12))){
+            
vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps);
+            emulate_io_inst(vcpu, vadr, data->ma);
+            return IA64_FAULT;
+        }
+       if ( data->ps != vrr.ps ) {
+               machine_tlb_insert(vcpu, data);
+       }
+       else {
+               thash_insert(vtlb->ts->vhpt,data,vadr);
+           }
+    }else if(type == DSIDE_TLB){
+        if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
+            if(vpsr.ic){
+                vmx_vcpu_set_isr(vcpu, misr.val);
+                alt_dtlb(vcpu, vadr);
+                return IA64_FAULT;
+            } else{
+                if(misr.sp){
+                    //TODO  lds emulation
+                    panic("Don't support speculation load");
+                }else{
+                    nested_dtlb(vcpu);
+                    return IA64_FAULT;
+                }
+            }
+        } else{
+            vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+            vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+            data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
+            if(data){
+                if(vpsr.ic){
+                    vmx_vcpu_set_isr(vcpu, misr.val);
+                    dtlb_fault(vcpu, vadr);
+                    return IA64_FAULT;
+                }else{
+                    if(misr.sp){
+                        //TODO  lds emulation
+                        panic("Don't support speculation load");
+                    }else{
+                        nested_dtlb(vcpu);
+                        return IA64_FAULT;
+                    }
+                }
+            }else{
+                if(vpsr.ic){
+                    vmx_vcpu_set_isr(vcpu, misr.val);
+                    dvhpt_fault(vcpu, vadr);
+                    return IA64_FAULT;
+                }else{
+                    if(misr.sp){
+                        //TODO  lds emulation
+                        panic("Don't support speculation load");
+                    }else{
+                        nested_dtlb(vcpu);
+                        return IA64_FAULT;
+                    }
+                }
+            }
+        }
+    }else if(type == ISIDE_TLB){
+        if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
+            if(!vpsr.ic){
+                misr.ni=1;
+            }
+            vmx_vcpu_set_isr(vcpu, misr.val);
+            alt_itlb(vcpu, vadr);
+            return IA64_FAULT;
+        } else{
+            vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+            vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+            data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
+            if(data){
+                if(!vpsr.ic){
+                    misr.ni=1;
+                }
+                vmx_vcpu_set_isr(vcpu, misr.val);
+                itlb_fault(vcpu, vadr);
+                return IA64_FAULT;
+            }else{
+                if(!vpsr.ic){
+                    misr.ni=1;
+                }
+                vmx_vcpu_set_isr(vcpu, misr.val);
+                ivhpt_fault(vcpu, vadr);
+                return IA64_FAULT;
+            }
+        }
+    }
+}
+
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_support.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_support.c   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,164 @@
+
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_support.c: vmx specific support interface.
+ * Copyright (c) 2005, Intel Corporation.
+ *     Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <public/io/ioreq.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vcpu.h>
+
+/*
+ * I/O emulation should be atomic from domain point of view. However,
+ * when emulation code is waiting for I/O completion by do_block,
+ * other events like DM interrupt, VBD, etc. may come and unblock
+ * current exection flow. So we have to prepare for re-block if unblocked
+ * by non I/O completion event.
+ */
+void vmx_wait_io(void)
+{
+    struct vcpu *v = current;
+    struct domain *d = v->domain;
+    extern void do_block();
+    int port = iopacket_port(d);
+
+    do {
+       if (!test_bit(port,
+               &d->shared_info->evtchn_pending[0]))
+           do_block();
+
+       /* Unblocked when some event is coming. Clear pending indication
+        * immediately if deciding to go for io assist
+         */
+       if (test_and_clear_bit(port,
+               &d->shared_info->evtchn_pending[0])) {
+           clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
+           clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+           vmx_io_assist(v);
+       }
+
+
+       if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+           /*
+            * Latest event is not I/O completion, so clear corresponding
+            * selector and pending indication, to allow real event coming
+            */
+           clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+
+           /* Here atually one window is leaved before selector is cleared.
+            * However this window only delay the indication to coming event,
+            * nothing losed. Next loop will check I/O channel to fix this
+            * window.
+            */
+           clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
+       }
+       else
+           break;
+    } while (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
+}
+
+/*
+ * Only place to call vmx_io_assist is mmio/legacy_io emulation.
+ * Since I/O emulation is synchronous, it shouldn't be called in
+ * other places. This is not like x86, since IA-64 implements a
+ * per-vp stack without continuation.
+ */
+void vmx_io_assist(struct vcpu *v)
+{
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+
+    /*
+     * This shared page contains I/O request between emulation code
+     * and device model.
+     */
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (!vio)
+       panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+
+    p = &vio->vp_ioreq;
+
+    if (p->state == STATE_IORESP_HOOK)
+       panic("Not supported: No hook available for DM request\n");
+
+    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+       if (p->state != STATE_IORESP_READY) {
+           /* Can't do_block here, for the same reason as other places to
+            * use vmx_wait_io. Simple return is safe since vmx_wait_io will
+            * try to block again
+            */
+           return; 
+       } else
+           p->state = STATE_INVALID;
+
+       clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+    } else
+       return; /* Spurous event? */
+}
+
+/*
+ * VMX domainN has two types of interrupt source: lsapic model within
+ * HV, and device model within domain 0 (service OS). There're another
+ * pending array in share page, manipulated by device model directly.
+ * To conform to VT-i spec, we have to sync pending bits in shared page
+ * into VPD. This has to be done before checking pending interrupt at
+ * resume to guest. For domain 0, all the interrupt sources come from
+ * HV, which then doesn't require this assist.
+ */
+void vmx_intr_assist(struct vcpu *v)
+{
+    vcpu_iodata_t *vio;
+    struct domain *d = v->domain;
+    extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu,
+                                       unsigned long *pend_irr);
+    int port = iopacket_port(d);
+
+    /* I/O emulation is atomic, so it's impossible to see execution flow
+     * out of vmx_wait_io, when guest is still waiting for response.
+     */
+    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
+       panic("!!!Bad resume to guest before I/O emulation is done.\n");
+
+    /* Clear indicator specific to interrupt delivered from DM */
+    if (test_and_clear_bit(port,
+               &d->shared_info->evtchn_pending[0])) {
+       if (!d->shared_info->evtchn_pending[port >> 5])
+           clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
+
+       if (!v->vcpu_info->evtchn_pending_sel)
+           clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+    }
+
+    /* Even without event pending, we still need to sync pending bits
+     * between DM and vlsapic. The reason is that interrupt delivery
+     * shares same event channel as I/O emulation, with corresponding
+     * indicator possibly cleared when vmx_wait_io().
+     */
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (!vio)
+       panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+
+#ifdef V_IOSAPIC_READY
+    vlapic_update_ext_irq(v);
+#else
+    panic("IOSAPIC model is missed in qemu\n");
+#endif
+    return;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_utility.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_utility.c   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,659 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_utility.c:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ *  Xiaoyan Feng (Fleming Feng)  <fleming.feng@xxxxxxxxx>
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/types.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/processor.h>
+#include <asm/vmx_mm_def.h>
+
+
+/*
+ * Return:
+ *  0:  Not reserved indirect registers
+ *  1:  Is reserved indirect registers
+ */
+int
+is_reserved_indirect_register (
+    int type,
+    int index )
+{
+    switch (type) {
+        case IA64_CPUID:
+            if ( index >= 5 ) {
+                return 1;
+            }
+
+        case IA64_DBR:
+        case IA64_IBR:
+            //bugbugbug:check with pal about the max ibr/dbr!!!!
+            break;
+
+        case IA64_PMC:
+            //bugbugbug:check with pal about the max ibr/dbr!!!!
+            break;
+
+        case IA64_PMD:
+            //bugbugbug:check with pal about the max ibr/dbr!!!!
+            break;
+
+        case IA64_PKR:
+            //bugbugbug:check with pal about the max pkr!!!!
+            break;
+
+        case IA64_RR:
+            //bugbugbug:check with pal about the max rr!!!!
+            break;
+
+        default:
+            panic ("Unsupported instruction!");
+    }
+
+    return 0;
+
+}
+
+/*
+ * Return:
+ *  Set all ignored fields in value to 0 and return
+ */
+u64
+indirect_reg_igfld_MASK (
+    int type,
+    int index,
+    u64 value
+    )
+{
+    u64 nvalue;
+
+    nvalue = value;
+    switch ( type ) {
+        case IA64_CPUID:
+            if ( index == 2 ) {
+                nvalue = 0;
+            }
+            break;
+
+        case IA64_DBR:
+        case IA64_IBR:
+            /* Refer to SDM Vol2 Table 7-1,7-2 */
+            if ( index % 2 != 0) {
+                /* Ignore field: {61:60} */
+                nvalue = value & (~MASK (60, 2));
+            }
+            break;
+        case IA64_PMC:
+            if ( index == 0 ) {
+                /* Ignore field: 3:1 */
+                nvalue = value & (~MASK (1, 3));
+            }
+            break;
+        case IA64_PMD:
+            if ( index >= 4 ) {
+                /* Ignore field: 7:7 */
+                /* bugbug: this code is correct for generic
+                 * PMD. However, for implementation specific
+                 * PMD, it's WRONG. need more info to judge
+                 * what's implementation specific PMD.
+                 */
+                nvalue = value & (~MASK (7, 1));
+            }
+            break;
+        case IA64_PKR:
+        case IA64_RR:
+            break;
+        default:
+            panic ("Unsupported instruction!");
+    }
+
+    return nvalue;
+}
+
+/*
+ * Return:
+ *  Set all ignored fields in value to 0 and return
+ */
+u64
+cr_igfld_mask (int index, u64 value)
+{
+    u64 nvalue;
+
+    nvalue = value;
+
+    switch ( index ) {
+    case IA64_REG_CR_IVA:
+        /* Ignore filed: 14:0 */
+        nvalue = value & (~MASK (0, 15));
+        break;
+
+    case IA64_REG_CR_IHA:
+        /* Ignore filed: 1:0 */
+        nvalue = value & (~MASK (0, 2));
+        break;
+
+    case IA64_REG_CR_LID:
+        /* Ignore filed: 63:32 */
+        nvalue = value & (~MASK (32, 32));
+        break;
+
+    case IA64_REG_CR_TPR:
+        /* Ignore filed: 63:17,3:0 */
+        nvalue = value & (~MASK (17, 47));
+        nvalue = nvalue & (~MASK (0, 4));
+        break;
+
+    case IA64_REG_CR_EOI:
+        /* Ignore filed: 63:0 */
+        nvalue = 0;
+        break;
+
+    case IA64_REG_CR_ITV:
+    case IA64_REG_CR_PMV:
+    case IA64_REG_CR_CMCV:
+    case IA64_REG_CR_LRR0:
+    case IA64_REG_CR_LRR1:
+        /* Ignore filed: 63:17,12:12 */
+        nvalue = value & (~MASK (17, 47));
+        nvalue = nvalue & (~MASK (12, 1));
+        break;
+    }
+
+    return nvalue;
+}
+
+
+/*
+ * Return:
+ *  1: PSR reserved fields are not zero
+ *  0:  PSR reserved fields are all zero
+ */
+int
+check_psr_rsv_fields (u64 value)
+{
+    /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46
+     * These reserved fields shall all be zero
+     * Otherwise we will panic
+     */
+
+    if ( value & MASK (0, 1) ||
+         value & MASK (6, 7) ||
+         value & MASK (16, 1) ||
+         value & MASK (28, 4) ||
+         value & MASK (46, 18)
+         ) {
+             return 1;
+         }
+
+    return 0;
+}
+
+
+
+/*
+ * Return:
+ *  1: CR reserved fields are not zero
+ *  0:  CR reserved fields are all zero
+ */
+int
+check_cr_rsv_fields (int index, u64 value)
+{
+    switch (index) {
+        case IA64_REG_CR_DCR:
+            if ( (value & MASK ( 3, 5 )) ||
+                (value & MASK (15, 49))) {
+                    return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_ITM:
+        case IA64_REG_CR_IVA:
+        case IA64_REG_CR_IIP:
+        case IA64_REG_CR_IFA:
+        case IA64_REG_CR_IIPA:
+        case IA64_REG_CR_IIM:
+        case IA64_REG_CR_IHA:
+        case IA64_REG_CR_EOI:
+            return 0;
+
+        case IA64_REG_CR_PTA:
+            if ( (value & MASK ( 1, 1 )) ||
+                (value & MASK (9, 6))) {
+                    return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_IPSR:
+            return check_psr_rsv_fields (value);
+
+
+        case IA64_REG_CR_ISR:
+            if ( (value & MASK ( 24, 8 )) ||
+                (value & MASK (44, 20))) {
+                    return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_ITIR:
+            if ( (value & MASK ( 0, 2 )) ||
+                (value & MASK (32, 32))) {
+                    return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_IFS:
+            if ( (value & MASK ( 38, 25 ))) {
+                return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_LID:
+            if ( (value & MASK ( 0, 16 ))) {
+                return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_IVR:
+            if ( (value & MASK ( 8, 56 ))) {
+                return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_TPR:
+            if ( (value & MASK ( 8, 8 ))) {
+                return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_IRR0:
+            if ( (value & MASK ( 1, 1 )) ||
+                (value & MASK (3, 13))) {
+                    return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_ITV:
+        case IA64_REG_CR_PMV:
+        case IA64_REG_CR_CMCV:
+            if ( (value & MASK ( 8, 4 )) ||
+                (value & MASK (13, 3))) {
+                    return 1;
+            }
+            return 0;
+
+        case IA64_REG_CR_LRR0:
+        case IA64_REG_CR_LRR1:
+            if ( (value & MASK ( 11, 1 )) ||
+                (value & MASK (14, 1))) {
+                    return 1;
+            }
+            return 0;
+    }
+
+
+    panic ("Unsupported CR");
+}
+
+
+
+/*
+ * Return:
+ *  0:  Indirect Reg reserved fields are not zero
+ *  1:  Indirect Reg reserved fields are all zero
+ */
+int
+check_indirect_reg_rsv_fields ( int type, int index, u64 value )
+{
+
+    switch ( type ) {
+        case IA64_CPUID:
+            if ( index == 3 ) {
+                if ( value & MASK (40, 24 )) {
+                    return 0;
+                }
+            } else if ( index == 4 ) {
+                if ( value & MASK (2, 62 )) {
+                    return 0;
+                }
+            }
+            break;
+
+        case IA64_DBR:
+        case IA64_IBR:
+        case IA64_PMC:
+        case IA64_PMD:
+            break;
+
+        case IA64_PKR:
+            if ( value & MASK (4, 4) ||
+                value & MASK (32, 32 )) {
+                return 0;
+                }
+            break;
+
+        case IA64_RR:
+            if ( value & MASK (1, 1) ||
+                value & MASK (32, 32 )) {
+                return 0;
+                }
+            break;
+
+        default:
+            panic ("Unsupported instruction!");
+    }
+
+    return 1;
+}
+
+
+
+
+/* Return
+ * Same format as isr_t
+ * Only ei/ni bits are valid, all other bits are zero
+ */
+u64
+set_isr_ei_ni (VCPU *vcpu)
+{
+
+    IA64_PSR vpsr,ipsr;
+    ISR visr;
+    REGS *regs;
+
+    regs=vcpu_regs(vcpu);
+
+    visr.val = 0;
+
+    vpsr.val = vmx_vcpu_get_psr (vcpu);
+
+    if (!vpsr.ic == 1 ) {
+        /* Set ISR.ni */
+        visr.ni = 1;
+    }
+    ipsr.val = regs->cr_ipsr;
+
+    visr.ei = ipsr.ri;
+    return visr.val;
+}
+
+
+/* Set up ISR.na/code{3:0}/r/w for no-access instructions
+ * Refer to SDM Vol Table 5-1
+ * Parameter:
+ *  setr: if 1, indicates this function will set up ISR.r
+ *  setw: if 1, indicates this function will set up ISR.w
+ * Return:
+ *  Same format as ISR. All fields are zero, except na/code{3:0}/r/w
+ */
+u64
+set_isr_for_na_inst(VCPU *vcpu, int op)
+{
+    ISR visr;
+    visr.val = 0;
+    switch (op) {
+        case IA64_INST_TPA:
+            visr.na = 1;
+            visr.code = 0;
+            break;
+        case IA64_INST_TAK:
+            visr.na = 1;
+            visr.code = 3;
+            break;
+    }
+    return visr.val;
+}
+
+
+
+/*
+ * Set up ISR for registe Nat consumption fault
+ * Parameters:
+ *  read: if 1, indicates this is a read access;
+ *  write: if 1, indicates this is a write access;
+ */
+void
+set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write)
+{
+    ISR visr;
+    u64 value;
+    /* Need set up ISR: code, ei, ni, na, r/w */
+    visr.val = 0;
+
+    /* ISR.code{7:4} =1,
+     * Set up ISR.code{3:0}, ISR.na
+     */
+    visr.code = (1 << 4);
+    if (inst) {
+
+        value = set_isr_for_na_inst (vcpu,inst);
+        visr.val = visr.val | value;
+    }
+
+    /* Set up ISR.r/w */
+    visr.r = read;
+    visr.w = write;
+
+    /* Set up ei/ni */
+    value = set_isr_ei_ni (vcpu);
+    visr.val = visr.val | value;
+
+    vmx_vcpu_set_isr (vcpu,visr.val);
+}
+
+
+
+/*
+ * Set up ISR for break fault
+ */
+void set_break_isr (VCPU *vcpu)
+{
+    ISR visr;
+    u64 value;
+
+    /* Need set up ISR: ei, ni */
+
+    visr.val = 0;
+
+    /* Set up ei/ni */
+    value = set_isr_ei_ni (vcpu);
+    visr.val = visr.val | value;
+
+    vmx_vcpu_set_isr(vcpu, visr.val);
+}
+
+
+
+
+
+
+/*
+ * Set up ISR for Priviledged Operation fault
+ */
+void set_privileged_operation_isr (VCPU *vcpu,int inst)
+{
+    ISR visr;
+    u64 value;
+
+    /* Need set up ISR: code, ei, ni, na */
+
+    visr.val = 0;
+
+    /* Set up na, code{3:0} for no-access instruction */
+    value = set_isr_for_na_inst (vcpu, inst);
+    visr.val = visr.val | value;
+
+
+    /* ISR.code{7:4} =1 */
+    visr.code = (1 << 4) | visr.code;
+
+    /* Set up ei/ni */
+    value = set_isr_ei_ni (vcpu);
+    visr.val = visr.val | value;
+
+    vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+
+/*
+ * Set up ISR for Priviledged Register fault
+ */
+void set_privileged_reg_isr (VCPU *vcpu, int inst)
+{
+    ISR visr;
+    u64 value;
+
+    /* Need set up ISR: code, ei, ni */
+
+    visr.val = 0;
+
+    /* ISR.code{7:4} =2 */
+    visr.code = 2 << 4;
+
+    /* Set up ei/ni */
+    value = set_isr_ei_ni (vcpu);
+    visr.val = visr.val | value;
+
+    vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+
+
+/*
+ * Set up ISR for Reserved Register/Field fault
+ */
+void set_rsv_reg_field_isr (VCPU *vcpu)
+{
+    ISR visr;
+    u64 value;
+
+    /* Need set up ISR: code, ei, ni */
+
+    visr.val = 0;
+
+    /* ISR.code{7:4} =4 */
+    visr.code = (3 << 4) | visr.code;
+
+    /* Set up ei/ni */
+    value = set_isr_ei_ni (vcpu);
+    visr.val = visr.val | value;
+
+    vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+/*
+ * Set up ISR for Illegal Operation fault
+ */
+void set_illegal_op_isr (VCPU *vcpu)
+{
+    ISR visr;
+    u64 value;
+
+    /* Need set up ISR: ei, ni */
+
+    visr.val = 0;
+
+    /* Set up ei/ni */
+    value = set_isr_ei_ni (vcpu);
+    visr.val = visr.val | value;
+
+    vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access)
+{
+    ISR isr;
+
+    isr.val = 0;
+    isr.val = set_isr_ei_ni(vcpu);
+    isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag;
+    isr.na = non_access;
+    isr.r = 1;
+    isr.w = 0;
+    vmx_vcpu_set_isr(vcpu, isr.val);
+    return;
+}
+
+void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access)
+{
+    u64 value;
+    ISR isr;
+
+    isr.val = set_isr_ei_ni(vcpu);
+    isr.code = IA64_PRIV_OP_FAULT;
+    isr.na = non_access;
+    vmx_vcpu_set_isr(vcpu, isr.val);
+
+    return;
+}
+
+
+IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index)
+{
+    u64 sof;
+    REGS *regs;
+    regs=vcpu_regs(vcpu);
+    sof = regs->cr_ifs & 0x7f;
+    if(reg_index >= sof + 32)
+        return IA64_FAULT;
+    return IA64_NO_FAULT;;
+}
+
+
+int is_reserved_rr_register(VCPU* vcpu, int reg_index)
+{
+    return (reg_index >= 8);
+}
+
+#define  ITIR_RSV_MASK         (0x3UL | (((1UL<<32)-1) << 32))
+int is_reserved_itir_field(VCPU* vcpu, u64 itir)
+{
+       if ( itir & ITIR_RSV_MASK ) {
+               return 1;
+       }
+       return 0;
+}
+
+int is_reserved_rr_field(VCPU* vcpu, u64 reg_value)
+{
+    ia64_rr rr;
+    rr.rrval = reg_value;
+
+    if(rr.reserved0 != 0 || rr.reserved1 != 0){
+        return 1;
+    }
+    if(rr.ps < 12 || rr.ps > 28){
+        // page too big or small.
+        return 1;
+    }
+    if(rr.ps > 15 && rr.ps % 2 != 0){
+        // unsupported page size.
+        return 1;
+    }
+    return 0;
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_vcpu.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,446 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Fred yang (fred.yang@xxxxxxxxx)
+ *  Arun Sharma (arun.sharma@xxxxxxxxx)
+ *  Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+
+//u64  fire_itc;
+//u64  fire_itc2;
+//u64  fire_itm;
+//u64  fire_itm2;
+/*
+ * Copyright (c) 2005 Intel Corporation.
+ *    Anthony Xu (anthony.xu@xxxxxxxxx)
+ *    Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/**************************************************************************
+ VCPU general register access routines
+**************************************************************************/
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+//unsigned long last_guest_rsm = 0x0;
+struct guest_psr_bundle{
+    unsigned long ip;
+    unsigned long psr;
+};
+
+struct guest_psr_bundle guest_psr_buf[100];
+unsigned long guest_psr_index = 0;
+
+void
+vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value)
+{
+
+    UINT64 mask;
+    REGS *regs;
+    IA64_PSR old_psr, new_psr;
+    old_psr.val=vmx_vcpu_get_psr(vcpu);
+
+    regs=vcpu_regs(vcpu);
+    /* We only support guest as:
+     *  vpsr.pk = 0
+     *  vpsr.is = 0
+     * Otherwise panic
+     */
+    if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
+        panic ("Setting unsupport guest psr!");
+    }
+
+    /*
+     * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+     * Since these bits will become 0, after success execution of each
+     * instruction, we will change set them to mIA64_PSR
+     */
+    VMX_VPD(vcpu,vpsr) = value &
+            (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+                IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+            ));
+
+    if ( !old_psr.i && (value & IA64_PSR_I) ) {
+        // vpsr.i 0->1
+        vcpu->arch.irq_new_condition = 1;
+    }
+    new_psr.val=vmx_vcpu_get_psr(vcpu);
+    {
+    struct pt_regs *regs = vcpu_regs(vcpu);
+    guest_psr_buf[guest_psr_index].ip = regs->cr_iip;
+    guest_psr_buf[guest_psr_index].psr = new_psr.val;
+    if (++guest_psr_index >= 100)
+        guest_psr_index = 0;
+    }
+#if 0
+    if (old_psr.i != new_psr.i) {
+    if (old_psr.i)
+        last_guest_rsm = vcpu_regs(vcpu)->cr_iip;
+    else
+        last_guest_rsm = 0;
+    }
+#endif
+
+    /*
+     * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+     * , except for the following bits:
+     *  ic/i/dt/si/rt/mc/it/bn/vm
+     */
+    mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+        IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+        IA64_PSR_VM;
+
+    regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) );
+
+    check_mm_mode_switch(vcpu, old_psr, new_psr);
+    return IA64_NO_FAULT;
+}
+
+/* Adjust slot both in pt_regs and vpd, upon vpsr.ri which
+ * should have sync with ipsr in entry.
+ *
+ * Clear some bits due to successfully emulation.
+ */
+IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu)
+{
+    // TODO: trap_bounce?? Eddie
+    REGS *regs = vcpu_regs(vcpu);
+    IA64_PSR vpsr;
+    IA64_PSR *ipsr = (IA64_PSR *)&regs->cr_ipsr;
+
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    if (vpsr.ri == 2) {
+    vpsr.ri = 0;
+    regs->cr_iip += 16;
+    } else {
+    vpsr.ri++;
+    }
+
+    ipsr->ri = vpsr.ri;
+    vpsr.val &=
+            (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+                IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+            ));
+
+    VMX_VPD(vcpu, vpsr) = vpsr.val;
+
+    ipsr->val &=
+            (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+                IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+            ));
+
+    return (IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
+{
+    REGS *regs = vcpu_regs(vcpu);
+    IA64_PSR vpsr;
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+
+    if(!vpsr.ic)
+        VPD_CR(vcpu,ifs) = regs->cr_ifs;
+    regs->cr_ifs = IA64_IFS_V;
+    return (IA64_NO_FAULT);
+}
+
+
+thash_cb_t *
+vmx_vcpu_get_vtlb(VCPU *vcpu)
+{
+    return vcpu->arch.vtlb;
+}
+
+
+struct virutal_platform_def *
+vmx_vcpu_get_plat(VCPU *vcpu)
+{
+    return &(vcpu->domain->arch.vmx_platform);
+}
+
+
+ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr)
+{
+        return (ia64_rr)VMX(vcpu,vrr[vadr>>61]);
+}
+
+
+IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+    ia64_rr oldrr,newrr;
+    thash_cb_t *hcb;
+    oldrr=vmx_vcpu_rr(vcpu,reg);
+    newrr.rrval=val;
+#if 1
+    if(oldrr.ps!=newrr.ps){
+        hcb = vmx_vcpu_get_vtlb(vcpu);
+        thash_purge_all(hcb);
+    }
+#endif
+    VMX(vcpu,vrr[reg>>61]) = val;
+    switch((u64)(reg>>61)) {
+    case VRN5:
+        VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val);
+        break;
+    case VRN6:
+        VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val);
+        break;
+    case VRN7:
+        VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val);
+        /* Change double mapping for this domain */
+#ifdef XEN_DBL_MAPPING
+        vmx_change_double_mapping(vcpu,
+                      vmx_vrrtomrr(vcpu,oldrr.rrval),
+                      vmx_vrrtomrr(vcpu,newrr.rrval));
+#endif
+        break;
+    default:
+        ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val));
+        break;
+    }
+
+    return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+ VCPU protection key register access routines
+**************************************************************************/
+
+IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+    UINT64 val = (UINT64)ia64_get_pkr(reg);
+    *pval = val;
+    return (IA64_NO_FAULT);
+}
+
+IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+    ia64_set_pkr(reg,val);
+    return (IA64_NO_FAULT);
+}
+
+#if 0
+int tlb_debug=0;
+check_entry(u64 va, u64 ps, char *str)
+{
+     va &= ~ (PSIZE(ps)-1);
+     if ( va == 0x2000000002908000UL ||
+      va == 0x600000000000C000UL ) {
+    stop();
+     }
+     if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps);
+}
+#endif
+
+
+u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa)
+{
+    ia64_rr rr,rr1;
+    rr=vmx_vcpu_rr(vcpu,ifa);
+    rr1.rrval=0;
+    rr1.ps=rr.ps;
+    rr1.rid=rr.rid;
+    return (rr1.rrval);
+}
+
+
+
+
+IA64FAULT vmx_vcpu_rfi(VCPU *vcpu)
+{
+    // TODO: Only allowed for current vcpu
+    UINT64 ifs, psr;
+    REGS *regs = vcpu_regs(vcpu);
+    psr = VPD_CR(vcpu,ipsr);
+    vmx_vcpu_set_psr(vcpu,psr);
+    ifs=VPD_CR(vcpu,ifs);
+    if((ifs>>63)&&(ifs<<1)){
+        ifs=(regs->cr_ifs)&0x7f;
+        regs->rfi_pfs = (ifs<<7)|ifs;
+        regs->cr_ifs = VPD_CR(vcpu,ifs);
+    }
+    regs->cr_iip = VPD_CR(vcpu,iip);
+    return (IA64_NO_FAULT);
+}
+
+
+UINT64
+vmx_vcpu_get_psr(VCPU *vcpu)
+{
+    return VMX_VPD(vcpu,vpsr);
+}
+
+
+IA64FAULT
+vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val)
+{
+    IA64_PSR vpsr;
+
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.bn ) {
+        *val=VMX_VPD(vcpu,vgr[reg-16]);
+        // Check NAT bit
+        if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) {
+            // TODO
+            //panic ("NAT consumption fault\n");
+            return IA64_FAULT;
+        }
+
+    }
+    else {
+        *val=VMX_VPD(vcpu,vbgr[reg-16]);
+        if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) {
+            //panic ("NAT consumption fault\n");
+            return IA64_FAULT;
+        }
+
+    }
+    return IA64_NO_FAULT;
+}
+
+IA64FAULT
+vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat)
+{
+    IA64_PSR vpsr;
+    vpsr.val = vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.bn ) {
+        VMX_VPD(vcpu,vgr[reg-16]) = val;
+        if(nat){
+            VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) );
+        }else{
+            VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) );
+        }
+    }
+    else {
+        VMX_VPD(vcpu,vbgr[reg-16]) = val;
+        if(nat){
+            VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) );
+        }else{
+            VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) );
+        }
+    }
+    return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT
+vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val)
+{
+    REGS *regs=vcpu_regs(vcpu);
+    int nat;
+    //TODO, Eddie
+    if (!regs) return 0;
+    if (reg >= 16 && reg < 32) {
+        return vmx_vcpu_get_bgr(vcpu,reg,val);
+    }
+    getreg(reg,val,&nat,regs);    // FIXME: handle NATs later
+    if(nat){
+        return IA64_FAULT;
+    }
+    return IA64_NO_FAULT;
+}
+
+// returns:
+//   IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
+//   IA64_NO_FAULT otherwise
+
+IA64FAULT
+vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat)
+{
+    REGS *regs = vcpu_regs(vcpu);
+    long sof = (regs->cr_ifs) & 0x7f;
+    //TODO Eddie
+
+    if (!regs) return IA64_ILLOP_FAULT;
+    if (reg >= sof + 32) return IA64_ILLOP_FAULT;
+    if ( reg >= 16 && reg < 32 ) {
+        return vmx_vcpu_set_bgr(vcpu,reg, value, nat);
+    }
+    setreg(reg,value,nat,regs);
+    return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+    UINT64 vpsr;
+    vpsr = vmx_vcpu_get_psr(vcpu);
+    vpsr &= (~imm24);
+    vmx_vcpu_set_psr(vcpu, vpsr);
+    return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+    UINT64 vpsr;
+    vpsr = vmx_vcpu_get_psr(vcpu);
+    vpsr |= imm24;
+    vmx_vcpu_set_psr(vcpu, vpsr);
+    return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
+{
+    vmx_vcpu_set_psr(vcpu, val);
+    return IA64_NO_FAULT;
+}
+
+IA64FAULT
+vmx_vcpu_set_tpr(VCPU *vcpu, u64 val)
+{
+    VPD_CR(vcpu,tpr)=val;
+    vcpu->arch.irq_new_condition = 1;
+    return IA64_NO_FAULT;
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_virt.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_virt.c      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1511 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_virt.c:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Fred yang (fred.yang@xxxxxxxxx)
+ *  Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+
+
+#include <asm/privop.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/processor.h>
+#include <asm/delay.h> // Debug only
+#include <asm/vmmu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/smp.h>
+
+#include <asm/virt_event.h>
+extern UINT64 privop_trace;
+
+void
+ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64  * cause)
+{
+    *cause=0;
+    switch (slot_type) {
+        case M:
+        if (inst.generic.major==0){
+            if(inst.M28.x3==0){
+                if(inst.M44.x4==6){
+                    *cause=EVENT_SSM;
+                }else if(inst.M44.x4==7){
+                    *cause=EVENT_RSM;
+                }else if(inst.M30.x4==8&&inst.M30.x2==2){
+                    *cause=EVENT_MOV_TO_AR_IMM;
+                }
+            }
+        }
+        else if(inst.generic.major==1){
+            if(inst.M28.x3==0){
+                if(inst.M32.x6==0x2c){
+                    *cause=EVENT_MOV_TO_CR;
+                }else if(inst.M33.x6==0x24){
+                    *cause=EVENT_MOV_FROM_CR;
+                }else if(inst.M35.x6==0x2d){
+                    *cause=EVENT_MOV_TO_PSR;
+                }else if(inst.M36.x6==0x25){
+                    *cause=EVENT_MOV_FROM_PSR;
+                }else if(inst.M29.x6==0x2A){
+                    *cause=EVENT_MOV_TO_AR;
+                }else if(inst.M31.x6==0x22){
+                    *cause=EVENT_MOV_FROM_AR;
+                }else if(inst.M45.x6==0x09){
+                    *cause=EVENT_PTC_L;
+                }else if(inst.M45.x6==0x0A){
+                    *cause=EVENT_PTC_G;
+                }else if(inst.M45.x6==0x0B){
+                    *cause=EVENT_PTC_GA;
+                }else if(inst.M45.x6==0x0C){
+                    *cause=EVENT_PTR_D;
+                }else if(inst.M45.x6==0x0D){
+                    *cause=EVENT_PTR_I;
+                }else if(inst.M46.x6==0x1A){
+                    *cause=EVENT_THASH;
+                }else if(inst.M46.x6==0x1B){
+                    *cause=EVENT_TTAG;
+                }else if(inst.M46.x6==0x1E){
+                    *cause=EVENT_TPA;
+                }else if(inst.M46.x6==0x1F){
+                    *cause=EVENT_TAK;
+                }else if(inst.M47.x6==0x34){
+                    *cause=EVENT_PTC_E;
+                }else if(inst.M41.x6==0x2E){
+                    *cause=EVENT_ITC_D;
+                }else if(inst.M41.x6==0x2F){
+                    *cause=EVENT_ITC_I;
+                }else if(inst.M42.x6==0x00){
+                    *cause=EVENT_MOV_TO_RR;
+                }else if(inst.M42.x6==0x01){
+                    *cause=EVENT_MOV_TO_DBR;
+                }else if(inst.M42.x6==0x02){
+                    *cause=EVENT_MOV_TO_IBR;
+                }else if(inst.M42.x6==0x03){
+                    *cause=EVENT_MOV_TO_PKR;
+                }else if(inst.M42.x6==0x04){
+                    *cause=EVENT_MOV_TO_PMC;
+                }else if(inst.M42.x6==0x05){
+                    *cause=EVENT_MOV_TO_PMD;
+                }else if(inst.M42.x6==0x0E){
+                    *cause=EVENT_ITR_D;
+                }else if(inst.M42.x6==0x0F){
+                    *cause=EVENT_ITR_I;
+                }else if(inst.M43.x6==0x10){
+                    *cause=EVENT_MOV_FROM_RR;
+                }else if(inst.M43.x6==0x11){
+                    *cause=EVENT_MOV_FROM_DBR;
+                }else if(inst.M43.x6==0x12){
+                    *cause=EVENT_MOV_FROM_IBR;
+                }else if(inst.M43.x6==0x13){
+                    *cause=EVENT_MOV_FROM_PKR;
+                }else if(inst.M43.x6==0x14){
+                    *cause=EVENT_MOV_FROM_PMC;
+/*
+                }else if(inst.M43.x6==0x15){
+                    *cause=EVENT_MOV_FROM_PMD;
+*/
+                }else if(inst.M43.x6==0x17){
+                    *cause=EVENT_MOV_FROM_CPUID;
+                }
+            }
+        }
+        break;
+        case B:
+        if(inst.generic.major==0){
+            if(inst.B8.x6==0x02){
+                *cause=EVENT_COVER;
+            }else if(inst.B8.x6==0x08){
+                *cause=EVENT_RFI;
+            }else if(inst.B8.x6==0x0c){
+                *cause=EVENT_BSW_0;
+            }else if(inst.B8.x6==0x0d){
+                *cause=EVENT_BSW_1;
+            }
+        }
+    }
+}
+
+IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst)
+{
+    UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+    return vmx_vcpu_reset_psr_sm(vcpu,imm24);
+}
+
+IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst)
+{
+    UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+    return vmx_vcpu_set_psr_sm(vcpu,imm24);
+}
+
+unsigned long last_guest_psr = 0x0;
+IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst)
+{
+    UINT64 tgt = inst.M33.r1;
+    UINT64 val;
+    IA64FAULT fault;
+
+/*
+    if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
+        return vmx_vcpu_set_gr(vcpu, tgt, val);
+    else return fault;
+    */
+    val = vmx_vcpu_get_psr(vcpu);
+    val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+    last_guest_psr = val;
+    return vmx_vcpu_set_gr(vcpu, tgt, val, 0);
+}
+
+/**
+ * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
+ */
+IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
+{
+    UINT64 val;
+    IA64FAULT fault;
+    if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
+       panic(" get_psr nat bit fault\n");
+
+       val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32));
+#if 0
+       if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32))))
+               while(1);
+       else
+               last_mov_from_psr = 0;
+#endif
+        return vmx_vcpu_set_psr_l(vcpu,val);
+}
+
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst)
+{
+    IA64_PSR  vpsr;
+    REGS *regs;
+#ifdef  CHECK_FAULT
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    regs=vcpu_regs(vcpu);
+    vpsr.val=regs->cr_ipsr;
+    if ( vpsr.is == 1 ) {
+        panic ("We do not support IA32 instruction yet");
+    }
+
+    return vmx_vcpu_rfi(vcpu);
+}
+
+IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst)
+{
+#ifdef  CHECK_FAULT
+    IA64_PSR  vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+   return vmx_vcpu_bsw0(vcpu);
+}
+
+IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst)
+{
+#ifdef  CHECK_FAULT
+    IA64_PSR  vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    return vmx_vcpu_bsw1(vcpu);
+}
+
+IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst)
+{
+    return vmx_vcpu_cover(vcpu);
+}
+
+IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst)
+{
+    u64 r2,r3;
+    ISR isr;
+    IA64_PSR  vpsr;
+
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+    
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){
+#ifdef  VMAL_NO_FAULT_CHECK
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if (unimplemented_gva(vcpu,r3) ) {
+        isr.val = set_isr_ei_ni(vcpu);
+        isr.code = IA64_RESERVED_REG_FAULT;
+        vcpu_set_isr(vcpu, isr.val);
+        unimpl_daddr(vcpu);
+        return IA64_FAULT;
+   }
+#endif // VMAL_NO_FAULT_CHECK
+    return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7));
+}
+
+IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst)
+{
+    u64 r3;
+    ISR isr;
+    IA64_PSR  vpsr;
+
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+#ifdef  VMAL_NO_FAULT_CHECK
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+    if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){
+#ifdef  VMAL_NO_FAULT_CHECK
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+    }
+    return vmx_vcpu_ptc_e(vcpu,r3);
+}
+
+IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst)
+{
+    return vmx_emul_ptc_l(vcpu, inst);
+}
+
+IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst)
+{
+    return vmx_emul_ptc_l(vcpu, inst);
+}
+
+IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3)
+{
+    ISR isr;
+    IA64FAULT  ret1, ret2;
+
+#ifdef  VMAL_NO_FAULT_CHECK
+    IA64_PSR  vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+    ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3);
+    ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2);
+#ifdef  VMAL_NO_FAULT_CHECK
+    if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) {
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+    }
+    if (unimplemented_gva(vcpu,r3) ) {
+        isr.val = set_isr_ei_ni(vcpu);
+        isr.code = IA64_RESERVED_REG_FAULT;
+        vcpu_set_isr(vcpu, isr.val);
+        unimpl_daddr(vcpu);
+        return IA64_FAULT;
+   }
+#endif // VMAL_NO_FAULT_CHECK
+   return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst)
+{
+    u64 r2,r3;
+    if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
+       return IA64_FAULT;
+    return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7));
+}
+
+IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst)
+{
+    u64 r2,r3;
+    if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
+       return IA64_FAULT;
+    return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7));
+}
+
+
+IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst)
+{
+    u64 r1,r3;
+    ISR visr;
+    IA64_PSR vpsr;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M46.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#endif //CHECK_FAULT
+    if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef  CHECK_FAULT
+        vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+        return IA64_NO_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(unimplemented_gva(vcpu, r3)){
+        vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+        return IA64_NO_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_thash(vcpu, r3, &r1);
+    vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+    return(IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst)
+{
+    u64 r1,r3;
+    ISR visr;
+    IA64_PSR vpsr;
+ #ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M46.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#endif //CHECK_FAULT
+    if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef  CHECK_FAULT
+        vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+        return IA64_NO_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(unimplemented_gva(vcpu, r3)){
+        vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+        return IA64_NO_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_ttag(vcpu, r3, &r1);
+    vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+    return(IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst)
+{
+    u64 r1,r3;
+    ISR visr;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M46.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if(vpsr.cpl!=0){
+        visr.val=0;
+        vcpu_set_isr(vcpu, visr.val);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,1);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if (unimplemented_gva(vcpu,r3) ) {
+        // inject unimplemented_data_address_fault
+        visr.val = set_isr_ei_ni(vcpu);
+        visr.code = IA64_RESERVED_REG_FAULT;
+        vcpu_set_isr(vcpu, isr.val);
+        // FAULT_UNIMPLEMENTED_DATA_ADDRESS.
+        unimpl_daddr(vcpu);
+        return IA64_FAULT;
+   }
+#endif  //CHECK_FAULT
+
+    if(vmx_vcpu_tpa(vcpu, r3, &r1)){
+        return IA64_FAULT;
+    }
+    vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+    return(IA64_NO_FAULT);
+}
+
+IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst)
+{
+    u64 r1,r3;
+    ISR visr;
+    IA64_PSR vpsr;
+    int fault=IA64_NO_FAULT;
+#ifdef  CHECK_FAULT
+    visr.val=0;
+    if(check_target_register(vcpu, inst.M46.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if(vpsr.cpl!=0){
+        vcpu_set_isr(vcpu, visr.val);
+        return IA64_FAULT;
+    }
+#endif
+    if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,1);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif
+    }
+    if(vmx_vcpu_tak(vcpu, r3, &r1)){
+        return IA64_FAULT;
+    }
+    vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+    return(IA64_NO_FAULT);
+}
+
+
+/************************************
+ * Insert translation register/cache
+************************************/
+
+IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst)
+{
+    UINT64 fault, itir, ifa, pte, slot;
+    ISR isr;
+    IA64_PSR  vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.ic ) {
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+    
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
+#ifdef  VMAL_NO_FAULT_CHECK
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if(is_reserved_rr_register(vcpu, slot)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+
+    if (vmx_vcpu_get_itir(vcpu,&itir)){
+        return(IA64_FAULT);
+    }
+    if (vmx_vcpu_get_ifa(vcpu,&ifa)){
+        return(IA64_FAULT);
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if (is_reserved_itir_field(vcpu, itir)) {
+       // TODO
+       return IA64_FAULT;
+    }
+    if (unimplemented_gva(vcpu,ifa) ) {
+        isr.val = set_isr_ei_ni(vcpu);
+        isr.code = IA64_RESERVED_REG_FAULT;
+        vcpu_set_isr(vcpu, isr.val);
+        unimpl_daddr(vcpu);
+        return IA64_FAULT;
+   }
+#endif // VMAL_NO_FAULT_CHECK
+
+    return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot));
+}
+
+IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst)
+{
+    UINT64 fault, itir, ifa, pte, slot;
+    ISR isr;
+    IA64_PSR  vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.ic ) {
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+    
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
+#ifdef  VMAL_NO_FAULT_CHECK
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if(is_reserved_rr_register(vcpu, slot)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+
+    if (vmx_vcpu_get_itir(vcpu,&itir)){
+        return(IA64_FAULT);
+    }
+    if (vmx_vcpu_get_ifa(vcpu,&ifa)){
+        return(IA64_FAULT);
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if (is_reserved_itir_field(vcpu, itir)) {
+       // TODO
+       return IA64_FAULT;
+    }
+    if (unimplemented_gva(vcpu,ifa) ) {
+        isr.val = set_isr_ei_ni(vcpu);
+        isr.code = IA64_RESERVED_REG_FAULT;
+        vcpu_set_isr(vcpu, isr.val);
+        unimpl_daddr(vcpu);
+        return IA64_FAULT;
+   }
+#endif // VMAL_NO_FAULT_CHECK
+
+   return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot));
+}
+
+IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64 
*pte)
+{
+    UINT64 fault;
+    ISR isr;
+    IA64_PSR  vpsr;
+    IA64FAULT  ret1;
+
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.ic ) {
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+
+#ifdef  VMAL_NO_FAULT_CHECK
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+    ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte);
+#ifdef  VMAL_NO_FAULT_CHECK
+    if( ret1 != IA64_NO_FAULT ){
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+    }
+#endif // VMAL_NO_FAULT_CHECK
+
+    if (vmx_vcpu_get_itir(vcpu,itir)){
+        return(IA64_FAULT);
+    }
+    if (vmx_vcpu_get_ifa(vcpu,ifa)){
+        return(IA64_FAULT);
+    }
+#ifdef  VMAL_NO_FAULT_CHECK
+    if (unimplemented_gva(vcpu,ifa) ) {
+        isr.val = set_isr_ei_ni(vcpu);
+        isr.code = IA64_RESERVED_REG_FAULT;
+        vcpu_set_isr(vcpu, isr.val);
+        unimpl_daddr(vcpu);
+        return IA64_FAULT;
+   }
+#endif // VMAL_NO_FAULT_CHECK
+   return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst)
+{
+    UINT64 itir, ifa, pte;
+
+    if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
+       return IA64_FAULT;
+    }
+
+   return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa));
+}
+
+IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst)
+{
+    UINT64 itir, ifa, pte;
+
+    if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
+       return IA64_FAULT;
+    }
+
+   return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa));
+
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+*************************************/
+
+IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
+{
+    // I27 and M30 are identical for these fields
+    if(inst.M30.ar3!=44){
+        panic("Can't support ar register other than itc");
+    }
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    UINT64  imm;
+    if(inst.M30.s){
+        imm = -inst.M30.imm;
+    }else{
+        imm = inst.M30.imm;
+    }
+    return (vmx_vcpu_set_itc(vcpu, imm));
+}
+
+IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
+{
+    // I26 and M29 are identical for these fields
+    u64 r2;
+    if(inst.M29.ar3!=44){
+        panic("Can't support ar register other than itc");
+    }
+    if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    return (vmx_vcpu_set_itc(vcpu, r2));
+}
+
+
+IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst)
+{
+    // I27 and M30 are identical for these fields
+    if(inst.M31.ar3!=44){
+        panic("Can't support ar register other than itc");
+    }
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu,inst.M31.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.si&& vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    u64 r1;
+    vmx_vcpu_get_itc(vcpu,&r1);
+    vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0);
+    return IA64_NO_FAULT;
+}
+
+
+/********************************
+ * Moves to privileged registers
+********************************/
+
+IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r2;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+    return (vmx_vcpu_set_pkr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r2;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+    return (vmx_vcpu_set_rr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r2;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+    return (vmx_vcpu_set_dbr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r2;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+    return (vmx_vcpu_set_ibr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r2;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+    return (vmx_vcpu_set_pmc(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r2;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+    return (vmx_vcpu_set_pmd(vcpu,r3,r2));
+}
+
+
+/**********************************
+ * Moves from privileged registers
+ **********************************/
+
+IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r1;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M43.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+
+#endif //CHECK_FAULT
+     if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){
+        set_rsv_reg_field_isr(vcpu);
+        rsv_reg_field(vcpu);
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_get_rr(vcpu,r3,&r1);
+    return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r1;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M43.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+
+#endif //CHECK_FAULT
+     if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(is_reserved_indirect_register(vcpu,r3)){
+        set_rsv_reg_field_isr(vcpu);
+        rsv_reg_field(vcpu);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_get_pkr(vcpu,r3,&r1);
+    return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r1;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M43.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+
+#endif //CHECK_FAULT
+     if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(is_reserved_indirect_register(vcpu,r3)){
+        set_rsv_reg_field_isr(vcpu);
+        rsv_reg_field(vcpu);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_get_dbr(vcpu,r3,&r1);
+    return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r1;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M43.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+
+#endif //CHECK_FAULT
+     if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(is_reserved_indirect_register(vcpu,r3)){
+        set_rsv_reg_field_isr(vcpu);
+        rsv_reg_field(vcpu);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_get_ibr(vcpu,r3,&r1);
+    return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r1;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M43.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if (vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+
+#endif //CHECK_FAULT
+     if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(is_reserved_indirect_register(vcpu,r3)){
+        set_rsv_reg_field_isr(vcpu);
+        rsv_reg_field(vcpu);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_get_pmc(vcpu,r3,&r1);
+    return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst)
+{
+    u64 r3,r1;
+#ifdef  CHECK_FAULT
+    if(check_target_register(vcpu, inst.M43.r1)){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+#endif //CHECK_FAULT
+     if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef  CHECK_FAULT
+    if(is_reserved_indirect_register(vcpu,r3)){
+        set_rsv_reg_field_isr(vcpu);
+        rsv_reg_field(vcpu);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    vmx_vcpu_get_cpuid(vcpu,r3,&r1);
+    return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst)
+{
+    u64 r2,cr3;
+#ifdef  CHECK_FAULT
+    IA64_PSR  vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    
if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+    if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){
+#ifdef  CHECK_FAULT
+        set_isr_reg_nat_consumption(vcpu,0,0);
+        rnat_comsumption(vcpu);
+        return IA64_FAULT;
+#endif  //CHECK_FAULT
+    }
+#ifdef   CHECK_FAULT
+    if ( check_cr_rsv_fields (inst.M32.cr3, r2)) {
+        /* Inject Reserved Register/Field fault
+         * into guest */
+        set_rsv_reg_field_isr (vcpu,0);
+        rsv_reg_field (vcpu);
+        return IA64_FAULT;
+    }
+#endif  //CHECK_FAULT
+    extern u64 cr_igfld_mask(int index, u64 value);
+    r2 = cr_igfld_mask(inst.M32.cr3,r2);
+    VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2;
+    switch (inst.M32.cr3) {
+        case 0: return vmx_vcpu_set_dcr(vcpu,r2);
+        case 1: return vmx_vcpu_set_itm(vcpu,r2);
+        case 2: return vmx_vcpu_set_iva(vcpu,r2);
+        case 8: return vmx_vcpu_set_pta(vcpu,r2);
+        case 16:return vmx_vcpu_set_ipsr(vcpu,r2);
+        case 17:return vmx_vcpu_set_isr(vcpu,r2);
+        case 19:return vmx_vcpu_set_iip(vcpu,r2);
+        case 20:return vmx_vcpu_set_ifa(vcpu,r2);
+        case 21:return vmx_vcpu_set_itir(vcpu,r2);
+        case 22:return vmx_vcpu_set_iipa(vcpu,r2);
+        case 23:return vmx_vcpu_set_ifs(vcpu,r2);
+        case 24:return vmx_vcpu_set_iim(vcpu,r2);
+        case 25:return vmx_vcpu_set_iha(vcpu,r2);
+        case 64:printk("SET LID to 0x%lx\n", r2);
+               return vmx_vcpu_set_lid(vcpu,r2);
+        case 65:return IA64_NO_FAULT;
+        case 66:return vmx_vcpu_set_tpr(vcpu,r2);
+        case 67:return vmx_vcpu_set_eoi(vcpu,r2);
+        case 68:return IA64_NO_FAULT;
+        case 69:return IA64_NO_FAULT;
+        case 70:return IA64_NO_FAULT;
+        case 71:return IA64_NO_FAULT;
+        case 72:return vmx_vcpu_set_itv(vcpu,r2);
+        case 73:return vmx_vcpu_set_pmv(vcpu,r2);
+        case 74:return vmx_vcpu_set_cmcv(vcpu,r2);
+        case 80:return vmx_vcpu_set_lrr0(vcpu,r2);
+        case 81:return vmx_vcpu_set_lrr1(vcpu,r2);
+        default: return IA64_NO_FAULT;
+    }
+}
+
+
+#define cr_get(cr) \
+    ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\
+        vmx_vcpu_set_gr(vcpu, tgt, val,0):fault;
+
+
+IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst)
+{
+    UINT64 tgt = inst.M33.r1;
+    UINT64 val;
+    IA64FAULT fault;
+#ifdef  CHECK_FAULT
+    IA64_PSR vpsr;
+    vpsr.val=vmx_vcpu_get_psr(vcpu);
+    if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3||
+        (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){
+        set_illegal_op_isr(vcpu);
+        illegal_op(vcpu);
+        return IA64_FAULT;
+    }
+    if ( vpsr.cpl != 0) {
+        /* Inject Privileged Operation fault into guest */
+        set_privileged_operation_isr (vcpu, 0);
+        privilege_op (vcpu);
+        return IA64_FAULT;
+    }
+#endif // CHECK_FAULT
+
+//    from_cr_cnt[inst.M33.cr3]++;
+    switch (inst.M33.cr3) {
+        case 0: return cr_get(dcr);
+        case 1: return cr_get(itm);
+        case 2: return cr_get(iva);
+        case 8: return cr_get(pta);
+        case 16:return cr_get(ipsr);
+        case 17:return cr_get(isr);
+        case 19:return cr_get(iip);
+        case 20:return cr_get(ifa);
+        case 21:return cr_get(itir);
+        case 22:return cr_get(iipa);
+        case 23:return cr_get(ifs);
+        case 24:return cr_get(iim);
+        case 25:return cr_get(iha);
+//     case 64:val = ia64_getreg(_IA64_REG_CR_LID);
+//          return vmx_vcpu_set_gr(vcpu,tgt,val,0);
+        case 64:return cr_get(lid);
+        case 65:
+             vmx_vcpu_get_ivr(vcpu,&val);
+             return vmx_vcpu_set_gr(vcpu,tgt,val,0);
+        case 66:return cr_get(tpr);
+        case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0);
+        case 68:return cr_get(irr0);
+        case 69:return cr_get(irr1);
+        case 70:return cr_get(irr2);
+        case 71:return cr_get(irr3);
+        case 72:return cr_get(itv);
+        case 73:return cr_get(pmv);
+        case 74:return cr_get(cmcv);
+        case 80:return cr_get(lrr0);
+        case 81:return cr_get(lrr1);
+        default:
+            panic("Read reserved cr register");
+    }
+}
+
+
+static void post_emulation_action(VCPU *vcpu)
+{
+    if ( vcpu->arch.irq_new_condition ) {
+        vcpu->arch.irq_new_condition = 0;
+        vhpi_detection(vcpu);
+    }
+}
+
+//#define  BYPASS_VMAL_OPCODE
+extern IA64_SLOT_TYPE  slot_types[0x20][3];
+IA64_BUNDLE __vmx_get_domain_bundle(u64 iip)
+{
+       IA64_BUNDLE bundle;
+
+       fetch_code( current,iip, &bundle.i64[0]);
+       fetch_code( current,iip+8, &bundle.i64[1]);
+       return bundle;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void
+vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode)
+{
+    IA64_BUNDLE bundle;
+    int slot;
+    IA64_SLOT_TYPE slot_type;
+    IA64FAULT status;
+    INST64 inst;
+    REGS * regs;
+    UINT64 iip;
+    regs = vcpu_regs(vcpu);
+    iip = regs->cr_iip;
+    IA64_PSR vpsr;
+/*
+    if (privop_trace) {
+        static long i = 400;
+        //if (i > 0) printf("privop @%p\n",iip);
+        if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
+            iip,ia64_get_itc(),ia64_get_itm());
+        i--;
+    }
+*/
+#ifdef  VTLB_DEBUG
+    check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu));
+    dump_vtlb(vmx_vcpu_get_vtlb(vcpu));
+#endif
+#if 0
+if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) {
+               printf ("VMAL decode error: cause - %lx; op - %lx\n", 
+                       cause, opcode );
+               return;
+}
+#endif
+#ifdef BYPASS_VMAL_OPCODE
+    // make a local copy of the bundle containing the privop
+    bundle = __vmx_get_domain_bundle(iip);
+    slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+    if (!slot) inst.inst = bundle.slot0;
+    else if (slot == 1)
+        inst.inst = bundle.slot1a + (bundle.slot1b<<18);
+    else if (slot == 2) inst.inst = bundle.slot2;
+    else printf("priv_handle_op: illegal slot: %d\n", slot);
+    slot_type = slot_types[bundle.template][slot];
+    ia64_priv_decoder(slot_type, inst, &cause);
+    if(cause==0){
+        printf("This instruction at 0x%lx slot %d can't be  virtualized", iip, 
slot);
+        panic("123456\n");
+    }
+#else
+    inst.inst=opcode;
+#endif /* BYPASS_VMAL_OPCODE */
+
+    /*
+     * Switch to actual virtual rid in rr0 and rr4,
+     * which is required by some tlb related instructions.
+     */
+    prepare_if_physical_mode(vcpu);
+
+    switch(cause) {
+    case EVENT_RSM:
+        status=vmx_emul_rsm(vcpu, inst);
+        break;
+    case EVENT_SSM:
+        status=vmx_emul_ssm(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_PSR:
+        status=vmx_emul_mov_to_psr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_PSR:
+        status=vmx_emul_mov_from_psr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_CR:
+        status=vmx_emul_mov_from_cr(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_CR:
+        status=vmx_emul_mov_to_cr(vcpu, inst);
+        break;
+    case EVENT_BSW_0:
+        status=vmx_emul_bsw0(vcpu, inst);
+        break;
+    case EVENT_BSW_1:
+        status=vmx_emul_bsw1(vcpu, inst);
+        break;
+    case EVENT_COVER:
+        status=vmx_emul_cover(vcpu, inst);
+        break;
+    case EVENT_RFI:
+        status=vmx_emul_rfi(vcpu, inst);
+        break;
+    case EVENT_ITR_D:
+        status=vmx_emul_itr_d(vcpu, inst);
+        break;
+    case EVENT_ITR_I:
+        status=vmx_emul_itr_i(vcpu, inst);
+        break;
+    case EVENT_PTR_D:
+        status=vmx_emul_ptr_d(vcpu, inst);
+        break;
+    case EVENT_PTR_I:
+        status=vmx_emul_ptr_i(vcpu, inst);
+        break;
+    case EVENT_ITC_D:
+        status=vmx_emul_itc_d(vcpu, inst);
+        break;
+    case EVENT_ITC_I:
+        status=vmx_emul_itc_i(vcpu, inst);
+        break;
+    case EVENT_PTC_L:
+        status=vmx_emul_ptc_l(vcpu, inst);
+        break;
+    case EVENT_PTC_G:
+        status=vmx_emul_ptc_g(vcpu, inst);
+        break;
+    case EVENT_PTC_GA:
+        status=vmx_emul_ptc_ga(vcpu, inst);
+        break;
+    case EVENT_PTC_E:
+        status=vmx_emul_ptc_e(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_RR:
+        status=vmx_emul_mov_to_rr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_RR:
+        status=vmx_emul_mov_from_rr(vcpu, inst);
+        break;
+    case EVENT_THASH:
+        status=vmx_emul_thash(vcpu, inst);
+        break;
+    case EVENT_TTAG:
+        status=vmx_emul_ttag(vcpu, inst);
+        break;
+    case EVENT_TPA:
+        status=vmx_emul_tpa(vcpu, inst);
+        break;
+    case EVENT_TAK:
+        status=vmx_emul_tak(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_AR_IMM:
+        status=vmx_emul_mov_to_ar_imm(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_AR:
+        status=vmx_emul_mov_to_ar_reg(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_AR:
+        status=vmx_emul_mov_from_ar_reg(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_DBR:
+        status=vmx_emul_mov_to_dbr(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_IBR:
+        status=vmx_emul_mov_to_ibr(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_PMC:
+        status=vmx_emul_mov_to_pmc(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_PMD:
+        status=vmx_emul_mov_to_pmd(vcpu, inst);
+        break;
+    case EVENT_MOV_TO_PKR:
+        status=vmx_emul_mov_to_pkr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_DBR:
+        status=vmx_emul_mov_from_dbr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_IBR:
+        status=vmx_emul_mov_from_ibr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_PMC:
+        status=vmx_emul_mov_from_pmc(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_PKR:
+        status=vmx_emul_mov_from_pkr(vcpu, inst);
+        break;
+    case EVENT_MOV_FROM_CPUID:
+        status=vmx_emul_mov_from_cpuid(vcpu, inst);
+        break;
+    case EVENT_VMSW:
+        printf ("Unimplemented instruction %d\n", cause);
+       status=IA64_FAULT;
+        break;
+    default:
+        printf("unknown cause %d, iip: %lx, ipsr: %lx\n", 
cause,regs->cr_iip,regs->cr_ipsr);
+        while(1);
+       /* For unknown cause, let hardware to re-execute */
+       status=IA64_RETRY;
+        break;
+//        panic("unknown cause in virtualization intercept");
+    };
+
+#if 0
+    if (status == IA64_FAULT)
+       panic("Emulation failed with cause %d:\n", cause);
+#endif
+
+    if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) {
+        vmx_vcpu_increment_iip(vcpu);
+    }
+
+    recover_if_physical_mode(vcpu);
+    post_emulation_action (vcpu);
+//TODO    set_irq_check(v);
+    return;
+
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_vsa.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_vsa.S       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,84 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vsa.c: Call PAL virtualization services.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Arun Sharma <arun.sharma@xxxxxxxxx>
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <asm/asmmacro.h>
+
+
+    .text
+
+/*
+ * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2,
+ *                  UINT64 arg3, UINT64 arg4, UINT64 arg5,
+ *                  UINT64 arg6, UINT64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ *  rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+    .regstk 4,4,0,0
+
+rpsave  =   loc0
+pfssave =   loc1
+psrsave =   loc2
+entry   =   loc3
+hostret =   r24
+
+    alloc   pfssave=ar.pfs,4,4,0,0
+    mov rpsave=rp
+    movl    entry=@gprel(__vsa_base)
+1:  mov hostret=ip
+    mov r25=in1         // copy arguments
+    mov r26=in2
+    mov r27=in3
+    mov psrsave=psr
+    ;;
+    add entry=entry,gp
+    tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+    tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+    ;;
+    ld8 entry=[entry]       // read entry point
+    ;;
+    add hostret=2f-1b,hostret   // calculate return address
+    add entry=entry,in0
+    ;;
+    rsm psr.i | psr.ic
+    ;;
+    srlz.d
+    mov b6=entry
+    br.cond.sptk b6         // call the service
+2:
+    // Architectural sequence for enabling interrupts if necessary
+(p7)    ssm psr.ic
+    ;;
+(p7)    srlz.d
+    ;;
+(p6)    ssm psr.i
+    ;;
+    mov rp=rpsave
+    mov ar.pfs=pfssave
+    mov r8=r31
+    ;;
+    srlz.d
+    br.ret.sptk rp
+
+END(ia64_call_vsa)
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vtlb.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vtlb.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1094 @@
+
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ *  XiaoYan Feng (Fleming Feng) (Fleming.feng@xxxxxxxxx)
+ */
+
+#include <linux/sched.h>
+#include <asm/tlb.h>
+#include <asm/mm.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/gcc_intrin.h>
+#include <linux/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#define  MAX_CCH_LENGTH     40
+
+
+static void cch_mem_init(thash_cb_t *hcb)
+{
+    thash_cch_mem_t *p, *q;
+
+    hcb->cch_freelist = p = hcb->cch_buf;
+
+    for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz;
+        p++, q++ ) {
+        p->next = q;
+    }
+    p->next = NULL;
+}
+
+static thash_data_t *cch_alloc(thash_cb_t *hcb)
+{
+    thash_cch_mem_t *p;
+
+    if ( (p = hcb->cch_freelist) != NULL ) {
+        hcb->cch_freelist = p->next;
+    }
+    return &(p->data);
+}
+
+static void cch_free(thash_cb_t *hcb, thash_data_t *cch)
+{
+    thash_cch_mem_t *p = (thash_cch_mem_t*)cch;
+
+    p->next = hcb->cch_freelist;
+    hcb->cch_freelist = p;
+}
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE 
cl)
+{
+    u64  size1,sa1,ea1;
+
+    if ( tlb->rid != rid || tlb->cl != cl )
+        return 0;
+    size1 = PSIZE(tlb->ps);
+    sa1 = tlb->vadr & ~(size1-1);   // mask the low address bits
+    ea1 = sa1 + size1;
+
+    if ( va >= sa1 && (va < ea1 || ea1 == 0) )
+        return 1;
+    else
+        return 0;
+}
+
+/*
+ * Only for TLB format.
+ */
+static int
+__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64 
sva, u64 eva)
+{
+    uint64_t size1,size2,sa1,ea1,ea2;
+
+    if ( entry->invalid || entry->rid != rid || entry->cl != cl ) {
+        return 0;
+    }
+    size1=PSIZE(entry->ps);
+    sa1 = entry->vadr & ~(size1-1); // mask the low address bits
+    ea1 = sa1 + size1;
+    if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) ) 
+        return 0;
+    else
+        return 1;
+
+}
+
+static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr)
+{
+    if ( hcb->remove_notifier ) {
+        (hcb->remove_notifier)(hcb,tr);
+    }
+    tr->invalid = 1;
+}
+
+static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx)
+{
+    *tr = *data;
+    tr->tr_idx = idx;
+}
+
+
+static void __init_tr(thash_cb_t *hcb)
+{
+    int i;
+    thash_data_t *tr;
+
+    for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) {
+        tr[i].invalid = 1;
+    }
+    for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) {
+        tr[i].invalid = 1;
+    }
+}
+
+/*
+ * Replace TR entry.
+ */
+static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx)
+{
+    thash_data_t *tr;
+
+    if ( insert->cl == ISIDE_TLB ) {
+        tr = &ITR(hcb,idx);
+    }
+    else {
+        tr = &DTR(hcb,idx);
+    }
+    if ( !INVALID_TLB(tr) ) {
+        __rem_tr(hcb, tr);
+    }
+    __set_tr (tr, insert, idx);
+}
+
+/*
+ * remove TR entry.
+ */
+static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx)
+{
+    thash_data_t *tr;
+
+    if ( cl == ISIDE_TLB ) {
+        tr = &ITR(hcb,idx);
+    }
+    else {
+        tr = &DTR(hcb,idx);
+    }
+    if ( !INVALID_TLB(tr) ) {
+        __rem_tr(hcb, tr);
+    }
+}
+
+/*
+ * Delete an thash entry in collision chain.
+ *  prev: the previous entry.
+ *  rem: the removed entry.
+ */
+static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t 
*rem)
+{
+    //prev->next = rem->next;
+    if ( hcb->remove_notifier ) {
+         (hcb->remove_notifier)(hcb,rem);
+    }
+    cch_free (hcb, rem);
+}
+
+/*
+ * Delete an thash entry leading collision chain.
+ */
+static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash)
+{
+    thash_data_t *next=hash->next;
+
+    if ( hcb->remove_notifier ) {
+        (hcb->remove_notifier)(hcb,hash);
+    }
+    if ( next != NULL ) {
+        *hash = *next;
+        cch_free (hcb, next);
+    }
+    else {
+        INVALIDATE_HASH(hcb, hash);
+    }
+}
+
+thash_data_t *__vtr_lookup(thash_cb_t *hcb,
+            u64 rid, u64 va,
+            CACHE_LINE_TYPE cl)
+{
+    thash_data_t    *tr;
+    int   num,i;
+
+    if ( cl == ISIDE_TLB ) {
+        tr = &ITR(hcb,0);
+        num = NITRS;
+    }
+    else {
+        tr = &DTR(hcb,0);
+        num = NDTRS;
+    }
+    for ( i=0; i<num; i++ ) {
+        if ( !INVALID_ENTRY(hcb,&tr[i]) &&
+            __is_translated(&tr[i], rid, va, cl) )
+            return &tr[i];
+    }
+    return NULL;
+}
+
+
+/*
+ * Find overlap VHPT entry within current collision chain
+ * base on internal priv info.
+ */
+static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb)
+{
+    thash_data_t    *cch;
+    thash_internal_t *priv = &hcb->priv;
+
+
+    for (cch=priv->cur_cch; cch; cch = cch->next) {
+        if ( priv->tag == cch->etag  ) {
+            return cch;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Find overlap TLB/VHPT entry within current collision chain
+ * base on internal priv info.
+ */
+static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb)
+{
+    thash_data_t    *cch;
+    thash_internal_t *priv = &hcb->priv;
+
+    /* Find overlap TLB entry */
+    for (cch=priv->cur_cch; cch; cch = cch->next) {
+        if ( ( cch->tc ? priv->s_sect.tc : priv->s_sect.tr )  &&
+            __is_tlb_overlap(hcb, cch, priv->rid, priv->cl,
+                priv->_curva, priv->_eva) ) {
+            return cch;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Get the machine format of VHPT entry.
+ *    PARAS:
+ *  1: tlb: means the tlb format hash entry converting to VHPT.
+ *  2: va means the guest virtual address that must be coverd by
+ *     the translated machine VHPT.
+ *  3: vhpt: means the machine format VHPT converting from tlb.
+ *    NOTES:
+ *  1: In case of the machine address is discontiguous,
+ *     "tlb" needs to be covered by several machine VHPT. va
+ *     is used to choice one of them.
+ *  2: Foreign map is supported in this API.
+ *    RETURN:
+ *  0/1: means successful or fail.
+ *
+ */
+int __tlb_to_vhpt(thash_cb_t *hcb,
+            thash_data_t *tlb, u64 va,
+            thash_data_t *vhpt)
+{
+    u64 pages,mfn;
+    ia64_rr vrr;
+
+    ASSERT ( hcb->ht == THASH_VHPT );
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+    pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
+    mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages);
+    if ( mfn == INVALID_MFN ) return 0;
+
+    // TODO with machine discontinuous address space issue.
+    vhpt->etag = (hcb->vs->tag_func)( hcb->pta,
+            tlb->vadr, tlb->rid, tlb->ps);
+    //vhpt->ti = 0;
+    vhpt->itir = tlb->itir & ~ITIR_RV_MASK;
+    vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
+    vhpt->ppn = mfn;
+    vhpt->next = 0;
+    return 1;
+}
+
+
+/*
+ * Insert an entry to hash table. 
+ *    NOTES:
+ *  1: TLB entry may be TR, TC or Foreign Map. For TR entry,
+ *     itr[]/dtr[] need to be updated too.
+ *  2: Inserting to collision chain may trigger recycling if 
+ *     the buffer for collision chain is empty.
+ *  3: The new entry is inserted at the next of hash table.
+ *     (I.e. head of the collision chain)
+ *  4: The buffer holding the entry is allocated internally
+ *     from cch_buf or just in the hash table.
+ *  5: Return the entry in hash table or collision chain.
+ *  6: Input parameter, entry, should be in TLB format.
+ *      I.e. Has va, rid, ps...
+ *  7: This API is invoked by emulating ITC/ITR and tlb_miss.
+ *
+ */
+
+void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx)
+{
+    if ( hcb->ht != THASH_TLB || entry->tc ) {
+        panic("wrong parameter\n");
+    }
+    entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
+    entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
+    rep_tr(hcb, entry, idx);
+    return ;
+}
+
+thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry)
+{
+    thash_data_t *cch;
+    
+    cch = cch_alloc(hcb);
+    if(cch == NULL){
+        // recycle
+        if ( hcb->recycle_notifier ) {
+                hcb->recycle_notifier(hcb,(u64)entry);
+        }
+        thash_purge_all(hcb);
+        cch = cch_alloc(hcb);
+    }
+    return cch;
+}
+ 
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ *  1: When inserting VHPT to thash, "va" is a must covered
+ *  address by the inserted machine VHPT entry.
+ *  2: The format of entry is always in TLB.
+ *  3: The caller need to make sure the new entry will not overlap 
+ *     with any existed entry.
+ */
+void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+    thash_data_t    *hash_table, *cch;
+    int flag;
+    ia64_rr vrr;
+    u64 gppn;
+    u64 ppns, ppne;
+    
+    hash_table = (hcb->hash_func)(hcb->pta,
+                        va, entry->rid, entry->ps);
+    if( INVALID_ENTRY(hcb, hash_table) ) {
+        *hash_table = *entry;
+        hash_table->next = 0;
+    }
+    else {
+        // TODO: Add collision chain length limitation.
+        cch = __alloc_chain(hcb,entry);
+        
+        *cch = *hash_table;
+        *hash_table = *entry;
+        hash_table->next = cch;
+    }
+    if(hcb->vcpu->domain->domain_id==0){
+       thash_insert(hcb->ts->vhpt, entry, va);
+        return;
+    }
+    flag = 1;
+    gppn = 
(POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT;
+    ppns = PAGEALIGN((entry->ppn<<12),entry->ps);
+    ppne = ppns + PSIZE(entry->ps);
+    if(((ppns<=0xa0000)&&(ppne>0xa0000))||((ppne>0xc0000)&&(ppns<=0xc0000)))
+        flag = 0;
+    if((__gpfn_is_mem(hcb->vcpu->domain, gppn)&&flag))
+       thash_insert(hcb->ts->vhpt, entry, va);
+    return ;
+}
+
+static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+    thash_data_t    *hash_table, *cch;
+    ia64_rr vrr;
+    
+    hash_table = (hcb->hash_func)(hcb->pta,
+                        va, entry->rid, entry->ps);
+    if( INVALID_ENTRY(hcb, hash_table) ) {
+        if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
+            panic("Can't convert to machine VHPT entry\n");
+        }
+        hash_table->next = 0;
+    }
+    else {
+        // TODO: Add collision chain length limitation.
+        cch = __alloc_chain(hcb,entry);
+        
+        *cch = *hash_table;
+        if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
+            panic("Can't convert to machine VHPT entry\n");
+        }
+        hash_table->next = cch;
+        if(hash_table->tag==hash_table->next->tag)
+            while(1);
+    }
+    return /*hash_table*/;
+}
+
+void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+    thash_data_t    *hash_table;
+    ia64_rr vrr;
+    
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr);
+    if ( entry->ps != vrr.ps && entry->tc ) {
+        panic("Not support for multiple page size now\n");
+    }
+    entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
+    entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
+    (hcb->ins_hash)(hcb, entry, va);
+    
+}
+
+static void rem_thash(thash_cb_t *hcb, thash_data_t *entry)
+{
+    thash_data_t    *hash_table, *p, *q;
+    thash_internal_t *priv = &hcb->priv;
+    int idx;
+
+    hash_table = priv->hash_base;
+    if ( hash_table == entry ) {
+//        if ( PURGABLE_ENTRY(hcb, entry) ) {
+            __rem_hash_head (hcb, entry);
+//        }
+        return ;
+    }
+    // remove from collision chain
+    p = hash_table;
+    for ( q=p->next; q; q = p->next ) {
+        if ( q == entry ){
+//            if ( PURGABLE_ENTRY(hcb,q ) ) {
+                p->next = q->next;
+                __rem_chain(hcb, entry);
+//            }
+            return ;
+        }
+        p = q;
+    }
+    panic("Entry not existed or bad sequence\n");
+}
+
+static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry)
+{
+    thash_data_t    *hash_table, *p, *q;
+    thash_internal_t *priv = &hcb->priv;
+    int idx;
+    
+    if ( !entry->tc ) {
+        return rem_tr(hcb, entry->cl, entry->tr_idx);
+    }
+    rem_thash(hcb, entry);
+}    
+
+int   cch_depth=0;
+/*
+ * Purge the collision chain starting from cch.
+ * NOTE:
+ *     For those UN-Purgable entries(FM), this function will return
+ * the head of left collision chain.
+ */
+static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch)
+{
+    thash_data_t *next;
+
+    if ( ++cch_depth > MAX_CCH_LENGTH ) {
+        printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n");
+        while(1);
+   }
+    if ( cch -> next ) {
+        next = thash_rem_cch(hcb, cch->next);
+    }
+    else {
+        next = NULL;
+    }
+    if ( PURGABLE_ENTRY(hcb, cch) ) {
+        __rem_chain(hcb, cch);
+        return next;
+    }
+    else {
+        cch->next = next;
+        return cch;
+    }
+}
+
+/*
+ * Purge one hash line (include the entry in hash table).
+ * Can only be called by thash_purge_all.
+ * Input:
+ *  hash: The head of collision chain (hash table)
+ *
+ */
+static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash)
+{
+    if ( INVALID_ENTRY(hcb, hash) ) return;
+    
+    if ( hash->next ) {
+        cch_depth = 0;
+        hash->next = thash_rem_cch(hcb, hash->next);
+    }
+    // Then hash table itself.
+    if ( PURGABLE_ENTRY(hcb, hash) ) {
+        __rem_hash_head(hcb, hash);
+    }
+}
+
+
+/*
+ * Find an overlap entry in hash table and its collision chain.
+ * Refer to SDM2 4.1.1.4 for overlap definition.
+ *    PARAS:
+ *  1: in: TLB format entry, rid:ps must be same with vrr[].
+ *         va & ps identify the address space for overlap lookup
+ *  2: section can be combination of TR, TC and FM. (THASH_SECTION_XX)
+ *  3: cl means I side or D side.
+ *    RETURNS:
+ *  NULL to indicate the end of findings.
+ *    NOTES:
+ *
+ */
+thash_data_t *thash_find_overlap(thash_cb_t *hcb, 
+            thash_data_t *in, search_section_t s_sect)
+{
+    return (hcb->find_overlap)(hcb, in->vadr, 
+            PSIZE(in->ps), in->rid, in->cl, s_sect);
+}
+
+static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb, 
+        u64 va, u64 size, int rid, char cl, search_section_t s_sect)
+{
+    thash_data_t    *hash_table;
+    thash_internal_t *priv = &hcb->priv;
+    u64     tag;
+    ia64_rr vrr;
+
+    priv->_curva = va & ~(size-1);
+    priv->_eva = priv->_curva + size;
+    priv->rid = rid;
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+    priv->ps = vrr.ps;
+    hash_table = (hcb->hash_func)(hcb->pta,
+        priv->_curva, rid, priv->ps);
+
+    priv->s_sect = s_sect;
+    priv->cl = cl;
+    priv->_tr_idx = 0;
+    priv->hash_base = hash_table;
+    priv->cur_cch = hash_table;
+    return (hcb->next_overlap)(hcb);
+}
+
+static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb, 
+        u64 va, u64 size, int rid, char cl, search_section_t s_sect)
+{
+    thash_data_t    *hash_table;
+    thash_internal_t *priv = &hcb->priv;
+    u64     tag;
+    ia64_rr vrr;
+
+    priv->_curva = va & ~(size-1);
+    priv->_eva = priv->_curva + size;
+    priv->rid = rid;
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+    priv->ps = vrr.ps;
+    hash_table = (hcb->hash_func)( hcb->pta,
+        priv->_curva, rid, priv->ps);
+    tag = (hcb->vs->tag_func)( hcb->pta,
+        priv->_curva, rid, priv->ps);
+
+    priv->tag = tag;
+    priv->hash_base = hash_table;
+    priv->cur_cch = hash_table;
+    return (hcb->next_overlap)(hcb);
+}
+
+
+static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb)
+{
+    thash_data_t    *tr;
+    thash_internal_t *priv = &hcb->priv;
+    int   num;
+
+    if ( priv->cl == ISIDE_TLB ) {
+        num = NITRS;
+        tr = &ITR(hcb,0);
+    }
+    else {
+        num = NDTRS;
+        tr = &DTR(hcb,0);
+    }
+    for (; priv->_tr_idx < num; priv->_tr_idx ++ ) {
+        if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx],
+                priv->rid, priv->cl,
+                priv->_curva, priv->_eva) ) {
+            return &tr[priv->_tr_idx++];
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Similar with vtlb_next_overlap but find next entry.
+ *    NOTES:
+ *  Intermediate position information is stored in hcb->priv.
+ */
+static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb)
+{
+    thash_data_t    *ovl;
+    thash_internal_t *priv = &hcb->priv;
+    u64 addr,rr_psize;
+    ia64_rr vrr;
+
+    if ( priv->s_sect.tr ) {
+        ovl = vtr_find_next_overlap (hcb);
+        if ( ovl ) return ovl;
+        priv->s_sect.tr = 0;
+    }
+    if ( priv->s_sect.v == 0 ) return NULL;
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
+    rr_psize = PSIZE(vrr.ps);
+
+    while ( priv->_curva < priv->_eva ) {
+        if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
+            ovl = _vtlb_next_overlap_in_chain(hcb);
+            if ( ovl ) {
+                priv->cur_cch = ovl->next;
+                return ovl;
+            }
+        }
+        priv->_curva += rr_psize;
+        priv->hash_base = (hcb->hash_func)( hcb->pta,
+            priv->_curva, priv->rid, priv->ps);
+        priv->cur_cch = priv->hash_base;
+    }
+    return NULL;
+}
+
+static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb)
+{
+    thash_data_t    *ovl;
+    thash_internal_t *priv = &hcb->priv;
+    u64 addr,rr_psize;
+    ia64_rr vrr;
+
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
+    rr_psize = PSIZE(vrr.ps);
+
+    while ( priv->_curva < priv->_eva ) {
+        if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
+            ovl = _vhpt_next_overlap_in_chain(hcb);
+            if ( ovl ) {
+                priv->cur_cch = ovl->next;
+                return ovl;
+            }
+        }
+        priv->_curva += rr_psize;
+        priv->hash_base = (hcb->hash_func)( hcb->pta,
+            priv->_curva, priv->rid, priv->ps);
+        priv->tag = (hcb->vs->tag_func)( hcb->pta,
+                priv->_curva, priv->rid, priv->ps);
+        priv->cur_cch = priv->hash_base;
+    }
+    return NULL;
+}
+
+
+/*
+ * Find and purge overlap entries in hash table and its collision chain.
+ *    PARAS:
+ *  1: in: TLB format entry, rid:ps must be same with vrr[].
+ *         rid, va & ps identify the address space for purge
+ *  2: section can be combination of TR, TC and FM. (thash_SECTION_XX)
+ *  3: cl means I side or D side.
+ *    NOTES:
+ *
+ */
+void thash_purge_entries(thash_cb_t *hcb, 
+            thash_data_t *in, search_section_t p_sect)
+{
+    return thash_purge_entries_ex(hcb, in->rid, in->vadr,
+            in->ps, p_sect, in->cl);
+}
+
+void thash_purge_entries_ex(thash_cb_t *hcb,
+            u64 rid, u64 va, u64 ps, 
+            search_section_t p_sect, 
+            CACHE_LINE_TYPE cl)
+{
+    thash_data_t    *ovl;
+
+    ovl = (hcb->find_overlap)(hcb, va, PSIZE(ps), rid, cl, p_sect);
+    while ( ovl != NULL ) {
+        (hcb->rem_hash)(hcb, ovl);
+        ovl = (hcb->next_overlap)(hcb);
+    };
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ *    Notes: Only TC entry can purge and insert.
+ */
+void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in)
+{
+    thash_data_t    *ovl;
+    search_section_t sections;
+
+#ifdef   XEN_DEBUGGER
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,in->vadr);
+       if ( in->ps != vrr.ps || hcb->ht != THASH_TLB || !in->tc ) {
+               panic ("Oops, wrong call for purge_and_insert\n");
+               return;
+       }
+#endif
+    in->vadr = PAGEALIGN(in->vadr,in->ps);
+    in->ppn = PAGEALIGN(in->ppn, in->ps-12);
+    sections.tr = 0;
+    sections.tc = 1;
+    ovl = (hcb->find_overlap)(hcb, in->vadr, PSIZE(in->ps),
+                                in->rid, in->cl, sections);
+    if(ovl)
+        (hcb->rem_hash)(hcb, ovl);
+#ifdef   XEN_DEBUGGER
+    ovl = (hcb->next_overlap)(hcb);
+    if ( ovl ) {
+               panic ("Oops, 2+ overlaps for purge_and_insert\n");
+               return;
+    }
+#endif
+    (hcb->ins_hash)(hcb, in, in->vadr);
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+// TODO: add sections.
+void thash_purge_all(thash_cb_t *hcb)
+{
+    thash_data_t    *hash_table;
+    
+#ifdef  VTLB_DEBUG
+       extern u64  sanity_check;
+    static u64 statistics_before_purge_all=0;
+    if ( statistics_before_purge_all ) {
+       sanity_check = 1;
+        check_vtlb_sanity(hcb);
+    }
+#endif
+
+    hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
+    
+    for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
+        thash_rem_line(hcb, hash_table);
+    }
+}
+
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ *  in: TLB format for both VHPT & TLB.
+ */
+thash_data_t *vtlb_lookup(thash_cb_t *hcb, 
+            thash_data_t *in)
+{
+    return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl);
+}
+
+thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, 
+            u64 rid, u64 va,
+            CACHE_LINE_TYPE cl)
+{
+    thash_data_t    *hash_table, *cch;
+    u64     tag;
+    ia64_rr vrr;
+   
+    ASSERT ( hcb->ht == THASH_VTLB );
+    
+    cch = __vtr_lookup(hcb, rid, va, cl);;
+    if ( cch ) return cch;
+
+    vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+    hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps);
+
+    if ( INVALID_ENTRY(hcb, hash_table ) )
+        return NULL;
+
+        
+    for (cch=hash_table; cch; cch = cch->next) {
+        if ( __is_translated(cch, rid, va, cl) )
+            return cch;
+    }
+    return NULL;
+}
+
+/*
+ * Lock/Unlock TC if found.
+ *     NOTES: Only the page in prefered size can be handled.
+ *   return:
+ *          1: failure
+ *          0: success
+ */
+int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int 
lock)
+{
+       thash_data_t    *ovl;
+       search_section_t        sections;
+
+    sections.tr = 1;
+    sections.tc = 1;
+       ovl = (hcb->find_overlap)(hcb, va, size, rid, cl, sections);
+       if ( ovl ) {
+               if ( !ovl->tc ) {
+//                     panic("Oops, TR for lock\n");
+                       return 0;
+               }
+               else if ( lock ) {
+                       if ( ovl->locked ) {
+                               DPRINTK("Oops, already locked entry\n");
+                       }
+                       ovl->locked = 1;
+               }
+               else if ( !lock ) {
+                       if ( !ovl->locked ) {
+                               DPRINTK("Oops, already unlocked entry\n");
+                       }
+                       ovl->locked = 0;
+               }
+               return 0;
+       }
+       return 1;
+}
+
+/*
+ * Notifier when TLB is deleted from hash table and its collision chain.
+ * NOTES:
+ *  The typical situation is that TLB remove needs to inform
+ * VHPT to remove too.
+ * PARAS:
+ *  1: hcb is TLB object.
+ *  2: The format of entry is always in TLB.
+ *
+ */
+void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry)
+{
+    thash_cb_t  *vhpt;
+    search_section_t    s_sect;
+    
+    s_sect.v = 0;
+    thash_purge_entries(hcb->ts->vhpt, entry, s_sect);
+    machine_tlb_purge(entry->rid, entry->vadr, entry->ps);
+}
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(thash_cb_t *hcb, u64 sz)
+{
+    thash_data_t    *hash_table;
+
+    cch_mem_init (hcb);
+    hcb->magic = THASH_CB_MAGIC;
+    hcb->pta.val = hcb->hash;
+    hcb->pta.vf = 1;
+    hcb->pta.ve = 1;
+    hcb->pta.size = sz;
+    hcb->get_rr_fn = vmmu_get_rr;
+    ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 );
+    if ( hcb->ht == THASH_TLB ) {
+        hcb->remove_notifier =  tlb_remove_notifier;
+        hcb->find_overlap = vtlb_find_overlap;
+        hcb->next_overlap = vtlb_next_overlap;
+        hcb->rem_hash = rem_vtlb;
+        hcb->ins_hash = vtlb_insert;
+        __init_tr(hcb);
+    }
+    else {
+        hcb->remove_notifier =  NULL;
+        hcb->find_overlap = vhpt_find_overlap;
+        hcb->next_overlap = vhpt_next_overlap;
+        hcb->rem_hash = rem_thash;
+        hcb->ins_hash = vhpt_insert;
+    }
+    hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
+    
+    for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
+        INVALIDATE_HASH(hcb,hash_table);
+    }
+}
+
+#ifdef  VTLB_DEBUG
+static  u64 cch_length_statistics[MAX_CCH_LENGTH+1];
+u64  sanity_check=0;
+u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash)
+{
+    thash_data_t *cch;
+    thash_data_t    *ovl;
+    search_section_t s_sect;
+    u64     num=0;
+    
+    s_sect.v = 0;
+    for (cch=hash; cch; cch=cch->next) {
+        ovl = thash_find_overlap(vhpt, cch, s_sect);
+        while ( ovl != NULL ) {
+            ovl->checked = 1;
+            ovl = (vhpt->next_overlap)(vhpt);
+        };
+        num ++;
+    }
+    if ( num >= MAX_CCH_LENGTH ) {
+       cch_length_statistics[MAX_CCH_LENGTH] ++;
+    }
+    else {
+       cch_length_statistics[num] ++;
+    }
+    return num;
+}
+
+void check_vtlb_sanity(thash_cb_t *vtlb)
+{
+//    struct pfn_info *page;
+    u64  hash_num, i, psr;
+    static u64 check_ok_num, check_fail_num,check_invalid;
+//  void *vb1, *vb2;
+    thash_data_t  *hash, *cch;
+    thash_data_t    *ovl;
+    search_section_t s_sect;
+    thash_cb_t *vhpt = vtlb->ts->vhpt;
+    u64   invalid_ratio;
+    
+    if ( sanity_check == 0 ) return;
+    sanity_check --;
+    s_sect.v = 0;
+//    page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
+//    if ( page == NULL ) {
+//        panic("No enough contiguous memory for init_domain_mm\n");
+//    };
+//    vb1 = page_to_virt(page);
+//    printf("Allocated page=%lp vbase=%lp\n", page, vb1);
+//    vb2 = vb1 + vtlb->hash_sz;
+    hash_num = vhpt->hash_sz / sizeof(thash_data_t);
+//    printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num);
+    printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n", 
+                vtlb, vtlb->hash,vtlb->hash_sz,
+                vhpt, vhpt->hash, vhpt->hash_sz);
+    //memcpy(vb1, vtlb->hash, vtlb->hash_sz);
+    //memcpy(vb2, vhpt->hash, vhpt->hash_sz);
+    for ( i=0; i < 
sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
+       cch_length_statistics[i] = 0;
+    }
+    
+    local_irq_save(psr);
+    
+    hash = vhpt->hash;
+    for (i=0; i < hash_num; i++) {
+        if ( !INVALID_ENTRY(vhpt, hash) ) {
+            for ( cch= hash; cch; cch=cch->next) {
+                cch->checked = 0;
+            }
+        }
+        hash ++;
+    }
+    printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num);
+    check_invalid = 0;
+    check_ok_num=0;
+    hash = vtlb->hash;
+    for ( i=0; i< hash_num; i++ ) {
+        if ( !INVALID_ENTRY(vtlb, hash) ) {
+            check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash);
+        }
+        else {
+            check_invalid++;
+        }
+        hash ++;
+    }
+    printf("Done vtlb entry check, hash=%lp\n", hash);
+    printf("check_ok_num = 0x%lx check_invalid=0x%lx\n", 
check_ok_num,check_invalid);
+    invalid_ratio = 1000*check_invalid / hash_num;
+    printf("%02ld.%01ld%% entries are invalid\n", 
+               invalid_ratio/10, invalid_ratio % 10 );
+    for (i=0; i<NDTRS; i++) {
+        ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect);
+        while ( ovl != NULL ) {
+            ovl->checked = 1;
+            ovl = (vhpt->next_overlap)(vhpt);
+        };
+    }
+    printf("Done dTR\n");
+    for (i=0; i<NITRS; i++) {
+        ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect);
+        while ( ovl != NULL ) {
+            ovl->checked = 1;
+            ovl = (vhpt->next_overlap)(vhpt);
+        };
+    }
+    printf("Done iTR\n");
+    check_fail_num = 0;
+    check_invalid = 0;
+    check_ok_num=0;
+    hash = vhpt->hash;
+    for (i=0; i < hash_num; i++) {
+        if ( !INVALID_ENTRY(vhpt, hash) ) {
+            for ( cch= hash; cch; cch=cch->next) {
+                if ( !cch->checked ) {
+                    printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash, 
cch);
+                    check_fail_num ++;
+                }
+                else {
+                    check_ok_num++;
+                }
+            }
+        }
+        else {
+            check_invalid ++;
+        }
+        hash ++;
+    }
+    local_irq_restore(psr);
+    printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n", 
+            check_ok_num, check_fail_num, check_invalid);
+    //memcpy(vtlb->hash, vb1, vtlb->hash_sz);
+    //memcpy(vhpt->hash, vb2, vhpt->hash_sz);
+    printf("The statistics of collision chain length is listed\n");
+    for ( i=0; i < 
sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
+       printf("CCH length=%02ld, chain number=%ld\n", i, 
cch_length_statistics[i]);
+    }
+//    free_domheap_pages(page, VCPU_TLB_ORDER);
+    printf("Done check_vtlb\n");
+}
+
+void dump_vtlb(thash_cb_t *vtlb)
+{
+    static u64  dump_vtlb=0;
+    thash_data_t  *hash, *cch, *tr;
+    u64     hash_num,i;
+    
+    if ( dump_vtlb == 0 ) return;
+    dump_vtlb --;
+    hash_num = vtlb->hash_sz / sizeof(thash_data_t);
+    hash = vtlb->hash;
+    
+    printf("Dump vTC\n");
+    for ( i = 0; i < hash_num; i++ ) {
+        if ( !INVALID_ENTRY(vtlb, hash) ) {
+            printf("VTLB at hash=%lp\n", hash);
+            for (cch=hash; cch; cch=cch->next) {
+                printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+                    cch, cch->vadr, cch->ps, cch->rid);
+            }
+        }
+        hash ++;
+    }
+    printf("Dump vDTR\n");
+    for (i=0; i<NDTRS; i++) {
+        tr = &DTR(vtlb,i);
+        printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+                    tr, tr->vadr, tr->ps, tr->rid);
+    }
+    printf("Dump vITR\n");
+    for (i=0; i<NITRS; i++) {
+        tr = &ITR(vtlb,i);
+        printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+                    tr, tr->vadr, tr->ps, tr->rid);
+    }
+    printf("End of vTLB dump\n");
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/acpi.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/acpi.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,678 @@
+/*
+ *  acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ *  Copyright (C) 1999 VA Linux Systems
+ *  Copyright (C) 1999,2000 Walt Drummond <drummond@xxxxxxxxxxx>
+ *  Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *  Copyright (C) 2000 Intel Corp.
+ *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@xxxxxxxxx>
+ *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2001 Jenna Hall <jenna.s.hall@xxxxxxxxx>
+ *  Copyright (C) 2001 Takayoshi Kochi <t-kochi@xxxxxxxxxxxxx>
+ *  Copyright (C) 2002 Erich Focht <efocht@xxxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/mmzone.h>
+#include <asm/io.h>
+//#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+#include <asm/sal.h>
+//#include <asm/cyclone.h>
+
+#define BAD_MADT_ENTRY(entry, end) (                                        \
+               (!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
+               ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX                 "ACPI: "
+
+void (*pm_idle) (void);
+EXPORT_SYMBOL(pm_idle);
+void (*pm_power_off) (void);
+
+unsigned char acpi_kbd_controller_present = 1;
+unsigned char acpi_legacy_devices;
+
+const char *
+acpi_get_sysname (void)
+{
+/* #ifdef CONFIG_IA64_GENERIC */
+       unsigned long rsdp_phys;
+       struct acpi20_table_rsdp *rsdp;
+       struct acpi_table_xsdt *xsdt;
+       struct acpi_table_header *hdr;
+
+       rsdp_phys = acpi_find_rsdp();
+       if (!rsdp_phys) {
+               printk(KERN_ERR "ACPI 2.0 RSDP not found, default to 
\"dig\"\n");
+               return "dig";
+       }
+
+       rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+       if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
+               printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to 
\"dig\"\n");
+               return "dig";
+       }
+
+       xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+       hdr = &xsdt->header;
+       if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
+               printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to 
\"dig\"\n");
+               return "dig";
+       }
+
+       if (!strcmp(hdr->oem_id, "HP")) {
+               return "hpzx1";
+       }
+       else if (!strcmp(hdr->oem_id, "SGI")) {
+               return "sn2";
+       }
+
+       return "dig";
+/*
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+       return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+       return "hpzx1";
+# elif defined (CONFIG_IA64_SGI_SN2)
+       return "sn2";
+# elif defined (CONFIG_IA64_DIG)
+       return "dig";
+# else
+#      error Unknown platform.  Fix acpi.c.
+# endif
+#endif
+*/
+}
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define ACPI_MAX_PLATFORM_INTERRUPTS   256
+
+#if 0
+/* Array to record platform interrupt vectors for generic interrupt routing. */
+int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
+       [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
+};
+
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+
+/*
+ * Interrupt routing API for device drivers.  Provides interrupt vector for
+ * a generic platform event.  Currently only CPEI is implemented.
+ */
+int
+acpi_request_vector (u32 int_type)
+{
+       int vector = -1;
+
+       if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
+               /* corrected platform error interrupt */
+               vector = platform_intr_list[int_type];
+       } else
+               printk(KERN_ERR "acpi_request_vector(): invalid interrupt 
type\n");
+       return vector;
+}
+#endif
+char *
+__acpi_map_table (unsigned long phys_addr, unsigned long size)
+{
+       return __va(phys_addr);
+}
+
+/* --------------------------------------------------------------------------
+                            Boot-time Table Parsing
+   -------------------------------------------------------------------------- 
*/
+
+static int                     total_cpus __initdata;
+static int                     available_cpus __initdata;
+struct acpi_table_madt *       acpi_madt __initdata;
+static u8                      has_8259;
+
+#if 0
+static int __init
+acpi_parse_lapic_addr_ovr (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_lapic_addr_ovr *lapic;
+
+       lapic = (struct acpi_table_lapic_addr_ovr *) header;
+
+       if (BAD_MADT_ENTRY(lapic, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       if (lapic->address) {
+               iounmap((void *) ipi_base_addr);
+               ipi_base_addr = (unsigned long) ioremap(lapic->address, 0);
+       }
+       return 0;
+}
+
+
+static int __init
+acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_lsapic *lsapic;
+
+       lsapic = (struct acpi_table_lsapic *) header;
+
+       if (BAD_MADT_ENTRY(lsapic, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | 
lsapic->eid);
+
+       if (!lsapic->flags.enabled)
+               printk(" disabled");
+       else {
+               printk(" enabled");
+#ifdef CONFIG_SMP
+               smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | 
lsapic->eid;
+               if (hard_smp_processor_id()
+                   == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus])
+                       printk(" (BSP)");
+#endif
+               ++available_cpus;
+       }
+
+       printk("\n");
+
+       total_cpus++;
+       return 0;
+}
+
+
+static int __init
+acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_lapic_nmi *lacpi_nmi;
+
+       lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+
+       if (BAD_MADT_ENTRY(lacpi_nmi, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* TBD: Support lapic_nmi entries */
+       return 0;
+}
+
+
+static int __init
+acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_iosapic *iosapic;
+
+       iosapic = (struct acpi_table_iosapic *) header;
+
+       if (BAD_MADT_ENTRY(iosapic, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       iosapic_init(iosapic->address, iosapic->global_irq_base);
+
+       return 0;
+}
+
+
+static int __init
+acpi_parse_plat_int_src (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_plat_int_src *plintsrc;
+       int vector;
+
+       plintsrc = (struct acpi_table_plat_int_src *) header;
+
+       if (BAD_MADT_ENTRY(plintsrc, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /*
+        * Get vector assignment for this interrupt, set attributes,
+        * and program the IOSAPIC routing table.
+        */
+       vector = iosapic_register_platform_intr(plintsrc->type,
+                                               plintsrc->global_irq,
+                                               plintsrc->iosapic_vector,
+                                               plintsrc->eid,
+                                               plintsrc->id,
+                                               (plintsrc->flags.polarity == 1) 
? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+                                               (plintsrc->flags.trigger == 1) 
? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+
+       platform_intr_list[plintsrc->type] = vector;
+       return 0;
+}
+
+
+static int __init
+acpi_parse_int_src_ovr (
+       acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_int_src_ovr *p;
+
+       p = (struct acpi_table_int_src_ovr *) header;
+
+       if (BAD_MADT_ENTRY(p, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       iosapic_override_isa_irq(p->bus_irq, p->global_irq,
+                                (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : 
IOSAPIC_POL_LOW,
+                                (p->flags.trigger == 1) ? IOSAPIC_EDGE : 
IOSAPIC_LEVEL);
+       return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+{
+       struct acpi_table_nmi_src *nmi_src;
+
+       nmi_src = (struct acpi_table_nmi_src*) header;
+
+       if (BAD_MADT_ENTRY(nmi_src, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+
+       /* TBD: Support nimsrc entries */
+       return 0;
+}
+/* Hook from generic ACPI tables.c */
+void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       if (!strncmp(oem_id, "IBM", 3) &&
+           (!strncmp(oem_table_id, "SERMOW", 6))){
+
+               /* Unfortunatly ITC_DRIFT is not yet part of the
+                * official SAL spec, so the ITC_DRIFT bit is not
+                * set by the BIOS on this hardware.
+                */
+               sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
+
+               /*Start cyclone clock*/
+               cyclone_setup(0);
+       }
+}
+
+static int __init
+acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+{
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+
+       /* remember the value for reference after free_initmem() */
+#ifdef CONFIG_ITANIUM
+       has_8259 = 1; /* Firmware on old Itanium systems is broken */
+#else
+       has_8259 = acpi_madt->flags.pcat_compat;
+#endif
+       iosapic_system_init(has_8259);
+
+       /* Get base address of IPI Message Block */
+
+       if (acpi_madt->lapic_address)
+               ipi_base_addr = (unsigned long) 
ioremap(acpi_madt->lapic_address, 0);
+
+       printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr);
+
+       acpi_madt_oem_check(acpi_madt->header.oem_id,
+               acpi_madt->header.oem_table_id);
+
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+
+#undef SLIT_DEBUG
+
+#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+
+static int __initdata srat_num_cpus;                   /* number of cpus */
+static u32 __initdata pxm_flag[PXM_FLAG_LEN];
+#define pxm_bit_set(bit)       (set_bit(bit,(void *)pxm_flag))
+#define pxm_bit_test(bit)      (test_bit(bit,(void *)pxm_flag))
+/* maps to convert between proximity domain and logical node ID */
+int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS];
+int __initdata nid_to_pxm_map[MAX_NUMNODES];
+static struct acpi_table_slit __initdata *slit_table;
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ */
+void __init
+acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+       u32 len;
+
+       len = sizeof(struct acpi_table_header) + 8
+               + slit->localities * slit->localities;
+       if (slit->header.length != len) {
+               printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d 
actual\n",
+                      len, slit->header.length);
+               memset(numa_slit, 10, sizeof(numa_slit));
+               return;
+       }
+       slit_table = slit;
+}
+
+void __init
+acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+{
+       /* record this node in proximity bitmap */
+       pxm_bit_set(pa->proximity_domain);
+
+       node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | 
(pa->lsapic_eid);
+       /* nid should be overridden as logical node id later */
+       node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+       srat_num_cpus++;
+}
+
+void __init
+acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+{
+       unsigned long paddr, size;
+       u8 pxm;
+       struct node_memblk_s *p, *q, *pend;
+
+       pxm = ma->proximity_domain;
+
+       /* fill node memory chunk structure */
+       paddr = ma->base_addr_hi;
+       paddr = (paddr << 32) | ma->base_addr_lo;
+       size = ma->length_hi;
+       size = (size << 32) | ma->length_lo;
+
+       /* Ignore disabled entries */
+       if (!ma->flags.enabled)
+               return;
+
+       /* record this node in proximity bitmap */
+       pxm_bit_set(pxm);
+
+       /* Insertion sort based on base address */
+       pend = &node_memblk[num_node_memblks];
+       for (p = &node_memblk[0]; p < pend; p++) {
+               if (paddr < p->start_paddr)
+                       break;
+       }
+       if (p < pend) {
+               for (q = pend - 1; q >= p; q--)
+                       *(q + 1) = *q;
+       }
+       p->start_paddr = paddr;
+       p->size = size;
+       p->nid = pxm;
+       num_node_memblks++;
+}
+
+void __init
+acpi_numa_arch_fixup (void)
+{
+       int i, j, node_from, node_to;
+
+       /* If there's no SRAT, fix the phys_id */
+       if (srat_num_cpus == 0) {
+               node_cpuid[0].phys_id = hard_smp_processor_id();
+               return;
+       }
+
+       /* calculate total number of nodes in system from PXM bitmap */
+       numnodes = 0;           /* init total nodes in system */
+
+       memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
+       memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
+       for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+               if (pxm_bit_test(i)) {
+                       pxm_to_nid_map[i] = numnodes;
+                       node_set_online(numnodes);
+                       nid_to_pxm_map[numnodes++] = i;
+               }
+       }
+
+       /* set logical node id in memory chunk structure */
+       for (i = 0; i < num_node_memblks; i++)
+               node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
+
+       /* assign memory bank numbers for each chunk on each node */
+       for (i = 0; i < numnodes; i++) {
+               int bank;
+
+               bank = 0;
+               for (j = 0; j < num_node_memblks; j++)
+                       if (node_memblk[j].nid == i)
+                               node_memblk[j].bank = bank++;
+       }
+
+       /* set logical node id in cpu structure */
+       for (i = 0; i < srat_num_cpus; i++)
+               node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
+
+       printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes);
+       printk(KERN_INFO "Number of memory chunks in system = %d\n", 
num_node_memblks);
+
+       if (!slit_table) return;
+       memset(numa_slit, -1, sizeof(numa_slit));
+       for (i=0; i<slit_table->localities; i++) {
+               if (!pxm_bit_test(i))
+                       continue;
+               node_from = pxm_to_nid_map[i];
+               for (j=0; j<slit_table->localities; j++) {
+                       if (!pxm_bit_test(j))
+                               continue;
+                       node_to = pxm_to_nid_map[j];
+                       node_distance(node_from, node_to) =
+                               slit_table->entry[i*slit_table->localities + j];
+               }
+       }
+
+#ifdef SLIT_DEBUG
+       printk("ACPI 2.0 SLIT locality table:\n");
+       for (i = 0; i < numnodes; i++) {
+               for (j = 0; j < numnodes; j++)
+                       printk("%03d ", node_distance(i,j));
+               printk("\n");
+       }
+#endif
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+#if 0
+unsigned int
+acpi_register_gsi (u32 gsi, int polarity, int trigger)
+{
+       return acpi_register_irq(gsi, polarity, trigger);
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+static int __init
+acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+{
+       struct acpi_table_header *fadt_header;
+       struct fadt_descriptor_rev2 *fadt;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       fadt_header = (struct acpi_table_header *) __va(phys_addr);
+       if (fadt_header->revision != 3)
+               return -ENODEV;         /* Only deal with ACPI 2.0 FADT */
+
+       fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+
+       if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
+               acpi_kbd_controller_present = 0;
+
+       if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
+               acpi_legacy_devices = 1;
+
+       acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE);
+       return 0;
+}
+#endif
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+       unsigned long rsdp_phys = 0;
+
+       if (efi.acpi20)
+               rsdp_phys = __pa(efi.acpi20);
+       else if (efi.acpi)
+               printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer 
supported\n");
+       return rsdp_phys;
+}
+
+#if 0
+int __init
+acpi_boot_init (void)
+{
+
+       /*
+        * MADT
+        * ----
+        * Parse the Multiple APIC Description Table (MADT), if exists.
+        * Note that this table provides platform SMP configuration
+        * information -- the successor to MPS tables.
+        */
+
+       if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+               printk(KERN_ERR PREFIX "Can't find MADT\n");
+               goto skip_madt;
+       }
+
+       /* Local APIC */
+
+       if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, 
acpi_parse_lapic_addr_ovr, 0) < 0)
+               printk(KERN_ERR PREFIX "Error parsing LAPIC address override 
entry\n");
+
+       if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) 
< 1)
+               printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC 
entries\n");
+
+       if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) 
< 0)
+               printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+
+       /* I/O APIC */
+
+       if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, 
NR_IOSAPICS) < 1)
+               printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC 
entries\n");
+
+       /* System-Level Interrupt Routing */
+
+       if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, 
acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+               printk(KERN_ERR PREFIX "Error parsing platform interrupt source 
entry\n");
+
+       if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, 
acpi_parse_int_src_ovr, 0) < 0)
+               printk(KERN_ERR PREFIX "Error parsing interrupt source 
overrides entry\n");
+
+       if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
+               printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+  skip_madt:
+
+       /*
+        * FADT says whether a legacy keyboard controller is present.
+        * The FADT also contains an SCI_INT line, by which the system
+        * gets interrupts such as power and sleep buttons.  If it's not
+        * on a Legacy interrupt, it needs to be setup.
+        */
+       if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
+               printk(KERN_ERR PREFIX "Can't find FADT\n");
+
+#ifdef CONFIG_SMP
+       if (available_cpus == 0) {
+               printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+               printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+               smp_boot_data.cpu_phys_id[available_cpus] = 
hard_smp_processor_id();
+               available_cpus = 1; /* We've got at least one of these, no? */
+       }
+       smp_boot_data.cpu_count = available_cpus;
+
+       smp_build_cpu_map();
+# ifdef CONFIG_ACPI_NUMA
+       if (srat_num_cpus == 0) {
+               int cpu, i = 1;
+               for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
+                       if (smp_boot_data.cpu_phys_id[cpu] != 
hard_smp_processor_id())
+                               node_cpuid[i++].phys_id = 
smp_boot_data.cpu_phys_id[cpu];
+       }
+       build_cpu_to_node_map();
+# endif
+#endif
+       /* Make boot-up look pretty */
+       printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, 
total_cpus);
+       return 0;
+}
+int
+acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+{
+       int vector;
+
+       if (has_8259 && gsi < 16)
+               *irq = isa_irq_to_vector(gsi);
+       else {
+               vector = gsi_to_vector(gsi);
+               if (vector == -1)
+                       return -1;
+
+               *irq = vector;
+       }
+       return 0;
+}
+
+int
+acpi_register_irq (u32 gsi, u32 polarity, u32 trigger)
+{
+       if (has_8259 && gsi < 16)
+               return isa_irq_to_vector(gsi);
+
+       return iosapic_register_intr(gsi,
+                       (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : 
IOSAPIC_POL_LOW,
+                       (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : 
IOSAPIC_LEVEL);
+}
+EXPORT_SYMBOL(acpi_register_irq);
+#endif
+#endif /* CONFIG_ACPI_BOOT */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/dom0_ops.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/dom0_ops.c      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,237 @@
+/******************************************************************************
+ * Arch-specific dom0_ops.c
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <public/dom0_ops.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <asm/pdb.h>
+#include <xen/trace.h>
+#include <xen/console.h>
+#include <public/sched_ctl.h>
+
+long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
+{
+    long ret = 0;
+
+    if ( !IS_PRIV(current->domain) )
+        return -EPERM;
+
+    switch ( op->cmd )
+    {
+    case DOM0_GETPAGEFRAMEINFO:
+    {
+        struct pfn_info *page;
+        unsigned long pfn = op->u.getpageframeinfo.pfn;
+        domid_t dom = op->u.getpageframeinfo.domain;
+        struct domain *d;
+
+        ret = -EINVAL;
+
+        if ( unlikely(pfn >= max_page) || 
+             unlikely((d = find_domain_by_id(dom)) == NULL) )
+            break;
+
+        page = &frame_table[pfn];
+
+        if ( likely(get_page(page, d)) )
+        {
+            ret = 0;
+
+            op->u.getpageframeinfo.type = NOTAB;
+
+            if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+            {
+                switch ( page->u.inuse.type_info & PGT_type_mask )
+                {
+               default:
+                   panic("No such page type\n");
+                    break;
+                }
+            }
+            
+            put_page(page);
+        }
+
+        put_domain(d);
+
+        copy_to_user(u_dom0_op, op, sizeof(*op));
+    }
+    break;
+
+    case DOM0_GETPAGEFRAMEINFO2:
+    {
+#define GPF2_BATCH 128
+        int n,j;
+        int num = op->u.getpageframeinfo2.num;
+        domid_t dom = op->u.getpageframeinfo2.domain;
+        unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
+        struct domain *d;
+        unsigned long *l_arr;
+        ret = -ESRCH;
+
+        if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
+            break;
+
+        if ( unlikely(num > 1024) )
+        {
+            ret = -E2BIG;
+            break;
+        }
+
+        l_arr = (unsigned long *)alloc_xenheap_page();
+ 
+        ret = 0;
+        for( n = 0; n < num; )
+        {
+            int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
+
+            if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
+            {
+                ret = -EINVAL;
+                break;
+            }
+     
+            for( j = 0; j < k; j++ )
+            {      
+                struct pfn_info *page;
+                unsigned long mfn = l_arr[j];
+
+                if ( unlikely(mfn >= max_page) )
+                    goto e2_err;
+
+                page = &frame_table[mfn];
+  
+                if ( likely(get_page(page, d)) )
+                {
+                    unsigned long type = 0;
+
+                    switch( page->u.inuse.type_info & PGT_type_mask )
+                    {
+                   default:
+                       panic("No such page type\n");
+                       break;
+                    }
+
+                    if ( page->u.inuse.type_info & PGT_pinned )
+                        type |= LPINTAB;
+                    l_arr[j] |= type;
+                    put_page(page);
+                }
+                else
+                {
+                e2_err:
+                    l_arr[j] |= XTAB;
+                }
+
+            }
+
+            if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
+            {
+                ret = -EINVAL;
+                break;
+            }
+
+            n += j;
+        }
+
+        free_xenheap_page((unsigned long)l_arr);
+
+        put_domain(d);
+    }
+    break;
+#ifndef CONFIG_VTI
+    /*
+     * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 -
+     * it actually allocates and maps pages.
+     */
+    case DOM0_GETMEMLIST:
+    {
+        unsigned long i;
+        struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
+        unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
+        unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
+        unsigned long pfn;
+        unsigned long *buffer = op->u.getmemlist.buffer;
+        struct page *page;
+
+        ret = -EINVAL;
+        if ( d != NULL )
+        {
+            ret = 0;
+
+            for ( i = start_page; i < (start_page + nr_pages); i++ )
+            {
+                page = map_new_domain_page(d, i << PAGE_SHIFT);
+                if ( page == NULL )
+                {
+                    ret = -ENOMEM;
+                    break;
+                }
+                pfn = page_to_pfn(page);
+                if ( put_user(pfn, buffer) )
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+                buffer++;
+            }
+
+            op->u.getmemlist.num_pfns = i - start_page;
+            copy_to_user(u_dom0_op, op, sizeof(*op));
+            
+            put_domain(d);
+        }
+    }
+    break;
+#else
+    case DOM0_GETMEMLIST:
+    {
+       int i;
+       struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
+       unsigned long max_pfns = op->u.getmemlist.max_pfns;
+       unsigned long pfn;
+       unsigned long *buffer = op->u.getmemlist.buffer;
+       struct list_head *list_ent;
+
+       ret = -EINVAL;
+       if (!d) {
+           ret = 0;
+
+           spin_lock(&d->page_alloc_lock);
+           list_ent = d->page_list.next;
+           for (i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++) {
+               pfn = list_entry(list_ent, struct pfn_info, list) -
+                   frame_table;
+               if (put_user(pfn, buffer)) {
+                   ret = -EFAULT;
+                   break;
+               }
+               buffer++;
+               list_ent = frame_table[pfn].list.next;
+           }
+           spin_unlock(&d->page_alloc_lock);
+
+           op->u.getmemlist.num_pfns = i;
+           copy_to_user(u_dom0_op, op, sizeof(*op));
+
+           put_domain(d);
+       }
+    }
+    break;
+#endif // CONFIG_VTI
+    default:
+        ret = -ENOSYS;
+
+    }
+
+    return ret;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/dom_fw.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/dom_fw.c        Thu Sep  1 18:46:28 2005
@@ -0,0 +1,688 @@
+/*
+ *  Xen domain firmware emulation support
+ *  Copyright (C) 2004 Hewlett-Packard Co.
+ *       Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <xen/config.h>
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+
+#include <linux/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <xen/acpi.h>
+
+#include <asm/dom_fw.h>
+
+struct ia64_boot_param *dom_fw_init(struct domain *, char *,int,char *,int);
+extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
+extern struct domain *dom0;
+extern unsigned long dom0_start;
+
+extern unsigned long running_on_sim;
+
+
+unsigned long dom_fw_base_mpa = -1;
+unsigned long imva_fw_base = -1;
+
+// return domain (meta)physical address for a given imva
+// this function is a call-back from dom_fw_init
+unsigned long dom_pa(unsigned long imva)
+{
+       if (dom_fw_base_mpa == -1 || imva_fw_base == -1) {
+               printf("dom_pa: uninitialized! (spinning...)\n");
+               while(1);
+       }
+       if (imva - imva_fw_base > PAGE_SIZE) {
+               printf("dom_pa: bad offset! imva=%p, imva_fw_base=%p 
(spinning...)\n",imva,imva_fw_base);
+               while(1);
+       }
+       return dom_fw_base_mpa + (imva - imva_fw_base);
+}
+
+// builds a hypercall bundle at domain physical address
+void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned 
long hypercall)
+{
+       unsigned long imva;
+
+       if (d == dom0) paddr += dom0_start;
+       imva = domain_mpa_to_imva(d,paddr);
+       build_hypercall_bundle(imva,d->arch.breakimm,hypercall,1);
+}
+
+
+// builds a hypercall bundle at domain physical address
+void dom_fw_hypercall_patch(struct domain *d, unsigned long paddr, unsigned 
long hypercall,unsigned long ret)
+{
+       unsigned long imva;
+
+       if (d == dom0) paddr += dom0_start;
+       imva = domain_mpa_to_imva(d,paddr);
+       build_hypercall_bundle(imva,d->arch.breakimm,hypercall,ret);
+}
+
+
+// FIXME: This is really a hack: Forcing the boot parameter block
+// at domain mpaddr 0 page, then grabbing only the low bits of the
+// Xen imva, which is the offset into the page
+unsigned long dom_fw_setup(struct domain *d, char *args, int arglen)
+{
+       struct ia64_boot_param *bp;
+
+       dom_fw_base_mpa = 0;
+       if (d == dom0) dom_fw_base_mpa += dom0_start;
+       imva_fw_base = domain_mpa_to_imva(d,dom_fw_base_mpa);
+       bp = dom_fw_init(d,args,arglen,imva_fw_base,PAGE_SIZE);
+       return dom_pa((unsigned long)bp);
+}
+
+
+/* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */
+
+#define MB     (1024*1024UL)
+
+#define NUM_EFI_SYS_TABLES 6
+#define PASS_THRU_IOPORT_SPACE
+#ifdef PASS_THRU_IOPORT_SPACE
+# define NUM_MEM_DESCS 4
+#else
+# define NUM_MEM_DESCS 3
+#endif
+
+
+#define SECS_PER_HOUR   (60 * 60)
+#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+   offset OFFSET seconds east of UTC,
+   and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+   Return nonzero if successful.  */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+       const unsigned short int __mon_yday[2][13] =
+       {
+               /* Normal years.  */
+               { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+               /* Leap years.  */
+               { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+       };
+       long int days, rem, y;
+       const unsigned short int *ip;
+
+       days = t / SECS_PER_DAY;
+       rem = t % SECS_PER_DAY;
+       while (rem < 0) {
+               rem += SECS_PER_DAY;
+               --days;
+       }
+       while (rem >= SECS_PER_DAY) {
+               rem -= SECS_PER_DAY;
+               ++days;
+       }
+       tp->hour = rem / SECS_PER_HOUR;
+       rem %= SECS_PER_HOUR;
+       tp->minute = rem / 60;
+       tp->second = rem % 60;
+       /* January 1, 1970 was a Thursday.  */
+       y = 1970;
+
+#      define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+#      define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+#      define __isleap(year) \
+         ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+       while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+               /* Guess a corrected year, assuming 365 days per year.  */
+               long int yg = y + days / 365 - (days % 365 < 0);
+
+               /* Adjust DAYS and Y to match the guessed year.  */
+               days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+                        - LEAPS_THRU_END_OF (y - 1));
+               y = yg;
+       }
+       tp->year = y;
+       ip = __mon_yday[__isleap(y)];
+       for (y = 11; days < (long int) ip[y]; --y)
+               continue;
+       days -= ip[y];
+       tp->month = y + 1;
+       tp->day = days + 1;
+       return 1;
+}
+
+extern struct ia64_pal_retval pal_emulator_static (unsigned long);
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr)                ((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr)       (0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr)  (0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr)       (0x0000000000FF0000 & (addr))
+
+#ifndef XEN
+static efi_status_t
+fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+       struct {
+               int tv_sec;     /* must be 32bits to work */
+               int tv_usec;
+       } tv32bits;
+
+       ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+       memset(tm, 0, sizeof(*tm));
+       offtime(tv32bits.tv_sec, tm);
+
+       if (tc)
+               memset(tc, 0, sizeof(*tc));
+#else
+#      error Not implemented yet...
+#endif
+       return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long 
data_size, efi_char16_t *data)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+       ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+#      error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+       return EFI_UNSUPPORTED;
+}
+#endif /* !XEN */
+
+struct sal_ret_values
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+             unsigned long in3, unsigned long in4, unsigned long in5,
+             unsigned long in6, unsigned long in7)
+{
+       long r9  = 0;
+       long r10 = 0;
+       long r11 = 0;
+       long status;
+
+       /*
+        * Don't do a "switch" here since that gives us code that
+        * isn't self-relocatable.
+        */
+       status = 0;
+       if (index == SAL_FREQ_BASE) {
+               if (!running_on_sim)
+                       status = ia64_sal_freq_base(in1,&r9,&r10);
+               else switch (in1) {
+                     case SAL_FREQ_BASE_PLATFORM:
+                       r9 = 200000000;
+                       break;
+
+                     case SAL_FREQ_BASE_INTERVAL_TIMER:
+                       r9 = 700000000;
+                       break;
+
+                     case SAL_FREQ_BASE_REALTIME_CLOCK:
+                       r9 = 1;
+                       break;
+
+                     default:
+                       status = -1;
+                       break;
+               }
+       } else if (index == SAL_PCI_CONFIG_READ) {
+               if (current->domain == dom0) {
+                       u64 value;
+                       // note that args 2&3 are swapped!!
+                       status = ia64_sal_pci_config_read(in1,in3,in2,&value);
+                       r9 = value;
+               }
+               else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n");
+       } else if (index == SAL_PCI_CONFIG_WRITE) {
+               if (current->domain == dom0) {
+                       if (((in1 & ~0xffffffffUL) && (in4 == 0)) ||
+                           (in4 > 1) ||
+                           (in2 > 8) || (in2 & (in2-1)))
+                               printf("*** 
SAL_PCI_CONF_WRITE?!?(adr=%p,typ=%p,sz=%p,val=%p)\n",in1,in4,in2,in3);
+                       // note that args are in a different order!!
+                       status = ia64_sal_pci_config_write(in1,in4,in2,in3);
+               }
+               else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n");
+       } else if (index == SAL_SET_VECTORS) {
+               printf("*** CALLED SAL_SET_VECTORS.  IGNORED...\n");
+       } else if (index == SAL_GET_STATE_INFO) {
+               printf("*** CALLED SAL_GET_STATE_INFO.  IGNORED...\n");
+       } else if (index == SAL_GET_STATE_INFO_SIZE) {
+               printf("*** CALLED SAL_GET_STATE_INFO_SIZE.  IGNORED...\n");
+       } else if (index == SAL_CLEAR_STATE_INFO) {
+               printf("*** CALLED SAL_CLEAR_STATE_INFO.  IGNORED...\n");
+       } else if (index == SAL_MC_RENDEZ) {
+               printf("*** CALLED SAL_MC_RENDEZ.  IGNORED...\n");
+       } else if (index == SAL_MC_SET_PARAMS) {
+               printf("*** CALLED SAL_MC_SET_PARAMS.  IGNORED...\n");
+       } else if (index == SAL_CACHE_FLUSH) {
+               printf("*** CALLED SAL_CACHE_FLUSH.  IGNORED...\n");
+       } else if (index == SAL_CACHE_INIT) {
+               printf("*** CALLED SAL_CACHE_INIT.  IGNORED...\n");
+       } else if (index == SAL_UPDATE_PAL) {
+               printf("*** CALLED SAL_UPDATE_PAL.  IGNORED...\n");
+       } else {
+               printf("*** CALLED SAL_ WITH UNKNOWN INDEX.  IGNORED...\n");
+               status = -1;
+       }
+       return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+struct ia64_pal_retval
+xen_pal_emulator(unsigned long index, unsigned long in1,
+       unsigned long in2, unsigned long in3)
+{
+       long r9  = 0;
+       long r10 = 0;
+       long r11 = 0;
+       long status = -1;
+
+#define USE_PAL_EMULATOR
+#ifdef USE_PAL_EMULATOR
+       return pal_emulator_static(index);
+#endif
+       if (running_on_sim) return pal_emulator_static(index);
+       if (index >= PAL_COPY_PAL) {
+               printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
+                               index);
+       }
+       else switch (index) {
+           case PAL_MEM_ATTRIB:
+               status = ia64_pal_mem_attrib(&r9);
+               break;
+           case PAL_FREQ_BASE:
+               status = ia64_pal_freq_base(&r9);
+               break;
+           case PAL_PROC_GET_FEATURES:
+               status = ia64_pal_proc_get_features(&r9,&r10,&r11);
+               break;
+           case PAL_BUS_GET_FEATURES:
+               status = ia64_pal_bus_get_features(&r9,&r10,&r11);
+               break;
+           case PAL_FREQ_RATIOS:
+               status = ia64_pal_freq_ratios(&r9,&r10,&r11);
+               break;
+           case PAL_PTCE_INFO:
+               {
+                       // return hard-coded xen-specific values because ptc.e
+                       // is emulated on xen to always flush everything
+                       // these values result in only one ptc.e instruction
+                       status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0;
+               }
+               break;
+           case PAL_VERSION:
+               status = ia64_pal_version(&r9,&r10);
+               break;
+           case PAL_VM_PAGE_SIZE:
+               status = ia64_pal_vm_page_size(&r9,&r10);
+               break;
+           case PAL_DEBUG_INFO:
+               status = ia64_pal_debug_info(&r9,&r10);
+               break;
+           case PAL_CACHE_SUMMARY:
+               status = ia64_pal_cache_summary(&r9,&r10);
+               break;
+           case PAL_VM_SUMMARY:
+               // FIXME: what should xen return for these, figure out later
+               // For now, linux does the right thing if pal call fails
+               // In particular, rid_size must be set properly!
+               //status = ia64_pal_vm_summary(&r9,&r10);
+               break;
+           case PAL_RSE_INFO:
+               status = ia64_pal_rse_info(&r9,&r10);
+               break;
+           case PAL_VM_INFO:
+               status = ia64_pal_vm_info(in1,in2,&r9,&r10);
+               break;
+           case PAL_REGISTER_INFO:
+               status = ia64_pal_register_info(in1,&r9,&r10);
+               break;
+           case PAL_CACHE_FLUSH:
+               /* FIXME */
+               printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n");
+               BUG();
+               break;
+           case PAL_PERF_MON_INFO:
+               {
+                       unsigned long pm_buffer[16];
+                       int i;
+                       status = ia64_pal_perf_mon_info(pm_buffer,&r9);
+                       if (status != 0) {
+                               while(1)
+                               printk("PAL_PERF_MON_INFO fails 
ret=%d\n",status);
+                               break;
+                       }
+                       if (copy_to_user((void __user *)in1,pm_buffer,128)) {
+                               while(1)
+                               printk("xen_pal_emulator: PAL_PERF_MON_INFO "
+                                       "can't copy to user!!!!\n");
+                               status = -1;
+                               break;
+                       }
+               }
+               break;
+           case PAL_CACHE_INFO:
+               {
+                       pal_cache_config_info_t ci;
+                       status = ia64_pal_cache_config_info(in1,in2,&ci);
+                       if (status != 0) break;
+                       r9 = ci.pcci_info_1.pcci1_data;
+                       r10 = ci.pcci_info_2.pcci2_data;
+               }
+               break;
+           case PAL_VM_TR_READ:        /* FIXME: vcpu_get_tr?? */
+               printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n");
+               break;
+           case PAL_HALT_INFO:         /* inappropriate info for guest? */
+               printk("PAL_HALT_INFO NOT IMPLEMENTED, IGNORED!\n");
+               break;
+           default:
+               printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
+                               index);
+               break;
+       }
+       return ((struct ia64_pal_retval) {status, r9, r10, r11});
+}
+
+#define NFUNCPTRS 20
+
+void print_md(efi_memory_desc_t *md)
+{
+#if 1
+       printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) 
(%luMB)\n",
+               md->type, md->attribute, md->phys_addr,
+               md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+               md->num_pages >> (20 - EFI_PAGE_SHIFT));
+#endif
+}
+
+#define LSAPIC_NUM 16  // TEMP
+static u32 lsapic_flag=1;
+
+/* Provide only one LP to guest */
+static int 
+acpi_update_lsapic (acpi_table_entry_header *header)
+{
+       struct acpi_table_lsapic *lsapic;
+
+       lsapic = (struct acpi_table_lsapic *) header;
+       if (!lsapic)
+               return -EINVAL;
+
+       if (lsapic->flags.enabled && lsapic_flag) {
+               printk("enable lsapic entry: 0x%lx\n", (u64)lsapic);
+               lsapic_flag = 0; /* disable all the following processros */
+       } else if (lsapic->flags.enabled) {
+               printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic);
+               lsapic->flags.enabled = 0;
+       } else
+               printk("lsapic entry is already disabled: 0x%lx\n", 
(u64)lsapic);
+
+       return 0;
+}
+
+static int
+acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size)
+{
+       u8 checksum=0;
+       u8* ptr;
+       int len;
+       struct acpi_table_madt* acpi_madt;
+
+       if (!phys_addr || !size)
+               return -EINVAL;
+
+       acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+       acpi_madt->header.checksum=0;
+
+       /* re-calculate MADT checksum */
+       ptr = (u8*)acpi_madt;
+       len = acpi_madt->header.length;
+       while (len>0){
+               checksum = (u8)( checksum + (*ptr++) );
+               len--;
+       }
+       acpi_madt->header.checksum = 0x0 - checksum;    
+       
+       return 0;
+}
+
+/* base is physical address of acpi table */
+void touch_acpi_table(void)
+{
+       u64 count = 0;
+       count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, 
NR_CPUS);
+       if ( count < 1)
+               printk("Error parsing MADT - no LAPIC entires\n");
+       printk("Total %d lsapic entry\n", count);
+       acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);
+
+       return;
+}
+
+
+struct ia64_boot_param *
+dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int 
fw_mem_size)
+{
+       efi_system_table_t *efi_systab;
+       efi_runtime_services_t *efi_runtime;
+       efi_config_table_t *efi_tables;
+       struct ia64_sal_systab *sal_systab;
+       efi_memory_desc_t *efi_memmap, *md;
+       unsigned long *pal_desc, *sal_desc;
+       struct ia64_sal_desc_entry_point *sal_ed;
+       struct ia64_boot_param *bp;
+       unsigned long *pfn;
+       unsigned char checksum = 0;
+       char *cp, *cmd_line, *fw_vendor;
+       int i = 0;
+       unsigned long maxmem = d->max_pages * PAGE_SIZE;
+       unsigned long start_mpaddr = ((d==dom0)?dom0_start:0);
+
+#      define MAKE_MD(typ, attr, start, end, abs)      \       
+       do {                                            \
+               md = efi_memmap + i++;                  \
+               md->type = typ;                         \
+               md->pad = 0;                            \
+               md->phys_addr = abs ? start : start_mpaddr + start;     \
+               md->virt_addr = 0;                      \
+               md->num_pages = (end - start) >> 12;    \
+               md->attribute = attr;                   \
+               print_md(md);                           \
+       } while (0)
+
+/* FIXME: should check size but for now we have a whole MB to play with.
+   And if stealing code from fw-emu.c, watch out for new fw_vendor on the end!
+       if (fw_mem_size < sizeof(fw_mem_proto)) {
+               printf("sys_fw_init: insufficient space for fw_mem\n");
+               return 0;
+       }
+*/
+       memset(fw_mem, 0, fw_mem_size);
+
+#ifdef XEN
+#else
+       pal_desc = (unsigned long *) &pal_emulator_static;
+       sal_desc = (unsigned long *) &sal_emulator;
+#endif
+
+       cp = fw_mem;
+       efi_systab  = (void *) cp; cp += sizeof(*efi_systab);
+       efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+       efi_tables  = (void *) cp; cp += NUM_EFI_SYS_TABLES * 
sizeof(*efi_tables);
+       sal_systab  = (void *) cp; cp += sizeof(*sal_systab);
+       sal_ed      = (void *) cp; cp += sizeof(*sal_ed);
+       efi_memmap  = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+       bp          = (void *) cp; cp += sizeof(*bp);
+       pfn        = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
+       cmd_line    = (void *) cp;
+
+       if (args) {
+               if (arglen >= 1024)
+                       arglen = 1023;
+               memcpy(cmd_line, args, arglen);
+       } else {
+               arglen = 0;
+       }
+       cmd_line[arglen] = '\0';
+
+       memset(efi_systab, 0, sizeof(efi_systab));
+       efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+       efi_systab->hdr.revision  = EFI_SYSTEM_TABLE_REVISION;
+       efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+       cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit 
boundary
+#define FW_VENDOR 
"X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+       cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to 
64-bit boundary
+
+       memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR));
+       efi_systab->fw_vendor = dom_pa(fw_vendor);
+       
+       efi_systab->fw_revision = 1;
+       efi_systab->runtime = (void *) dom_pa(efi_runtime);
+       efi_systab->nr_tables = NUM_EFI_SYS_TABLES;
+       efi_systab->tables = dom_pa(efi_tables);
+
+       efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+       efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+       efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+#define EFI_HYPERCALL_PATCH(tgt,call) do { \
+    
dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \
+    tgt = dom_pa(pfn); \
+    *pfn++ = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
+    *pfn++ = 0; \
+    } while (0)
+
+       EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME);
+       EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME);
+       EFI_HYPERCALL_PATCH(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
+       EFI_HYPERCALL_PATCH(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
+       
EFI_HYPERCALL_PATCH(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
+       EFI_HYPERCALL_PATCH(efi_runtime->get_variable,EFI_GET_VARIABLE);
+       
EFI_HYPERCALL_PATCH(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
+       EFI_HYPERCALL_PATCH(efi_runtime->set_variable,EFI_SET_VARIABLE);
+       
EFI_HYPERCALL_PATCH(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
+       EFI_HYPERCALL_PATCH(efi_runtime->reset_system,EFI_RESET_SYSTEM);
+
+       efi_tables[0].guid = SAL_SYSTEM_TABLE_GUID;
+       efi_tables[0].table = dom_pa(sal_systab);
+       for (i = 1; i < NUM_EFI_SYS_TABLES; i++) {
+               efi_tables[i].guid = NULL_GUID;
+               efi_tables[i].table = 0;
+       }
+       if (d == dom0) {
+               printf("Domain0 EFI passthrough:");
+               i = 1;
+               if (efi.mps) {
+                       efi_tables[i].guid = MPS_TABLE_GUID;
+                       efi_tables[i].table = __pa(efi.mps);
+                       printf(" MPS=%0xlx",efi_tables[i].table);
+                       i++;
+               }
+
+               touch_acpi_table();
+
+               if (efi.acpi20) {
+                       efi_tables[i].guid = ACPI_20_TABLE_GUID;
+                       efi_tables[i].table = __pa(efi.acpi20);
+                       printf(" ACPI 2.0=%0xlx",efi_tables[i].table);
+                       i++;
+               }
+               if (efi.acpi) {
+                       efi_tables[i].guid = ACPI_TABLE_GUID;
+                       efi_tables[i].table = __pa(efi.acpi);
+                       printf(" ACPI=%0xlx",efi_tables[i].table);
+                       i++;
+               }
+               if (efi.smbios) {
+                       efi_tables[i].guid = SMBIOS_TABLE_GUID;
+                       efi_tables[i].table = __pa(efi.smbios);
+                       printf(" SMBIOS=%0xlx",efi_tables[i].table);
+                       i++;
+               }
+               if (efi.hcdp) {
+                       efi_tables[i].guid = HCDP_TABLE_GUID;
+                       efi_tables[i].table = __pa(efi.hcdp);
+                       printf(" HCDP=%0xlx",efi_tables[i].table);
+                       i++;
+               }
+               printf("\n");
+       }
+
+       /* fill in the SAL system table: */
+       memcpy(sal_systab->signature, "SST_", 4);
+       sal_systab->size = sizeof(*sal_systab);
+       sal_systab->sal_rev_minor = 1;
+       sal_systab->sal_rev_major = 0;
+       sal_systab->entry_count = 1;
+
+       strcpy(sal_systab->oem_id, "Xen/ia64");
+       strcpy(sal_systab->product_id, "Xen/ia64");
+
+       /* fill in an entry point: */
+       sal_ed->type = SAL_DESC_ENTRY_POINT;
+#define FW_HYPERCALL_PATCH(tgt,call,ret) do { \
+    
dom_fw_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call,ret); \
+    tgt = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
+    } while (0)
+       FW_HYPERCALL_PATCH(sal_ed->pal_proc,PAL_CALL,0);
+       FW_HYPERCALL_PATCH(sal_ed->sal_proc,SAL_CALL,1);
+       sal_ed->gp = 0;  // will be ignored
+
+       for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+               checksum += *cp;
+
+       sal_systab->checksum = -checksum;
+
+       /* simulate 1MB free memory at physical address zero */
+       i = 0;
+       MAKE_MD(EFI_BOOT_SERVICES_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);
+       /* hypercall patches live here, masquerade as reserved PAL memory */
+       MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
+       MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 0);
+#ifdef PASS_THRU_IOPORT_SPACE
+       if (d == dom0 && !running_on_sim) {
+               /* pass through the I/O port space */
+               efi_memory_desc_t *efi_get_io_md(void);
+               efi_memory_desc_t *ia64_efi_io_md = efi_get_io_md();
+               u32 type;
+               u64 iostart, ioend, ioattr;
+               
+               type = ia64_efi_io_md->type;
+               iostart = ia64_efi_io_md->phys_addr;
+               ioend = ia64_efi_io_md->phys_addr +
+                       (ia64_efi_io_md->num_pages << 12);
+               ioattr = ia64_efi_io_md->attribute;
+               MAKE_MD(type,ioattr,iostart,ioend, 1);
+       }
+       else
+               MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0);
+#endif
+
+       bp->efi_systab = dom_pa(fw_mem);
+       bp->efi_memmap = dom_pa(efi_memmap);
+       bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+       bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+       bp->efi_memdesc_version = 1;
+       bp->command_line = dom_pa(cmd_line);
+       bp->console_info.num_cols = 80;
+       bp->console_info.num_rows = 25;
+       bp->console_info.orig_x = 0;
+       bp->console_info.orig_y = 24;
+       bp->fpswa = 0;
+
+       return bp;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/domain.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/domain.c        Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1103 @@
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
+ *
+ *  Copyright (C) 2005 Intel Co
+ *     Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>
+ *
+ * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> Add CONFIG_VTI domain 
support
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/softirq.h>
+#include <xen/mm.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/mpspec.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+//#include <xen/shadow.h>
+#include <xen/console.h>
+
+#include <xen/elf.h>
+//#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>   /* for MAX_DMA_ADDRESS */
+
+#include <asm/asm-offsets.h>  /* for IA64_THREAD_INFO_SIZE */
+
+#include <asm/vcpu.h>   /* for function declarations */
+#include <public/arch-ia64.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_vpd.h>
+#include <asm/pal.h>
+#include <public/io/ioreq.h>
+
+#define CONFIG_DOMAIN0_CONTIGUOUS
+unsigned long dom0_start = -1L;
+unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
+//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
+unsigned long dom0_align = 256*1024*1024;
+#ifdef DOMU_BUILD_STAGING
+unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
+unsigned long domU_staging_start;
+unsigned long domU_staging_align = 64*1024;
+unsigned long *domU_staging_area;
+#endif
+
+// initialized by arch/ia64/setup.c:find_initrd()
+unsigned long initrd_start = 0, initrd_end = 0;
+
+#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
+
+//extern int loadelfimage(char *);
+extern int readelfimage_base_and_size(char *, unsigned long,
+                     unsigned long *, unsigned long *, unsigned long *);
+
+unsigned long map_domain_page0(struct domain *);
+extern unsigned long dom_fw_setup(struct domain *, char *, int);
+
+/* this belongs in include/asm, but there doesn't seem to be a suitable place 
*/
+void free_perdomain_pt(struct domain *d)
+{
+       printf("free_perdomain_pt: not implemented\n");
+       //free_page((unsigned long)d->mm.perdomain_pt);
+}
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+       hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+       hlt_counter--;
+}
+
+static void default_idle(void)
+{
+       if ( hlt_counter == 0 )
+       {
+       local_irq_disable();
+           if ( !softirq_pending(smp_processor_id()) )
+               safe_halt();
+           //else
+               local_irq_enable();
+       }
+}
+
+void continue_cpu_idle_loop(void)
+{
+       int cpu = smp_processor_id();
+       for ( ; ; )
+       {
+#ifdef IA64
+//        __IRQ_STAT(cpu, idle_timestamp) = jiffies
+#else
+           irq_stat[cpu].idle_timestamp = jiffies;
+#endif
+           while ( !softirq_pending(cpu) )
+               default_idle();
+           raise_softirq(SCHEDULE_SOFTIRQ);
+           do_softirq();
+       }
+}
+
+void startup_cpu_idle_loop(void)
+{
+       /* Just some sanity to ensure that the scheduler is set up okay. */
+       ASSERT(current->domain == IDLE_DOMAIN_ID);
+       raise_softirq(SCHEDULE_SOFTIRQ);
+       do_softirq();
+
+       /*
+        * Declares CPU setup done to the boot processor.
+        * Therefore memory barrier to ensure state is visible.
+        */
+       smp_mb();
+#if 0
+//do we have to ensure the idle task has a shared page so that, for example,
+//region registers can be loaded from it.  Apparently not...
+       idle0_task.shared_info = (void *)alloc_xenheap_page();
+       memset(idle0_task.shared_info, 0, PAGE_SIZE);
+       /* pin mapping */
+       // FIXME: Does this belong here?  Or do only at domain switch time?
+       {
+               /* WARNING: following must be inlined to avoid nested fault */
+               unsigned long psr = ia64_clear_ic();
+               ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
+                pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> 
PAGE_SHIFT, PAGE_KERNEL)),
+                PAGE_SHIFT);
+               ia64_set_psr(psr);
+               ia64_srlz_i();
+       }
+#endif
+
+       continue_cpu_idle_loop();
+}
+
+struct vcpu *arch_alloc_vcpu_struct(void)
+{
+       /* Per-vp stack is used here. So we need keep vcpu
+        * same page as per-vp stack */
+       return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER);
+}
+
+void arch_free_vcpu_struct(struct vcpu *v)
+{
+       free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
+}
+
+static void init_switch_stack(struct vcpu *v)
+{
+       struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + 
IA64_STK_OFFSET) - 1;
+       struct switch_stack *sw = (struct switch_stack *) regs - 1;
+       extern void ia64_ret_from_clone;
+
+       memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
+       sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
+       sw->b0 = (unsigned long) &ia64_ret_from_clone;
+       sw->ar_fpsr = FPSR_DEFAULT;
+       v->arch._thread.ksp = (unsigned long) sw - 16;
+       // stay on kernel stack because may get interrupts!
+       // ia64_ret_from_clone (which b0 gets in new_thread) switches
+       // to user stack
+       v->arch._thread.on_ustack = 0;
+       memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
+}
+
+void arch_do_createdomain(struct vcpu *v)
+{
+       struct domain *d = v->domain;
+       struct thread_info *ti = alloc_thread_info(v);
+
+       /* Clear thread_info to clear some important fields, like preempt_count 
*/
+       memset(ti, 0, sizeof(struct thread_info));
+       init_switch_stack(v);
+
+       d->shared_info = (void *)alloc_xenheap_page();
+       if (!d->shared_info) {
+               printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
+               while (1);
+       }
+       memset(d->shared_info, 0, PAGE_SIZE);
+       d->shared_info->vcpu_data[0].arch.privregs = 
+                       alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
+       printf("arch_vcpu_info=%p\n", 
d->shared_info->vcpu_data[0].arch.privregs);
+       memset(d->shared_info->vcpu_data[0].arch.privregs, 0, PAGE_SIZE);
+       v->vcpu_info = &(d->shared_info->vcpu_data[0]);
+
+       d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
+
+#ifdef CONFIG_VTI
+       /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
+        * to this solution. Maybe it can be deferred until we know created
+        * one as vmx domain */
+       v->arch.vtlb = init_domain_tlb(v);
+#endif
+
+       /* We may also need emulation rid for region4, though it's unlikely
+        * to see guest issue uncacheable access in metaphysical mode. But
+        * keep such info here may be more sane.
+        */
+       if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL)
+        || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL))
+               BUG();
+       VCPU(v, metaphysical_mode) = 1;
+       v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
+       v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
+       v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
+       v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
+#define DOMAIN_RID_BITS_DEFAULT 18
+       if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
+               BUG();
+       v->arch.starting_rid = d->arch.starting_rid;
+       v->arch.ending_rid = d->arch.ending_rid;
+       // the following will eventually need to be negotiated dynamically
+       d->xen_vastart = XEN_START_ADDR;
+       d->xen_vaend = XEN_END_ADDR;
+       d->shared_info_va = SHAREDINFO_ADDR;
+       d->arch.breakimm = 0x1000;
+       v->arch.breakimm = d->arch.breakimm;
+
+       d->arch.mm = xmalloc(struct mm_struct);
+       if (unlikely(!d->arch.mm)) {
+               printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
+               return -ENOMEM;
+       }
+       memset(d->arch.mm, 0, sizeof(*d->arch.mm));
+       d->arch.mm->pgd = pgd_alloc(d->arch.mm);
+       if (unlikely(!d->arch.mm->pgd)) {
+               printk("Can't allocate pgd for domain %d\n",d->domain_id);
+               return -ENOMEM;
+       }
+}
+
+void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
+{
+       struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + 
IA64_STK_OFFSET) - 1;
+
+       printf("arch_getdomaininfo_ctxt\n");
+       c->regs = *regs;
+       c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
+#if 0
+       if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs,
+                       v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) {
+               printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs);
+               return -EFAULT;
+       }
+#endif
+
+       c->shared = v->domain->shared_info->arch;
+}
+
+int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
+{
+       struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + 
IA64_STK_OFFSET) - 1;
+       struct domain *d = v->domain;
+       int i, rc, ret;
+       unsigned long progress = 0;
+
+       printf("arch_set_info_guest\n");
+       if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+            return 0;
+
+       if (c->flags & VGCF_VMX_GUEST) {
+           if (!vmx_enabled) {
+               printk("No VMX hardware feature for vmx domain.\n");
+               return -EINVAL;
+           }
+
+           vmx_setup_platform(v, c);
+       }
+
+       *regs = c->regs;
+       new_thread(v, regs->cr_iip, 0, 0);
+
+       v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
+       if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs,
+                          c->vcpu.privregs, sizeof(mapped_regs_t))) {
+           printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n", 
c->vcpu.privregs);
+           return -EFAULT;
+       }
+
+       v->arch.domain_itm_last = -1L;
+       d->shared_info->arch = c->shared;
+
+       /* Don't redo final setup */
+       set_bit(_VCPUF_initialised, &v->vcpu_flags);
+       return 0;
+}
+
+void arch_do_boot_vcpu(struct vcpu *v)
+{
+       struct domain *d = v->domain;
+       printf("arch_do_boot_vcpu: not implemented\n");
+
+       d->shared_info->vcpu_data[v->vcpu_id].arch.privregs = 
+                       alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
+       printf("arch_vcpu_info=%p\n", 
d->shared_info->vcpu_data[v->vcpu_id].arch.privregs);
+       memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0, 
PAGE_SIZE);
+       return;
+}
+
+void domain_relinquish_resources(struct domain *d)
+{
+       /* FIXME */
+       printf("domain_relinquish_resources: not implemented\n");
+}
+
+// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
+// and linux/arch/ia64/kernel/process.c:kernel_thread()
+void new_thread(struct vcpu *v,
+                unsigned long start_pc,
+                unsigned long start_stack,
+                unsigned long start_info)
+{
+       struct domain *d = v->domain;
+       struct pt_regs *regs;
+       struct ia64_boot_param *bp;
+       extern char saved_command_line[];
+
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+       if (d == dom0) start_pc += dom0_start;
+#endif
+
+       regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+       if (VMX_DOMAIN(v)) {
+               /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
+               regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro 
*/
+       } else {
+               regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
+                       | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
+                       & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
+               regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
+       }
+       regs->cr_iip = start_pc;
+       regs->cr_ifs = 1UL << 63; /* or clear? */
+       regs->ar_fpsr = FPSR_DEFAULT;
+
+       if (VMX_DOMAIN(v)) {
+#ifdef CONFIG_VTI
+               vmx_init_all_rr(v);
+               if (d == dom0)
+                   VMX_VPD(v,vgr[12]) = 
dom_fw_setup(d,saved_command_line,256L);
+               /* Virtual processor context setup */
+               VMX_VPD(v, vpsr) = IA64_PSR_BN;
+               VPD_CR(v, dcr) = 0;
+#endif
+       } else {
+               init_all_rr(v);
+               if (d == dom0) 
+                   regs->r28 = dom_fw_setup(d,saved_command_line,256L);
+               else {
+                   regs->ar_rsc |= (2 << 2); /* force PL2/3 */
+                   regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0 
console=tty0 root=/dev/hda1",256L);  //FIXME
+               }
+               VCPU(v, banknum) = 1;
+               VCPU(v, metaphysical_mode) = 1;
+               d->shared_info->arch.flags = (d == dom0) ? 
(SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN)
 : 0;
+       }
+}
+
+static struct page * map_new_domain0_page(unsigned long mpaddr)
+{
+       if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+               printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr);
+printk("map_new_domain0_page: 
start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
+               while(1);
+       }
+       return pfn_to_page((mpaddr >> PAGE_SHIFT));
+}
+
+/* allocate new page for domain and map it to the specified metaphysical addr 
*/
+struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr)
+{
+       struct mm_struct *mm = d->arch.mm;
+       struct page *p = (struct page *)0;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+extern unsigned long vhpt_paddr, vhpt_pend;
+
+       if (!mm->pgd) {
+               printk("map_new_domain_page: domain pgd must exist!\n");
+               return(p);
+       }
+       pgd = pgd_offset(mm,mpaddr);
+       if (pgd_none(*pgd))
+               pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
+
+       pud = pud_offset(pgd, mpaddr);
+       if (pud_none(*pud))
+               pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
+
+       pmd = pmd_offset(pud, mpaddr);
+       if (pmd_none(*pmd))
+               pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
+//             pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
+
+       pte = pte_offset_map(pmd, mpaddr);
+       if (pte_none(*pte)) {
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+               if (d == dom0) p = map_new_domain0_page(mpaddr);
+               else
+#endif
+               {
+                       p = alloc_domheap_page(d);
+                       // zero out pages for security reasons
+                       memset(__va(page_to_phys(p)),0,PAGE_SIZE);
+               }
+               if (unlikely(!p)) {
+printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
+                       return(p);
+               }
+if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) {
+  printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p));
+}
+               set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT,
+                       __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+       }
+       else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+       return p;
+}
+
+/* map a physical address to the specified metaphysical addr */
+void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long 
physaddr)
+{
+       struct mm_struct *mm = d->arch.mm;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       if (!mm->pgd) {
+               printk("map_domain_page: domain pgd must exist!\n");
+               return;
+       }
+       pgd = pgd_offset(mm,mpaddr);
+       if (pgd_none(*pgd))
+               pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
+
+       pud = pud_offset(pgd, mpaddr);
+       if (pud_none(*pud))
+               pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
+
+       pmd = pmd_offset(pud, mpaddr);
+       if (pmd_none(*pmd))
+               pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
+//             pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
+
+       pte = pte_offset_map(pmd, mpaddr);
+       if (pte_none(*pte)) {
+               set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
+                       __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+       }
+       else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+}
+
+void mpafoo(unsigned long mpaddr)
+{
+       extern unsigned long privop_trace;
+       if (mpaddr == 0x3800)
+               privop_trace = 1;
+}
+
+unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
+{
+       struct mm_struct *mm = d->arch.mm;
+       pgd_t *pgd = pgd_offset(mm, mpaddr);
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+       if (d == dom0) {
+               if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+                       //printk("lookup_domain_mpa: bad dom0 mpaddr 
%p!\n",mpaddr);
+//printk("lookup_domain_mpa: 
start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
+                       mpafoo(mpaddr);
+               }
+               pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
+                       __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
+               pte = &pteval;
+               return *(unsigned long *)pte;
+       }
+#endif
+tryagain:
+       if (pgd_present(*pgd)) {
+               pud = pud_offset(pgd,mpaddr);
+               if (pud_present(*pud)) {
+                       pmd = pmd_offset(pud,mpaddr);
+                       if (pmd_present(*pmd)) {
+                               pte = pte_offset_map(pmd,mpaddr);
+                               if (pte_present(*pte)) {
+//printk("lookup_domain_page: found mapping for %lx, 
pte=%lx\n",mpaddr,pte_val(*pte));
+                                       return *(unsigned long *)pte;
+                               }
+                       }
+               }
+       }
+       /* if lookup fails and mpaddr is "legal", "create" the page */
+       if ((mpaddr >> PAGE_SHIFT) < d->max_pages) {
+               if (map_new_domain_page(d,mpaddr)) goto tryagain;
+       }
+       printk("lookup_domain_mpa: bad mpa %p (> %p\n",
+               mpaddr,d->max_pages<<PAGE_SHIFT);
+       mpafoo(mpaddr);
+       return 0;
+}
+
+// FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
+#ifndef CONFIG_VTI
+unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
+{
+       unsigned long pte = lookup_domain_mpa(d,mpaddr);
+       unsigned long imva;
+
+       pte &= _PAGE_PPN_MASK;
+       imva = __va(pte);
+       imva |= mpaddr & ~PAGE_MASK;
+       return(imva);
+}
+#else // CONFIG_VTI
+unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
+{
+    unsigned long imva = __gpa_to_mpa(d, mpaddr);
+
+    return __va(imva);
+}
+#endif // CONFIG_VTI
+
+// remove following line if not privifying in memory
+//#define HAVE_PRIVIFY_MEMORY
+#ifndef HAVE_PRIVIFY_MEMORY
+#define        privify_memory(x,y) do {} while(0)
+#endif
+
+// see arch/x86/xxx/domain_build.c
+int elf_sanity_check(Elf_Ehdr *ehdr)
+{
+       return (IS_ELF(*ehdr));
+}
+
+static void copy_memory(void *dst, void *src, int size)
+{
+       int remain;
+
+       if (IS_XEN_ADDRESS(dom0,src)) {
+               memcpy(dst,src,size);
+       }
+       else {
+               printf("About to call __copy_from_user(%p,%p,%d)\n",
+                       dst,src,size);
+               while (remain = __copy_from_user(dst,src,size)) {
+                       printf("incomplete user copy, %d remain of %d\n",
+                               remain,size);
+                       dst += size - remain; src += size - remain;
+                       size -= remain;
+               }
+       }
+}
+
+void loaddomainelfimage(struct domain *d, unsigned long image_start)
+{
+       char *elfbase = image_start;
+       //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
+       Elf_Ehdr ehdr;
+       Elf_Phdr phdr;
+       int h, filesz, memsz, paddr;
+       unsigned long elfaddr, dom_mpaddr, dom_imva;
+       struct page *p;
+       unsigned long pteval;
+  
+       copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
+       for ( h = 0; h < ehdr.e_phnum; h++ ) {
+               copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
+               sizeof(Elf_Phdr));
+           //if ( !is_loadable_phdr(phdr) )
+           if ((phdr.p_type != PT_LOAD)) {
+               continue;
+       }
+       filesz = phdr.p_filesz; memsz = phdr.p_memsz;
+       elfaddr = elfbase + phdr.p_offset;
+       dom_mpaddr = phdr.p_paddr;
+//printf("p_offset: %x, size=%x\n",elfaddr,filesz);
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+       if (d == dom0) {
+               if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) {
+                       printf("Domain0 doesn't fit in allocated space!\n");
+                       while(1);
+               }
+               dom_imva = __va(dom_mpaddr + dom0_start);
+               copy_memory(dom_imva,elfaddr,filesz);
+               if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
+//FIXME: This test for code seems to find a lot more than objdump -x does
+               if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
+       }
+       else
+#endif
+       while (memsz > 0) {
+#ifdef DOMU_AUTO_RESTART
+               pteval = lookup_domain_mpa(d,dom_mpaddr);
+               if (pteval) dom_imva = __va(pteval & _PFN_MASK);
+               else { printf("loaddomainelfimage: BAD!\n"); while(1); }
+#else
+               p = map_new_domain_page(d,dom_mpaddr);
+               if (unlikely(!p)) BUG();
+               dom_imva = __va(page_to_phys(p));
+#endif
+               if (filesz > 0) {
+                       if (filesz >= PAGE_SIZE)
+                               copy_memory(dom_imva,elfaddr,PAGE_SIZE);
+                       else { // copy partial page, zero the rest of page
+                               copy_memory(dom_imva,elfaddr,filesz);
+                               memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
+                       }
+//FIXME: This test for code seems to find a lot more than objdump -x does
+                       if (phdr.p_flags & PF_X)
+                               privify_memory(dom_imva,PAGE_SIZE);
+               }
+               else if (memsz > 0) // always zero out entire page
+                       memset(dom_imva,0,PAGE_SIZE);
+               memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
+               elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
+       }
+       }
+}
+
+int
+parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
+{
+       Elf_Ehdr ehdr;
+
+       copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
+
+       if ( !elf_sanity_check(&ehdr) ) {
+           printk("ELF sanity check failed.\n");
+           return -EINVAL;
+       }
+
+       if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
+       {
+           printk("ELF program headers extend beyond end of image.\n");
+           return -EINVAL;
+       }
+
+       if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
+       {
+           printk("ELF section headers extend beyond end of image.\n");
+           return -EINVAL;
+       }
+
+#if 0
+       /* Find the section-header strings table. */
+       if ( ehdr.e_shstrndx == SHN_UNDEF )
+       {
+           printk("ELF image has no section-header strings table 
(shstrtab).\n");
+           return -EINVAL;
+       }
+#endif
+
+       *entry = ehdr.e_entry;
+printf("parsedomainelfimage: entry point = %p\n",*entry);
+
+       return 0;
+}
+
+
+void alloc_dom0(void)
+{
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+       if (platform_is_hp_ski()) {
+       dom0_size = 128*1024*1024; //FIXME: Should be configurable
+       }
+       printf("alloc_dom0: starting (initializing %d 
MB...)\n",dom0_size/(1024*1024));
+ 
+     /* FIXME: The first trunk (say 256M) should always be assigned to
+      * Dom0, since Dom0's physical == machine address for DMA purpose.
+      * Some old version linux, like 2.4, assumes physical memory existing
+      * in 2nd 64M space.
+      */
+     dom0_start = alloc_boot_pages(
+         dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
+     dom0_start <<= PAGE_SHIFT;
+       if (!dom0_start) {
+       printf("construct_dom0: can't allocate contiguous memory size=%p\n",
+               dom0_size);
+       while(1);
+       }
+       printf("alloc_dom0: dom0_start=%p\n",dom0_start);
+#else
+       dom0_start = 0;
+#endif
+
+}
+
+#ifdef DOMU_BUILD_STAGING
+void alloc_domU_staging(void)
+{
+       domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
+       printf("alloc_domU_staging: starting (initializing %d 
MB...)\n",domU_staging_size/(1024*1024));
+       domU_staging_start = alloc_boot_pages(
+            domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT);
+        domU_staging_start <<= PAGE_SHIFT;
+       if (!domU_staging_size) {
+               printf("alloc_domU_staging: can't allocate, spinning...\n");
+               while(1);
+       }
+       else domU_staging_area = (unsigned long *)__va(domU_staging_start);
+       printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area);
+
+}
+
+unsigned long
+domU_staging_read_8(unsigned long at)
+{
+       // no way to return errors so just do it
+       return domU_staging_area[at>>3];
+       
+}
+
+unsigned long
+domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b,
+       unsigned long c, unsigned long d)
+{
+       if (at + 32 > domU_staging_size) return -1;
+       if (at & 0x1f) return -1;
+       at >>= 3;
+       domU_staging_area[at++] = a;
+       domU_staging_area[at++] = b;
+       domU_staging_area[at++] = c;
+       domU_staging_area[at] = d;
+       return 0;
+       
+}
+#endif
+
+/*
+ * Domain 0 has direct access to all devices absolutely. However
+ * the major point of this stub here, is to allow alloc_dom_mem
+ * handled with order > 0 request. Dom0 requires that bit set to
+ * allocate memory for other domains.
+ */
+void physdev_init_dom0(struct domain *d)
+{
+       set_bit(_DOMF_physdev_access, &d->domain_flags);
+}
+
+extern unsigned long running_on_sim;
+unsigned int vmx_dom0 = 0;
+int construct_dom0(struct domain *d, 
+                      unsigned long image_start, unsigned long image_len, 
+                      unsigned long initrd_start, unsigned long initrd_len,
+                      char *cmdline)
+{
+       char *dst;
+       int i, rc;
+       unsigned long pfn, mfn;
+       unsigned long nr_pt_pages;
+       unsigned long count;
+       unsigned long alloc_start, alloc_end;
+       struct pfn_info *page = NULL;
+       start_info_t *si;
+       struct vcpu *v = d->vcpu[0];
+
+       struct domain_setup_info dsi;
+       unsigned long p_start;
+       unsigned long pkern_start;
+       unsigned long pkern_entry;
+       unsigned long pkern_end;
+       unsigned long ret, progress = 0;
+
+//printf("construct_dom0: starting\n");
+       /* Sanity! */
+#ifndef CLONE_DOMAIN0
+       if ( d != dom0 ) 
+           BUG();
+       if ( test_bit(_DOMF_constructed, &d->domain_flags) ) 
+           BUG();
+#endif
+
+       memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+       printk("*** LOADING DOMAIN 0 ***\n");
+
+       alloc_start = dom0_start;
+       alloc_end = dom0_start + dom0_size;
+       d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE;
+       image_start = __va(ia64_boot_param->initrd_start);
+       image_len = ia64_boot_param->initrd_size;
+//printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
+//printk("First word of image: %lx\n",*(unsigned long *)image_start);
+
+//printf("construct_dom0: about to call parseelfimage\n");
+       dsi.image_addr = (unsigned long)image_start;
+       dsi.image_len  = image_len;
+       rc = parseelfimage(&dsi);
+       if ( rc != 0 )
+           return rc;
+
+#ifdef CONFIG_VTI
+       /* Temp workaround */
+       if (running_on_sim)
+           dsi.xen_section_string = (char *)1;
+
+       /* Check whether dom0 is vti domain */
+       if ((!vmx_enabled) && !dsi.xen_section_string) {
+           printk("Lack of hardware support for unmodified vmx dom0\n");
+           panic("");
+       }
+
+       if (vmx_enabled && !dsi.xen_section_string) {
+           printk("Dom0 is vmx domain!\n");
+           vmx_dom0 = 1;
+       }
+#endif
+
+       p_start = dsi.v_start;
+       pkern_start = dsi.v_kernstart;
+       pkern_end = dsi.v_kernend;
+       pkern_entry = dsi.v_kernentry;
+
+//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, 
pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
+
+       if ( (p_start & (PAGE_SIZE-1)) != 0 )
+       {
+           printk("Initial guest OS must load to a page boundary.\n");
+           return -EINVAL;
+       }
+
+       printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
+              " Kernel image:  %lx->%lx\n"
+              " Entry address: %lx\n"
+              " Init. ramdisk:   (NOT IMPLEMENTED YET)\n",
+              pkern_start, pkern_end, pkern_entry);
+
+       if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
+       {
+           printk("Initial guest OS requires too much space\n"
+                  "(%luMB is greater than %luMB limit)\n",
+                  (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
+           return -ENOMEM;
+       }
+
+       // if high 3 bits of pkern start are non-zero, error
+
+       // if pkern end is after end of metaphysical memory, error
+       //  (we should be able to deal with this... later)
+
+
+       //
+
+#if 0
+       strcpy(d->name,"Domain0");
+#endif
+
+       /* Mask all upcalls... */
+       for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+           d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+#ifdef CONFIG_VTI
+       /* Construct a frame-allocation list for the initial domain, since these
+        * pages are allocated by boot allocator and pfns are not set properly
+        */
+       for ( mfn = (alloc_start>>PAGE_SHIFT); 
+             mfn < (alloc_end>>PAGE_SHIFT); 
+             mfn++ )
+       {
+            page = &frame_table[mfn];
+            page_set_owner(page, d);
+            page->u.inuse.type_info = 0;
+            page->count_info        = PGC_allocated | 1;
+            list_add_tail(&page->list, &d->page_list);
+
+           /* Construct 1:1 mapping */
+           machine_to_phys_mapping[mfn] = mfn;
+       }
+
+       /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
+        * for dom0
+        */
+       d->arch.pmt = NULL;
+#endif
+
+       /* Copy the OS image. */
+       loaddomainelfimage(d,image_start);
+
+       /* Copy the initial ramdisk. */
+       //if ( initrd_len != 0 )
+       //    memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+
+       /* Sync d/i cache conservatively */
+       ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+       if (ret != PAL_STATUS_SUCCESS)
+           panic("PAL CACHE FLUSH failed for dom0.\n");
+       printk("Sync i/d cache for dom0 image SUCC\n");
+
+#if 0
+       /* Set up start info area. */
+       //si = (start_info_t *)vstartinfo_start;
+       memset(si, 0, PAGE_SIZE);
+       si->nr_pages     = d->tot_pages;
+       si->shared_info  = virt_to_phys(d->shared_info);
+       si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
+       //si->pt_base      = vpt_start;
+       //si->nr_pt_frames = nr_pt_pages;
+       //si->mfn_list     = vphysmap_start;
+
+       if ( initrd_len != 0 )
+       {
+           //si->mod_start = vinitrd_start;
+           si->mod_len   = initrd_len;
+           printk("Initrd len 0x%lx, start at 0x%08lx\n",
+                  si->mod_len, si->mod_start);
+       }
+
+       dst = si->cmd_line;
+       if ( cmdline != NULL )
+       {
+           for ( i = 0; i < 255; i++ )
+           {
+               if ( cmdline[i] == '\0' )
+                   break;
+               *dst++ = cmdline[i];
+           }
+       }
+       *dst = '\0';
+
+       zap_low_mappings(); /* Do the same for the idle page tables. */
+#endif
+       
+       /* Give up the VGA console if DOM0 is configured to grab it. */
+       if (cmdline != NULL)
+           console_endboot(strstr(cmdline, "tty0") != NULL);
+
+       /* VMX specific construction for Dom0, if hardware supports VMX
+        * and Dom0 is unmodified image
+        */
+       printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
+       if (vmx_dom0)
+           vmx_final_setup_domain(dom0);
+
+       set_bit(_DOMF_constructed, &d->domain_flags);
+
+       new_thread(v, pkern_entry, 0, 0);
+       physdev_init_dom0(d);
+
+       // FIXME: Hack for keyboard input
+#ifdef CLONE_DOMAIN0
+if (d == dom0)
+#endif
+       serial_input_init();
+       if (d == dom0) {
+               VCPU(v, delivery_mask[0]) = -1L;
+               VCPU(v, delivery_mask[1]) = -1L;
+               VCPU(v, delivery_mask[2]) = -1L;
+               VCPU(v, delivery_mask[3]) = -1L;
+       }
+       else __set_bit(0x30, VCPU(v, delivery_mask));
+
+       return 0;
+}
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int construct_domU(struct domain *d,
+                  unsigned long image_start, unsigned long image_len,
+                  unsigned long initrd_start, unsigned long initrd_len,
+                  char *cmdline)
+{
+       int i, rc;
+       struct vcpu *v = d->vcpu[0];
+       unsigned long pkern_entry;
+
+#ifndef DOMU_AUTO_RESTART
+       if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG();
+#endif
+
+       printk("*** LOADING DOMAIN %d ***\n",d->domain_id);
+
+       d->max_pages = dom0_size/PAGE_SIZE;     // FIXME: use dom0 size
+       // FIXME: use domain0 command line
+       rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
+       printk("parsedomainelfimage returns %d\n",rc);
+       if ( rc != 0 ) return rc;
+
+       /* Mask all upcalls... */
+       for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+               d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+       /* Copy the OS image. */
+       printk("calling loaddomainelfimage(%p,%p)\n",d,image_start);
+       loaddomainelfimage(d,image_start);
+       printk("loaddomainelfimage returns\n");
+
+       set_bit(_DOMF_constructed, &d->domain_flags);
+
+       printk("calling new_thread, entry=%p\n",pkern_entry);
+#ifdef DOMU_AUTO_RESTART
+       v->domain->arch.image_start = image_start;
+       v->domain->arch.image_len = image_len;
+       v->domain->arch.entry = pkern_entry;
+#endif
+       new_thread(v, pkern_entry, 0, 0);
+       printk("new_thread returns\n");
+       __set_bit(0x30, VCPU(v, delivery_mask));
+
+       return 0;
+}
+
+#ifdef DOMU_AUTO_RESTART
+void reconstruct_domU(struct vcpu *v)
+{
+       /* re-copy the OS image to reset data values to original */
+       printk("reconstruct_domU: restarting domain %d...\n",
+               v->domain->domain_id);
+       loaddomainelfimage(v->domain,v->domain->arch.image_start);
+       new_thread(v, v->domain->arch.entry, 0, 0);
+}
+#endif
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int launch_domainU(unsigned long size)
+{
+#ifdef CLONE_DOMAIN0
+       static int next = CLONE_DOMAIN0+1;
+#else
+       static int next = 1;
+#endif 
+
+       struct domain *d = do_createdomain(next,0);
+       if (!d) {
+               printf("launch_domainU: couldn't create\n");
+               return 1;
+       }
+       else next++;
+       if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) {
+               printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n",
+                       d->domain_id,domU_staging_area,size);
+               return 2;
+       }
+       domain_unpause_by_systemcontroller(d);
+}
+
+void machine_restart(char * __unused)
+{
+       if (platform_is_hp_ski()) dummy();
+       printf("machine_restart called: spinning....\n");
+       while(1);
+}
+
+void machine_halt(void)
+{
+       if (platform_is_hp_ski()) dummy();
+       printf("machine_halt called: spinning....\n");
+       while(1);
+}
+
+void dummy_called(char *function)
+{
+       if (platform_is_hp_ski()) asm("break 0;;");
+       printf("dummy called in %s: spinning....\n", function);
+       while(1);
+}
+
+
+#if 0
+void switch_to(struct vcpu *prev, struct vcpu *next)
+{
+       struct vcpu *last;
+
+       __switch_to(prev,next,last);
+       //set_current(next);
+}
+#endif
+
+void domain_pend_keyboard_interrupt(int irq)
+{
+       vcpu_pend_interrupt(dom0->vcpu[0],irq);
+}
+
+void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
+{
+       if ( v->processor == newcpu )
+               return;
+
+       set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+       v->processor = newcpu;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/grant_table.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/grant_table.c   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1288 @@
+#ifndef CONFIG_VTI
+// temporarily in arch/ia64 until can merge into common/grant_table.c
+/******************************************************************************
+ * common/grant_table.c
+ * 
+ * Mechanism for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ * 
+ * Copyright (c) 2005 Christopher Clark
+ * Copyright (c) 2004 K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define GRANT_DEBUG 0
+#define GRANT_DEBUG_VERBOSE 0
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/shadow.h>
+#include <xen/mm.h>
+#ifdef __ia64__
+#define __addr_ok(a) 1 // FIXME-ia64: a variant of access_ok??
+// FIXME-ia64: need to implement real cmpxchg_user on ia64
+//#define cmpxchg_user(_p,_o,_n) ((*_p == _o) ? ((*_p = _n), 0) : ((_o = *_p), 
0))
+// FIXME-ia64: these belong in an asm/grant_table.h... PAGE_SIZE different
+#undef ORDER_GRANT_FRAMES
+//#undef NUM_GRANT_FRAMES
+#define ORDER_GRANT_FRAMES 0
+//#define NUM_GRANT_FRAMES  (1U << ORDER_GRANT_FRAMES)
+#endif
+
+#define PIN_FAIL(_lbl, _rc, _f, _a...)   \
+    do {                           \
+        DPRINTK( _f, ## _a );      \
+        rc = (_rc);                \
+        goto _lbl;                 \
+    } while ( 0 )
+
+static inline int
+get_maptrack_handle(
+    grant_table_t *t)
+{
+    unsigned int h;
+    if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) )
+        return -1;
+    t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
+    t->map_count++;
+    return h;
+}
+
+static inline void
+put_maptrack_handle(
+    grant_table_t *t, int handle)
+{
+    t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
+    t->maptrack_head = handle;
+    t->map_count--;
+}
+
+static int
+__gnttab_activate_grant_ref(
+    struct domain          *mapping_d,          /* IN */
+    struct vcpu     *mapping_ed,
+    struct domain          *granting_d,
+    grant_ref_t             ref,
+    u16                     dev_hst_ro_flags,
+    unsigned long           host_virt_addr,
+    unsigned long          *pframe )            /* OUT */
+{
+    domid_t               sdom;
+    u16                   sflags;
+    active_grant_entry_t *act;
+    grant_entry_t        *sha;
+    s16                   rc = 1;
+    unsigned long         frame = 0;
+    int                   retries = 0;
+
+    /*
+     * Objectives of this function:
+     * . Make the record ( granting_d, ref ) active, if not already.
+     * . Update shared grant entry of owner, indicating frame is mapped.
+     * . Increment the owner act->pin reference counts.
+     * . get_page on shared frame if new mapping.
+     * . get_page_type if this is first RW mapping of frame.
+     * . Add PTE to virtual address space of mapping_d, if necessary.
+     * Returns:
+     * .  -ve: error
+     * .    1: ok
+     * .    0: ok and TLB invalidate of host_virt_addr needed.
+     *
+     * On success, *pframe contains mfn.
+     */
+
+    /*
+     * We bound the number of times we retry CMPXCHG on memory locations that
+     * we share with a guest OS. The reason is that the guest can modify that
+     * location at a higher rate than we can read-modify-CMPXCHG, so the guest
+     * could cause us to livelock. There are a few cases where it is valid for
+     * the guest to race our updates (e.g., to change the GTF_readonly flag),
+     * so we allow a few retries before failing.
+     */
+
+    act = &granting_d->grant_table->active[ref];
+    sha = &granting_d->grant_table->shared[ref];
+
+    spin_lock(&granting_d->grant_table->lock);
+
+    if ( act->pin == 0 )
+    {
+        /* CASE 1: Activating a previously inactive entry. */
+
+        sflags = sha->flags;
+        sdom   = sha->domid;
+
+        for ( ; ; )
+        {
+            u32 scombo, prev_scombo, new_scombo;
+
+            if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
+                 unlikely(sdom != mapping_d->domain_id) )
+                PIN_FAIL(unlock_out, GNTST_general_error,
+                         "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
+                        sflags, sdom, mapping_d->domain_id);
+
+            /* Merge two 16-bit values into a 32-bit combined update. */
+            /* NB. Endianness! */
+            prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
+
+            new_scombo = scombo | GTF_reading;
+            if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
+            {
+                new_scombo |= GTF_writing;
+                if ( unlikely(sflags & GTF_readonly) )
+                    PIN_FAIL(unlock_out, GNTST_general_error,
+                             "Attempt to write-pin a r/o grant entry.\n");
+            }
+
+            /* NB. prev_scombo is updated in place to seen value. */
+            if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
+                                       prev_scombo,
+                                       new_scombo)) )
+                PIN_FAIL(unlock_out, GNTST_general_error,
+                         "Fault while modifying shared flags and domid.\n");
+
+            /* Did the combined update work (did we see what we expected?). */
+            if ( likely(prev_scombo == scombo) )
+                break;
+
+            if ( retries++ == 4 )
+                PIN_FAIL(unlock_out, GNTST_general_error,
+                         "Shared grant entry is unstable.\n");
+
+            /* Didn't see what we expected. Split out the seen flags & dom. */
+            /* NB. Endianness! */
+            sflags = (u16)prev_scombo;
+            sdom   = (u16)(prev_scombo >> 16);
+        }
+
+        /* rmb(); */ /* not on x86 */
+
+        frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+        if ( unlikely(!pfn_valid(frame)) ||
+             unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
+                        get_page(&frame_table[frame], granting_d) :
+                        get_page_and_type(&frame_table[frame], granting_d,
+                                          PGT_writable_page))) )
+        {
+            clear_bit(_GTF_writing, &sha->flags);
+            clear_bit(_GTF_reading, &sha->flags);
+            PIN_FAIL(unlock_out, GNTST_general_error,
+                     "Could not pin the granted frame (%lx)!\n", frame);
+        }
+#endif
+
+        if ( dev_hst_ro_flags & GNTMAP_device_map )
+            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+                GNTPIN_devr_inc : GNTPIN_devw_inc;
+        if ( dev_hst_ro_flags & GNTMAP_host_map )
+            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+        act->domid = sdom;
+        act->frame = frame;
+    }
+    else 
+    {
+        /* CASE 2: Active modications to an already active entry. */
+
+        /*
+         * A cheesy check for possible pin-count overflow.
+         * A more accurate check cannot be done with a single comparison.
+         */
+        if ( (act->pin & 0x80808080U) != 0 )
+            PIN_FAIL(unlock_out, ENOSPC,
+                     "Risk of counter overflow %08x\n", act->pin);
+
+        frame = act->frame;
+
+        if ( !(dev_hst_ro_flags & GNTMAP_readonly) && 
+             !((sflags = sha->flags) & GTF_writing) )
+        {
+            for ( ; ; )
+            {
+                u16 prev_sflags;
+                
+                if ( unlikely(sflags & GTF_readonly) )
+                    PIN_FAIL(unlock_out, GNTST_general_error,
+                             "Attempt to write-pin a r/o grant entry.\n");
+
+                prev_sflags = sflags;
+
+                /* NB. prev_sflags is updated in place to seen value. */
+                if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, 
+                                           prev_sflags | GTF_writing)) )
+                    PIN_FAIL(unlock_out, GNTST_general_error,
+                         "Fault while modifying shared flags.\n");
+
+                if ( likely(prev_sflags == sflags) )
+                    break;
+
+                if ( retries++ == 4 )
+                    PIN_FAIL(unlock_out, GNTST_general_error,
+                             "Shared grant entry is unstable.\n");
+
+                sflags = prev_sflags;
+            }
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+            if ( unlikely(!get_page_type(&frame_table[frame],
+                                         PGT_writable_page)) )
+            {
+                clear_bit(_GTF_writing, &sha->flags);
+                PIN_FAIL(unlock_out, GNTST_general_error,
+                         "Attempt to write-pin a unwritable page.\n");
+            }
+#endif
+        }
+
+        if ( dev_hst_ro_flags & GNTMAP_device_map )
+            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? 
+                GNTPIN_devr_inc : GNTPIN_devw_inc;
+
+        if ( dev_hst_ro_flags & GNTMAP_host_map )
+            act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+    }
+
+    /*
+     * At this point:
+     * act->pin updated to reflect mapping.
+     * sha->flags updated to indicate to granting domain mapping done.
+     * frame contains the mfn.
+     */
+
+    spin_unlock(&granting_d->grant_table->lock);
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+    if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
+    {
+        /* Write update into the pagetable. */
+        l1_pgentry_t pte;
+        pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED | 
_PAGE_DIRTY);
+        if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
+            l1e_add_flags(pte,_PAGE_RW);
+        rc = update_grant_va_mapping( host_virt_addr, pte, 
+                       mapping_d, mapping_ed );
+
+        /*
+         * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
+         * This is done in the outer gnttab_map_grant_ref.
+         */
+
+        if ( rc < 0 )
+        {
+            /* Failure: undo and abort. */
+
+            spin_lock(&granting_d->grant_table->lock);
+
+            if ( dev_hst_ro_flags & GNTMAP_readonly )
+            {
+                act->pin -= GNTPIN_hstr_inc;
+            }
+            else
+            {
+                act->pin -= GNTPIN_hstw_inc;
+                if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
+                {
+                    clear_bit(_GTF_writing, &sha->flags);
+                    put_page_type(&frame_table[frame]);
+                }
+            }
+
+            if ( act->pin == 0 )
+            {
+                clear_bit(_GTF_reading, &sha->flags);
+                put_page(&frame_table[frame]);
+            }
+
+            spin_unlock(&granting_d->grant_table->lock);
+        }
+
+    }
+#endif
+
+    *pframe = frame;
+    return rc;
+
+ unlock_out:
+    spin_unlock(&granting_d->grant_table->lock);
+    return rc;
+}
+
+/*
+ * Returns 0 if TLB flush / invalidate required by caller.
+ * va will indicate the address to be invalidated.
+ */
+static int
+__gnttab_map_grant_ref(
+    gnttab_map_grant_ref_t *uop,
+    unsigned long *va)
+{
+    domid_t               dom;
+    grant_ref_t           ref;
+    struct domain        *ld, *rd;
+    struct vcpu   *led;
+    u16                   dev_hst_ro_flags;
+    int                   handle;
+    unsigned long         frame = 0, host_virt_addr;
+    int                   rc;
+
+    led = current;
+    ld = led->domain;
+
+    /* Bitwise-OR avoids short-circuiting which screws control flow. */
+    if ( unlikely(__get_user(dom, &uop->dom) |
+                  __get_user(ref, &uop->ref) |
+                  __get_user(host_virt_addr, &uop->host_addr) |
+                  __get_user(dev_hst_ro_flags, &uop->flags)) )
+    {
+        DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
+        return -EFAULT; /* don't set status */
+    }
+
+
+    if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) &&
+         unlikely(!__addr_ok(host_virt_addr)))
+    {
+        DPRINTK("Bad virtual address (%lx) or flags (%x).\n",
+                host_virt_addr, dev_hst_ro_flags);
+        (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
+        return GNTST_bad_gntref;
+    }
+
+    if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
+         unlikely((dev_hst_ro_flags &
+                   (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
+    {
+        DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
+        (void)__put_user(GNTST_bad_gntref, &uop->handle);
+        return GNTST_bad_gntref;
+    }
+
+    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+         unlikely(ld == rd) )
+    {
+        if ( rd != NULL )
+            put_domain(rd);
+        DPRINTK("Could not find domain %d\n", dom);
+        (void)__put_user(GNTST_bad_domain, &uop->handle);
+        return GNTST_bad_domain;
+    }
+
+    /* Get a maptrack handle. */
+    if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
+    {
+        int              i;
+        grant_mapping_t *new_mt;
+        grant_table_t   *lgt      = ld->grant_table;
+
+        /* Grow the maptrack table. */
+        new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
+        if ( new_mt == NULL )
+        {
+            put_domain(rd);
+            DPRINTK("No more map handles available\n");
+            (void)__put_user(GNTST_no_device_space, &uop->handle);
+            return GNTST_no_device_space;
+        }
+
+        memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
+        for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
+            new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
+
+        free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
+        lgt->maptrack          = new_mt;
+        lgt->maptrack_order   += 1;
+        lgt->maptrack_limit  <<= 1;
+
+        printk("Doubled maptrack size\n");
+        handle = get_maptrack_handle(ld->grant_table);
+    }
+
+#if GRANT_DEBUG_VERBOSE
+    DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
+            ref, dom, dev_hst_ro_flags);
+#endif
+
+    if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
+                                                  dev_hst_ro_flags,
+                                                  host_virt_addr, &frame)))
+    {
+        /*
+         * Only make the maptrack live _after_ writing the pte, in case we 
+         * overwrite the same frame number, causing a maptrack walk to find it
+         */
+        ld->grant_table->maptrack[handle].domid = dom;
+
+        ld->grant_table->maptrack[handle].ref_and_flags
+            = (ref << MAPTRACK_REF_SHIFT) |
+              (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
+
+        (void)__put_user(frame, &uop->dev_bus_addr);
+
+        if ( dev_hst_ro_flags & GNTMAP_host_map )
+            *va = host_virt_addr;
+
+        (void)__put_user(handle, &uop->handle);
+    }
+    else
+    {
+        (void)__put_user(rc, &uop->handle);
+        put_maptrack_handle(ld->grant_table, handle);
+    }
+
+    put_domain(rd);
+    return rc;
+}
+
+static long
+gnttab_map_grant_ref(
+    gnttab_map_grant_ref_t *uop, unsigned int count)
+{
+    int i, flush = 0;
+    unsigned long va = 0;
+
+    for ( i = 0; i < count; i++ )
+        if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
+            flush++;
+
+#ifdef __ia64__
+// FIXME-ia64: probably need to do something here to avoid stale mappings?
+#else
+    if ( flush == 1 )
+        flush_tlb_one_mask(current->domain->cpumask, va);
+    else if ( flush != 0 ) 
+        flush_tlb_mask(current->domain->cpumask);
+#endif
+
+    return 0;
+}
+
+static int
+__gnttab_unmap_grant_ref(
+    gnttab_unmap_grant_ref_t *uop,
+    unsigned long *va)
+{
+    domid_t        dom;
+    grant_ref_t    ref;
+    u16            handle;
+    struct domain *ld, *rd;
+
+    active_grant_entry_t *act;
+    grant_entry_t *sha;
+    grant_mapping_t *map;
+    u16            flags;
+    s16            rc = 1;
+    unsigned long  frame, virt;
+
+    ld = current->domain;
+
+    /* Bitwise-OR avoids short-circuiting which screws control flow. */
+    if ( unlikely(__get_user(virt, &uop->host_addr) |
+                  __get_user(frame, &uop->dev_bus_addr) |
+                  __get_user(handle, &uop->handle)) )
+    {
+        DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
+        return -EFAULT; /* don't set status */
+    }
+
+    map = &ld->grant_table->maptrack[handle];
+
+    if ( unlikely(handle >= ld->grant_table->maptrack_limit) ||
+         unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
+    {
+        DPRINTK("Bad handle (%d).\n", handle);
+        (void)__put_user(GNTST_bad_handle, &uop->status);
+        return GNTST_bad_handle;
+    }
+
+    dom   = map->domid;
+    ref   = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+    flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
+
+    if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+         unlikely(ld == rd) )
+    {
+        if ( rd != NULL )
+            put_domain(rd);
+        DPRINTK("Could not find domain %d\n", dom);
+        (void)__put_user(GNTST_bad_domain, &uop->status);
+        return GNTST_bad_domain;
+    }
+
+#if GRANT_DEBUG_VERBOSE
+    DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
+            ref, dom, handle);
+#endif
+
+    act = &rd->grant_table->active[ref];
+    sha = &rd->grant_table->shared[ref];
+
+    spin_lock(&rd->grant_table->lock);
+
+    if ( frame == 0 )
+    {
+        frame = act->frame;
+    }
+    else
+    {
+        if ( unlikely(frame != act->frame) )
+            PIN_FAIL(unmap_out, GNTST_general_error,
+                     "Bad frame number doesn't match gntref.\n");
+        if ( flags & GNTMAP_device_map )
+            act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+                                                  : GNTPIN_devw_inc;
+
+        map->ref_and_flags &= ~GNTMAP_device_map;
+        (void)__put_user(0, &uop->dev_bus_addr);
+
+        /* Frame is now unmapped for device access. */
+    }
+
+    if ( (virt != 0) &&
+         (flags & GNTMAP_host_map) &&
+         ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
+    {
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+        l1_pgentry_t   *pl1e;
+        unsigned long   _ol1e;
+
+        pl1e = &linear_pg_table[l1_linear_offset(virt)];
+
+        if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
+        {
+            DPRINTK("Could not find PTE entry for address %lx\n", virt);
+            rc = -EINVAL;
+            goto unmap_out;
+        }
+
+        /*
+         * Check that the virtual address supplied is actually mapped to 
+         * act->frame.
+         */
+        if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
+        {
+            DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+                    _ol1e, virt, frame);
+            rc = -EINVAL;
+            goto unmap_out;
+        }
+
+        /* Delete pagetable entry. */
+        if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
+        {
+            DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n",
+                    pl1e, virt);
+            rc = -EINVAL;
+            goto unmap_out;
+        }
+#endif
+
+        map->ref_and_flags &= ~GNTMAP_host_map;
+
+        act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
+                                              : GNTPIN_hstw_inc;
+
+        rc = 0;
+        *va = virt;
+    }
+
+    if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
+    {
+        map->ref_and_flags = 0;
+        put_maptrack_handle(ld->grant_table, handle);
+    }
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?  I think not and then
+//  this can probably be macro-ized into nothingness
+#else
+    /* If just unmapped a writable mapping, mark as dirtied */
+    if ( unlikely(shadow_mode_log_dirty(rd)) &&
+        !( flags & GNTMAP_readonly ) )
+         mark_dirty(rd, frame);
+#endif
+
+    /* If the last writable mapping has been removed, put_page_type */
+    if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
+         ( !( flags & GNTMAP_readonly ) ) )
+    {
+        clear_bit(_GTF_writing, &sha->flags);
+        put_page_type(&frame_table[frame]);
+    }
+
+    if ( act->pin == 0 )
+    {
+        clear_bit(_GTF_reading, &sha->flags);
+        put_page(&frame_table[frame]);
+    }
+
+ unmap_out:
+    (void)__put_user(rc, &uop->status);
+    spin_unlock(&rd->grant_table->lock);
+    put_domain(rd);
+    return rc;
+}
+
+static long
+gnttab_unmap_grant_ref(
+    gnttab_unmap_grant_ref_t *uop, unsigned int count)
+{
+    int i, flush = 0;
+    unsigned long va = 0;
+
+    for ( i = 0; i < count; i++ )
+        if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
+            flush++;
+
+#ifdef __ia64__
+// FIXME-ia64: probably need to do something here to avoid stale mappings?
+#else
+    if ( flush == 1 )
+        flush_tlb_one_mask(current->domain->cpumask, va);
+    else if ( flush != 0 ) 
+        flush_tlb_mask(current->domain->cpumask);
+#endif
+
+    return 0;
+}
+
+static long 
+gnttab_setup_table(
+    gnttab_setup_table_t *uop, unsigned int count)
+{
+    gnttab_setup_table_t  op;
+    struct domain        *d;
+    int                   i;
+    unsigned long addr;
+
+    if ( count != 1 )
+        return -EINVAL;
+
+    if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
+    {
+        DPRINTK("Fault while reading gnttab_setup_table_t.\n");
+        return -EFAULT;
+    }
+
+    if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
+    {
+        DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
+                NR_GRANT_FRAMES);
+        (void)put_user(GNTST_general_error, &uop->status);
+        return 0;
+    }
+
+    if ( op.dom == DOMID_SELF )
+    {
+        op.dom = current->domain->domain_id;
+    }
+    else if ( unlikely(!IS_PRIV(current->domain)) )
+    {
+        (void)put_user(GNTST_permission_denied, &uop->status);
+        return 0;
+    }
+
+    if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
+    {
+        DPRINTK("Bad domid %d.\n", op.dom);
+        (void)put_user(GNTST_bad_domain, &uop->status);
+        return 0;
+    }
+
+    if ( op.nr_frames <= NR_GRANT_FRAMES )
+    {
+        ASSERT(d->grant_table != NULL);
+        (void)put_user(GNTST_okay, &uop->status);
+#ifdef __ia64__
+       if (d == dom0) {
+            for ( i = 0; i < op.nr_frames; i++ )
+                (void)put_user(
+                    (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
+                    &uop->frame_list[i]);
+       } else {
+            /* IA64 hack - need to map it somewhere */
+            addr = (1UL << 40);
+            map_domain_page(d, addr, virt_to_phys(d->grant_table->shared));
+            (void)put_user(addr >> PAGE_SHIFT, &uop->frame_list[0]);
+        }
+#else
+        for ( i = 0; i < op.nr_frames; i++ )
+            (void)put_user(
+                (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
+                &uop->frame_list[i]);
+#endif
+    }
+
+    put_domain(d);
+    return 0;
+}
+
+#if GRANT_DEBUG
+static int
+gnttab_dump_table(gnttab_dump_table_t *uop)
+{
+    grant_table_t        *gt;
+    gnttab_dump_table_t   op;
+    struct domain        *d;
+    u32                   shared_mfn;
+    active_grant_entry_t *act;
+    grant_entry_t         sha_copy;
+    grant_mapping_t      *maptrack;
+    int                   i;
+
+
+    if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
+    {
+        DPRINTK("Fault while reading gnttab_dump_table_t.\n");
+        return -EFAULT;
+    }
+
+    if ( op.dom == DOMID_SELF )
+    {
+        op.dom = current->domain->domain_id;
+    }
+
+    if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
+    {
+        DPRINTK("Bad domid %d.\n", op.dom);
+        (void)put_user(GNTST_bad_domain, &uop->status);
+        return 0;
+    }
+
+    ASSERT(d->grant_table != NULL);
+    gt = d->grant_table;
+    (void)put_user(GNTST_okay, &uop->status);
+
+    shared_mfn = virt_to_phys(d->grant_table->shared);
+
+    DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
+            op.dom, shared_mfn);
+
+    ASSERT(d->grant_table->active != NULL);
+    ASSERT(d->grant_table->shared != NULL);
+    ASSERT(d->grant_table->maptrack != NULL);
+
+    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+    {
+        sha_copy =  gt->shared[i];
+
+        if ( sha_copy.flags )
+        {
+            DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
+                    "dom:(%hu) frame:(%lx)\n",
+                    op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
+        }
+    }
+
+    spin_lock(&gt->lock);
+
+    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+    {
+        act = &gt->active[i];
+
+        if ( act->pin )
+        {
+            DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) "
+                    "dom:(%hu) frame:(%lx)\n",
+                    op.dom, i, act->pin, act->domid, act->frame);
+        }
+    }
+
+    for ( i = 0; i < gt->maptrack_limit; i++ )
+    {
+        maptrack = &gt->maptrack[i];
+
+        if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
+        {
+            DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) "
+                    "dom:(%hu)\n",
+                    op.dom, i,
+                    maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
+                    maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
+                    maptrack->domid);
+        }
+    }
+
+    spin_unlock(&gt->lock);
+
+    put_domain(d);
+    return 0;
+}
+#endif
+
+long 
+do_grant_table_op(
+    unsigned int cmd, void *uop, unsigned int count)
+{
+    long rc;
+
+    if ( count > 512 )
+        return -EINVAL;
+
+    LOCK_BIGLOCK(current->domain);
+
+    rc = -EFAULT;
+    switch ( cmd )
+    {
+    case GNTTABOP_map_grant_ref:
+        if ( unlikely(!array_access_ok(
+            uop, count, sizeof(gnttab_map_grant_ref_t))) )
+            goto out;
+        rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
+        break;
+    case GNTTABOP_unmap_grant_ref:
+        if ( unlikely(!array_access_ok(
+            uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
+            goto out;
+        rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
+        break;
+    case GNTTABOP_setup_table:
+        rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
+        break;
+#if GRANT_DEBUG
+    case GNTTABOP_dump_table:
+        rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
+        break;
+#endif
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+out:
+    UNLOCK_BIGLOCK(current->domain);
+
+    return rc;
+}
+
+int
+gnttab_check_unmap(
+    struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
+{
+    /* Called when put_page is invoked on a page belonging to a foreign domain.
+     * Instead of decrementing the frame table ref count, locate the grant
+     * table entry, if any, and if found, decrement that count.
+     * Called a _lot_ at domain creation because pages mapped by priv domains
+     * also traverse this.
+     */
+
+    /* Note: If the same frame is mapped multiple times, and then one of
+     *       the ptes is overwritten, which maptrack handle gets invalidated?
+     * Advice: Don't do it. Explicitly unmap.
+     */
+
+    unsigned int handle, ref, refcount;
+    grant_table_t        *lgt, *rgt;
+    active_grant_entry_t *act;
+    grant_mapping_t      *map;
+    int found = 0;
+
+    lgt = ld->grant_table;
+
+#if GRANT_DEBUG_VERBOSE
+    if ( ld->domain_id != 0 )
+    {
+        DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
+                rd->domain_id, ld->domain_id, frame, readonly);
+    }
+#endif
+
+    /* Fast exit if we're not mapping anything using grant tables */
+    if ( lgt->map_count == 0 )
+        return 0;
+
+    if ( get_domain(rd) == 0 )
+    {
+        DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
+                rd->domain_id);
+        return 0;
+    }
+
+    rgt = rd->grant_table;
+
+    for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
+    {
+        map = &lgt->maptrack[handle];
+
+        if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
+             ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
+        {
+            ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
+            act = &rgt->active[ref];
+
+            spin_lock(&rgt->lock);
+
+            if ( act->frame != frame )
+            {
+                spin_unlock(&rgt->lock);
+                continue;
+            }
+
+            refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
+                                             : GNTPIN_hstw_mask );
+            if ( refcount == 0 )
+            {
+                spin_unlock(&rgt->lock);
+                continue;
+            }
+
+            /* gotcha */
+            DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
+                    rd->domain_id, ld->domain_id, frame, readonly);
+
+            if ( readonly )
+                act->pin -= GNTPIN_hstr_inc;
+            else
+            {
+                act->pin -= GNTPIN_hstw_inc;
+
+                /* any more granted writable mappings? */
+                if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
+                {
+                    clear_bit(_GTF_writing, &rgt->shared[ref].flags);
+                    put_page_type(&frame_table[frame]);
+                }
+            }
+
+            if ( act->pin == 0 )
+            {
+                clear_bit(_GTF_reading, &rgt->shared[ref].flags);
+                put_page(&frame_table[frame]);
+            }
+            spin_unlock(&rgt->lock);
+
+            clear_bit(GNTMAP_host_map, &map->ref_and_flags);
+
+            if ( !(map->ref_and_flags & GNTMAP_device_map) )
+                put_maptrack_handle(lgt, handle);
+
+            found = 1;
+            break;
+        }
+    }
+    put_domain(rd);
+
+    return found;
+}
+
+int 
+gnttab_prepare_for_transfer(
+    struct domain *rd, struct domain *ld, grant_ref_t ref)
+{
+    grant_table_t *rgt;
+    grant_entry_t *sha;
+    domid_t        sdom;
+    u16            sflags;
+    u32            scombo, prev_scombo;
+    int            retries = 0;
+    unsigned long  target_pfn;
+
+    DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+            rd->domain_id, ld->domain_id, ref);
+
+    if ( unlikely((rgt = rd->grant_table) == NULL) ||
+         unlikely(ref >= NR_GRANT_ENTRIES) )
+    {
+        DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n",
+                rd->domain_id, ref);
+        return 0;
+    }
+
+    spin_lock(&rgt->lock);
+
+    sha = &rgt->shared[ref];
+    
+    sflags = sha->flags;
+    sdom   = sha->domid;
+
+    for ( ; ; )
+    {
+        target_pfn = sha->frame;
+
+        if ( unlikely(target_pfn >= max_page ) )
+        {
+            DPRINTK("Bad pfn (%lx)\n", target_pfn);
+            goto fail;
+        }
+
+        if ( unlikely(sflags != GTF_accept_transfer) ||
+             unlikely(sdom != ld->domain_id) )
+        {
+            DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
+                    sflags, sdom, ld->domain_id);
+            goto fail;
+        }
+
+        /* Merge two 16-bit values into a 32-bit combined update. */
+        /* NB. Endianness! */
+        prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
+
+        /* NB. prev_scombo is updated in place to seen value. */
+        if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, 
+                                   prev_scombo | GTF_transfer_committed)) )
+        {
+            DPRINTK("Fault while modifying shared flags and domid.\n");
+            goto fail;
+        }
+
+        /* Did the combined update work (did we see what we expected?). */
+        if ( likely(prev_scombo == scombo) )
+            break;
+
+        if ( retries++ == 4 )
+        {
+            DPRINTK("Shared grant entry is unstable.\n");
+            goto fail;
+        }
+
+        /* Didn't see what we expected. Split out the seen flags & dom. */
+        /* NB. Endianness! */
+        sflags = (u16)prev_scombo;
+        sdom   = (u16)(prev_scombo >> 16);
+    }
+
+    spin_unlock(&rgt->lock);
+    return 1;
+
+ fail:
+    spin_unlock(&rgt->lock);
+    return 0;
+}
+
+void 
+gnttab_notify_transfer(
+    struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
+{
+    grant_entry_t  *sha;
+    unsigned long   pfn;
+
+    DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+            rd->domain_id, ld->domain_id, ref);
+
+    sha = &rd->grant_table->shared[ref];
+
+    spin_lock(&rd->grant_table->lock);
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+    pfn = sha->frame;
+
+    if ( unlikely(pfn >= max_page ) )
+        DPRINTK("Bad pfn (%lx)\n", pfn);
+    else
+    {
+        machine_to_phys_mapping[frame] = pfn;
+
+        if ( unlikely(shadow_mode_log_dirty(ld)))
+             mark_dirty(ld, frame);
+
+        if (shadow_mode_translate(ld))
+            __phys_to_machine_mapping[pfn] = frame;
+    }
+#endif
+    sha->frame = __mfn_to_gpfn(rd, frame);
+    sha->domid = rd->domain_id;
+    wmb();
+    sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
+
+    spin_unlock(&rd->grant_table->lock);
+
+    return;
+}
+
+int 
+grant_table_create(
+    struct domain *d)
+{
+    grant_table_t *t;
+    int            i;
+
+    if ( (t = xmalloc(grant_table_t)) == NULL )
+        goto no_mem;
+
+    /* Simple stuff. */
+    memset(t, 0, sizeof(*t));
+    spin_lock_init(&t->lock);
+
+    /* Active grant table. */
+    if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES))
+         == NULL )
+        goto no_mem;
+    memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
+
+    /* Tracking of mapped foreign frames table */
+    if ( (t->maptrack = alloc_xenheap_page()) == NULL )
+        goto no_mem;
+    t->maptrack_order = 0;
+    t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t);
+    memset(t->maptrack, 0, PAGE_SIZE);
+    for ( i = 0; i < t->maptrack_limit; i++ )
+        t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
+
+    /* Shared grant table. */
+    t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES);
+    if ( t->shared == NULL )
+        goto no_mem;
+    memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
+
+#ifdef __ia64__
+// I don't think there's anything to do here on ia64?...
+#else
+    for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+    {
+        SHARE_PFN_WITH_DOMAIN(
+            virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
+        machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] =
+            INVALID_M2P_ENTRY;
+    }
+#endif
+
+    /* Okay, install the structure. */
+    wmb(); /* avoid races with lock-free access to d->grant_table */
+    d->grant_table = t;
+    return 0;
+
+ no_mem:
+    if ( t != NULL )
+    {
+        xfree(t->active);
+        if ( t->maptrack != NULL )
+            free_xenheap_page(t->maptrack);
+        xfree(t);
+    }
+    return -ENOMEM;
+}
+
+void
+gnttab_release_dev_mappings(grant_table_t *gt)
+{
+    grant_mapping_t        *map;
+    domid_t                 dom;
+    grant_ref_t             ref;
+    u16                     handle;
+    struct domain          *ld, *rd;
+    unsigned long           frame;
+    active_grant_entry_t   *act;
+    grant_entry_t          *sha;
+
+    ld = current->domain;
+
+    for ( handle = 0; handle < gt->maptrack_limit; handle++ )
+    {
+        map = &gt->maptrack[handle];
+
+        if ( map->ref_and_flags & GNTMAP_device_map )
+        {
+            dom = map->domid;
+            ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+
+            DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
+                    handle, ref,
+                    map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
+
+            if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+                 unlikely(ld == rd) )
+            {
+                if ( rd != NULL )
+                    put_domain(rd);
+
+                printk(KERN_WARNING "Grant release: No dom%d\n", dom);
+                continue;
+            }
+
+            act = &rd->grant_table->active[ref];
+            sha = &rd->grant_table->shared[ref];
+
+            spin_lock(&rd->grant_table->lock);
+
+            if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
+            {
+                frame = act->frame;
+
+                if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
+                     ( (act->pin & GNTPIN_devw_mask) >  0 ) )
+                {
+                    clear_bit(_GTF_writing, &sha->flags);
+                    put_page_type(&frame_table[frame]);
+                }
+
+                act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
+
+                if ( act->pin == 0 )
+                {
+                    clear_bit(_GTF_reading, &sha->flags);
+                    map->ref_and_flags = 0;
+                    put_page(&frame_table[frame]);
+                }
+                else
+                    map->ref_and_flags &= ~GNTMAP_device_map;
+            }
+
+            spin_unlock(&rd->grant_table->lock);
+
+            put_domain(rd);
+        }
+    }
+}
+
+
+void
+grant_table_destroy(
+    struct domain *d)
+{
+    grant_table_t *t;
+
+    if ( (t = d->grant_table) != NULL )
+    {
+        /* Free memory relating to this grant table. */
+        d->grant_table = NULL;
+        free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES);
+        free_xenheap_page(t->maptrack);
+        xfree(t->active);
+        xfree(t);
+    }
+}
+
+void
+grant_table_init(
+    void)
+{
+    /* Nothing. */
+}
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hpsimserial.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/hpsimserial.c   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,23 @@
+/*
+ * HP Ski simulator serial I/O
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ *     Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+#include <xen/sched.h>
+#include <xen/serial.h>
+#include "hpsim_ssc.h"
+
+static void hp_ski_putc(struct serial_port *port, char c)
+{
+       ia64_ssc(c,0,0,0,SSC_PUTCHAR);
+}
+
+static struct uart_driver hp_ski = { .putc = hp_ski_putc };
+
+void hpsim_serial_init(void)
+{
+       serial_register_uart(0, &hp_ski, 0);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hypercall.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/hypercall.c     Thu Sep  1 18:46:28 2005
@@ -0,0 +1,182 @@
+/*
+ * Hypercall implementations
+ * 
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h>   /* FOR struct ia64_sal_retval */
+
+#include <asm/vcpu.h>
+#include <asm/dom_fw.h>
+
+extern unsigned long translate_domain_mpaddr(unsigned long);
+extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64);
+extern struct ia64_sal_retval 
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+
+unsigned long idle_when_pending = 0;
+unsigned long pal_halt_light_count = 0;
+
+int
+ia64_hypercall (struct pt_regs *regs)
+{
+       struct vcpu *v = (struct domain *) current;
+       struct ia64_sal_retval x;
+       struct ia64_pal_retval y;
+       unsigned long *tv, *tc;
+       int pi;
+
+       switch (regs->r2) {
+           case FW_HYPERCALL_PAL_CALL:
+               //printf("*** PAL hypercall: index=%d\n",regs->r28);
+               //FIXME: This should call a C routine
+#if 0
+               // This is very conservative, but avoids a possible
+               // (and deadly) freeze in paravirtualized domains due
+               // to a yet-to-be-found bug where pending_interruption
+               // is zero when it shouldn't be. Since PAL is called
+               // in the idle loop, this should resolve it
+               VCPU(v,pending_interruption) = 1;
+#endif
+               if (regs->r28 == PAL_HALT_LIGHT) {
+#define SPURIOUS_VECTOR 15
+                       pi = vcpu_check_pending_interrupts(v);
+                       if (pi != SPURIOUS_VECTOR) {
+                               if (!VCPU(v,pending_interruption))
+                                       idle_when_pending++;
+                               vcpu_pend_unspecified_interrupt(v);
+//printf("idle w/int#%d pending!\n",pi);
+//this shouldn't happen, but it apparently does quite a bit!  so don't
+//allow it to happen... i.e. if a domain has an interrupt pending and
+//it tries to halt itself because it thinks it is idle, just return here
+//as deliver_pending_interrupt is called on the way out and will deliver it
+                       }
+                       else {
+                               pal_halt_light_count++;
+                               do_sched_op(SCHEDOP_yield);
+                       }
+                       //break;
+               }
+               else if (regs->r28 >= PAL_COPY_PAL) {   /* FIXME */
+                       printf("stacked PAL hypercalls not supported\n");
+                       regs->r8 = -1;
+                       break;
+               }
+               else y = xen_pal_emulator(regs->r28,regs->r29,
+                                               regs->r30,regs->r31);
+               regs->r8 = y.status; regs->r9 = y.v0;
+               regs->r10 = y.v1; regs->r11 = y.v2;
+               break;
+           case FW_HYPERCALL_SAL_CALL:
+               x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33),
+                       vcpu_get_gr(v,34),vcpu_get_gr(v,35),
+                       vcpu_get_gr(v,36),vcpu_get_gr(v,37),
+                       vcpu_get_gr(v,38),vcpu_get_gr(v,39));
+               regs->r8 = x.status; regs->r9 = x.v0;
+               regs->r10 = x.v1; regs->r11 = x.v2;
+               break;
+           case FW_HYPERCALL_EFI_RESET_SYSTEM:
+               printf("efi.reset_system called ");
+               if (current->domain == dom0) {
+                       printf("(by dom0)\n ");
+                       (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+               }
+#ifdef DOMU_AUTO_RESTART
+               else {
+                       reconstruct_domU(current);
+                       return 0;  // don't increment ip!
+               }
+#else  
+               printf("(not supported for non-0 domain)\n");
+               regs->r8 = EFI_UNSUPPORTED;
+#endif
+               break;
+           case FW_HYPERCALL_EFI_GET_TIME:
+               tv = vcpu_get_gr(v,32);
+               tc = vcpu_get_gr(v,33);
+               //printf("efi_get_time(%p,%p) called...",tv,tc);
+               tv = __va(translate_domain_mpaddr(tv));
+               if (tc) tc = __va(translate_domain_mpaddr(tc));
+               regs->r8 = (*efi.get_time)(tv,tc);
+               //printf("and returns %lx\n",regs->r8);
+               break;
+           case FW_HYPERCALL_EFI_SET_TIME:
+           case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+           case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+               // FIXME: need fixes in efi.h from 2.6.9
+           case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+               // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
+               // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS 
+               // POINTER ARGUMENTS WILL BE VIRTUAL!!
+           case FW_HYPERCALL_EFI_GET_VARIABLE:
+               // FIXME: need fixes in efi.h from 2.6.9
+           case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+           case FW_HYPERCALL_EFI_SET_VARIABLE:
+           case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+               // FIXME: need fixes in efi.h from 2.6.9
+               regs->r8 = EFI_UNSUPPORTED;
+               break;
+           case 0xffff: // test dummy hypercall
+               regs->r8 = dump_privop_counts_to_user(
+                       vcpu_get_gr(v,32),
+                       vcpu_get_gr(v,33));
+               break;
+           case 0xfffe: // test dummy hypercall
+               regs->r8 = zero_privop_counts_to_user(
+                       vcpu_get_gr(v,32),
+                       vcpu_get_gr(v,33));
+               break;
+           case 0xfffd: // test dummy hypercall
+               regs->r8 = launch_domainU(
+                       vcpu_get_gr(v,32));
+               break;
+           case 0xfffc: // test dummy hypercall
+               regs->r8 = domU_staging_write_32(
+                       vcpu_get_gr(v,32),
+                       vcpu_get_gr(v,33),
+                       vcpu_get_gr(v,34),
+                       vcpu_get_gr(v,35),
+                       vcpu_get_gr(v,36));
+               break;
+           case 0xfffb: // test dummy hypercall
+               regs->r8 = domU_staging_read_8(vcpu_get_gr(v,32));
+               break;
+
+           case __HYPERVISOR_dom0_op:
+               regs->r8 = do_dom0_op(regs->r14);
+               break;
+
+           case __HYPERVISOR_dom_mem_op:
+#ifdef CONFIG_VTI
+               regs->r8 = do_dom_mem_op(regs->r14, regs->r15, regs->r16, 
regs->r17, regs->r18); 
+#else
+               /* we don't handle reservations; just return success */
+               regs->r8 = regs->r16;
+#endif
+               break;
+
+           case __HYPERVISOR_event_channel_op:
+               regs->r8 = do_event_channel_op(regs->r14);
+               break;
+
+#ifndef CONFIG_VTI
+           case __HYPERVISOR_grant_table_op:
+               regs->r8 = do_grant_table_op(regs->r14, regs->r15, regs->r16);
+               break;
+#endif
+
+           case __HYPERVISOR_console_io:
+               regs->r8 = do_console_io(regs->r14, regs->r15, regs->r16);
+               break;
+
+           default:
+               printf("unknown hypercall %x\n", regs->r2);
+               regs->r8 = (unsigned long)-1;
+       }
+       return 1;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hyperprivop.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/hyperprivop.S   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1592 @@
+/*
+ * arch/ia64/kernel/hyperprivop.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *     Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <public/arch-ia64.h>
+
+#if 1   // change to 0 to turn off all fast paths
+#define FAST_HYPERPRIVOPS
+#define FAST_HYPERPRIVOP_CNT
+#define FAST_REFLECT_CNT
+//#define FAST_TICK
+#define FAST_BREAK
+#define FAST_ACCESS_REFLECT
+#define FAST_RFI
+#define FAST_SSM_I
+#define FAST_PTC_GA
+#undef RFI_TO_INTERRUPT // not working yet
+#endif
+
+#ifdef CONFIG_SMP
+#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
+#undef FAST_PTC_GA
+#endif
+
+// FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
+//#define HANDLE_AR_UNAT
+
+// FIXME: This is defined in include/asm-ia64/hw_irq.h but this
+// doesn't appear to be include'able from assembly?
+#define IA64_TIMER_VECTOR 0xef
+
+// Should be included from common header file (also in process.c)
+//  NO PSR_CLR IS DIFFERENT! (CPL)
+#define IA64_PSR_CPL1  (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
+#define IA64_PSR_CPL0  (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
+// note IA64_PSR_PK removed from following, why is this necessary?
+#define        DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
+                       IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
+                       IA64_PSR_IT | IA64_PSR_BN)
+
+#define        DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
+                       IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI |       \
+                       IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
+                       IA64_PSR_MC | IA64_PSR_IS | \
+                       IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
+                       IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
+
+// Note: not hand-scheduled for now
+//  Registers at entry
+//     r16 == cr.isr
+//     r17 == cr.iim
+//     r18 == XSI_PSR_IC_OFS
+//     r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+//     r31 == pr
+GLOBAL_ENTRY(fast_hyperprivop)
+#ifndef FAST_HYPERPRIVOPS // see beginning of file
+       br.sptk.many dispatch_break_fault ;;
+#endif
+       // HYPERPRIVOP_SSM_I?
+       // assumes domain interrupts pending, so just do it
+       cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
+(p7)   br.sptk.many hyper_ssm_i;;
+
+       // FIXME. This algorithm gives up (goes to the slow path) if there
+       // are ANY interrupts pending, even if they are currently
+       // undeliverable.  This should be improved later...
+       adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r20=[r20] ;;
+       cmp.eq p7,p0=r0,r20
+(p7)   br.cond.sptk.many 1f
+       movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r20=[r20];;
+       adds r21=IA64_VCPU_IRR0_OFFSET,r20;
+       adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
+       ld8 r23=[r21],16; ld8 r24=[r22],16;;
+       ld8 r21=[r21]; ld8 r22=[r22];;
+       or r23=r23,r24; or r21=r21,r22;;
+       or r20=r23,r21;;
+1:     // when we get to here r20=~=interrupts pending
+
+       // HYPERPRIVOP_RFI?
+       cmp.eq p7,p6=XEN_HYPER_RFI,r17
+(p7)   br.sptk.many hyper_rfi;;
+
+       // HYPERPRIVOP_GET_IVR?
+       cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
+(p7)   br.sptk.many hyper_get_ivr;;
+
+       cmp.ne p7,p0=r20,r0
+(p7)   br.spnt.many dispatch_break_fault ;;
+
+       // HYPERPRIVOP_COVER?
+       cmp.eq p7,p6=XEN_HYPER_COVER,r17
+(p7)   br.sptk.many hyper_cover;;
+
+       // HYPERPRIVOP_SSM_DT?
+       cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
+(p7)   br.sptk.many hyper_ssm_dt;;
+
+       // HYPERPRIVOP_RSM_DT?
+       cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
+(p7)   br.sptk.many hyper_rsm_dt;;
+
+       // HYPERPRIVOP_GET_TPR?
+       cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
+(p7)   br.sptk.many hyper_get_tpr;;
+
+       // HYPERPRIVOP_SET_TPR?
+       cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
+(p7)   br.sptk.many hyper_set_tpr;;
+
+       // HYPERPRIVOP_EOI?
+       cmp.eq p7,p6=XEN_HYPER_EOI,r17
+(p7)   br.sptk.many hyper_eoi;;
+
+       // HYPERPRIVOP_SET_ITM?
+       cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
+(p7)   br.sptk.many hyper_set_itm;;
+
+       // HYPERPRIVOP_SET_RR?
+       cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
+(p7)   br.sptk.many hyper_set_rr;;
+
+       // HYPERPRIVOP_GET_RR?
+       cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
+(p7)   br.sptk.many hyper_get_rr;;
+
+       // HYPERPRIVOP_PTC_GA?
+       cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
+(p7)   br.sptk.many hyper_ptc_ga;;
+
+       // HYPERPRIVOP_ITC_D?
+       cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
+(p7)   br.sptk.many hyper_itc_d;;
+
+       // HYPERPRIVOP_ITC_I?
+       cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
+(p7)   br.sptk.many hyper_itc_i;;
+
+       // HYPERPRIVOP_THASH?
+       cmp.eq p7,p6=XEN_HYPER_THASH,r17
+(p7)   br.sptk.many hyper_thash;;
+
+       // if not one of the above, give up for now and do it the slow way
+       br.sptk.many dispatch_break_fault ;;
+
+
+// give up for now if: ipsr.be==1, ipsr.pp==1
+// from reflect_interruption, don't need to:
+//  - printf first extint (debug only)
+//  - check for interrupt collection enabled (routine will force on)
+//  - set ifa (not valid for extint)
+//  - set iha (not valid for extint)
+//  - set itir (not valid for extint)
+// DO need to
+//  - increment the HYPER_SSM_I fast_hyperprivop counter
+//  - set shared_mem iip to instruction after HYPER_SSM_I
+//  - set cr.iip to guest iva+0x3000
+//  - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
+//     be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
+//     i = shared_mem interrupt_delivery_enabled
+//     ic = shared_mem interrupt_collection_enabled
+//     ri = instruction after HYPER_SSM_I
+//     all other bits unchanged from real cr.ipsr
+//  - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
+//  - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
+//     and isr.ri to cr.isr.ri (all other bits zero)
+//  - cover and set shared_mem precover_ifs to cr.ifs
+//             ^^^ MISSED THIS FOR fast_break??
+//  - set shared_mem ifs and incomplete_regframe to 0
+//  - set shared_mem interrupt_delivery_enabled to 0
+//  - set shared_mem interrupt_collection_enabled to 0
+//  - set r31 to SHAREDINFO_ADDR
+//  - virtual bank switch 0
+// maybe implement later
+//  - verify that there really IS a deliverable interrupt pending
+//  - set shared_mem iva
+// needs to be done but not implemented (in reflect_interruption)
+//  - set shared_mem iipa
+// don't know for sure
+//  - set shared_mem unat
+//     r16 == cr.isr
+//     r17 == cr.iim
+//     r18 == XSI_PSR_IC
+//     r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+//     r31 == pr
+ENTRY(hyper_ssm_i)
+#ifndef FAST_SSM_I
+       br.spnt.few dispatch_break_fault ;;
+#endif
+       // give up for now if: ipsr.be==1, ipsr.pp==1
+       mov r30=cr.ipsr;;
+       mov r29=cr.iip;;
+       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.sptk.many dispatch_break_fault ;;
+       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.sptk.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       // set shared_mem iip to instruction after HYPER_SSM_I
+       extr.u r20=r30,41,2 ;;
+       cmp.eq p6,p7=2,r20 ;;
+(p6)   mov r20=0
+(p6)   adds r29=16,r29
+(p7)   adds r20=1,r20 ;;
+       dep r30=r20,r30,41,2;;  // adjust cr.ipsr.ri but don't save yet
+       adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r29 ;;
+       // set shared_mem isr
+       extr.u r16=r16,38,1;;   // grab cr.isr.ir bit
+       dep r16=r16,r0,38,1 ;;  // insert into cr.isr (rest of bits zero)
+       dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
+       adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r16 ;;
+       // set cr.ipsr
+       mov r29=r30 ;;
+       movl r28=DELIVER_PSR_SET;;
+       movl r27=~DELIVER_PSR_CLR;;
+       or r29=r29,r28;;
+       and r29=r29,r27;;
+       mov cr.ipsr=r29;;
+       // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
+       extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
+       cmp.eq p6,p7=3,r29;;
+(p6)   dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
+(p7)   dep r30=0,r30,IA64_PSR_CPL0_BIT,2
+       ;;
+       // FOR SSM_I ONLY, also turn on psr.i and psr.ic
+       movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
+       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+       or r30=r30,r28;;
+       and r30=r30,r27;;
+       adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r30 ;;
+       // set shared_mem interrupt_delivery_enabled to 0
+       // set shared_mem interrupt_collection_enabled to 0
+       st8 [r18]=r0;;
+       // cover and set shared_mem precover_ifs to cr.ifs
+       // set shared_mem ifs and incomplete_regframe to 0
+       cover ;;
+       mov r20=cr.ifs;;
+       adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r21]=r0 ;;
+       adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r0 ;;
+       adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r20 ;;
+       // leave cr.ifs alone for later rfi
+       // set iip to go to domain IVA break instruction vector
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r22=[r22];;
+       adds r22=IA64_VCPU_IVA_OFFSET,r22;;
+       ld8 r23=[r22];;
+       movl r24=0x3000;;
+       add r24=r24,r23;;
+       mov cr.iip=r24;;
+       // OK, now all set to go except for switch to virtual bank0
+       mov r30=r2; mov r29=r3;;
+       adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+       adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+       bsw.1;;
+       // FIXME?: ar.unat is not really handled correctly,
+       // but may not matter if the OS is NaT-clean
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+       movl r31=XSI_IPSR;;
+       bsw.0 ;;
+       mov r2=r30; mov r3=r29;;
+       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r20]=r0 ;;
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+
+// reflect domain clock interrupt
+//     r31 == pr
+//     r30 == cr.ivr
+//     r29 == rp
+GLOBAL_ENTRY(fast_tick_reflect)
+#ifndef FAST_TICK // see beginning of file
+       br.cond.sptk.many rp;;
+#endif
+       mov r28=IA64_TIMER_VECTOR;;
+       cmp.ne p6,p0=r28,r30
+(p6)   br.cond.spnt.few rp;;
+       movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
+       ld8 r26=[r20];;
+       mov r27=ar.itc;;
+       adds r27=200,r27;;      // safety margin
+       cmp.ltu p6,p0=r26,r27
+(p6)   br.cond.spnt.few rp;;
+       mov r17=cr.ipsr;;
+       // slow path if: ipsr.be==1, ipsr.pp==1
+       extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p6,p0=r21,r0
+(p6)   br.cond.spnt.few rp;;
+       extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p6,p0=r21,r0
+(p6)   br.cond.spnt.few rp;;
+       // definitely have a domain tick
+       mov cr.eoi=r0;;
+       mov rp=r29;;
+       mov cr.itm=r26;;        // ensure next tick
+#ifdef FAST_REFLECT_CNT
+       movl r20=fast_reflect_count+((0x3000>>8)*8);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       // vcpu_pend_timer(current)
+       movl r18=XSI_PSR_IC;;
+       adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r20=[r20];;
+       cmp.eq p6,p0=r20,r0     // if cr.itv==0 done
+(p6)   br.cond.spnt.few fast_tick_reflect_done;;
+       tbit.nz p6,p0=r20,16;;  // check itv.m (discard) bit
+(p6)   br.cond.spnt.few fast_tick_reflect_done;;
+       extr.u r27=r20,0,6      // r27 has low 6 bits of itv.vector
+       extr.u r26=r20,6,2;;    // r26 has irr index of itv.vector
+       movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r19=[r19];;
+       adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
+       adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
+       ld8 r24=[r22];;
+       ld8 r23=[r23];;
+       cmp.eq p6,p0=r23,r24    // skip if this tick already delivered
+(p6)   br.cond.spnt.few fast_tick_reflect_done;;
+       // set irr bit
+       adds r21=IA64_VCPU_IRR0_OFFSET,r19;
+       shl r26=r26,3;;
+       add r21=r21,r26;;
+       mov r25=1;;
+       shl r22=r25,r27;;
+       ld8 r23=[r21];;
+       or r22=r22,r23;;
+       st8 [r21]=r22;;
+       // set PSCB(pending_interruption)!
+       adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r20]=r25;;
+       
+       // if interrupted at pl0, we're done
+       extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
+       cmp.eq p6,p0=r16,r0;;
+(p6)   br.cond.spnt.few fast_tick_reflect_done;;
+       // if guest vpsr.i is off, we're done
+       adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r21=[r21];;
+       cmp.eq p6,p0=r21,r0
+(p6)   br.cond.spnt.few fast_tick_reflect_done;;
+
+       // OK, we have a clock tick to deliver to the active domain!
+       // so deliver to iva+0x3000
+       //      r17 == cr.ipsr
+       //      r18 == XSI_PSR_IC
+       //      r19 == IA64_KR(CURRENT)
+       //      r31 == pr
+       mov r16=cr.isr;;
+       mov r29=cr.iip;;
+       adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r29 ;;
+       // set shared_mem isr
+       extr.u r16=r16,38,1;;   // grab cr.isr.ir bit
+       dep r16=r16,r0,38,1 ;;  // insert into cr.isr (rest of bits zero)
+       extr.u r20=r17,41,2 ;;  // get ipsr.ri
+       dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
+       adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r16 ;;
+       // set cr.ipsr (make sure cpl==2!)
+       mov r29=r17 ;;
+       movl r28=DELIVER_PSR_SET;;
+       movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+       or r29=r29,r28;;
+       and r29=r29,r27;;
+       mov cr.ipsr=r29;;
+       // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
+       extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
+       cmp.eq p6,p7=3,r29;;
+(p6)   dep r17=-1,r17,IA64_PSR_CPL0_BIT,2
+(p7)   dep r17=0,r17,IA64_PSR_CPL0_BIT,2
+       ;;
+       movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
+       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
+       dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
+       or r17=r17,r28;;
+       and r17=r17,r27;;
+       ld4 r16=[r18],4;;
+       cmp.ne p6,p0=r16,r0;;
+(p6)   dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
+       ld4 r16=[r18],-4;;
+       cmp.ne p6,p0=r16,r0;;
+(p6)   dep r17=-1,r17,IA64_PSR_I_BIT,1 ;;
+       adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r17 ;;
+       // set shared_mem interrupt_delivery_enabled to 0
+       // set shared_mem interrupt_collection_enabled to 0
+       st8 [r18]=r0;;
+       // cover and set shared_mem precover_ifs to cr.ifs
+       // set shared_mem ifs and incomplete_regframe to 0
+       cover ;;
+       mov r20=cr.ifs;;
+       adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r21]=r0 ;;
+       adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r0 ;;
+       adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r20 ;;
+       // leave cr.ifs alone for later rfi
+       // set iip to go to domain IVA break instruction vector
+       adds r22=IA64_VCPU_IVA_OFFSET,r19;;
+       ld8 r23=[r22];;
+       movl r24=0x3000;;
+       add r24=r24,r23;;
+       mov cr.iip=r24;;
+       // OK, now all set to go except for switch to virtual bank0
+       mov r30=r2; mov r29=r3;;
+#ifdef HANDLE_AR_UNAT
+       mov r28=ar.unat;
+#endif
+       adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+       adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+       bsw.1;;
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+#ifdef HANDLE_AR_UNAT
+       // bank0 regs have no NaT bit, so ensure they are NaT clean
+       mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0;
+       mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0;
+       mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0;
+       mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;;
+#endif
+       bsw.0 ;;
+       mov r2=r30; mov r3=r29;;
+#ifdef HANDLE_AR_UNAT
+       mov ar.unat=r28;
+#endif
+       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r20]=r0 ;;
+fast_tick_reflect_done:
+       mov pr=r31,-1 ;;
+       rfi
+END(fast_tick_reflect)
+
+// reflect domain breaks directly to domain
+//     r16 == cr.isr
+//     r17 == cr.iim
+//     r18 == XSI_PSR_IC
+//     r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+//     r31 == pr
+GLOBAL_ENTRY(fast_break_reflect)
+#ifndef FAST_BREAK // see beginning of file
+       br.sptk.many dispatch_break_fault ;;
+#endif
+       mov r30=cr.ipsr;;
+       mov r29=cr.iip;;
+       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0 ;;
+(p7)   br.spnt.few dispatch_break_fault ;;
+       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0 ;;
+(p7)   br.spnt.few dispatch_break_fault ;;
+#if 1 /* special handling in case running on simulator */
+       movl r20=first_break;;
+       ld4 r23=[r20];;
+       movl r21=0x80001;
+       movl r22=0x80002;;
+       cmp.ne p7,p0=r23,r0;;
+(p7)   br.spnt.few dispatch_break_fault ;;
+       cmp.eq p7,p0=r21,r17;
+(p7)   br.spnt.few dispatch_break_fault ;;
+       cmp.eq p7,p0=r22,r17;
+(p7)   br.spnt.few dispatch_break_fault ;;
+#endif
+       movl r20=0x2c00;
+       // save iim in shared_info
+       adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r17;;
+       // fall through
+
+
+// reflect to domain ivt+r20
+// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
+//     r16 == cr.isr
+//     r18 == XSI_PSR_IC
+//     r20 == offset into ivt
+//     r29 == iip
+//     r30 == ipsr
+//     r31 == pr
+ENTRY(fast_reflect)
+#ifdef FAST_REFLECT_CNT
+       movl r22=fast_reflect_count;
+       shr r23=r20,5;;
+       add r22=r22,r23;;
+       ld8 r21=[r22];;
+       adds r21=1,r21;;
+       st8 [r22]=r21;;
+#endif
+       // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
+       adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r29;;
+       // set shared_mem isr
+       adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r16 ;;
+       // set cr.ipsr
+       mov r29=r30 ;;
+       movl r28=DELIVER_PSR_SET;;
+       movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+       or r29=r29,r28;;
+       and r29=r29,r27;;
+       mov cr.ipsr=r29;;
+       // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
+       extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
+       cmp.eq p6,p7=3,r29;;
+(p6)   dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
+(p7)   dep r30=0,r30,IA64_PSR_CPL0_BIT,2
+       ;;
+       movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
+       movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+       or r30=r30,r28;;
+       and r30=r30,r27;;
+       // also set shared_mem ipsr.i and ipsr.ic appropriately
+       ld8 r24=[r18];;
+       extr.u r22=r24,32,32
+       cmp4.eq p6,p7=r24,r0;;
+(p6)   dep r30=0,r30,IA64_PSR_IC_BIT,1
+(p7)   dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
+       cmp4.eq p6,p7=r22,r0;;
+(p6)   dep r30=0,r30,IA64_PSR_I_BIT,1
+(p7)   dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
+       adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r30 ;;
+       // set shared_mem interrupt_delivery_enabled to 0
+       // set shared_mem interrupt_collection_enabled to 0
+       st8 [r18]=r0;;
+       // cover and set shared_mem precover_ifs to cr.ifs
+       // set shared_mem ifs and incomplete_regframe to 0
+       cover ;;
+       mov r24=cr.ifs;;
+       adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r21]=r0 ;;
+       adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r0 ;;
+       adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r21]=r24 ;;
+       // vpsr.i = vpsr.ic = 0 on delivery of interruption
+       st8 [r18]=r0;;
+       // FIXME: need to save iipa and isr to be arch-compliant
+       // set iip to go to domain IVA break instruction vector
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r22=[r22];;
+       adds r22=IA64_VCPU_IVA_OFFSET,r22;;
+       ld8 r23=[r22];;
+       add r20=r20,r23;;
+       mov cr.iip=r20;;
+       // OK, now all set to go except for switch to virtual bank0
+       mov r30=r2; mov r29=r3;;
+#ifdef HANDLE_AR_UNAT
+       mov r28=ar.unat;
+#endif
+       adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+       adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+       bsw.1;;
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+#ifdef HANDLE_AR_UNAT
+       // bank0 regs have no NaT bit, so ensure they are NaT clean
+       mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0;
+       mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0;
+       mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0;
+       mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;;
+#endif
+       movl r31=XSI_IPSR;;
+       bsw.0 ;;
+       mov r2=r30; mov r3=r29;;
+#ifdef HANDLE_AR_UNAT
+       mov ar.unat=r28;
+#endif
+       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r20]=r0 ;;
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+
+// reflect access faults (0x2400,0x2800,0x5300) directly to domain
+//     r16 == isr
+//     r17 == ifa
+//     r19 == reflect number (only pass-thru to dispatch_reflection)
+//     r20 == offset into ivt
+//     r31 == pr
+GLOBAL_ENTRY(fast_access_reflect)
+#ifndef FAST_ACCESS_REFLECT // see beginning of file
+       br.spnt.few dispatch_reflection ;;
+#endif
+       mov r30=cr.ipsr;;
+       mov r29=cr.iip;;
+       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
+       cmp.eq p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       movl r18=XSI_PSR_IC;;
+       ld8 r21=[r18];;
+       cmp.eq p7,p0=r0,r21
+(p7)   br.spnt.few dispatch_reflection ;;
+       // set shared_mem ifa, FIXME: should we validate it?
+       mov r17=cr.ifa;;
+       adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r17 ;;
+       // get rr[ifa] and save to itir in shared memory (extra bits ignored)
+       shr.u r22=r17,61
+       adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 
+       adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shladd r22=r22,3,r21;;
+       ld8 r22=[r22];;
+       st8 [r23]=r22;;
+       br.cond.sptk.many fast_reflect;;
+
+
+// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
+ENTRY(hyper_rfi)
+#ifndef FAST_RFI
+       br.spnt.few dispatch_break_fault ;;
+#endif
+       // if no interrupts pending, proceed
+       mov r30=r0
+       cmp.eq p7,p0=r20,r0
+(p7)   br.sptk.many 1f
+       ;;
+       adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r21=[r20];;         // r21 = vcr.ipsr
+       extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
+       mov r30=r22     
+       // r30 determines whether we might deliver an immediate extint
+1:
+       adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r21=[r20];;         // r21 = vcr.ipsr
+       extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
+       // if turning on psr.be, give up for now and do it the slow way
+       cmp.ne p7,p0=r22,r0
+(p7)   br.spnt.few dispatch_break_fault ;;
+       // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
+       movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
+       and r22=r20,r21
+       ;;
+       cmp.ne p7,p0=r22,r20
+(p7)   br.spnt.few dispatch_break_fault ;;
+       // if was in metaphys mode, do it the slow way (FIXME later?)
+       adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r20=[r20];;
+       cmp.ne p7,p0=r20,r0
+(p7)   br.spnt.few dispatch_break_fault ;;
+       // if domain hasn't already done virtual bank switch
+       //  do it the slow way (FIXME later?)
+#if 0
+       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r20=[r20];;
+       cmp.eq p7,p0=r20,r0
+(p7)   br.spnt.few dispatch_break_fault ;;
+#endif
+       // validate vcr.iip, if in Xen range, do it the slow way
+       adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r22=[r20];;
+       movl r23=XEN_VIRT_SPACE_LOW
+       movl r24=XEN_VIRT_SPACE_HIGH ;;
+       cmp.ltu p0,p7=r22,r23 ;;        // if !(iip<low) &&
+(p7)   cmp.geu p0,p7=r22,r24 ;;        //    !(iip>=high)
+(p7)   br.spnt.few dispatch_break_fault ;;
+#ifndef RFI_TO_INTERRUPT // see beginning of file
+       cmp.ne p6,p0=r30,r0
+(p6)   br.cond.spnt.few dispatch_break_fault ;;
+#endif
+
+1:     // OK now, let's do an rfi.
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
+       ld8 r23=[r20];;
+       adds r23=1,r23;;
+       st8 [r20]=r23;;
+#endif
+#ifdef RFI_TO_INTERRUPT
+       // maybe do an immediate interrupt delivery?
+       cmp.ne p6,p0=r30,r0
+(p6)   br.cond.spnt.few rfi_check_extint;;
+#endif
+
+just_do_rfi:
+       // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
+       mov cr.iip=r22;;
+       adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r20]=r0 ;;
+       adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r20=[r20];;
+       dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
+       mov cr.ifs=r20 ;;
+       // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
+       dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
+       // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
+       mov r19=r0 ;;
+       extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
+       cmp.ne p7,p6=r23,r0 ;;
+       // not done yet
+(p7)   dep r19=-1,r19,32,1
+       extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
+       cmp.ne p7,p6=r23,r0 ;;
+(p7)   dep r19=-1,r19,0,1 ;;
+       st8 [r18]=r19 ;;
+       // force on psr.ic, i, dt, rt, it, bn
+       movl 
r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
+       ;;
+       or r21=r21,r20
+       ;;
+       mov cr.ipsr=r21
+       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r21=[r20];;
+       cmp.ne p7,p0=r21,r0     // domain already did "bank 1 switch?"
+(p7)   br.cond.spnt.few 1f;
+       // OK, now all set to go except for switch to virtual bank1
+       mov r22=1;; st4 [r20]=r22;
+       mov r30=r2; mov r29=r3;;
+       adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+       adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+       bsw.1;;
+       // FIXME?: ar.unat is not really handled correctly,
+       // but may not matter if the OS is NaT-clean
+       .mem.offset 0,0; ld8.fill r16=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
+       .mem.offset 0,0; ld8.fill r18=[r2],16 ;
+       .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
+       .mem.offset 8,0; ld8.fill r20=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
+       .mem.offset 8,0; ld8.fill r22=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
+       .mem.offset 8,0; ld8.fill r24=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
+       .mem.offset 8,0; ld8.fill r26=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
+       .mem.offset 8,0; ld8.fill r28=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
+       .mem.offset 8,0; ld8.fill r30=[r2],16 ;
+       .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
+       bsw.0 ;;
+       mov r2=r30; mov r3=r29;;
+1:     mov pr=r31,-1
+       ;;
+       rfi
+       ;;
+
+#ifdef RFI_TO_INTERRUPT
+GLOBAL_ENTRY(rfi_check_extint)
+       //br.sptk.many dispatch_break_fault ;;
+
+       // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
+       // make sure none of these get trashed in case going to just_do_rfi
+       movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r30=[r30];;
+       adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
+       mov r25=192
+       adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
+       ld8 r23=[r16];;
+       cmp.eq p6,p0=r23,r0;;
+(p6)   adds r16=-8,r16;;
+(p6)   adds r24=-8,r24;;
+(p6)   adds r25=-64,r25;;
+(p6)   ld8 r23=[r16];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+(p6)   adds r16=-8,r16;;
+(p6)   adds r24=-8,r24;;
+(p6)   adds r25=-64,r25;;
+(p6)   ld8 r23=[r16];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+(p6)   adds r16=-8,r16;;
+(p6)   adds r24=-8,r24;;
+(p6)   adds r25=-64,r25;;
+(p6)   ld8 r23=[r16];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+       cmp.eq p6,p0=r23,r0
+(p6)   br.cond.spnt.few just_do_rfi;   // this is actually an error
+       // r16 points to non-zero element of irr, r23 has value
+       // r24 points to corr element of insvc, r25 has elt*64
+       ld8 r26=[r24];;
+       cmp.geu p6,p0=r26,r23
+(p6)   br.cond.spnt.many just_do_rfi;
+
+       // not masked by insvc, get vector number
+       shr.u r26=r23,1;;
+       or r26=r23,r26;;
+       shr.u r27=r26,2;;
+       or r26=r26,r27;;
+       shr.u r27=r26,4;;
+       or r26=r26,r27;;
+       shr.u r27=r26,8;;
+       or r26=r26,r27;;
+       shr.u r27=r26,16;;
+       or r26=r26,r27;;
+       shr.u r27=r26,32;;
+       or r26=r26,r27;;
+       andcm r26=0xffffffffffffffff,r26;;
+       popcnt r26=r26;;
+       sub r26=63,r26;;
+       // r26 now contains the bit index (mod 64)
+       mov r27=1;;
+       shl r27=r27,r26;;
+       // r27 now contains the (within the proper word) bit mask 
+       add r26=r25,r26
+       // r26 now contains the vector [0..255]
+       adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r20=[r20] ;;
+       extr.u r28=r20,16,1
+       extr.u r29=r20,4,4 ;;
+       cmp.ne p6,p0=r28,r0     // if tpr.mmi is set, just rfi
+(p6)   br.cond.spnt.few just_do_rfi;;
+       shl r29=r29,4;;
+       adds r29=15,r29;;
+       cmp.ge p6,p0=r29,r26    // if tpr masks interrupt, just rfi
+(p6)   br.cond.spnt.few just_do_rfi;;
+
+// this doesn't work yet (dies early after getting to user mode)
+// but happens relatively infrequently, so fix it later.
+// NOTE that these will be counted incorrectly for now (for privcnt output)
+GLOBAL_ENTRY(rfi_with_interrupt)
+#if 1
+       br.sptk.many dispatch_break_fault ;;
+#endif
+
+       // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
+       //      r18 == XSI_PSR_IC
+       //      r21 == vipsr (ipsr in shared_mem)
+       //      r30 == IA64_KR(CURRENT)
+       //      r31 == pr
+       mov r17=cr.ipsr;;
+       mov r16=cr.isr;;
+       // set shared_mem isr
+       extr.u r16=r16,38,1;;   // grab cr.isr.ir bit
+       dep r16=r16,r0,38,1 ;;  // insert into cr.isr (rest of bits zero)
+       extr.u r20=r21,41,2 ;;  // get v(!)psr.ri
+       dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
+       adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r22]=r16 ;;
+       // set cr.ipsr (make sure cpl==2!)
+       mov r29=r17 ;;
+       movl r28=DELIVER_PSR_SET;;
+       movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+       or r29=r29,r28;;
+       and r29=r29,r27;;
+       mov cr.ipsr=r29;;
+       // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
+       // set shared_mem interrupt_delivery_enabled to 0
+       // set shared_mem interrupt_collection_enabled to 0
+       st8 [r18]=r0;;
+       // cover and set shared_mem precover_ifs to cr.ifs
+       // set shared_mem ifs and incomplete_regframe to 0
+#if 0
+       cover ;;
+       mov r20=cr.ifs;;
+       adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r22]=r0 ;;
+       adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r22]=r0 ;;
+       adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r22]=r20 ;;
+       // leave cr.ifs alone for later rfi
+#else
+       adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r22]=r0 ;;
+       adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r20=[r22];;
+       st8 [r22]=r0 ;;
+       adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st8 [r22]=r20 ;;
+#endif
+       // set iip to go to domain IVA break instruction vector
+       adds r22=IA64_VCPU_IVA_OFFSET,r30;;
+       ld8 r23=[r22];;
+       movl r24=0x3000;;
+       add r24=r24,r23;;
+       mov cr.iip=r24;;
+#if 0
+       // OK, now all set to go except for switch to virtual bank0
+       mov r30=r2; mov r29=r3;;
+       adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+       adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+       bsw.1;;
+       // FIXME: need to handle ar.unat!
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+       movl r31=XSI_IPSR;;
+       bsw.0 ;;
+       mov r2=r30; mov r3=r29;;
+#else
+       bsw.1;;
+       movl r31=XSI_IPSR;;
+       bsw.0 ;;
+#endif
+       adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+       st4 [r20]=r0 ;;
+       mov pr=r31,-1 ;;
+       rfi
+#endif // RFI_TO_INTERRUPT
+
+ENTRY(hyper_cover)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
+       cover ;;
+       adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+       mov r30=cr.ifs;;
+       adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
+       ld4 r21=[r20] ;;
+       cmp.eq p6,p7=r21,r0 ;;
+(p6)   st8 [r22]=r30;;
+(p7)   st4 [r20]=r0;;
+       mov cr.ifs=r0;;
+       // adjust return address to skip over break instruction
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+
+// return from metaphysical mode (meta=1) to virtual mode (meta=0)
+ENTRY(hyper_ssm_dt)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r21=[r20];;
+       cmp.eq p7,p0=r21,r0     // meta==0?
+(p7)   br.spnt.many    1f ;;   // already in virtual mode
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r22=[r22];;
+       adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
+       ld4 r23=[r22];;
+       mov rr[r0]=r23;;
+       srlz.i;;
+       st4 [r20]=r0 ;;
+       // adjust return address to skip over break instruction
+1:     extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+
+// go to metaphysical mode (meta=1) from virtual mode (meta=0)
+ENTRY(hyper_rsm_dt)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld4 r21=[r20];;
+       cmp.ne p7,p0=r21,r0     // meta==0?
+(p7)   br.spnt.many    1f ;;   // already in metaphysical mode
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r22=[r22];;
+       adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
+       ld4 r23=[r22];;
+       mov rr[r0]=r23;;
+       srlz.i;;
+       adds r21=1,r0 ;;
+       st4 [r20]=r21 ;;
+       // adjust return address to skip over break instruction
+1:     extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+
+ENTRY(hyper_get_tpr)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r8=[r20];;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_get_tpr)
+
+// if we get to here, there are no interrupts pending so we
+// can change virtual tpr to any value without fear of provoking
+// (or accidentally missing) delivering an interrupt
+ENTRY(hyper_set_tpr)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       movl r27=0xff00;;
+       adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       andcm r8=r8,r27;;
+       st8 [r20]=r8;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_set_tpr)
+
+ENTRY(hyper_get_ivr)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
+       ld8 r21=[r22];;
+       adds r21=1,r21;;
+       st8 [r22]=r21;;
+#endif
+       mov r8=15;;
+       // when we get to here r20=~=interrupts pending
+       cmp.eq p7,p0=r20,r0;;
+(p7)   adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+(p7)   st4 [r20]=r0;;
+(p7)   br.spnt.many 1f ;;
+       movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r30=[r30];;
+       adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
+       mov r25=192
+       adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
+       ld8 r23=[r22];;
+       cmp.eq p6,p0=r23,r0;;
+(p6)   adds r22=-8,r22;;
+(p6)   adds r24=-8,r24;;
+(p6)   adds r25=-64,r25;;
+(p6)   ld8 r23=[r22];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+(p6)   adds r22=-8,r22;;
+(p6)   adds r24=-8,r24;;
+(p6)   adds r25=-64,r25;;
+(p6)   ld8 r23=[r22];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+(p6)   adds r22=-8,r22;;
+(p6)   adds r24=-8,r24;;
+(p6)   adds r25=-64,r25;;
+(p6)   ld8 r23=[r22];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+       cmp.eq p6,p0=r23,r0
+(p6)   br.cond.spnt.few 1f;    // this is actually an error
+       // r22 points to non-zero element of irr, r23 has value
+       // r24 points to corr element of insvc, r25 has elt*64
+       ld8 r26=[r24];;
+       cmp.geu p6,p0=r26,r23
+(p6)   br.cond.spnt.many 1f;
+       // not masked by insvc, get vector number
+       shr.u r26=r23,1;;
+       or r26=r23,r26;;
+       shr.u r27=r26,2;;
+       or r26=r26,r27;;
+       shr.u r27=r26,4;;
+       or r26=r26,r27;;
+       shr.u r27=r26,8;;
+       or r26=r26,r27;;
+       shr.u r27=r26,16;;
+       or r26=r26,r27;;
+       shr.u r27=r26,32;;
+       or r26=r26,r27;;
+       andcm r26=0xffffffffffffffff,r26;;
+       popcnt r26=r26;;
+       sub r26=63,r26;;
+       // r26 now contains the bit index (mod 64)
+       mov r27=1;;
+       shl r27=r27,r26;;
+       // r27 now contains the (within the proper word) bit mask 
+       add r26=r25,r26
+       // r26 now contains the vector [0..255]
+       adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r20=[r20] ;;
+       extr.u r28=r20,16,1
+       extr.u r29=r20,4,4 ;;
+       cmp.ne p6,p0=r28,r0     // if tpr.mmi is set, return SPURIOUS
+(p6)   br.cond.spnt.few 1f;
+       shl r29=r29,4;;
+       adds r29=15,r29;;
+       cmp.ge p6,p0=r29,r26
+(p6)   br.cond.spnt.few 1f;
+       // OK, have an unmasked vector to process/return
+       ld8 r25=[r24];;
+       or r25=r25,r27;;
+       st8 [r24]=r25;;
+       ld8 r25=[r22];;
+       andcm r25=r25,r27;;
+       st8 [r22]=r25;;
+       mov r8=r26;;
+       // if its a clock tick, remember itm to avoid delivering it twice
+       adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
+       ld8 r20=[r20];;
+       extr.u r20=r20,0,8;;
+       cmp.eq p6,p0=r20,r8
+       adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30
+       adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;;
+       ld8 r23=[r23];;
+(p6)   st8 [r22]=r23;;
+       // all done
+1:     mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_get_ivr)
+
+ENTRY(hyper_eoi)
+       // when we get to here r20=~=interrupts pending
+       cmp.ne p7,p0=r20,r0
+(p7)   br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r22=[r22];;
+       adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
+       ld8 r23=[r22];;
+       cmp.eq p6,p0=r23,r0;;
+(p6)   adds r22=-8,r22;;
+(p6)   ld8 r23=[r22];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+(p6)   adds r22=-8,r22;;
+(p6)   ld8 r23=[r22];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+(p6)   adds r22=-8,r22;;
+(p6)   ld8 r23=[r22];;
+(p6)   cmp.eq p6,p0=r23,r0;;
+       cmp.eq p6,p0=r23,r0
+(p6)   br.cond.spnt.few 1f;    // this is actually an error
+       // r22 points to non-zero element of insvc, r23 has value
+       shr.u r24=r23,1;;
+       or r24=r23,r24;;
+       shr.u r25=r24,2;;
+       or r24=r24,r25;;
+       shr.u r25=r24,4;;
+       or r24=r24,r25;;
+       shr.u r25=r24,8;;
+       or r24=r24,r25;;
+       shr.u r25=r24,16;;
+       or r24=r24,r25;;
+       shr.u r25=r24,32;;
+       or r24=r24,r25;;
+       andcm r24=0xffffffffffffffff,r24;;
+       popcnt r24=r24;;
+       sub r24=63,r24;;
+       // r24 now contains the bit index
+       mov r25=1;;
+       shl r25=r25,r24;;
+       andcm r23=r23,r25;;
+       st8 [r22]=r23;;
+1:     mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_eoi)
+
+ENTRY(hyper_set_itm)
+       // when we get to here r20=~=interrupts pending
+       cmp.ne p7,p0=r20,r0
+(p7)   br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
+       ld8 r21=[r20];;
+       movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r20=[r20];;
+       adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
+       st8 [r20]=r8;;
+       cmp.geu p6,p0=r21,r8;;
+(p6)   mov r21=r8;;
+       // now "safe set" cr.itm=r21
+       mov r23=100;;
+2:     mov cr.itm=r21;;
+       srlz.d;;
+       mov r22=ar.itc ;;
+       cmp.leu p6,p0=r21,r22;;
+       add r21=r21,r23;;
+       shl r23=r23,1;;
+(p6)   br.cond.spnt.few 2b;;
+1:     mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_set_itm)
+
+ENTRY(hyper_get_rr)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       extr.u r25=r8,61,3;;
+       adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shl r25=r25,3;;
+       add r20=r20,r25;;
+       ld8 r8=[r20];;
+1:     mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_get_rr)
+
+ENTRY(hyper_set_rr)
+       extr.u r25=r8,61,3;;
+       cmp.leu p7,p0=7,r25     // punt on setting rr7
+(p7)   br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       extr.u r26=r9,8,24      // r26 = r9.rid
+       movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r20=[r20];;
+       adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
+       ld4 r22=[r21];;
+       adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
+       ld4 r23=[r21];;
+       adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
+       add r22=r26,r22;;
+       cmp.geu p6,p0=r22,r23   // if r9.rid + starting_rid >= ending_rid
+(p6)   br.cond.spnt.few 1f;    // this is an error, but just ignore/return
+       // r21=starting_rid
+       adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shl r25=r25,3;;
+       add r20=r20,r25;;
+       st8 [r20]=r9;;          // store away exactly what was passed
+       // but adjust value actually placed in rr[r8]
+       // r22 contains adjusted rid, "mangle" it (see regionreg.c)
+       // and set ps to PAGE_SHIFT and ve to 1
+       extr.u r27=r22,0,8
+       extr.u r28=r22,8,8
+       extr.u r29=r22,16,8;;
+       dep.z r23=PAGE_SHIFT,2,6;;
+       dep r23=-1,r23,0,1;;    // mangling is swapping bytes 1 & 3
+       dep r23=r27,r23,24,8;;
+       dep r23=r28,r23,16,8;;
+       dep r23=r29,r23,8,8
+       cmp.eq p6,p0=r25,r0;;   // if rr0, save for metaphysical
+(p6)   st4 [r24]=r23
+       mov rr[r8]=r23;;
+       // done, mosey on back
+1:     mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_set_rr)
+
+// this routine was derived from optimized assembly output from
+// vcpu_thash so it is dense and difficult to read but it works
+// On entry:
+//     r18 == XSI_PSR_IC
+//     r31 == pr
+GLOBAL_ENTRY(hyper_thash)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       shr.u r20 = r8, 61
+       addl r25 = 1, r0
+       movl r17 = 0xe000000000000000
+       ;;
+       and r21 = r17, r8               // VHPT_Addr1
+       ;;
+       shladd r28 = r20, 3, r18
+       adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
+       ;;
+       adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
+       addl r28 = 32767, r0
+       ld8 r24 = [r19]                 // pta
+       ;;
+       ld8 r23 = [r27]                 // rrs[vadr>>61]
+       extr.u r26 = r24, 2, 6
+       ;;
+       extr.u r22 = r23, 2, 6
+       shl r30 = r25, r26
+       ;;
+       shr.u r19 = r8, r22
+       shr.u r29 = r24, 15
+       ;;
+       adds r17 = -1, r30
+       ;;
+       shladd r27 = r19, 3, r0
+       extr.u r26 = r17, 15, 46
+       ;;
+       andcm r24 = r29, r26
+       and r19 = r28, r27
+       shr.u r25 = r27, 15
+       ;;
+       and r23 = r26, r25
+       ;;
+       or r22 = r24, r23
+       ;;
+       dep.z r20 = r22, 15, 46
+       ;;
+       or r16 = r20, r21
+       ;;
+       or r8 = r19, r16
+       // done, update iip/ipsr to next instruction
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_thash)
+
+ENTRY(hyper_ptc_ga)
+#ifndef FAST_PTC_GA
+       br.spnt.few dispatch_break_fault ;;
+#endif
+       // FIXME: validate not flushing Xen addresses
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r28=r8
+       extr.u r19=r9,2,6               // addr_range=1<<((r9&0xfc)>>2)
+       mov r20=1
+       shr.u r24=r8,61
+       addl r27=56,r0                  // PAGE_SHIFT<<2 (for ptc.ga)
+       movl r26=0x8000000000000000     // INVALID_TI_TAG
+       mov r30=ar.lc
+       ;;
+       shl r19=r20,r19
+       cmp.eq p7,p0=7,r24
+(p7)   br.spnt.many dispatch_break_fault ;;    // slow way for rr7
+       ;;
+       cmp.le p7,p0=r19,r0             // skip flush if size<=0
+(p7)   br.cond.dpnt 2f ;;
+       extr.u r24=r19,0,PAGE_SHIFT
+       shr.u r23=r19,PAGE_SHIFT ;;     // repeat loop for n pages
+       cmp.ne p7,p0=r24,r0 ;;
+(p7)   adds r23=1,r23 ;;               // n_pages<size<n_pages+1? extra iter
+       mov ar.lc=r23
+       movl r29=PAGE_SIZE;;
+1:
+       thash r25=r28 ;;
+       adds r25=16,r25 ;;
+       ld8 r24=[r25] ;;
+       // FIXME: should check if tag matches, not just blow it away
+       or r24=r26,r24 ;;               // vhpt_entry->ti_tag = 1
+       st8 [r25]=r24
+       ptc.ga r28,r27 ;;
+       srlz.i ;;
+       add r28=r29,r28
+       br.cloop.sptk.few 1b
+       ;;
+2:
+       mov ar.lc=r30 ;;
+       mov r29=cr.ipsr
+       mov r30=cr.iip;;
+       movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r27=[r27];;
+       adds r25=IA64_VCPU_DTLB_OFFSET,r27
+       adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+       ld8 r24=[r25]
+       ld8 r27=[r26] ;;
+       and r24=-2,r24
+       and r27=-2,r27 ;;
+       st8 [r25]=r24                   // set 1-entry i/dtlb as not present
+       st8 [r26]=r27 ;;
+       // increment to point to next instruction
+       extr.u r26=r29,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r30=16,r30
+(p7)   adds r26=1,r26
+       ;;
+       dep r29=r26,r29,41,2
+       ;;
+       mov cr.ipsr=r29
+       mov cr.iip=r30
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_ptc_ga)
+
+ENTRY(hyper_itc_d)
+       br.spnt.many dispatch_break_fault ;;
+END(hyper_itc_d)
+
+ENTRY(hyper_itc_i)
+       br.spnt.many dispatch_break_fault ;;
+END(hyper_itc_i)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/idle0_task.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/idle0_task.c    Thu Sep  1 18:46:28 2005
@@ -0,0 +1,58 @@
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/desc.h>
+
+#define INIT_MM(name) \
+{                                                              \
+       .pgd            = swapper_pg_dir,                       \
+       .mm_users       = ATOMIC_INIT(2),                       \
+       .mm_count       = ATOMIC_INIT(1),                       \
+       .page_table_lock =  SPIN_LOCK_UNLOCKED,                 \
+       .mmlist         = LIST_HEAD_INIT(name.mmlist),          \
+}
+
+#define IDLE0_EXEC_DOMAIN(_ed,_d)    \
+{                                    \
+    processor:   0,                  \
+    mm:          0,                  \
+    thread:      INIT_THREAD,        \
+    domain:      (_d)                \
+}
+
+#define IDLE0_DOMAIN(_t)             \
+{                                    \
+    domain_id:   IDLE_DOMAIN_ID,     \
+    domain_flags:DOMF_idle_domain,   \
+    refcnt:      ATOMIC_INIT(1)      \
+}
+
+struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_SYMBOL(init_mm);
+
+struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain);
+#if 0
+struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
+                                                         &idle0_domain);
+#endif
+
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process 
stacks are
+ * handled. This is done by having a special ".data.init_task" section...
+ */
+union {
+       struct {
+               struct domain task;
+       } s;
+       unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __attribute__((section(".data.init_task")));
+// = {{
+       ;
+//.task =              IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain),
+//};
+//};
+
+EXPORT_SYMBOL(init_task);
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/irq.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/irq.c   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1503 @@
+/*
+ *     linux/arch/ia64/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@xxxxxxxxx>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ *                     migration without lossing interrupts for iosapic
+ *                     architecture.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#ifndef XEN
+#include <linux/signal.h>
+#endif
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#ifndef XEN
+#include <linux/random.h>
+#include <linux/cpu.h>
+#endif
+#include <linux/ctype.h>
+#ifndef XEN
+#include <linux/smp_lock.h>
+#endif
+#include <linux/init.h>
+#ifndef XEN
+#include <linux/kernel_stat.h>
+#endif
+#include <linux/irq.h>
+#ifndef XEN
+#include <linux/proc_fs.h>
+#endif
+#include <linux/seq_file.h>
+#ifndef XEN
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#endif
+
+#include <asm/atomic.h>
+#ifndef XEN
+#include <asm/cpu.h>
+#endif
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#ifndef XEN
+#include <asm/tlbflush.h>
+#endif
+#include <asm/delay.h>
+#include <asm/irq.h>
+
+#ifdef XEN
+#include <xen/event.h>
+#define _irq_desc irq_desc
+#define irq_descp(irq) &irq_desc[irq]
+#define apicid_to_phys_cpu_present(x)  1
+#endif
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the appropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t _irq_desc[NR_IRQS] __cacheline_aligned = {
+       [0 ... NR_IRQS-1] = {
+               .status = IRQ_DISABLED,
+               .handler = &no_irq_type,
+               .lock = SPIN_LOCK_UNLOCKED
+       }
+};
+
+/*
+ * This is updated when the user sets irq affinity via /proc
+ */
+cpumask_t    __cacheline_aligned pending_irq_cpumask[NR_IRQS];
+
+#ifdef CONFIG_IA64_GENERIC
+irq_desc_t * __ia64_irq_desc (unsigned int irq)
+{
+       return _irq_desc + irq;
+}
+
+ia64_vector __ia64_irq_to_vector (unsigned int irq)
+{
+       return (ia64_vector) irq;
+}
+
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+       return (unsigned int) vec;
+}
+#endif
+
+static void register_irq_proc (unsigned int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+#ifdef XEN
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+#else
+irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
+{ return IRQ_NONE; }
+#endif
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesn't deserve
+ * a generic callback i think.
+ */
+#ifdef CONFIG_X86
+       printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        */
+       ack_APIC_irq();
+#endif
+#endif
+#ifdef CONFIG_IA64
+       printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, 
smp_processor_id());
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none  disable_none
+#define end_none       enable_none
+
+struct hw_interrupt_type no_irq_type = {
+       "none",
+       startup_none,
+       shutdown_none,
+       enable_none,
+       disable_none,
+       ack_none,
+       end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+#ifndef XEN
+int show_interrupts(struct seq_file *p, void *v)
+{
+       int j, i = *(loff_t *) v;
+       struct irqaction * action;
+       irq_desc_t *idesc;
+       unsigned long flags;
+
+       if (i == 0) {
+               seq_puts(p, "           ");
+               for (j=0; j<NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "CPU%d       ",j);
+               seq_putc(p, '\n');
+       }
+
+       if (i < NR_IRQS) {
+               idesc = irq_descp(i);
+               spin_lock_irqsave(&idesc->lock, flags);
+               action = idesc->action;
+               if (!action)
+                       goto skip;
+               seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+               seq_printf(p, " %14s", idesc->handler->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+
+               seq_putc(p, '\n');
+skip:
+               spin_unlock_irqrestore(&idesc->lock, flags);
+       } else if (i == NR_IRQS) {
+               seq_puts(p, "NMI: ");
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", nmi_count(j));
+               seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+               seq_puts(p, "LOC: ");
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", 
irq_stat[j].apic_timer_irqs);
+               seq_putc(p, '\n');
+#endif
+               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+       }
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+inline void synchronize_irq(unsigned int irq)
+{
+#ifndef XEN
+       struct irq_desc *desc = irq_desc + irq;
+
+       while (desc->status & IRQ_INPROGRESS)
+               cpu_relax();
+#endif
+}
+EXPORT_SYMBOL(synchronize_irq);
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq,
+               struct pt_regs *regs, struct irqaction *action)
+{
+       int status = 1; /* Force the "do bottom halves" bit */
+       int retval = 0;
+
+#ifndef XEN
+       if (!(action->flags & SA_INTERRUPT))
+#endif
+               local_irq_enable();
+
+#ifdef XEN
+               action->handler(irq, action->dev_id, regs);
+#else
+       do {
+               status |= action->flags;
+               retval |= action->handler(irq, action->dev_id, regs);
+               action = action->next;
+       } while (action);
+       if (status & SA_SAMPLE_RANDOM)
+               add_interrupt_randomness(irq);
+#endif
+       local_irq_disable();
+       return retval;
+}
+
+#ifndef XEN
+static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+       struct irqaction *action;
+
+       if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+               printk(KERN_ERR "irq event %d: bogus return value %x\n",
+                               irq, action_ret);
+       } else {
+               printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+       }
+       dump_stack();
+       printk(KERN_ERR "handlers:\n");
+       action = desc->action;
+       do {
+               printk(KERN_ERR "[<%p>]", action->handler);
+               print_symbol(" (%s)",
+                       (unsigned long)action->handler);
+               printk("\n");
+               action = action->next;
+       } while (action);
+}
+
+static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+       static int count = 100;
+
+       if (count) {
+               count--;
+               __report_bad_irq(irq, desc, action_ret);
+       }
+}
+#endif
+
+static int noirqdebug;
+
+static int __init noirqdebug_setup(char *str)
+{
+       noirqdebug = 1;
+       printk("IRQ lockup detection disabled\n");
+       return 1;
+}
+
+__setup("noirqdebug", noirqdebug_setup);
+
+/*
+ * If 99,900 of the previous 100,000 interrupts have not been handled then
+ * assume that the IRQ is stuck in some manner.  Drop a diagnostic and try to
+ * turn the IRQ off.
+ *
+ * (The other 100-of-100,000 interrupts may have been a correctly-functioning
+ *  device sharing an IRQ with the failing one)
+ *
+ * Called under desc->lock
+ */
+#ifndef XEN
+static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+       if (action_ret != IRQ_HANDLED) {
+               desc->irqs_unhandled++;
+               if (action_ret != IRQ_NONE)
+                       report_bad_irq(irq, desc, action_ret);
+       }
+
+       desc->irq_count++;
+       if (desc->irq_count < 100000)
+               return;
+
+       desc->irq_count = 0;
+       if (desc->irqs_unhandled > 99900) {
+               /*
+                * The interrupt is stuck
+                */
+               __report_bad_irq(irq, desc, action_ret);
+               /*
+                * Now kill the IRQ
+                */
+               printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+               desc->status |= IRQ_DISABLED;
+               desc->handler->disable(irq);
+       }
+       desc->irqs_unhandled = 0;
+}
+#endif
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+
+/**
+ *     disable_irq_nosync - disable an irq without waiting
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Disables and Enables are
+ *     nested.
+ *     Unlike disable_irq(), this function does not ensure existing
+ *     instances of the IRQ handler have completed before returning.
+ *
+ *     This function may be called from IRQ context.
+ */
+
+inline void disable_irq_nosync(unsigned int irq)
+{
+       irq_desc_t *desc = irq_descp(irq);
+       unsigned long flags;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       if (!desc->depth++) {
+               desc->status |= IRQ_DISABLED;
+               desc->handler->disable(irq);
+       }
+       spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL(disable_irq_nosync);
+
+/**
+ *     disable_irq - disable an irq and wait for completion
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Enables and Disables are
+ *     nested.
+ *     This function waits for any pending IRQ handlers for this interrupt
+ *     to complete before returning. If you use this function while
+ *     holding a resource the IRQ handler may need you will deadlock.
+ *
+ *     This function may be called - with care - from IRQ context.
+ */
+
+void disable_irq(unsigned int irq)
+{
+       irq_desc_t *desc = irq_descp(irq);
+
+       disable_irq_nosync(irq);
+       if (desc->action)
+               synchronize_irq(irq);
+}
+EXPORT_SYMBOL(disable_irq);
+
+/**
+ *     enable_irq - enable handling of an irq
+ *     @irq: Interrupt to enable
+ *
+ *     Undoes the effect of one call to disable_irq().  If this
+ *     matches the last disable, processing of interrupts on this
+ *     IRQ line is re-enabled.
+ *
+ *     This function may be called from IRQ context.
+ */
+
+void enable_irq(unsigned int irq)
+{
+       irq_desc_t *desc = irq_descp(irq);
+       unsigned long flags;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       switch (desc->depth) {
+       case 1: {
+               unsigned int status = desc->status & ~IRQ_DISABLED;
+               desc->status = status;
+#ifndef XEN
+               if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+                       desc->status = status | IRQ_REPLAY;
+                       hw_resend_irq(desc->handler,irq);
+               }
+#endif
+               desc->handler->enable(irq);
+               /* fall-through */
+       }
+       default:
+               desc->depth--;
+               break;
+       case 0:
+               printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n",
+                      irq, (void *) __builtin_return_address(0));
+       }
+       spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL(enable_irq);
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+       irq_desc_t *desc = irq_desc + irq;
+       struct irqaction * action;
+       unsigned int status;
+
+#ifndef XEN
+       kstat_this_cpu.irqs[irq]++;
+#endif
+       if (desc->status & IRQ_PER_CPU) {
+               irqreturn_t action_ret;
+
+               /*
+                * No locking required for CPU-local interrupts:
+                */
+               desc->handler->ack(irq);
+               action_ret = handle_IRQ_event(irq, regs, desc->action);
+#ifndef XEN
+               if (!noirqdebug)
+                       note_interrupt(irq, desc, action_ret);
+#endif
+               desc->handler->end(irq);
+               return 1;
+       }
+
+       spin_lock(&desc->lock);
+       desc->handler->ack(irq);
+       /*
+        * REPLAY is when Linux resends an IRQ that was dropped earlier
+        * WAITING is used by probe to mark irqs that are being tested
+        */
+#ifdef XEN
+       status = desc->status & ~IRQ_REPLAY;
+#else
+       status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+#endif
+       status |= IRQ_PENDING; /* we _want_ to handle it */
+
+       /*
+        * If the IRQ is disabled for whatever reason, we cannot
+        * use the action we have.
+        */
+       action = NULL;
+       if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
+               action = desc->action;
+               status &= ~IRQ_PENDING; /* we commit to handling */
+               status |= IRQ_INPROGRESS; /* we are handling it */
+       }
+       desc->status = status;
+
+       /*
+        * If there is no IRQ handler or it was disabled, exit early.
+        * Since we set PENDING, if another processor is handling
+        * a different instance of this same irq, the other processor
+        * will take care of it.
+        */
+       if (unlikely(!action))
+               goto out;
+
+       /*
+        * Edge triggered interrupts need to remember
+        * pending events.
+        * This applies to any hw interrupts that allow a second
+        * instance of the same irq to arrive while we are in do_IRQ
+        * or in the handler. But the code here only handles the _second_
+        * instance of the irq, not the third or fourth. So it is mostly
+        * useful for irq hardware that does not mask cleanly in an
+        * SMP environment.
+        */
+       for (;;) {
+               irqreturn_t action_ret;
+
+               spin_unlock(&desc->lock);
+
+               action_ret = handle_IRQ_event(irq, regs, action);
+
+               spin_lock(&desc->lock);
+#ifndef XEN
+               if (!noirqdebug)
+                       note_interrupt(irq, desc, action_ret);
+#endif
+               if (likely(!(desc->status & IRQ_PENDING)))
+                       break;
+               desc->status &= ~IRQ_PENDING;
+       }
+       desc->status &= ~IRQ_INPROGRESS;
+
+out:
+       /*
+        * The ->end() handler has to deal with interrupts which got
+        * disabled while the handler was running.
+        */
+       desc->handler->end(irq);
+       spin_unlock(&desc->lock);
+
+       return 1;
+}
+
+/**
+ *     request_irq - allocate an interrupt line
+ *     @irq: Interrupt line to allocate
+ *     @handler: Function to be called when the IRQ occurs
+ *     @irqflags: Interrupt type flags
+ *     @devname: An ascii name for the claiming device
+ *     @dev_id: A cookie passed back to the handler function
+ *
+ *     This call allocates interrupt resources and enables the
+ *     interrupt line and IRQ handling. From the point this
+ *     call is made your handler function may be invoked. Since
+ *     your handler function must clear any interrupt the board 
+ *     raises, you must take care both to initialise your hardware
+ *     and to set up the interrupt handler in the right order.
+ *
+ *     Dev_id must be globally unique. Normally the address of the
+ *     device data structure is used as the cookie. Since the handler
+ *     receives this value it makes sense to use it.
+ *
+ *     If your interrupt is shared you must pass a non NULL dev_id
+ *     as this is required when freeing the interrupt.
+ *
+ *     Flags:
+ *
+ *     SA_SHIRQ                Interrupt is shared
+ *
+ *     SA_INTERRUPT            Disable local interrupts while processing
+ *
+ *     SA_SAMPLE_RANDOM        The interrupt can be used for entropy
+ *
+ */
+
+int request_irq(unsigned int irq,
+               irqreturn_t (*handler)(int, void *, struct pt_regs *),
+               unsigned long irqflags,
+               const char * devname,
+               void *dev_id)
+{
+       int retval;
+       struct irqaction * action;
+
+#if 1
+       /*
+        * Sanity-check: shared interrupts should REALLY pass in
+        * a real dev-ID, otherwise we'll have trouble later trying
+        * to figure out which interrupt is which (messes up the
+        * interrupt freeing logic etc).
+        */
+       if (irqflags & SA_SHIRQ) {
+               if (!dev_id)
+                       printk(KERN_ERR "Bad boy: %s called us without a 
dev_id!\n", devname);
+       }
+#endif
+
+       if (irq >= NR_IRQS)
+               return -EINVAL;
+       if (!handler)
+               return -EINVAL;
+
+       action = xmalloc(struct irqaction);
+       if (!action)
+               return -ENOMEM;
+
+       action->handler = handler;
+#ifndef XEN
+       action->flags = irqflags;
+       action->mask = 0;
+#endif
+       action->name = devname;
+#ifndef XEN
+       action->next = NULL;
+#endif
+       action->dev_id = dev_id;
+
+       retval = setup_irq(irq, action);
+       if (retval)
+               xfree(action);
+       return retval;
+}
+
+EXPORT_SYMBOL(request_irq);
+
+/**
+ *     free_irq - free an interrupt
+ *     @irq: Interrupt line to free
+ *     @dev_id: Device identity to free
+ *
+ *     Remove an interrupt handler. The handler is removed and if the
+ *     interrupt line is no longer in use by any driver it is disabled.
+ *     On a shared IRQ the caller must ensure the interrupt is disabled
+ *     on the card it drives before calling this function. The function
+ *     does not return until any executing interrupts for this IRQ
+ *     have completed.
+ *
+ *     This function must not be called from interrupt context.
+ */
+
+#ifdef XEN
+void free_irq(unsigned int irq)
+#else
+void free_irq(unsigned int irq, void *dev_id)
+#endif
+{
+       irq_desc_t *desc;
+       struct irqaction **p;
+       unsigned long flags;
+
+       if (irq >= NR_IRQS)
+               return;
+
+       desc = irq_descp(irq);
+       spin_lock_irqsave(&desc->lock,flags);
+#ifdef XEN
+       if (desc->action) {
+               struct irqaction * action = desc->action;
+               desc->action = NULL;
+#else
+       p = &desc->action;
+       for (;;) {
+               struct irqaction * action = *p;
+               if (action) {
+                       struct irqaction **pp = p;
+                       p = &action->next;
+                       if (action->dev_id != dev_id)
+                               continue;
+
+                       /* Found it - now remove it from the list of entries */
+                       *pp = action->next;
+                       if (!desc->action) {
+#endif
+                               desc->status |= IRQ_DISABLED;
+                               desc->handler->shutdown(irq);
+#ifndef XEN
+                       }
+#endif
+                       spin_unlock_irqrestore(&desc->lock,flags);
+
+                       /* Wait to make sure it's not being used on another CPU 
*/
+                       synchronize_irq(irq);
+                       xfree(action);
+                       return;
+               }
+               printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+               spin_unlock_irqrestore(&desc->lock,flags);
+#ifndef XEN
+               return;
+       }
+#endif
+}
+
+EXPORT_SYMBOL(free_irq);
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ *     probe_irq_on    - begin an interrupt autodetect
+ *
+ *     Commence probing for an interrupt. The interrupts are scanned
+ *     and a mask of potential interrupt lines is returned.
+ *
+ */
+
+#ifndef XEN
+unsigned long probe_irq_on(void)
+{
+       unsigned int i;
+       irq_desc_t *desc;
+       unsigned long val;
+       unsigned long delay;
+
+       down(&probe_sem);
+       /*
+        * something may have generated an irq long ago and we want to
+        * flush such a longstanding irq before considering it as spurious.
+        */
+       for (i = NR_IRQS-1; i > 0; i--)  {
+               desc = irq_descp(i);
+
+               spin_lock_irq(&desc->lock);
+               if (!desc->action)
+                       desc->handler->startup(i);
+               spin_unlock_irq(&desc->lock);
+       }
+
+       /* Wait for longstanding interrupts to trigger. */
+       for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+               /* about 20ms delay */ barrier();
+
+       /*
+        * enable any unassigned irqs
+        * (we must startup again here because if a longstanding irq
+        * happened in the previous stage, it may have masked itself)
+        */
+       for (i = NR_IRQS-1; i > 0; i--) {
+               desc = irq_descp(i);
+
+               spin_lock_irq(&desc->lock);
+               if (!desc->action) {
+                       desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+                       if (desc->handler->startup(i))
+                               desc->status |= IRQ_PENDING;
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+
+       /*
+        * Wait for spurious interrupts to trigger
+        */
+       for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+               /* about 100ms delay */ barrier();
+
+       /*
+        * Now filter out any obviously spurious interrupts
+        */
+       val = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_descp(i);
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       /* It triggered already - consider it spurious. */
+                       if (!(status & IRQ_WAITING)) {
+                               desc->status = status & ~IRQ_AUTODETECT;
+                               desc->handler->shutdown(i);
+                       } else
+                               if (i < 32)
+                                       val |= 1 << i;
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+
+       return val;
+}
+
+EXPORT_SYMBOL(probe_irq_on);
+
+/**
+ *     probe_irq_mask - scan a bitmap of interrupt lines
+ *     @val:   mask of interrupts to consider
+ *
+ *     Scan the ISA bus interrupt lines and return a bitmap of
+ *     active interrupts. The interrupt probe logic state is then
+ *     returned to its previous value.
+ *
+ *     Note: we need to scan all the irq's even though we will
+ *     only return ISA irq numbers - just so that we reset them
+ *     all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+       int i;
+       unsigned int mask;
+
+       mask = 0;
+       for (i = 0; i < 16; i++) {
+               irq_desc_t *desc = irq_descp(i);
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       if (!(status & IRQ_WAITING))
+                               mask |= 1 << i;
+
+                       desc->status = status & ~IRQ_AUTODETECT;
+                       desc->handler->shutdown(i);
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+       up(&probe_sem);
+
+       return mask & val;
+}
+EXPORT_SYMBOL(probe_irq_mask);
+
+/**
+ *     probe_irq_off   - end an interrupt autodetect
+ *     @val: mask of potential interrupts (unused)
+ *
+ *     Scans the unused interrupt lines and returns the line which
+ *     appears to have triggered the interrupt. If no interrupt was
+ *     found then zero is returned. If more than one interrupt is
+ *     found then minus the first candidate is returned to indicate
+ *     their is doubt.
+ *
+ *     The interrupt probe logic state is returned to its previous
+ *     value.
+ *
+ *     BUGS: When used in a module (which arguably shouldn't happen)
+ *     nothing prevents two IRQ probe callers from overlapping. The
+ *     results of this are non-optimal.
+ */
+
+int probe_irq_off(unsigned long val)
+{
+       int i, irq_found, nr_irqs;
+
+       nr_irqs = 0;
+       irq_found = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_descp(i);
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       if (!(status & IRQ_WAITING)) {
+                               if (!nr_irqs)
+                                       irq_found = i;
+                               nr_irqs++;
+                       }
+                       desc->status = status & ~IRQ_AUTODETECT;
+                       desc->handler->shutdown(i);
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+       up(&probe_sem);
+
+       if (nr_irqs > 1)
+               irq_found = -irq_found;
+       return irq_found;
+}
+
+EXPORT_SYMBOL(probe_irq_off);
+#endif
+
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+       int shared = 0;
+       unsigned long flags;
+       struct irqaction *old, **p;
+       irq_desc_t *desc = irq_descp(irq);
+
+#ifndef XEN
+       if (desc->handler == &no_irq_type)
+               return -ENOSYS;
+       /*
+        * Some drivers like serial.c use request_irq() heavily,
+        * so we have to be careful not to interfere with a
+        * running system.
+        */
+       if (new->flags & SA_SAMPLE_RANDOM) {
+               /*
+                * This function might sleep, we want to call it first,
+                * outside of the atomic block.
+                * Yes, this might clear the entropy pool if the wrong
+                * driver is attempted to be loaded, without actually
+                * installing a new handler, but is this really a problem,
+                * only the sysadmin is able to do this.
+                */
+               rand_initialize_irq(irq);
+       }
+
+       if (new->flags & SA_PERCPU_IRQ) {
+               desc->status |= IRQ_PER_CPU;
+               desc->handler = &irq_type_ia64_lsapic;
+       }
+#endif
+
+       /*
+        * The following block of code has to be executed atomically
+        */
+       spin_lock_irqsave(&desc->lock,flags);
+       p = &desc->action;
+       if ((old = *p) != NULL) {
+#ifdef XEN
+               if (1) {
+               /* Can't share interrupts unless both agree to */
+#else
+               if (!(old->flags & new->flags & SA_SHIRQ)) {
+#endif
+                       spin_unlock_irqrestore(&desc->lock,flags);
+                       return -EBUSY;
+               }
+
+#ifndef XEN
+               /* add new interrupt at end of irq queue */
+               do {
+                       p = &old->next;
+                       old = *p;
+               } while (old);
+               shared = 1;
+#endif
+       }
+
+       *p = new;
+
+#ifndef XEN
+       if (!shared) {
+#else
+       {
+#endif
+               desc->depth = 0;
+#ifdef XEN
+               desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
+#else
+               desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | 
IRQ_INPROGRESS);
+#endif
+               desc->handler->startup(irq);
+       }
+       spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifndef XEN
+       register_irq_proc(irq);
+#endif
+       return 0;
+}
+
+#ifndef XEN
+
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+#ifdef CONFIG_SMP
+
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
+
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+       cpumask_t mask = CPU_MASK_NONE;
+
+       cpu_set(cpu_logical_id(hwid), mask);
+
+       if (irq < NR_IRQS) {
+               irq_affinity[irq] = mask;
+               irq_redir[irq] = (char) (redir & 0xff);
+       }
+}
+
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : "");
+
+       len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]);
+       if (count - len < 2)
+               return -EINVAL;
+       len += sprintf(page + len, "\n");
+       return len;
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+                                   unsigned long count, void *data)
+{
+       unsigned int irq = (unsigned long) data;
+       int full_count = count, err;
+       cpumask_t new_value, tmp;
+#      define R_PREFIX_LEN 16
+       char rbuf[R_PREFIX_LEN];
+       int rlen;
+       int prelen;
+       irq_desc_t *desc = irq_descp(irq);
+       unsigned long flags;
+
+       if (!desc->handler->set_affinity)
+               return -EIO;
+
+       /*
+        * If string being written starts with a prefix of 'r' or 'R'
+        * and some limited number of spaces, set IA64_IRQ_REDIRECTED.
+        * If more than (R_PREFIX_LEN - 2) spaces are passed, they won't
+        * all be trimmed as part of prelen, the untrimmed spaces will
+        * cause the hex parsing to fail, and this write() syscall will
+        * fail with EINVAL.
+        */
+
+       if (!count)
+               return -EINVAL;
+       rlen = min(sizeof(rbuf)-1, count);
+       if (copy_from_user(rbuf, buffer, rlen))
+               return -EFAULT;
+       rbuf[rlen] = 0;
+       prelen = 0;
+       if (tolower(*rbuf) == 'r') {
+               prelen = strspn(rbuf, "Rr ");
+               irq |= IA64_IRQ_REDIRECTED;
+       }
+
+       err = cpumask_parse(buffer+prelen, count-prelen, new_value);
+       if (err)
+               return err;
+
+       /*
+        * Do not allow disabling IRQs completely - it's a too easy
+        * way to make the system unusable accidentally :-) At least
+        * one online CPU still has to be targeted.
+        */
+       cpus_and(tmp, new_value, cpu_online_map);
+       if (cpus_empty(tmp))
+               return -EINVAL;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       pending_irq_cpumask[irq] = new_value;
+       spin_unlock_irqrestore(&desc->lock, flags);
+
+       return full_count;
+}
+
+void move_irq(int irq)
+{
+       /* note - we hold desc->lock */
+       cpumask_t tmp;
+       irq_desc_t *desc = irq_descp(irq);
+
+       if (!cpus_empty(pending_irq_cpumask[irq])) {
+               cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+               if (unlikely(!cpus_empty(tmp))) {
+                       desc->handler->set_affinity(irq, 
pending_irq_cpumask[irq]);
+               }
+               cpus_clear(pending_irq_cpumask[irq]);
+       }
+}
+
+
+#endif /* CONFIG_SMP */
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_map is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+       cpumask_t       mask;
+       irq_desc_t *desc;
+       int             irq, new_cpu;
+
+       for (irq=0; irq < NR_IRQS; irq++) {
+               desc = irq_descp(irq);
+
+               /*
+                * No handling for now.
+                * TBD: Implement a disable function so we can now
+                * tell CPU not to respond to these local intr sources.
+                * such as ITV,CPEI,MCA etc.
+                */
+               if (desc->status == IRQ_PER_CPU)
+                       continue;
+
+               cpus_and(mask, irq_affinity[irq], cpu_online_map);
+               if (any_online_cpu(mask) == NR_CPUS) {
+                       /*
+                        * Save it for phase 2 processing
+                        */
+                       vectors_in_migration[irq] = irq;
+
+                       new_cpu = any_online_cpu(cpu_online_map);
+                       mask = cpumask_of_cpu(new_cpu);
+
+                       /*
+                        * Al three are essential, currently WARN_ON.. maybe 
panic?
+                        */
+                       if (desc->handler && desc->handler->disable &&
+                               desc->handler->enable && 
desc->handler->set_affinity) {
+                               desc->handler->disable(irq);
+                               desc->handler->set_affinity(irq, mask);
+                               desc->handler->enable(irq);
+                       } else {
+                               WARN_ON((!(desc->handler) || 
!(desc->handler->disable) ||
+                                               !(desc->handler->enable) ||
+                                               
!(desc->handler->set_affinity)));
+                       }
+               }
+       }
+}
+
+void fixup_irqs(void)
+{
+       unsigned int irq;
+       extern void ia64_process_pending_intr(void);
+
+       ia64_set_itv(1<<16);
+       /*
+        * Phase 1: Locate irq's bound to this cpu and
+        * relocate them for cpu removal.
+        */
+       migrate_irqs();
+
+       /*
+        * Phase 2: Perform interrupt processing for all entries reported in
+        * local APIC.
+        */
+       ia64_process_pending_intr();
+
+       /*
+        * Phase 3: Now handle any interrupts not captured in local APIC.
+        * This is to account for cases that device interrupted during the time 
the
+        * rte was being disabled and re-programmed.
+        */
+       for (irq=0; irq < NR_IRQS; irq++) {
+               if (vectors_in_migration[irq]) {
+                       vectors_in_migration[irq]=0;
+                       do_IRQ(irq, NULL);
+               }
+       }
+
+       /*
+        * Now let processor die. We do irq disable and max_xtp() to
+        * ensure there is no more interrupts routed to this processor.
+        * But the local timer interrupt can have 1 pending which we
+        * take care in timer_interrupt().
+        */
+       max_xtp();
+       local_irq_disable();
+}
+#endif
+
+#ifndef XEN
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+       if (count - len < 2)
+               return -EINVAL;
+       len += sprintf(page + len, "\n");
+       return len;
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+       cpumask_t *mask = (cpumask_t *)data;
+       unsigned long full_count = count, err;
+       cpumask_t new_value;
+
+       err = cpumask_parse(buffer, count, new_value);
+       if (err)
+               return err;
+
+       *mask = new_value;
+       return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+       char name [MAX_NAMELEN];
+
+       if (!root_irq_dir || (irq_descp(irq)->handler == &no_irq_type) || 
irq_dir[irq])
+               return;
+
+       memset(name, 0, MAX_NAMELEN);
+       sprintf(name, "%d", irq);
+
+       /* create /proc/irq/1234 */
+       irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#ifdef CONFIG_SMP
+       {
+               struct proc_dir_entry *entry;
+
+               /* create /proc/irq/1234/smp_affinity */
+               entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+               if (entry) {
+                       entry->nlink = 1;
+                       entry->data = (void *)(long)irq;
+                       entry->read_proc = irq_affinity_read_proc;
+                       entry->write_proc = irq_affinity_write_proc;
+               }
+
+               smp_affinity_entry[irq] = entry;
+       }
+#endif
+}
+
+cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+
+void init_irq_proc (void)
+{
+       struct proc_dir_entry *entry;
+       int i;
+
+       /* create /proc/irq */
+       root_irq_dir = proc_mkdir("irq", 0);
+
+       /* create /proc/irq/prof_cpu_mask */
+       entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+       if (!entry)
+               return;
+
+       entry->nlink = 1;
+       entry->data = (void *)&prof_cpu_mask;
+       entry->read_proc = prof_cpu_mask_read_proc;
+       entry->write_proc = prof_cpu_mask_write_proc;
+
+       /*
+        * Create entries for all existing IRQs.
+        */
+       for (i = 0; i < NR_IRQS; i++) {
+               if (irq_descp(i)->handler == &no_irq_type)
+                       continue;
+               register_irq_proc(i);
+       }
+}
+#endif
+
+
+#ifdef XEN
+/*
+ * HANDLING OF GUEST-BOUND PHYSICAL IRQS
+ */
+
+#define IRQ_MAX_GUESTS 7
+typedef struct {
+    u8 nr_guests;
+    u8 in_flight;
+    u8 shareable;
+    struct domain *guest[IRQ_MAX_GUESTS];
+} irq_guest_action_t;
+
+static void __do_IRQ_guest(int irq)
+{
+    irq_desc_t         *desc = &irq_desc[irq];
+    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+    struct domain      *d;
+    int                 i;
+
+    for ( i = 0; i < action->nr_guests; i++ )
+    {
+        d = action->guest[i];
+        if ( !test_and_set_bit(irq, &d->pirq_mask) )
+            action->in_flight++;
+        send_guest_pirq(d, irq);
+    }
+}
+
+int pirq_guest_unmask(struct domain *d)
+{
+    irq_desc_t    *desc;
+    int            i, j, pirq;
+    u32            m;
+    shared_info_t *s = d->shared_info;
+
+    for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ )
+    {
+        m = d->pirq_mask[i];
+        while ( (j = ffs(m)) != 0 )
+        {
+            m &= ~(1 << --j);
+            pirq = (i << 5) + j;
+            desc = &irq_desc[pirq];
+            spin_lock_irq(&desc->lock);
+            if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
+                 test_and_clear_bit(pirq, &d->pirq_mask) &&
+                 (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
+                desc->handler->end(pirq);
+            spin_unlock_irq(&desc->lock);
+        }
+    }
+
+    return 0;
+}
+
+int pirq_guest_bind(struct vcpu *d, int irq, int will_share)
+{
+    irq_desc_t         *desc = &irq_desc[irq];
+    irq_guest_action_t *action;
+    unsigned long       flags;
+    int                 rc = 0;
+
+    if ( !IS_CAPABLE_PHYSDEV(d->domain) )
+        return -EPERM;
+
+    spin_lock_irqsave(&desc->lock, flags);
+
+    action = (irq_guest_action_t *)desc->action;
+
+    if ( !(desc->status & IRQ_GUEST) )
+    {
+        if ( desc->action != NULL )
+        {
+            DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
+                    irq, desc->action->name);
+            rc = -EBUSY;
+            goto out;
+        }
+
+        action = xmalloc(irq_guest_action_t);
+        if ( (desc->action = (struct irqaction *)action) == NULL )
+        {
+            DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
+            rc = -ENOMEM;
+            goto out;
+        }
+
+        action->nr_guests = 0;
+        action->in_flight = 0;
+        action->shareable = will_share;
+        
+        desc->depth = 0;
+        desc->status |= IRQ_GUEST;
+        desc->status &= ~IRQ_DISABLED;
+        desc->handler->startup(irq);
+
+        /* Attempt to bind the interrupt target to the correct CPU. */
+#if 0 /* FIXME CONFIG_SMP ??? */
+        if ( desc->handler->set_affinity != NULL )
+            desc->handler->set_affinity(
+                irq, apicid_to_phys_cpu_present(d->processor));
+#endif
+    }
+    else if ( !will_share || !action->shareable )
+    {
+        DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
+                irq);
+        rc = -EBUSY;
+        goto out;
+    }
+
+    if ( action->nr_guests == IRQ_MAX_GUESTS )
+    {
+        DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
+        rc = -EBUSY;
+        goto out;
+    }
+
+    action->guest[action->nr_guests++] = d;
+
+ out:
+    spin_unlock_irqrestore(&desc->lock, flags);
+    return rc;
+}
+
+int pirq_guest_unbind(struct domain *d, int irq)
+{
+    irq_desc_t         *desc = &irq_desc[irq];
+    irq_guest_action_t *action;
+    unsigned long       flags;
+    int                 i;
+
+    spin_lock_irqsave(&desc->lock, flags);
+
+    action = (irq_guest_action_t *)desc->action;
+
+    if ( test_and_clear_bit(irq, &d->pirq_mask) &&
+         (--action->in_flight == 0) )
+        desc->handler->end(irq);
+
+    if ( action->nr_guests == 1 )
+    {
+        desc->action = NULL;
+        xfree(action);
+        desc->depth   = 1;
+        desc->status |= IRQ_DISABLED;
+        desc->status &= ~IRQ_GUEST;
+        desc->handler->shutdown(irq);
+    }
+    else
+    {
+        i = 0;
+        while ( action->guest[i] != d )
+            i++;
+        memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+        action->nr_guests--;
+    }
+
+    spin_unlock_irqrestore(&desc->lock, flags);    
+    return 0;
+}
+
+#endif
+
+#ifdef XEN
+#ifdef IA64
+// this is a temporary hack until real console input is implemented
+irqreturn_t guest_forward_keyboard_input(int irq, void *nada, struct pt_regs 
*regs)
+{
+       domain_pend_keyboard_interrupt(irq);
+}
+
+void serial_input_init(void)
+{
+       int retval;
+       int irq = 0x30; // FIXME
+
+       retval = 
request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL);
+       if (retval) {
+               printk("serial_input_init: broken request_irq call\n");
+               while(1);
+       }
+}
+#endif
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/ivt.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/ivt.S   Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1975 @@
+
+#ifdef XEN
+//#define CONFIG_DISABLE_VHPT  // FIXME: change when VHPT is enabled??
+// these are all hacked out for now as the entire IVT
+// will eventually be replaced... just want to use it
+// for startup code to handle TLB misses
+//#define ia64_leave_kernel 0
+//#define ia64_ret_from_syscall 0
+//#define ia64_handle_irq 0
+//#define ia64_fault 0
+#define ia64_illegal_op_fault 0
+#define ia64_prepare_handle_unaligned 0
+#define ia64_bad_break 0
+#define ia64_trace_syscall 0
+#define sys_call_table 0
+#define sys_ni_syscall 0
+#include <asm/vhpt.h>
+#endif
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *     Stephane Eranian <eranian@xxxxxxxxxx>
+ *     David Mosberger <davidm@xxxxxxxxxx>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *     Asit Mallick <asit.k.mallick@xxxxxxxxx>
+ *      Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ *      Kenneth Chen <kenneth.w.chen@xxxxxxxxx>
+ *      Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now 
uses virtual PT.
+ */
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *             // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ *  entry offset ----/     /         /                  /          /
+ *  entry number ---------/         /                  /          /
+ *  size of the entry -------------/                  /          /
+ *  vector name -------------------------------------/          /
+ *  interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+#if 1
+# define PSR_DEFAULT_BITS      psr.ac
+#else
+# define PSR_DEFAULT_BITS      0
+#endif
+
+#if 0
+  /*
+   * This lets you track the last eight faults that occurred on the CPU.  Make 
sure ar.k2 isn't
+   * needed for something else before enabling this...
+   */
+# define DBG_FAULT(i)  mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov 
ar.k2=r16
+#else
+# define DBG_FAULT(i)
+#endif
+
+#define MINSTATE_VIRT  /* needed by minstate.h */
+#include "minstate.h"
+
+#define FAULT(n)                                                               
        \
+       mov r31=pr;                                                             
        \
+       mov r19=n;;                     /* prepare to save predicates */        
        \
+       br.sptk.many dispatch_to_fault_handler
+
+#ifdef XEN
+#define REFLECT(n)                                                             
        \
+       mov r31=pr;                                                             
        \
+       mov r19=n;;                     /* prepare to save predicates */        
        \
+       br.sptk.many dispatch_reflection
+#endif
+
+       .section .text.ivt,"ax"
+
+       .align 32768    // align on 32KB boundary
+       .global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vhpt_miss)
+       DBG_FAULT(0)
+       /*
+        * The VHPT vector is invoked when the TLB entry for the virtual page 
table
+        * is missing.  This happens only as a result of a previous
+        * (the "original") TLB miss, which may either be caused by an 
instruction
+        * fetch or a data access (or non-access).
+        *
+        * What we do here is normal TLB miss handing for the _original_ miss, 
followed
+        * by inserting the TLB entry for the virtual page table page that the 
VHPT
+        * walker was attempting to access.  The latter gets inserted as long
+        * as both L1 and L2 have valid mappings for the faulting address.
+        * The TLB entry for the original miss gets inserted only if
+        * the L3 entry indicates that the page is present.
+        *
+        * do_page_fault gets invoked in the following cases:
+        *      - the faulting virtual address uses unimplemented address bits
+        *      - the faulting virtual address has no L1, L2, or L3 mapping
+        */
+       mov r16=cr.ifa                          // get address that caused the 
TLB miss
+#ifdef CONFIG_HUGETLB_PAGE
+       movl r18=PAGE_SHIFT
+       mov r25=cr.itir
+#endif
+       ;;
+       rsm psr.dt                              // use physical addressing for 
data
+       mov r31=pr                              // save the predicate registers
+#ifdef XEN
+       movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
+#else
+       mov r19=IA64_KR(PT_BASE)                // get page table base address
+#endif
+       shl r21=r16,3                           // shift bit 60 into sign bit
+       shr.u r17=r16,61                        // get the region number into 
r17
+       ;;
+       shr r22=r21,3
+#ifdef CONFIG_HUGETLB_PAGE
+       extr.u r26=r25,2,6
+       ;;
+       cmp.ne p8,p0=r18,r26
+       sub r27=r26,r18
+       ;;
+(p8)   dep r25=r18,r25,2,6
+(p8)   shr r22=r22,r27
+#endif
+       ;;
+       cmp.eq p6,p7=5,r17                      // is IFA pointing into to 
region 5?
+       shr.u r18=r22,PGDIR_SHIFT               // get bits 33-63 of the 
faulting address
+       ;;
+(p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in 
place
+
+       srlz.d
+       LOAD_PHYSICAL(p6, r19, swapper_pg_dir)  // region 5 is rooted at 
swapper_pg_dir
+
+       .pred.rel "mutex", p6, p7
+(p6)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+       ;;
+(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=PTA + IFA(33,42)*8
+(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=PTA + (((IFA(61,63) << 
7) | IFA(33,39))*8)
+       cmp.eq p7,p6=0,r21                      // unused address bits all 
zeroes?
+       shr.u r18=r22,PMD_SHIFT                 // shift L2 index into position
+       ;;
+       ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
+       ;;
+(p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // compute address of L2 page 
table entry
+       ;;
+(p7)   ld8 r20=[r17]                           // fetch the L2 entry (may be 0)
+       shr.u r19=r22,PAGE_SHIFT                // shift L3 index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r20,r0            // was L2 entry NULL?
+       dep r21=r19,r20,3,(PAGE_SHIFT-3)        // compute address of L3 page 
table entry
+       ;;
+(p7)   ld8 r18=[r21]                           // read the L3 PTE
+       mov r19=cr.isr                          // cr.isr bit 0 tells us if 
this is an insn miss
+       ;;
+(p7)   tbit.z p6,p7=r18,_PAGE_P_BIT            // page present bit cleared?
+       mov r22=cr.iha                          // get the VHPT address that 
caused the TLB miss
+       ;;                                      // avoid RAW on p7
+(p7)   tbit.nz.unc p10,p11=r19,32              // is it an instruction TLB 
miss?
+       dep r23=0,r20,0,PAGE_SHIFT              // clear low bits to get page 
address
+       ;;
+(p10)  itc.i r18                               // insert the instruction TLB 
entry
+(p11)  itc.d r18                               // insert the data TLB entry
+(p6)   br.cond.spnt.many page_fault            // handle bad address/page not 
present (page fault)
+       mov cr.ifa=r22
+
+#ifdef CONFIG_HUGETLB_PAGE
+(p8)   mov cr.itir=r25                         // change to default page-size 
for VHPT
+#endif
+
+       /*
+        * Now compute and insert the TLB entry for the virtual page table.  We 
never
+        * execute in a page table page so there is no need to set the 
exception deferral
+        * bit.
+        */
+       adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
+       ;;
+(p7)   itc.d r24
+       ;;
+#ifdef CONFIG_SMP
+       /*
+        * Tell the assemblers dependency-violation checker that the above 
"itc" instructions
+        * cannot possibly affect the following loads:
+        */
+       dv_serialize_data
+
+       /*
+        * Re-check L2 and L3 pagetable.  If they changed, we may have received 
a ptc.g
+        * between reading the pagetable and the "itc".  If so, flush the entry 
we
+        * inserted and retry.
+        */
+       ld8 r25=[r21]                           // read L3 PTE again
+       ld8 r26=[r17]                           // read L2 entry again
+       ;;
+       cmp.ne p6,p7=r26,r20                    // did L2 entry change
+       mov r27=PAGE_SHIFT<<2
+       ;;
+(p6)   ptc.l r22,r27                           // purge PTE page translation
+(p7)   cmp.ne.or.andcm p6,p7=r25,r18           // did L3 PTE change
+       ;;
+(p6)   ptc.l r16,r27                           // purge translation
+#endif
+
+       mov pr=r31,-1                           // restore predicate registers
+       rfi
+END(vhpt_miss)
+
+       .org ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(itlb_miss)
+       DBG_FAULT(1)
+#ifdef XEN
+       VHPT_CCHAIN_LOOKUP(itlb_miss,i)
+#ifdef VHPT_GLOBAL
+       br.cond.sptk page_fault
+       ;;
+#endif
+#endif
+       /*
+        * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+        * page table.  If a nested TLB miss occurs, we switch into physical
+        * mode, walk the page table, and then re-execute the L3 PTE read
+        * and go on normally after that.
+        */
+       mov r16=cr.ifa                          // get virtual address
+       mov r29=b0                              // save b0
+       mov r31=pr                              // save predicates
+.itlb_fault:
+       mov r17=cr.iha                          // get virtual address of L3 PTE
+       movl r30=1f                             // load nested fault 
continuation point
+       ;;
+1:     ld8 r18=[r17]                           // read L3 PTE
+       ;;
+       mov b0=r29
+       tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
+(p6)   br.cond.spnt page_fault
+       ;;
+       itc.i r18
+       ;;
+#ifdef CONFIG_SMP
+       /*
+        * Tell the assemblers dependency-violation checker that the above 
"itc" instructions
+        * cannot possibly affect the following loads:
+        */
+       dv_serialize_data
+
+       ld8 r19=[r17]                           // read L3 PTE again and see if 
same
+       mov r20=PAGE_SHIFT<<2                   // setup page size for purge
+       ;;
+       cmp.ne p7,p0=r18,r19
+       ;;
+(p7)   ptc.l r16,r20
+#endif
+       mov pr=r31,-1
+       rfi
+END(itlb_miss)
+
+       .org ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(dtlb_miss)
+       DBG_FAULT(2)
+#ifdef XEN
+       VHPT_CCHAIN_LOOKUP(dtlb_miss,d)
+#ifdef VHPT_GLOBAL
+       br.cond.sptk page_fault
+       ;;
+#endif
+#endif
+       /*
+        * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+        * page table.  If a nested TLB miss occurs, we switch into physical
+        * mode, walk the page table, and then re-execute the L3 PTE read
+        * and go on normally after that.
+        */
+       mov r16=cr.ifa                          // get virtual address
+       mov r29=b0                              // save b0
+       mov r31=pr                              // save predicates
+dtlb_fault:
+       mov r17=cr.iha                          // get virtual address of L3 PTE
+       movl r30=1f                             // load nested fault 
continuation point
+       ;;
+1:     ld8 r18=[r17]                           // read L3 PTE
+       ;;
+       mov b0=r29
+       tbit.z p6,p0=r18,_PAGE_P_BIT            // page present bit cleared?
+(p6)   br.cond.spnt page_fault
+       ;;
+       itc.d r18
+       ;;
+#ifdef CONFIG_SMP
+       /*
+        * Tell the assemblers dependency-violation checker that the above 
"itc" instructions
+        * cannot possibly affect the following loads:
+        */
+       dv_serialize_data
+
+       ld8 r19=[r17]                           // read L3 PTE again and see if 
same
+       mov r20=PAGE_SHIFT<<2                   // setup page size for purge
+       ;;
+       cmp.ne p7,p0=r18,r19
+       ;;
+(p7)   ptc.l r16,r20
+#endif
+       mov pr=r31,-1
+       rfi
+END(dtlb_miss)
+
+       .org ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(alt_itlb_miss)
+       DBG_FAULT(3)
+#ifdef XEN
+//#ifdef VHPT_GLOBAL
+//     VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i)
+//     br.cond.sptk page_fault
+//     ;;
+//#endif
+#endif
+#ifdef XEN
+       mov r31=pr
+       mov r16=cr.ifa          // get address that caused the TLB miss
+       ;;
+late_alt_itlb_miss:
+       movl r17=PAGE_KERNEL
+       mov r21=cr.ipsr
+       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+       ;;
+#else
+       mov r16=cr.ifa          // get address that caused the TLB miss
+       movl r17=PAGE_KERNEL
+       mov r21=cr.ipsr
+       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+       mov r31=pr
+       ;;
+#endif
+#ifdef CONFIG_DISABLE_VHPT
+       shr.u r22=r16,61                        // get the region number into 
r21
+       ;;
+       cmp.gt p8,p0=6,r22                      // user mode
+       ;;
+(p8)   thash r17=r16
+       ;;
+(p8)   mov cr.iha=r17
+(p8)   mov r29=b0                              // save b0
+(p8)   br.cond.dptk .itlb_fault
+#endif
+       extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
+       and r19=r19,r16         // clear ed, reserved bits, and PTE control bits
+#ifdef XEN
+       shr.u r18=r16,55        // move address bit 59 to bit 4
+       ;;
+       and r18=0x10,r18        // bit 4=address-bit(59)
+#else
+       shr.u r18=r16,57        // move address bit 61 to bit 4
+       ;;
+       andcm r18=0x10,r18      // bit 4=~address-bit(61)
+#endif
+       cmp.ne p8,p0=r0,r23     // psr.cpl != 0?
+       or r19=r17,r19          // insert PTE control bits into r19
+       ;;
+       or r19=r19,r18          // set bit 4 (uncached) if the access was to 
region 6
+(p8)   br.cond.spnt page_fault
+       ;;
+       itc.i r19               // insert the TLB entry
+       mov pr=r31,-1
+       rfi
+END(alt_itlb_miss)
+
+       .org ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(alt_dtlb_miss)
+       DBG_FAULT(4)
+#ifdef XEN
+//#ifdef VHPT_GLOBAL
+//     VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d)
+//     br.cond.sptk page_fault
+//     ;;
+//#endif
+#endif
+#ifdef XEN
+       mov r31=pr
+       mov r16=cr.ifa          // get address that caused the TLB miss
+       ;;
+late_alt_dtlb_miss:
+       movl r17=PAGE_KERNEL
+       mov r20=cr.isr
+       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+       mov r21=cr.ipsr
+       ;;
+#else
+#endif
+#ifdef CONFIG_DISABLE_VHPT
+       shr.u r22=r16,61                        // get the region number into 
r21
+       ;;
+       cmp.gt p8,p0=6,r22                      // access to region 0-5
+       ;;
+(p8)   thash r17=r16
+       ;;
+(p8)   mov cr.iha=r17
+(p8)   mov r29=b0                              // save b0
+(p8)   br.cond.dptk dtlb_fault
+#endif
+       extr.u r23=r21,IA64_PSR_CPL0_BIT,2      // extract psr.cpl
+       and r22=IA64_ISR_CODE_MASK,r20          // get the isr.code field
+       tbit.nz p6,p7=r20,IA64_ISR_SP_BIT       // is speculation bit on?
+#ifdef XEN
+       shr.u r18=r16,55                        // move address bit 59 to bit 4
+       and r19=r19,r16                         // clear ed, reserved bits, and 
PTE control bits
+       tbit.nz p9,p0=r20,IA64_ISR_NA_BIT       // is non-access bit on?
+       ;;
+       and r18=0x10,r18        // bit 4=address-bit(59)
+#else
+       shr.u r18=r16,57                        // move address bit 61 to bit 4
+       and r19=r19,r16                         // clear ed, reserved bits, and 
PTE control bits
+       tbit.nz p9,p0=r20,IA64_ISR_NA_BIT       // is non-access bit on?
+       ;;
+       andcm r18=0x10,r18      // bit 4=~address-bit(61)
+#endif
+       cmp.ne p8,p0=r0,r23
+(p9)   cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22  // check isr.code field
+(p8)   br.cond.spnt page_fault
+#ifdef XEN
+       ;;
+       // Test for Xen address, if not handle via page_fault
+       // note that 0xf000 (cached) and 0xe800 (uncached) addresses
+       // should be OK.
+       extr.u r22=r16,59,5;;
+       cmp.eq p8,p0=0x1e,r22
+(p8)   br.cond.spnt 1f;;
+       cmp.ne p8,p0=0x1d,r22
+(p8)   br.cond.sptk page_fault ;;
+1:
+#endif
+
+       dep r21=-1,r21,IA64_PSR_ED_BIT,1
+       or r19=r19,r17          // insert PTE control bits into r19
+       ;;
+       or r19=r19,r18          // set bit 4 (uncached) if the access was to 
region 6
+(p6)   mov cr.ipsr=r21
+       ;;
+(p7)   itc.d r19               // insert the TLB entry
+       mov pr=r31,-1
+       rfi
+END(alt_dtlb_miss)
+
+       .org ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(nested_dtlb_miss)
+       /*
+        * In the absence of kernel bugs, we get here when the virtually mapped 
linear
+        * page table is accessed non-speculatively (e.g., in the Dirty-bit, 
Instruction
+        * Access-bit, or Data Access-bit faults).  If the DTLB entry for the 
virtual page
+        * table is missing, a nested TLB miss fault is triggered and control is
+        * transferred to this point.  When this happens, we lookup the pte for 
the
+        * faulting address by walking the page table in physical mode and 
return to the
+        * continuation point passed in register r30 (or call page_fault if the 
address is
+        * not mapped).
+        *
+        * Input:       r16:    faulting address
+        *              r29:    saved b0
+        *              r30:    continuation address
+        *              r31:    saved pr
+        *
+        * Output:      r17:    physical address of L3 PTE of faulting address
+        *              r29:    saved b0
+        *              r30:    continuation address
+        *              r31:    saved pr
+        *
+        * Clobbered:   b0, r18, r19, r21, psr.dt (cleared)
+        */
+       rsm psr.dt                              // switch to using physical 
data addressing
+#ifdef XEN
+       movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
+#else
+       mov r19=IA64_KR(PT_BASE)                // get the page table base 
address
+#endif
+       shl r21=r16,3                           // shift bit 60 into sign bit
+       ;;
+       shr.u r17=r16,61                        // get the region number into 
r17
+       ;;
+       cmp.eq p6,p7=5,r17                      // is faulting address in 
region 5?
+       shr.u r18=r16,PGDIR_SHIFT               // get bits 33-63 of faulting 
address
+       ;;
+(p7)   dep r17=r17,r19,(PAGE_SHIFT-3),3        // put region number bits in 
place
+
+       srlz.d
+       LOAD_PHYSICAL(p6, r19, swapper_pg_dir)  // region 5 is rooted at 
swapper_pg_dir
+
+       .pred.rel "mutex", p6, p7
+(p6)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)   shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+       ;;
+(p6)   dep r17=r18,r19,3,(PAGE_SHIFT-3)        // r17=PTA + IFA(33,42)*8
+(p7)   dep r17=r18,r17,3,(PAGE_SHIFT-6)        // r17=PTA + (((IFA(61,63) << 
7) | IFA(33,39))*8)
+       cmp.eq p7,p6=0,r21                      // unused address bits all 
zeroes?
+       shr.u r18=r16,PMD_SHIFT                 // shift L2 index into position
+       ;;
+       ld8 r17=[r17]                           // fetch the L1 entry (may be 0)
+       ;;
+(p7)   cmp.eq p6,p7=r17,r0                     // was L1 entry NULL?
+       dep r17=r18,r17,3,(PAGE_SHIFT-3)        // compute address of L2 page 
table entry
+       ;;
+(p7)   ld8 r17=[r17]                           // fetch the L2 entry (may be 0)
+       shr.u r19=r16,PAGE_SHIFT                // shift L3 index into position
+       ;;
+(p7)   cmp.eq.or.andcm p6,p7=r17,r0            // was L2 entry NULL?
+       dep r17=r19,r17,3,(PAGE_SHIFT-3)        // compute address of L3 page 
table entry
+(p6)   br.cond.spnt page_fault
+       mov b0=r30
+       br.sptk.many b0                         // return to continuation point
+END(nested_dtlb_miss)
+
+       .org ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(ikey_miss)
+#ifdef XEN
+       REFLECT(6)
+#endif
+       DBG_FAULT(6)
+       FAULT(6)
+END(ikey_miss)
+
+       
//-----------------------------------------------------------------------------------
+       // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is 
faulting address)
+ENTRY(page_fault)
+       ssm psr.dt
+       ;;
+       srlz.i
+       ;;
+       SAVE_MIN_WITH_COVER
+#ifdef XEN
+       alloc r15=ar.pfs,0,0,4,0
+       mov out0=cr.ifa
+       mov out1=cr.isr
+       mov out3=cr.itir
+#else
+       alloc r15=ar.pfs,0,0,3,0
+       mov out0=cr.ifa
+       mov out1=cr.isr
+#endif
+       adds r3=8,r2                            // set up second base pointer
+       ;;
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collectin is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       movl r14=ia64_leave_kernel
+       ;;
+       SAVE_REST
+       mov rp=r14
+       ;;
+       adds out2=16,r12                        // out2 = pointer to pt_regs
+       br.call.sptk.many b6=ia64_do_page_fault // ignore return address
+END(page_fault)
+
+       .org ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(dkey_miss)
+#ifdef XEN
+       REFLECT(7)
+#endif
+       DBG_FAULT(7)
+       FAULT(7)
+END(dkey_miss)
+
+       .org ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(dirty_bit)
+#ifdef XEN
+       REFLECT(8)
+#endif
+       DBG_FAULT(8)
+       /*
+        * What we do here is to simply turn on the dirty bit in the PTE.  We 
need to
+        * update both the page-table and the TLB entry.  To efficiently access 
the PTE,
+        * we address it through the virtual page table.  Most likely, the TLB 
entry for
+        * the relevant virtual page table page is still present in the TLB so 
we can
+        * normally do this without additional TLB misses.  In case the 
necessary virtual
+        * page table TLB entry isn't present, we take a nested TLB miss hit 
where we look
+        * up the physical address of the L3 PTE and then continue at label 1 
below.
+        */
+       mov r16=cr.ifa                          // get the address that caused 
the fault
+       movl r30=1f                             // load continuation point in 
case of nested fault
+       ;;
+       thash r17=r16                           // compute virtual address of 
L3 PTE
+       mov r29=b0                              // save b0 in case of nested 
fault
+       mov r31=pr                              // save pr
+#ifdef CONFIG_SMP
+       mov r28=ar.ccv                          // save ar.ccv
+       ;;
+1:     ld8 r18=[r17]
+       ;;                                      // avoid RAW on r18
+       mov ar.ccv=r18                          // set compare value for cmpxchg
+       or r25=_PAGE_D|_PAGE_A,r18              // set the dirty and accessed 
bits
+       ;;
+       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       mov r24=PAGE_SHIFT<<2
+       ;;
+       cmp.eq p6,p7=r26,r18
+       ;;
+(p6)   itc.d r25                               // install updated PTE
+       ;;
+       /*
+        * Tell the assemblers dependency-violation checker that the above 
"itc" instructions
+        * cannot possibly affect the following loads:
+        */
+       dv_serialize_data
+
+       ld8 r18=[r17]                           // read PTE again
+       ;;
+       cmp.eq p6,p7=r18,r25                    // is it same as the newly 
installed
+       ;;
+(p7)   ptc.l r16,r24
+       mov b0=r29                              // restore b0
+       mov ar.ccv=r28
+#else
+       ;;
+1:     ld8 r18=[r17]
+       ;;                                      // avoid RAW on r18
+       or r18=_PAGE_D|_PAGE_A,r18              // set the dirty and accessed 
bits
+       mov b0=r29                              // restore b0
+       ;;
+       st8 [r17]=r18                           // store back updated PTE
+       itc.d r18                               // install updated PTE
+#endif
+       mov pr=r31,-1                           // restore pr
+       rfi
+END(dirty_bit)
+
+       .org ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(iaccess_bit)
+#ifdef XEN
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=9
+       movl r20=0x2400
+       br.sptk.many fast_access_reflect;;
+#endif
+       DBG_FAULT(9)
+       // Like Entry 8, except for instruction access
+       mov r16=cr.ifa                          // get the address that caused 
the fault
+       movl r30=1f                             // load continuation point in 
case of nested fault
+       mov r31=pr                              // save predicates
+#ifdef CONFIG_ITANIUM
+       /*
+        * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
+        */
+       mov r17=cr.ipsr
+       ;;
+       mov r18=cr.iip
+       tbit.z p6,p0=r17,IA64_PSR_IS_BIT        // IA64 instruction set?
+       ;;
+(p6)   mov r16=r18                             // if so, use cr.iip instead of 
cr.ifa
+#endif /* CONFIG_ITANIUM */
+       ;;
+       thash r17=r16                           // compute virtual address of 
L3 PTE
+       mov r29=b0                              // save b0 in case of nested 
fault)
+#ifdef CONFIG_SMP
+       mov r28=ar.ccv                          // save ar.ccv
+       ;;
+1:     ld8 r18=[r17]
+       ;;
+       mov ar.ccv=r18                          // set compare value for cmpxchg
+       or r25=_PAGE_A,r18                      // set the accessed bit
+       ;;
+       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       mov r24=PAGE_SHIFT<<2
+       ;;
+       cmp.eq p6,p7=r26,r18
+       ;;
+(p6)   itc.i r25                               // install updated PTE
+       ;;
+       /*
+        * Tell the assemblers dependency-violation checker that the above 
"itc" instructions
+        * cannot possibly affect the following loads:
+        */
+       dv_serialize_data
+
+       ld8 r18=[r17]                           // read PTE again
+       ;;
+       cmp.eq p6,p7=r18,r25                    // is it same as the newly 
installed
+       ;;
+(p7)   ptc.l r16,r24
+       mov b0=r29                              // restore b0
+       mov ar.ccv=r28
+#else /* !CONFIG_SMP */
+       ;;
+1:     ld8 r18=[r17]
+       ;;
+       or r18=_PAGE_A,r18                      // set the accessed bit
+       mov b0=r29                              // restore b0
+       ;;
+       st8 [r17]=r18                           // store back updated PTE
+       itc.i r18                               // install updated PTE
+#endif /* !CONFIG_SMP */
+       mov pr=r31,-1
+       rfi
+END(iaccess_bit)
+
+       .org ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(daccess_bit)
+#ifdef XEN
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=10
+       movl r20=0x2800
+       br.sptk.many fast_access_reflect;;
+#endif
+       DBG_FAULT(10)
+       // Like Entry 8, except for data access
+       mov r16=cr.ifa                          // get the address that caused 
the fault
+       movl r30=1f                             // load continuation point in 
case of nested fault
+       ;;
+       thash r17=r16                           // compute virtual address of 
L3 PTE
+       mov r31=pr
+       mov r29=b0                              // save b0 in case of nested 
fault)
+#ifdef CONFIG_SMP
+       mov r28=ar.ccv                          // save ar.ccv
+       ;;
+1:     ld8 r18=[r17]
+       ;;                                      // avoid RAW on r18
+       mov ar.ccv=r18                          // set compare value for cmpxchg
+       or r25=_PAGE_A,r18                      // set the dirty bit
+       ;;
+       cmpxchg8.acq r26=[r17],r25,ar.ccv
+       mov r24=PAGE_SHIFT<<2
+       ;;
+       cmp.eq p6,p7=r26,r18
+       ;;
+(p6)   itc.d r25                               // install updated PTE
+       /*
+        * Tell the assemblers dependency-violation checker that the above 
"itc" instructions
+        * cannot possibly affect the following loads:
+        */
+       dv_serialize_data
+       ;;
+       ld8 r18=[r17]                           // read PTE again
+       ;;
+       cmp.eq p6,p7=r18,r25                    // is it same as the newly 
installed
+       ;;
+(p7)   ptc.l r16,r24
+       mov ar.ccv=r28
+#else
+       ;;
+1:     ld8 r18=[r17]
+       ;;                                      // avoid RAW on r18
+       or r18=_PAGE_A,r18                      // set the accessed bit
+       ;;
+       st8 [r17]=r18                           // store back updated PTE
+       itc.d r18                               // install updated PTE
+#endif
+       mov b0=r29                              // restore b0
+       mov pr=r31,-1
+       rfi
+END(daccess_bit)
+
+       .org ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(break_fault)
+       /*
+        * The streamlined system call entry/exit paths only save/restore the 
initial part
+        * of pt_regs.  This implies that the callers of system-calls must 
adhere to the
+        * normal procedure calling conventions.
+        *
+        *   Registers to be saved & restored:
+        *      CR registers: cr.ipsr, cr.iip, cr.ifs
+        *      AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, 
ar.fpsr
+        *      others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
+        *   Registers to be restored only:
+        *      r8-r11: output value from the system call.
+        *
+        * During system call exit, scratch registers (including r15) are 
modified/cleared
+        * to prevent leaking bits from kernel to user level.
+        */
+       DBG_FAULT(11)
+#ifdef XEN
+       mov r16=cr.isr
+       mov r17=cr.iim
+       mov r31=pr
+       ;;
+       movl r18=XSI_PSR_IC
+       ;;
+       ld8 r19=[r18]
+       ;;
+       cmp.eq p7,p0=r0,r17                     // is this a psuedo-cover?
+(p7)   br.spnt.many dispatch_privop_fault
+       ;;
+       // if vpsr.ic is off, we have a hyperprivop
+       // A hyperprivop is hand-coded assembly with psr.ic off
+       // which means no calls, no use of r1-r15 and no memory accesses
+       // except to pinned addresses!
+       cmp4.eq p7,p0=r0,r19
+(p7)   br.sptk.many fast_hyperprivop
+       ;;
+       movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r22 = [r22]
+       ;;
+       adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;;
+       ld4 r23=[r22];;
+       cmp4.eq p6,p7=r23,r17                   // Xen-reserved breakimm?
+(p6)   br.spnt.many dispatch_break_fault
+       ;;
+       br.sptk.many fast_break_reflect
+       ;;
+#endif
+       movl r16=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+       ld8 r16=[r16]
+       mov r17=cr.iim
+       mov r18=__IA64_BREAK_SYSCALL
+       mov r21=ar.fpsr
+       mov r29=cr.ipsr
+       mov r19=b6
+       mov r25=ar.unat
+       mov r27=ar.rsc
+       mov r26=ar.pfs
+       mov r28=cr.iip
+#ifndef XEN
+       mov r31=pr                              // prepare to save predicates
+#endif
+       mov r20=r1
+       ;;
+       adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+       cmp.eq p0,p7=r18,r17                    // is this a system call? (p7 
<- false, if so)
+(p7)   br.cond.spnt non_syscall
+       ;;
+       ld1 r17=[r16]                           // load 
current->thread.on_ustack flag
+       st1 [r16]=r0                            // clear 
current->thread.on_ustack flag
+       add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16   // set r1 for 
MINSTATE_START_SAVE_MIN_VIRT
+       ;;
+       invala
+
+       /* adjust return address so we skip over the break instruction: */
+
+       extr.u r8=r29,41,2                      // extract ei field from cr.ipsr
+       ;;
+       cmp.eq p6,p7=2,r8                       // isr.ei==2?
+       mov r2=r1                               // setup r2 for 
ia64_syscall_setup
+       ;;
+(p6)   mov r8=0                                // clear ei to 0
+(p6)   adds r28=16,r28                         // switch cr.iip to next bundle 
cr.ipsr.ei wrapped
+(p7)   adds r8=1,r8                            // increment ei to next slot
+       ;;
+       cmp.eq pKStk,pUStk=r0,r17               // are we in kernel mode 
already?
+       dep r29=r8,r29,41,2                     // insert new ei into cr.ipsr
+       ;;
+
+       // switch from user to kernel RBS:
+       MINSTATE_START_SAVE_MIN_VIRT
+       br.call.sptk.many b7=ia64_syscall_setup
+       ;;
+       MINSTATE_END_SAVE_MIN_VIRT              // switch to bank 1
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       mov r3=NR_syscalls - 1
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       // p10==true means out registers are more than 8 or r15's Nat is true
+(p10)  br.cond.spnt.many ia64_ret_from_syscall
+       ;;
+       movl r16=sys_call_table
+
+       adds r15=-1024,r15                      // r15 contains the syscall 
number---subtract 1024
+       movl r2=ia64_ret_from_syscall
+       ;;
+       shladd r20=r15,3,r16                    // r20 = sys_call_table + 
8*(syscall-1024)
+       cmp.leu p6,p7=r15,r3                    // (syscall > 0 && syscall < 
1024 + NR_syscalls) ?
+       mov rp=r2                               // set the real return addr
+       ;;
+(p6)   ld8 r20=[r20]                           // load address of syscall 
entry point
+(p7)   movl r20=sys_ni_syscall
+
+       add r2=TI_FLAGS+IA64_TASK_SIZE,r13
+       ;;
+       ld4 r2=[r2]                             // r2 = 
current_thread_info()->flags
+       ;;
+       and r2=_TIF_SYSCALL_TRACEAUDIT,r2       // mask trace or audit
+       ;;
+       cmp.eq p8,p0=r2,r0
+       mov b6=r20
+       ;;
+(p8)   br.call.sptk.many b6=b6                 // ignore this return addr
+       br.cond.sptk ia64_trace_syscall
+       // NOT REACHED
+END(break_fault)
+
+       .org ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(interrupt)
+       DBG_FAULT(12)
+       mov r31=pr              // prepare to save predicates
+       ;;
+#ifdef XEN
+       mov r30=cr.ivr          // pass cr.ivr as first arg
+       // FIXME: this is a hack... use cpuinfo.ksoftirqd because its
+       // not used anywhere else and we need a place to stash ivr and
+       // there's no registers available unused by SAVE_MIN/REST
+       movl r29=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
+       st8 [r29]=r30;;
+       movl r28=slow_interrupt;;
+       mov r29=rp;;
+       mov rp=r28;;
+       br.cond.sptk.many fast_tick_reflect
+       ;;
+slow_interrupt:
+       mov rp=r29;;
+#endif
+       SAVE_MIN_WITH_COVER     // uses r31; defines r2 and r3
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       adds r3=8,r2            // set up second base pointer for SAVE_REST
+       srlz.i                  // ensure everybody knows psr.ic is back on
+       ;;
+       SAVE_REST
+       ;;
+       alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+#ifdef XEN
+       movl out0=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
+       ld8 out0=[out0];;
+#else
+       mov out0=cr.ivr         // pass cr.ivr as first arg
+#endif
+       add out1=16,sp          // pass pointer to pt_regs as second arg
+       ;;
+       srlz.d                  // make sure we see the effect of cr.ivr
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.call.sptk.many b6=ia64_handle_irq
+END(interrupt)
+
+       .org ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+       DBG_FAULT(13)
+       FAULT(13)
+
+#ifdef XEN
+       // There is no particular reason for this code to be here, other than 
that
+       // there happens to be space here that would go unused otherwise.  If 
this
+       // fault ever gets "unreserved", simply moved the following code to a 
more
+       // suitable spot...
+
+GLOBAL_ENTRY(dispatch_break_fault)
+       SAVE_MIN_WITH_COVER
+       ;;
+dispatch_break_fault_post_save:
+       alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+       mov out0=cr.ifa
+       adds out1=16,sp
+       mov out2=cr.isr         // FIXME: pity to make this slow access twice
+       mov out3=cr.iim         // FIXME: pity to make this slow access twice
+
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       adds r3=8,r2                            // set up second base pointer
+       ;;
+       SAVE_REST
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.sptk.many ia64_prepare_handle_break
+END(dispatch_break_fault)
+#endif
+
+       .org ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+       DBG_FAULT(14)
+       FAULT(14)
+
+       /*
+        * There is no particular reason for this code to be here, other than 
that
+        * there happens to be space here that would go unused otherwise.  If 
this
+        * fault ever gets "unreserved", simply moved the following code to a 
more
+        * suitable spot...
+        *
+        * ia64_syscall_setup() is a separate subroutine so that it can
+        *      allocate stacked registers so it can safely demine any
+        *      potential NaT values from the input registers.
+        *
+        * On entry:
+        *      - executing on bank 0 or bank 1 register set (doesn't matter)
+        *      -  r1: stack pointer
+        *      -  r2: current task pointer
+        *      -  r3: preserved
+        *      - r11: original contents (saved ar.pfs to be saved)
+        *      - r12: original contents (sp to be saved)
+        *      - r13: original contents (tp to be saved)
+        *      - r15: original contents (syscall # to be saved)
+        *      - r18: saved bsp (after switching to kernel stack)
+        *      - r19: saved b6
+        *      - r20: saved r1 (gp)
+        *      - r21: saved ar.fpsr
+        *      - r22: kernel's register backing store base (krbs_base)
+        *      - r23: saved ar.bspstore
+        *      - r24: saved ar.rnat
+        *      - r25: saved ar.unat
+        *      - r26: saved ar.pfs
+        *      - r27: saved ar.rsc
+        *      - r28: saved cr.iip
+        *      - r29: saved cr.ipsr
+        *      - r31: saved pr
+        *      -  b0: original contents (to be saved)
+        * On exit:
+        *      - executing on bank 1 registers
+        *      - psr.ic enabled, interrupts restored
+        *      -  p10: TRUE if syscall is invoked with more than 8 out
+        *              registers or r15's Nat is true
+        *      -  r1: kernel's gp
+        *      -  r3: preserved (same as on entry)
+        *      -  r8: -EINVAL if p10 is true
+        *      - r12: points to kernel stack
+        *      - r13: points to current task
+        *      - p15: TRUE if interrupts need to be re-enabled
+        *      - ar.fpsr: set to kernel settings
+        */
+GLOBAL_ENTRY(ia64_syscall_setup)
+#ifndef XEN
+#if PT(B6) != 0
+# error This code assumes that b6 is the first field in pt_regs.
+#endif
+#endif
+       st8 [r1]=r19                            // save b6
+       add r16=PT(CR_IPSR),r1                  // initialize first base pointer
+       add r17=PT(R11),r1                      // initialize second base 
pointer
+       ;;
+       alloc r19=ar.pfs,8,0,0,0                // ensure in0-in7 are writable
+       st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR)    // save cr.ipsr
+       tnat.nz p8,p0=in0
+
+       st8.spill [r17]=r11,PT(CR_IIP)-PT(R11)  // save r11
+       tnat.nz p9,p0=in1
+(pKStk)        mov r18=r0                              // make sure r18 isn't 
NaT
+       ;;
+
+       st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS)     // save ar.pfs
+       st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)    // save cr.iip
+       mov r28=b0                              // save b0 (2 cyc)
+       ;;
+
+       st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT)    // save ar.unat
+       dep r19=0,r19,38,26                     // clear all bits but 0..37 [I0]
+(p8)   mov in0=-1
+       ;;
+
+       st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS)    // store ar.pfs.pfm in cr.ifs
+       extr.u r11=r19,7,7      // I0           // get sol of ar.pfs
+       and r8=0x7f,r19         // A            // get sof of ar.pfs
+
+       st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+       tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
+(p9)   mov in1=-1
+       ;;
+
+(pUStk) sub r18=r18,r22                                // r18=RSE.ndirty*8
+       tnat.nz p10,p0=in2
+       add r11=8,r11
+       ;;
+(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16                // skip over ar_rnat 
field
+(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17    // skip over ar_bspstore field
+       tnat.nz p11,p0=in3
+       ;;
+(p10)  mov in2=-1
+       tnat.nz p12,p0=in4                              // [I0]
+(p11)  mov in3=-1
+       ;;
+(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT)       // save ar.rnat
+(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE)   // save ar.bspstore
+       shl r18=r18,16                          // compute ar.rsc to be used 
for "loadrs"
+       ;;
+       st8 [r16]=r31,PT(LOADRS)-PT(PR)         // save predicates
+       st8 [r17]=r28,PT(R1)-PT(B0)             // save b0
+       tnat.nz p13,p0=in5                              // [I0]
+       ;;
+       st8 [r16]=r18,PT(R12)-PT(LOADRS)        // save ar.rsc value for 
"loadrs"
+       st8.spill [r17]=r20,PT(R13)-PT(R1)      // save original r1
+(p12)  mov in4=-1
+       ;;
+
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)       // save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)           // save r13
+(p13)  mov in5=-1
+       ;;
+       st8 [r16]=r21,PT(R8)-PT(AR_FPSR)        // save ar.fpsr
+       tnat.nz p14,p0=in6
+       cmp.lt p10,p9=r11,r8    // frame size can't be more than local+8
+       ;;
+       stf8 [r16]=f1           // ensure pt_regs.r8 != 0 (see 
handle_syscall_error)
+(p9)   tnat.nz p10,p0=r15
+       adds r12=-16,r1         // switch to kernel memory stack (with 16 bytes 
of scratch)
+
+       st8.spill [r17]=r15                     // save r15
+       tnat.nz p8,p0=in7
+       nop.i 0
+
+       mov r13=r2                              // establish `current'
+       movl r1=__gp                            // establish kernel global 
pointer
+       ;;
+(p14)  mov in6=-1
+(p8)   mov in7=-1
+       nop.i 0
+
+       cmp.eq pSys,pNonSys=r0,r0               // set pSys=1, pNonSys=0
+       movl r17=FPSR_DEFAULT
+       ;;
+       mov.m ar.fpsr=r17                       // set ar.fpsr to kernel 
default value
+(p10)  mov r8=-EINVAL
+       br.ret.sptk.many b7
+END(ia64_syscall_setup)
+
+       .org ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+       DBG_FAULT(15)
+       FAULT(15)
+
+       /*
+        * Squatting in this space ...
+        *
+        * This special case dispatcher for illegal operation faults allows 
preserved
+        * registers to be modified through a callback function (asm only) that 
is handed
+        * back from the fault handler in r8. Up to three arguments can be 
passed to the
+        * callback function by returning an aggregate with the callback as its 
first
+        * element, followed by the arguments.
+        */
+ENTRY(dispatch_illegal_op_fault)
+       SAVE_MIN_WITH_COVER
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i          // guarantee that interruption collection is on
+       ;;
+(p15)  ssm psr.i       // restore psr.i
+       adds r3=8,r2    // set up second base pointer for SAVE_REST
+       ;;
+       alloc r14=ar.pfs,0,0,1,0        // must be first in insn group
+       mov out0=ar.ec
+       ;;
+       SAVE_REST
+       ;;
+       br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+       alloc r14=ar.pfs,0,0,3,0        // must be first in insn group
+       mov out0=r9
+       mov out1=r10
+       mov out2=r11
+       movl r15=ia64_leave_kernel
+       ;;
+       mov rp=r15
+       mov b6=r8
+       ;;
+       cmp.ne p6,p0=0,r8
+(p6)   br.call.dpnt.many b6=b6         // call returns to ia64_leave_kernel
+       br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
+       .org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+       DBG_FAULT(16)
+       FAULT(16)
+
+#ifdef XEN
+       // There is no particular reason for this code to be here, other than 
that
+       // there happens to be space here that would go unused otherwise.  If 
this
+       // fault ever gets "unreserved", simply moved the following code to a 
more
+       // suitable spot...
+
+ENTRY(dispatch_privop_fault)
+       SAVE_MIN_WITH_COVER
+       ;;
+       alloc r14=ar.pfs,0,0,4,0                // now it's safe (must be first 
in insn group!)
+       mov out0=cr.ifa
+       adds out1=16,sp
+       mov out2=cr.isr         // FIXME: pity to make this slow access twice
+       mov out3=cr.itir
+
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       adds r3=8,r2                            // set up second base pointer
+       ;;
+       SAVE_REST
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.sptk.many ia64_prepare_handle_privop
+END(dispatch_privop_fault)
+#endif
+
+
+       .org ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+       DBG_FAULT(17)
+       FAULT(17)
+
+ENTRY(non_syscall)
+       SAVE_MIN_WITH_COVER
+
+       // There is no particular reason for this code to be here, other than 
that
+       // there happens to be space here that would go unused otherwise.  If 
this
+       // fault ever gets "unreserved", simply moved the following code to a 
more
+       // suitable spot...
+
+       alloc r14=ar.pfs,0,0,2,0
+       mov out0=cr.iim
+       add out1=16,sp
+       adds r3=8,r2                    // set up second base pointer for 
SAVE_REST
+
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                          // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                       // restore psr.i
+       movl r15=ia64_leave_kernel
+       ;;
+       SAVE_REST
+       mov rp=r15
+       ;;
+       br.call.sptk.many b6=ia64_bad_break     // avoid WAW on CFM and ignore 
return addr
+END(non_syscall)
+
+       .org ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+       DBG_FAULT(18)
+       FAULT(18)
+
+       /*
+        * There is no particular reason for this code to be here, other than 
that
+        * there happens to be space here that would go unused otherwise.  If 
this
+        * fault ever gets "unreserved", simply moved the following code to a 
more
+        * suitable spot...
+        */
+
+ENTRY(dispatch_unaligned_handler)
+       SAVE_MIN_WITH_COVER
+       ;;
+       alloc r14=ar.pfs,0,0,2,0                // now it's safe (must be first 
in insn group!)
+       mov out0=cr.ifa
+       adds out1=16,sp
+
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       adds r3=8,r2                            // set up second base pointer
+       ;;
+       SAVE_REST
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+       .org ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+       DBG_FAULT(19)
+       FAULT(19)
+
+       /*
+        * There is no particular reason for this code to be here, other than 
that
+        * there happens to be space here that would go unused otherwise.  If 
this
+        * fault ever gets "unreserved", simply moved the following code to a 
more
+        * suitable spot...
+        */
+
+ENTRY(dispatch_to_fault_handler)
+       /*
+        * Input:
+        *      psr.ic: off
+        *      r19:    fault vector number (e.g., 24 for General Exception)
+        *      r31:    contains saved predicates (pr)
+        */
+       SAVE_MIN_WITH_COVER_R19
+       alloc r14=ar.pfs,0,0,5,0
+       mov out0=r15
+       mov out1=cr.isr
+       mov out2=cr.ifa
+       mov out3=cr.iim
+       mov out4=cr.itir
+       ;;
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       adds r3=8,r2                            // set up second base pointer 
for SAVE_REST
+       ;;
+       SAVE_REST
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+//
+// --- End of long entries, Beginning of short entries
+//
+
+       .org ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ENTRY(page_not_present)
+#ifdef XEN
+       REFLECT(20)
+#endif
+       DBG_FAULT(20)
+       mov r16=cr.ifa
+       rsm psr.dt
+       /*
+        * The Linux page fault handler doesn't expect non-present pages to be 
in
+        * the TLB.  Flush the existing entry now, so we meet that expectation.
+        */
+       mov r17=PAGE_SHIFT<<2
+       ;;
+       ptc.l r16,r17
+       ;;
+       mov r31=pr
+       srlz.d
+       br.sptk.many page_fault
+END(page_not_present)
+
+       .org ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ENTRY(key_permission)
+#ifdef XEN
+       REFLECT(21)
+#endif
+       DBG_FAULT(21)
+       mov r16=cr.ifa
+       rsm psr.dt
+       mov r31=pr
+       ;;
+       srlz.d
+       br.sptk.many page_fault
+END(key_permission)
+
+       .org ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(iaccess_rights)
+#ifdef XEN
+       REFLECT(22)
+#endif
+       DBG_FAULT(22)
+       mov r16=cr.ifa
+       rsm psr.dt
+       mov r31=pr
+       ;;
+       srlz.d
+       br.sptk.many page_fault
+END(iaccess_rights)
+
+       .org ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(daccess_rights)
+#ifdef XEN
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=23
+       movl r20=0x5300
+       br.sptk.many fast_access_reflect;;
+#endif
+       DBG_FAULT(23)
+       mov r16=cr.ifa
+       rsm psr.dt
+       mov r31=pr
+       ;;
+       srlz.d
+       br.sptk.many page_fault
+END(daccess_rights)
+
+       .org ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(general_exception)
+       DBG_FAULT(24)
+       mov r16=cr.isr
+       mov r31=pr
+       ;;
+#ifdef XEN
+       cmp4.ge p6,p0=0x20,r16
+(p6)   br.sptk.many dispatch_privop_fault
+#else
+       cmp4.eq p6,p0=0,r16
+(p6)   br.sptk.many dispatch_illegal_op_fault
+#endif
+       ;;
+       mov r19=24              // fault number
+       br.sptk.many dispatch_to_fault_handler
+END(general_exception)
+
+       .org ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(disabled_fp_reg)
+#ifdef XEN
+       REFLECT(25)
+#endif
+       DBG_FAULT(25)
+       rsm psr.dfh             // ensure we can access fph
+       ;;
+       srlz.d
+       mov r31=pr
+       mov r19=25
+       br.sptk.many dispatch_to_fault_handler
+END(disabled_fp_reg)
+
+       .org ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(nat_consumption)
+#ifdef XEN
+       REFLECT(26)
+#endif
+       DBG_FAULT(26)
+       FAULT(26)
+END(nat_consumption)
+
+       .org ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(speculation_vector)
+#ifdef XEN
+       // this probably need not reflect...
+       REFLECT(27)
+#endif
+       DBG_FAULT(27)
+       /*
+        * A [f]chk.[as] instruction needs to take the branch to the recovery 
code but
+        * this part of the architecture is not implemented in hardware on some 
CPUs, such
+        * as Itanium.  Thus, in general we need to emulate the behavior.  IIM 
contains
+        * the relative target (not yet sign extended).  So after sign 
extending it we
+        * simply add it to IIP.  We also need to reset the EI field of the 
IPSR to zero,
+        * i.e., the slot to restart into.
+        *
+        * cr.imm contains zero_ext(imm21)
+        */
+       mov r18=cr.iim
+       ;;
+       mov r17=cr.iip
+       shl r18=r18,43                  // put sign bit in position (43=64-21)
+       ;;
+
+       mov r16=cr.ipsr
+       shr r18=r18,39                  // sign extend (39=43-4)
+       ;;
+
+       add r17=r17,r18                 // now add the offset
+       ;;
+       mov cr.iip=r17
+       dep r16=0,r16,41,2              // clear EI
+       ;;
+
+       mov cr.ipsr=r16
+       ;;
+
+       rfi                             // and go back
+END(speculation_vector)
+
+       .org ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+       DBG_FAULT(28)
+       FAULT(28)
+
+       .org ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(debug_vector)
+#ifdef XEN
+       REFLECT(29)
+#endif
+       DBG_FAULT(29)
+       FAULT(29)
+END(debug_vector)
+
+       .org ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(unaligned_access)
+#ifdef XEN
+       REFLECT(30)
+#endif
+       DBG_FAULT(30)
+       mov r16=cr.ipsr
+       mov r31=pr              // prepare to save predicates
+       ;;
+       br.sptk.many dispatch_unaligned_handler
+END(unaligned_access)
+
+       .org ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
+#ifdef XEN
+       REFLECT(31)
+#endif
+       DBG_FAULT(31)
+       FAULT(31)
+END(unsupported_data_reference)
+
+       .org ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
+#ifdef XEN
+       REFLECT(32)
+#endif
+       DBG_FAULT(32)
+       FAULT(32)
+END(floating_point_fault)
+
+       .org ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
+#ifdef XEN
+       REFLECT(33)
+#endif
+       DBG_FAULT(33)
+       FAULT(33)
+END(floating_point_trap)
+
+       .org ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
+#ifdef XEN
+       REFLECT(34)
+#endif
+       DBG_FAULT(34)
+       FAULT(34)
+END(lower_privilege_trap)
+
+       .org ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
+#ifdef XEN
+       REFLECT(35)
+#endif
+       DBG_FAULT(35)
+       FAULT(35)
+END(taken_branch_trap)
+
+       .org ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
+#ifdef XEN
+       REFLECT(36)
+#endif
+       DBG_FAULT(36)
+       FAULT(36)
+END(single_step_trap)
+
+       .org ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+       DBG_FAULT(37)
+       FAULT(37)
+
+       .org ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+       DBG_FAULT(38)
+       FAULT(38)
+
+       .org ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+       DBG_FAULT(39)
+       FAULT(39)
+
+       .org ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+       DBG_FAULT(40)
+       FAULT(40)
+
+       .org ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+       DBG_FAULT(41)
+       FAULT(41)
+
+       .org ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+       DBG_FAULT(42)
+       FAULT(42)
+
+       .org ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+       DBG_FAULT(43)
+       FAULT(43)
+
+       .org ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+       DBG_FAULT(44)
+       FAULT(44)
+
+       .org ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(ia32_exception)
+#ifdef XEN
+       REFLECT(45)
+#endif
+       DBG_FAULT(45)
+       FAULT(45)
+END(ia32_exception)
+
+       .org ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(ia32_intercept)
+#ifdef XEN
+       REFLECT(46)
+#endif
+       DBG_FAULT(46)
+#ifdef CONFIG_IA32_SUPPORT
+       mov r31=pr
+       mov r16=cr.isr
+       ;;
+       extr.u r17=r16,16,8     // get ISR.code
+       mov r18=ar.eflag
+       mov r19=cr.iim          // old eflag value
+       ;;
+       cmp.ne p6,p0=2,r17
+(p6)   br.cond.spnt 1f         // not a system flag fault
+       xor r16=r18,r19
+       ;;
+       extr.u r17=r16,18,1     // get the eflags.ac bit
+       ;;
+       cmp.eq p6,p0=0,r17
+(p6)   br.cond.spnt 1f         // eflags.ac bit didn't change
+       ;;
+       mov pr=r31,-1           // restore predicate registers
+       rfi
+
+1:
+#endif // CONFIG_IA32_SUPPORT
+       FAULT(46)
+END(ia32_intercept)
+
+       .org ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
+ENTRY(ia32_interrupt)
+#ifdef XEN
+       REFLECT(47)
+#endif
+       DBG_FAULT(47)
+#ifdef CONFIG_IA32_SUPPORT
+       mov r31=pr
+       br.sptk.many dispatch_to_ia32_handler
+#else
+       FAULT(47)
+#endif
+END(ia32_interrupt)
+
+       .org ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+       DBG_FAULT(48)
+       FAULT(48)
+
+       .org ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+       DBG_FAULT(49)
+       FAULT(49)
+
+       .org ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+       DBG_FAULT(50)
+       FAULT(50)
+
+       .org ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+       DBG_FAULT(51)
+       FAULT(51)
+
+       .org ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+       DBG_FAULT(52)
+       FAULT(52)
+
+       .org ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+       DBG_FAULT(53)
+       FAULT(53)
+
+       .org ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+       DBG_FAULT(54)
+       FAULT(54)
+
+       .org ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+       DBG_FAULT(55)
+       FAULT(55)
+
+       .org ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+       DBG_FAULT(56)
+       FAULT(56)
+
+       .org ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+       DBG_FAULT(57)
+       FAULT(57)
+
+       .org ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+       DBG_FAULT(58)
+       FAULT(58)
+
+       .org ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+       DBG_FAULT(59)
+       FAULT(59)
+
+       .org ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+       DBG_FAULT(60)
+       FAULT(60)
+
+       .org ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+       DBG_FAULT(61)
+       FAULT(61)
+
+       .org ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+       DBG_FAULT(62)
+       FAULT(62)
+
+       .org ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+       DBG_FAULT(63)
+       FAULT(63)
+
+       .org ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+       DBG_FAULT(64)
+       FAULT(64)
+
+       .org ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+       DBG_FAULT(65)
+       FAULT(65)
+
+       .org ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+       DBG_FAULT(66)
+       FAULT(66)
+
+       .org ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+       DBG_FAULT(67)
+       FAULT(67)
+
+#ifdef XEN
+       .org ia64_ivt+0x8000
+GLOBAL_ENTRY(dispatch_reflection)
+       /*
+        * Input:
+        *      psr.ic: off
+        *      r19:    intr type (offset into ivt, see ia64_int.h)
+        *      r31:    contains saved predicates (pr)
+        */
+       SAVE_MIN_WITH_COVER_R19
+       alloc r14=ar.pfs,0,0,5,0
+       mov out4=r15
+       mov out0=cr.ifa
+       adds out1=16,sp
+       mov out2=cr.isr
+       mov out3=cr.iim
+//     mov out3=cr.itir
+
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i                               // restore psr.i
+       adds r3=8,r2                            // set up second base pointer
+       ;;
+       SAVE_REST
+       movl r14=ia64_leave_kernel
+       ;;
+       mov rp=r14
+       br.sptk.many ia64_prepare_handle_reflection
+END(dispatch_reflection)
+
+#define SAVE_MIN_COVER_DONE    DO_SAVE_MIN(,mov r30=cr.ifs,)
+
+// same as dispatch_break_fault except cover has already been done
+GLOBAL_ENTRY(dispatch_slow_hyperprivop)
+       SAVE_MIN_COVER_DONE
+       ;;
+       br.sptk.many dispatch_break_fault_post_save
+END(dispatch_slow_hyperprivop)
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+
+       /*
+        * There is no particular reason for this code to be here, other than 
that
+        * there happens to be space here that would go unused otherwise.  If 
this
+        * fault ever gets "unreserved", simply moved the following code to a 
more
+        * suitable spot...
+        */
+
+       // IA32 interrupt entry point
+
+ENTRY(dispatch_to_ia32_handler)
+       SAVE_MIN
+       ;;
+       mov r14=cr.isr
+       ssm psr.ic | PSR_DEFAULT_BITS
+       ;;
+       srlz.i                                  // guarantee that interruption 
collection is on
+       ;;
+(p15)  ssm psr.i
+       adds r3=8,r2            // Base pointer for SAVE_REST
+       ;;
+       SAVE_REST
+       ;;
+       mov r15=0x80
+       shr r14=r14,16          // Get interrupt number
+       ;;
+       cmp.ne p6,p0=r14,r15
+(p6)   br.call.dpnt.many b6=non_ia32_syscall
+
+       adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW 
conventions
+       adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+       ;;
+       cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+       ld8 r8=[r14]            // get r8
+       ;;
+       st8 [r15]=r8            // save original EAX in r1 (IA32 procs don't 
use the GP)
+       ;;
+       alloc r15=ar.pfs,0,0,6,0        // must first in an insn group
+       ;;
+       ld4 r8=[r14],8          // r8 == eax (syscall number)
+       mov r15=IA32_NR_syscalls
+       ;;
+       cmp.ltu.unc p6,p7=r8,r15
+       ld4 out1=[r14],8        // r9 == ecx
+       ;;
+       ld4 out2=[r14],8        // r10 == edx
+       ;;
+       ld4 out0=[r14]          // r11 == ebx
+       adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+       ;;
+       ld4 out5=[r14],PT(R14)-PT(R13)  // r13 == ebp
+       ;;
+       ld4 out3=[r14],PT(R15)-PT(R14)  // r14 == esi
+       adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+       ;;
+       ld4 out4=[r14]          // r15 == edi
+       movl r16=ia32_syscall_table
+       ;;
+(p6)   shladd r16=r8,3,r16     // force ni_syscall if not valid syscall number
+       ld4 r2=[r2]             // r2 = current_thread_info()->flags
+       ;;
+       ld8 r16=[r16]
+       and r2=_TIF_SYSCALL_TRACEAUDIT,r2       // mask trace or audit
+       ;;
+       mov b6=r16
+       movl r15=ia32_ret_from_syscall
+       cmp.eq p8,p0=r2,r0
+       ;;
+       mov rp=r15
+(p8)   br.call.sptk.many b6=b6
+       br.cond.sptk ia32_trace_syscall
+
+non_ia32_syscall:
+       alloc r15=ar.pfs,0,0,2,0
+       mov out0=r14                            // interrupt #
+       add out1=16,sp                          // pointer to pt_regs
+       ;;                      // avoid WAW on CFM
+       br.call.sptk.many rp=ia32_bad_interrupt
+.ret1: movl r15=ia64_leave_kernel
+       ;;
+       mov rp=r15
+       br.ret.sptk.many rp
+END(dispatch_to_ia32_handler)
+
+#endif /* CONFIG_IA32_SUPPORT */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/mm_init.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/mm_init.c       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,549 @@
+/*
+ * Initialize MMU support.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#ifdef XEN
+#include <xen/sched.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#ifndef XEN
+#include <linux/personality.h>
+#endif
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#ifndef XEN
+#include <linux/proc_fs.h>
+#endif
+
+#ifndef XEN
+#include <asm/a.out.h>
+#endif
+#include <asm/bitops.h>
+#include <asm/dma.h>
+#ifndef XEN
+#include <asm/ia32.h>
+#endif
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/numa.h>
+#include <asm/patch.h>
+#include <asm/pgalloc.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+#ifndef XEN
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+#endif
+
+extern void ia64_tlb_init (void);
+
+unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+unsigned long vmalloc_end = VMALLOC_END_INIT;
+EXPORT_SYMBOL(vmalloc_end);
+struct page *vmem_map;
+EXPORT_SYMBOL(vmem_map);
+#endif
+
+static int pgt_cache_water[2] = { 25, 50 };
+
+struct page *zero_page_memmap_ptr;             /* map entry for zero page */
+EXPORT_SYMBOL(zero_page_memmap_ptr);
+
+#ifdef XEN
+void *high_memory;
+EXPORT_SYMBOL(high_memory);
+
+/////////////////////////////////////////////
+// following from linux-2.6.7/mm/mmap.c
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware.  The expected
+ * behavior is in parens:
+ *
+ * map_type    prot
+ *             PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
+ * MAP_SHARED  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
+ *             w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
+ *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
+ *             
+ * MAP_PRIVATE r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
+ *             w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
+ *             x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
+ *
+ */
+pgprot_t protection_map[16] = {
+       __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
+       __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+};
+
+void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+       printf("insert_vm_struct: called, not implemented yet\n");
+}
+
+/////////////////////////////////////////////
+//following from linux/mm/memory.c
+
+#ifndef __ARCH_HAS_4LEVEL_HACK
+/*
+ * Allocate page upper directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level or three-level page table, this ends up actually being
+ * entirely optimized away.
+ */
+pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long 
address)
+{
+       pud_t *new;
+
+       spin_unlock(&mm->page_table_lock);
+       new = pud_alloc_one(mm, address);
+       spin_lock(&mm->page_table_lock);
+       if (!new)
+               return NULL;
+
+       /*
+        * Because we dropped the lock, we should re-check the
+        * entry, as somebody else could have populated it..
+        */
+       if (pgd_present(*pgd)) {
+               pud_free(new);
+               goto out;
+       }
+       pgd_populate(mm, pgd, new);
+ out:
+       return pud_offset(pgd, address);
+}
+
+/*
+ * Allocate page middle directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level page table, this ends up actually being entirely
+ * optimized away.
+ */
+pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
+{
+       pmd_t *new;
+
+       spin_unlock(&mm->page_table_lock);
+       new = pmd_alloc_one(mm, address);
+       spin_lock(&mm->page_table_lock);
+       if (!new)
+               return NULL;
+
+       /*
+        * Because we dropped the lock, we should re-check the
+        * entry, as somebody else could have populated it..
+        */
+       if (pud_present(*pud)) {
+               pmd_free(new);
+               goto out;
+       }
+       pud_populate(mm, pud, new);
+ out:
+       return pmd_offset(pud, address);
+}
+#endif
+
+pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long 
address)
+{
+       if (!pmd_present(*pmd)) {
+               struct page *new;
+
+               spin_unlock(&mm->page_table_lock);
+               new = pte_alloc_one(mm, address);
+               spin_lock(&mm->page_table_lock);
+               if (!new)
+                       return NULL;
+
+               /*
+                * Because we dropped the lock, we should re-check the
+                * entry, as somebody else could have populated it..
+                */
+               if (pmd_present(*pmd)) {
+                       pte_free(new);
+                       goto out;
+               }
+               inc_page_state(nr_page_table_pages);
+               pmd_populate(mm, pmd, new);
+       }
+out:
+       return pte_offset_map(pmd, address);
+}
+/////////////////////////////////////////////
+#endif /* XEN */
+
+#if 0
+void
+update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+{
+       unsigned long addr;
+       struct page *page;
+
+       if (!pte_exec(pte))
+               return;                         /* not an executable page... */
+
+       page = pte_page(pte);
+       /* don't use VADDR: it may not be mapped on this CPU (or may have just 
been flushed): */
+       addr = (unsigned long) page_address(page);
+
+       if (test_bit(PG_arch_1, &page->flags))
+               return;                         /* i-cache is already coherent 
with d-cache */
+
+       flush_icache_range(addr, addr + PAGE_SIZE);
+       set_bit(PG_arch_1, &page->flags);       /* mark page as clean */
+}
+#endif
+
+inline void
+ia64_set_rbs_bot (void)
+{
+#ifdef XEN
+       unsigned stack_size = MAX_USER_STACK_SIZE;
+#else
+       unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16;
+#endif
+
+       if (stack_size > MAX_USER_STACK_SIZE)
+               stack_size = MAX_USER_STACK_SIZE;
+       current->arch._thread.rbs_bot = STACK_TOP - stack_size;
+}
+
+/*
+ * This performs some platform-dependent address space initialization.
+ * On IA-64, we want to setup the VM area for the register backing
+ * store (which grows upwards) and install the gateway page which is
+ * used for signal trampolines, etc.
+ */
+void
+ia64_init_addr_space (void)
+{
+#ifdef XEN
+printf("ia64_init_addr_space: called, not implemented\n");
+#else
+       struct vm_area_struct *vma;
+
+       ia64_set_rbs_bot();
+
+       /*
+        * If we're out of memory and kmem_cache_alloc() returns NULL, we 
simply ignore
+        * the problem.  When the process attempts to write to the register 
backing store
+        * for the first time, it will get a SEGFAULT in this case.
+        */
+       vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       if (vma) {
+               memset(vma, 0, sizeof(*vma));
+               vma->vm_mm = current->mm;
+               vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK;
+               vma->vm_end = vma->vm_start + PAGE_SIZE;
+               vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
+               vma->vm_flags = 
VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
+               insert_vm_struct(current->mm, vma);
+       }
+
+       /* map NaT-page at address zero to speed up speculative dereferencing 
of NULL: */
+       if (!(current->personality & MMAP_PAGE_ZERO)) {
+               vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+               if (vma) {
+                       memset(vma, 0, sizeof(*vma));
+                       vma->vm_mm = current->mm;
+                       vma->vm_end = PAGE_SIZE;
+                       vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) 
| _PAGE_MA_NAT);
+                       vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | 
VM_RESERVED;
+                       insert_vm_struct(current->mm, vma);
+               }
+       }
+#endif
+}
+
+setup_gate (void)
+{
+       printk("setup_gate not-implemented.\n");
+}
+
+void __devinit
+ia64_mmu_init (void *my_cpu_data)
+{
+       unsigned long psr, pta, impl_va_bits;
+       extern void __devinit tlb_init (void);
+       int cpu;
+
+#ifdef CONFIG_DISABLE_VHPT
+#      define VHPT_ENABLE_BIT  0
+#else
+#      define VHPT_ENABLE_BIT  1
+#endif
+
+       /* Pin mapping for percpu area into TLB */
+       psr = ia64_clear_ic();
+       ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
+                pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
+                PERCPU_PAGE_SHIFT);
+
+       ia64_set_psr(psr);
+       ia64_srlz_i();
+
+       /*
+        * Check if the virtually mapped linear page table (VMLPT) overlaps 
with a mapped
+        * address space.  The IA-64 architecture guarantees that at least 50 
bits of
+        * virtual address space are implemented but if we pick a large enough 
page size
+        * (e.g., 64KB), the mapped address space is big enough that it will 
overlap with
+        * VMLPT.  I assume that once we run on machines big enough to warrant 
64KB pages,
+        * IMPL_VA_MSB will be significantly bigger, so this is unlikely to 
become a
+        * problem in practice.  Alternatively, we could truncate the top of 
the mapped
+        * address space to not permit mappings that would overlap with the 
VMLPT.
+        * --davidm 00/12/06
+        */
+#      define pte_bits                 3
+#      define mapped_space_bits        (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
+       /*
+        * The virtual page table has to cover the entire implemented address 
space within
+        * a region even though not all of this space may be mappable.  The 
reason for
+        * this is that the Access bit and Dirty bit fault handlers perform
+        * non-speculative accesses to the virtual page table, so the address 
range of the
+        * virtual page table itself needs to be covered by virtual page table.
+        */
+#      define vmlpt_bits               (impl_va_bits - PAGE_SHIFT + pte_bits)
+#      define POW2(n)                  (1ULL << (n))
+
+       impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+
+       if (impl_va_bits < 51 || impl_va_bits > 61)
+               panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits 
- 1);
+
+#ifdef XEN
+       vhpt_init();
+#endif
+#if 0
+       /* place the VMLPT at the end of each page-table mapped region: */
+       pta = POW2(61) - POW2(vmlpt_bits);
+
+       if (POW2(mapped_space_bits) >= pta)
+               panic("mm/init: overlap between virtually mapped linear page 
table and "
+                     "mapped kernel space!");
+       /*
+        * Set the (virtually mapped linear) page table address.  Bit
+        * 8 selects between the short and long format, bits 2-7 the
+        * size of the table, and bit 0 whether the VHPT walker is
+        * enabled.
+        */
+       ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
+#endif
+       ia64_tlb_init();
+
+#ifdef CONFIG_HUGETLB_PAGE
+       ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+       ia64_srlz_d();
+#endif
+
+       cpu = smp_processor_id();
+
+#ifndef XEN
+       /* mca handler uses cr.lid as key to pick the right entry */
+       ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID);
+
+       /* insert this percpu data information into our list for MCA recovery 
purposes */
+       ia64_mca_tlb_list[cpu].percpu_paddr = 
pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL));
+       /* Also save per-cpu tlb flush recipe for use in physical mode mca 
handler */
+       ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base;
+       ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0];
+       ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1];
+       ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0];
+       ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1];
+#endif
+}
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+
+int
+create_mem_map_page_table (u64 start, u64 end, void *arg)
+{
+       unsigned long address, start_page, end_page;
+       struct page *map_start, *map_end;
+       int node;
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+       map_end   = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+       start_page = (unsigned long) map_start & PAGE_MASK;
+       end_page = PAGE_ALIGN((unsigned long) map_end);
+       node = paddr_to_nid(__pa(start));
+
+       for (address = start_page; address < end_page; address += PAGE_SIZE) {
+               pgd = pgd_offset_k(address);
+               if (pgd_none(*pgd))
+                       pgd_populate(&init_mm, pgd, 
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+               pmd = pmd_offset(pgd, address);
+
+               if (pmd_none(*pmd))
+                       pmd_populate_kernel(&init_mm, pmd, 
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+               pte = pte_offset_kernel(pmd, address);
+
+               if (pte_none(*pte))
+                       set_pte(pte, 
pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> 
PAGE_SHIFT,
+                                            PAGE_KERNEL));
+       }
+       return 0;
+}
+
+struct memmap_init_callback_data {
+       struct page *start;
+       struct page *end;
+       int nid;
+       unsigned long zone;
+};
+
+static int
+virtual_memmap_init (u64 start, u64 end, void *arg)
+{
+       struct memmap_init_callback_data *args;
+       struct page *map_start, *map_end;
+
+       args = (struct memmap_init_callback_data *) arg;
+
+       map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+       map_end   = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+       if (map_start < args->start)
+               map_start = args->start;
+       if (map_end > args->end)
+               map_end = args->end;
+
+       /*
+        * We have to initialize "out of bounds" struct page elements that fit 
completely
+        * on the same pages that were allocated for the "in bounds" elements 
because they
+        * may be referenced later (and found to be "reserved").
+        */
+       map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / 
sizeof(struct page);
+       map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) 
map_end)
+                   / sizeof(struct page));
+
+       if (map_start < map_end)
+               memmap_init_zone(map_start, (unsigned long) (map_end - 
map_start),
+                                args->nid, args->zone, page_to_pfn(map_start));
+       return 0;
+}
+
+void
+memmap_init (struct page *start, unsigned long size, int nid,
+            unsigned long zone, unsigned long start_pfn)
+{
+       if (!vmem_map)
+               memmap_init_zone(start, size, nid, zone, start_pfn);
+       else {
+               struct memmap_init_callback_data args;
+
+               args.start = start;
+               args.end = start + size;
+               args.nid = nid;
+               args.zone = zone;
+
+               efi_memmap_walk(virtual_memmap_init, &args);
+       }
+}
+
+int
+ia64_pfn_valid (unsigned long pfn)
+{
+       char byte;
+       struct page *pg = pfn_to_page(pfn);
+
+       return     (__get_user(byte, (char *) pg) == 0)
+               && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
+                       || (__get_user(byte, (char *) (pg + 1) - 1) == 0));
+}
+EXPORT_SYMBOL(ia64_pfn_valid);
+
+int
+find_largest_hole (u64 start, u64 end, void *arg)
+{
+       u64 *max_gap = arg;
+
+       static u64 last_end = PAGE_OFFSET;
+
+       /* NOTE: this algorithm assumes efi memmap table is ordered */
+
+#ifdef XEN
+//printf("find_largest_hole: 
start=%lx,end=%lx,max_gap=%lx\n",start,end,*(unsigned long *)arg);
+#endif
+       if (*max_gap < (start - last_end))
+               *max_gap = start - last_end;
+       last_end = end;
+#ifdef XEN
+//printf("find_largest_hole2: max_gap=%lx,last_end=%lx\n",*max_gap,last_end);
+#endif
+       return 0;
+}
+#endif /* CONFIG_VIRTUAL_MEM_MAP */
+
+static int
+count_reserved_pages (u64 start, u64 end, void *arg)
+{
+       unsigned long num_reserved = 0;
+       unsigned long *count = arg;
+
+       for (; start < end; start += PAGE_SIZE)
+               if (PageReserved(virt_to_page(start)))
+                       ++num_reserved;
+       *count += num_reserved;
+       return 0;
+}
+
+/*
+ * Boot command-line option "nolwsys" can be used to disable the use of any 
light-weight
+ * system call handler.  When this option is in effect, all fsyscalls will end 
up bubbling
+ * down into the kernel and calling the normal (heavy-weight) syscall handler. 
 This is
+ * useful for performance testing, but conceivably could also come in handy 
for debugging
+ * purposes.
+ */
+
+static int nolwsys;
+
+static int __init
+nolwsys_setup (char *s)
+{
+       nolwsys = 1;
+       return 1;
+}
+
+__setup("nolwsys", nolwsys_setup);
+
+void
+mem_init (void)
+{
+#ifdef CONFIG_PCI
+       /*
+        * This needs to be called _after_ the command line has been parsed but 
_before_
+        * any drivers that may need the PCI DMA interface are initialized or 
bootmem has
+        * been freed.
+        */
+       platform_dma_init();
+#endif
+
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/pcdp.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/pcdp.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,120 @@
+/*
+ * Parse the EFI PCDP table to locate the console device.
+ *
+ * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P.
+ *     Khalid Aziz <khalid.aziz@xxxxxx>
+ *     Alex Williamson <alex.williamson@xxxxxx>
+ *     Bjorn Helgaas <bjorn.helgaas@xxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/console.h>
+#include <linux/efi.h>
+#include <linux/serial.h>
+#ifdef XEN
+#include <linux/errno.h>
+#endif
+#include "pcdp.h"
+
+static int __init
+setup_serial_console(struct pcdp_uart *uart)
+{
+#ifdef XEN
+       extern struct ns16550_defaults ns16550_com1;
+       ns16550_com1.baud = uart->baud;
+       ns16550_com1.io_base = uart->addr.address;
+       if (uart->bits)
+               ns16550_com1.data_bits = uart->bits;
+       return 0;
+#else
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+       int mmio;
+       static char options[64];
+
+       mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY);
+       snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d",
+               mmio ? "mmio" : "io", uart->addr.address, uart->baud,
+               uart->bits ? uart->bits : 8);
+
+       return early_serial_console_init(options);
+#else
+       return -ENODEV;
+#endif
+#endif
+}
+
+#ifndef XEN
+static int __init
+setup_vga_console(struct pcdp_vga *vga)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+       if (efi_mem_type(0xA0000) == EFI_CONVENTIONAL_MEMORY) {
+               printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not 
MMIO!\n");
+               return -ENODEV;
+       }
+
+       conswitchp = &vga_con;
+       printk(KERN_INFO "PCDP: VGA console\n");
+       return 0;
+#else
+       return -ENODEV;
+#endif
+}
+#endif
+
+int __init
+efi_setup_pcdp_console(char *cmdline)
+{
+       struct pcdp *pcdp;
+       struct pcdp_uart *uart;
+       struct pcdp_device *dev, *end;
+       int i, serial = 0;
+
+       pcdp = efi.hcdp;
+       if (!pcdp)
+               return -ENODEV;
+
+#ifndef XEN
+       printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, __pa(pcdp));
+#endif
+
+       if (strstr(cmdline, "console=hcdp")) {
+               if (pcdp->rev < 3)
+                       serial = 1;
+       } else if (strstr(cmdline, "console=")) {
+#ifndef XEN
+               printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n");
+#endif
+               return -ENODEV;
+       }
+
+       if (pcdp->rev < 3 && efi_uart_console_only())
+               serial = 1;
+
+       for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) {
+               if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) {
+                       if (uart->type == PCDP_CONSOLE_UART) {
+                               return setup_serial_console(uart);
+                       }
+               }
+       }
+
+#ifndef XEN
+       end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length);
+       for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts);
+            dev < end;
+            dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) {
+               if (dev->flags & PCDP_PRIMARY_CONSOLE) {
+                       if (dev->type == PCDP_CONSOLE_VGA) {
+                               return setup_vga_console((struct pcdp_vga *) 
dev);
+                       }
+               }
+       }
+#endif
+
+       return -ENODEV;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/privop.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/privop.c        Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1130 @@
+/*
+ * Privileged operation "API" handling functions.
+ * 
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <asm/privop.h>
+#include <asm/vcpu.h>
+#include <asm/processor.h>
+#include <asm/delay.h> // Debug only
+//#include <debug.h>
+
+long priv_verbose=0;
+
+/**************************************************************************
+Hypercall bundle creation
+**************************************************************************/
+
+
+void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64 
ret)
+{
+       INST64_A5 slot0;
+       INST64_I19 slot1;
+       INST64_B4 slot2;
+       IA64_BUNDLE bundle;
+
+       // slot1: mov r2 = hypnum (low 20 bits)
+       slot0.inst = 0;
+       slot0.qp = 0; slot0.r1 = 2; slot0.r3 = 0; slot0.major = 0x9;
+       slot0.imm7b = hypnum; slot0.imm9d = hypnum >> 7;
+       slot0.imm5c = hypnum >> 16; slot0.s = 0;
+       // slot1: break brkimm
+       slot1.inst = 0;
+       slot1.qp = 0; slot1.x6 = 0; slot1.x3 = 0; slot1.major = 0x0;
+       slot1.imm20 = brkimm; slot1.i = brkimm >> 20;
+       // if ret slot2: br.ret.sptk.many rp
+       // else slot2: br.cond.sptk.many rp
+       slot2.inst = 0; slot2.qp = 0; slot2.p = 1; slot2.b2 = 0;
+       slot2.wh = 0; slot2.d = 0; slot2.major = 0x0;
+       if (ret) {
+               slot2.btype = 4; slot2.x6 = 0x21;
+       }
+       else {
+               slot2.btype = 0; slot2.x6 = 0x20;
+       }
+       
+       bundle.i64[0] = 0; bundle.i64[1] = 0;
+       bundle.template = 0x11;
+       bundle.slot0 = slot0.inst; bundle.slot2 = slot2.inst;
+       bundle.slot1a = slot1.inst; bundle.slot1b = slot1.inst >> 18;
+       
+       *imva++ = bundle.i64[0]; *imva = bundle.i64[1];
+}
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT priv_rfi(VCPU *vcpu, INST64 inst)
+{
+       return vcpu_rfi(vcpu);
+}
+
+IA64FAULT priv_bsw0(VCPU *vcpu, INST64 inst)
+{
+       return vcpu_bsw0(vcpu);
+}
+
+IA64FAULT priv_bsw1(VCPU *vcpu, INST64 inst)
+{
+       return vcpu_bsw1(vcpu);
+}
+
+IA64FAULT priv_cover(VCPU *vcpu, INST64 inst)
+{
+       return vcpu_cover(vcpu);
+}
+
+IA64FAULT priv_ptc_l(VCPU *vcpu, INST64 inst)
+{
+       UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+       UINT64 addr_range;
+
+       addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+       return vcpu_ptc_l(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptc_e(VCPU *vcpu, INST64 inst)
+{
+       UINT src = inst.M28.r3;
+
+       // NOTE: ptc_e with source gr > 63 is emulated as a fc r(y-64)
+       if (src > 63) return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64)));
+       return vcpu_ptc_e(vcpu,vcpu_get_gr(vcpu,src));
+}
+
+IA64FAULT priv_ptc_g(VCPU *vcpu, INST64 inst)
+{
+       UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+       UINT64 addr_range;
+
+       addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+       return vcpu_ptc_g(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptc_ga(VCPU *vcpu, INST64 inst)
+{
+       UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+       UINT64 addr_range;
+
+       addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+       return vcpu_ptc_ga(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptr_d(VCPU *vcpu, INST64 inst)
+{
+       UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+       UINT64 addr_range;
+
+       addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+       return vcpu_ptr_d(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptr_i(VCPU *vcpu, INST64 inst)
+{
+       UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+       UINT64 addr_range;
+
+       addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+       return vcpu_ptr_i(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_tpa(VCPU *vcpu, INST64 inst)
+{
+       UINT64 padr;
+       UINT fault;
+       UINT src = inst.M46.r3;
+
+       // NOTE: tpa with source gr > 63 is emulated as a ttag rx=r(y-64)
+       if (src > 63)
+               fault = vcpu_ttag(vcpu,vcpu_get_gr(vcpu,src-64),&padr);
+       else fault = vcpu_tpa(vcpu,vcpu_get_gr(vcpu,src),&padr);
+       if (fault == IA64_NO_FAULT)
+               return vcpu_set_gr(vcpu, inst.M46.r1, padr);
+       else return fault;
+}
+
+IA64FAULT priv_tak(VCPU *vcpu, INST64 inst)
+{
+       UINT64 key;
+       UINT fault;
+       UINT src = inst.M46.r3;
+
+       // NOTE: tak with source gr > 63 is emulated as a thash rx=r(y-64)
+       if (src > 63)
+               fault = vcpu_thash(vcpu,vcpu_get_gr(vcpu,src-64),&key);
+       else fault = vcpu_tak(vcpu,vcpu_get_gr(vcpu,src),&key);
+       if (fault == IA64_NO_FAULT)
+               return vcpu_set_gr(vcpu, inst.M46.r1, key);
+       else return fault;
+}
+
+/************************************
+ * Insert translation register/cache
+************************************/
+
+IA64FAULT priv_itr_d(VCPU *vcpu, INST64 inst)
+{
+       UINT64 fault, itir, ifa, pte, slot;
+
+       //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       pte = vcpu_get_gr(vcpu,inst.M42.r2);
+       slot = vcpu_get_gr(vcpu,inst.M42.r3);
+
+       return (vcpu_itr_d(vcpu,slot,pte,itir,ifa));
+}
+
+IA64FAULT priv_itr_i(VCPU *vcpu, INST64 inst)
+{
+       UINT64 fault, itir, ifa, pte, slot;
+
+       //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       pte = vcpu_get_gr(vcpu,inst.M42.r2);
+       slot = vcpu_get_gr(vcpu,inst.M42.r3);
+
+       return (vcpu_itr_i(vcpu,slot,pte,itir,ifa));
+}
+
+IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst)
+{
+       UINT64 fault, itir, ifa, pte;
+
+       //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       pte = vcpu_get_gr(vcpu,inst.M41.r2);
+
+       return (vcpu_itc_d(vcpu,pte,itir,ifa));
+}
+
+IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst)
+{
+       UINT64 fault, itir, ifa, pte;
+
+       //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+               return(IA64_ILLOP_FAULT);
+       pte = vcpu_get_gr(vcpu,inst.M41.r2);
+
+       return (vcpu_itc_i(vcpu,pte,itir,ifa));
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+*************************************/
+
+IA64FAULT priv_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
+{
+       // I27 and M30 are identical for these fields
+       UINT64 ar3 = inst.M30.ar3;
+       UINT64 imm = vcpu_get_gr(vcpu,inst.M30.imm);
+       return (vcpu_set_ar(vcpu,ar3,imm));
+}
+
+IA64FAULT priv_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
+{
+       // I26 and M29 are identical for these fields
+       UINT64 ar3 = inst.M29.ar3;
+
+       if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) { // privified mov from kr
+               UINT64 val;
+               if (vcpu_get_ar(vcpu,ar3,&val) != IA64_ILLOP_FAULT)
+                       return vcpu_set_gr(vcpu, inst.M29.r2-64, val);
+               else return IA64_ILLOP_FAULT;
+       }
+       else {
+               UINT64 r2 = vcpu_get_gr(vcpu,inst.M29.r2);
+               return (vcpu_set_ar(vcpu,ar3,r2));
+       }
+}
+
+/********************************
+ * Moves to privileged registers
+********************************/
+
+IA64FAULT priv_mov_to_pkr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+       UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+       return (vcpu_set_pkr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_rr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+       UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+       return (vcpu_set_rr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_dbr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+       UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+       return (vcpu_set_dbr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_ibr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+       UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+       return (vcpu_set_ibr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_pmc(VCPU *vcpu, INST64 inst)
+{
+       UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+       UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+       return (vcpu_set_pmc(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_pmd(VCPU *vcpu, INST64 inst)
+{
+       UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+       UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+       return (vcpu_set_pmd(vcpu,r3,r2));
+}
+
+unsigned long to_cr_cnt[128] = { 0 };
+
+IA64FAULT priv_mov_to_cr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val = vcpu_get_gr(vcpu, inst.M32.r2);
+       to_cr_cnt[inst.M32.cr3]++;
+       switch (inst.M32.cr3) {
+           case 0: return vcpu_set_dcr(vcpu,val);
+           case 1: return vcpu_set_itm(vcpu,val);
+           case 2: return vcpu_set_iva(vcpu,val);
+           case 8: return vcpu_set_pta(vcpu,val);
+           case 16:return vcpu_set_ipsr(vcpu,val);
+           case 17:return vcpu_set_isr(vcpu,val);
+           case 19:return vcpu_set_iip(vcpu,val);
+           case 20:return vcpu_set_ifa(vcpu,val);
+           case 21:return vcpu_set_itir(vcpu,val);
+           case 22:return vcpu_set_iipa(vcpu,val);
+           case 23:return vcpu_set_ifs(vcpu,val);
+           case 24:return vcpu_set_iim(vcpu,val);
+           case 25:return vcpu_set_iha(vcpu,val);
+           case 64:return vcpu_set_lid(vcpu,val);
+           case 65:return IA64_ILLOP_FAULT;
+           case 66:return vcpu_set_tpr(vcpu,val);
+           case 67:return vcpu_set_eoi(vcpu,val);
+           case 68:return IA64_ILLOP_FAULT;
+           case 69:return IA64_ILLOP_FAULT;
+           case 70:return IA64_ILLOP_FAULT;
+           case 71:return IA64_ILLOP_FAULT;
+           case 72:return vcpu_set_itv(vcpu,val);
+           case 73:return vcpu_set_pmv(vcpu,val);
+           case 74:return vcpu_set_cmcv(vcpu,val);
+           case 80:return vcpu_set_lrr0(vcpu,val);
+           case 81:return vcpu_set_lrr1(vcpu,val);
+           default: return IA64_ILLOP_FAULT;
+       }
+}
+
+IA64FAULT priv_rsm(VCPU *vcpu, INST64 inst)
+{
+       UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+       return vcpu_reset_psr_sm(vcpu,imm24);
+}
+
+IA64FAULT priv_ssm(VCPU *vcpu, INST64 inst)
+{
+       UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+       return vcpu_set_psr_sm(vcpu,imm24);
+}
+
+/**
+ * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
+ */
+IA64FAULT priv_mov_to_psr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val = vcpu_get_gr(vcpu, inst.M35.r2);
+       return vcpu_set_psr_l(vcpu,val);
+}
+
+/**********************************
+ * Moves from privileged registers
+ **********************************/
+
+IA64FAULT priv_mov_from_rr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val;
+       IA64FAULT fault;
+       
+       if (inst.M43.r1 > 63) { // privified mov from cpuid
+               fault = vcpu_get_cpuid(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+               if (fault == IA64_NO_FAULT)
+                       return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
+       }
+       else {
+               fault = vcpu_get_rr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+               if (fault == IA64_NO_FAULT)
+                       return vcpu_set_gr(vcpu, inst.M43.r1, val);
+       }
+       return fault;
+}
+
+IA64FAULT priv_mov_from_pkr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val;
+       IA64FAULT fault;
+       
+       fault = vcpu_get_pkr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+       if (fault == IA64_NO_FAULT)
+               return vcpu_set_gr(vcpu, inst.M43.r1, val);
+       else return fault;
+}
+
+IA64FAULT priv_mov_from_dbr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val;
+       IA64FAULT fault;
+       
+       fault = vcpu_get_dbr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+       if (fault == IA64_NO_FAULT)
+               return vcpu_set_gr(vcpu, inst.M43.r1, val);
+       else return fault;
+}
+
+IA64FAULT priv_mov_from_ibr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val;
+       IA64FAULT fault;
+       
+       fault = vcpu_get_ibr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+       if (fault == IA64_NO_FAULT)
+               return vcpu_set_gr(vcpu, inst.M43.r1, val);
+       else return fault;
+}
+
+IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst)
+{
+       UINT64 val;
+       IA64FAULT fault;
+       
+       if (inst.M43.r1 > 63) { // privified mov from pmd
+               fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+               if (fault == IA64_NO_FAULT)
+                       return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
+       }
+       else {
+               fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+               if (fault == IA64_NO_FAULT)
+                       return vcpu_set_gr(vcpu, inst.M43.r1, val);
+       }
+       return fault;
+}
+
+unsigned long from_cr_cnt[128] = { 0 };
+
+#define cr_get(cr) \
+       ((fault = vcpu_get_##cr(vcpu,&val)) == IA64_NO_FAULT) ? \
+               vcpu_set_gr(vcpu, tgt, val) : fault;
+       
+IA64FAULT priv_mov_from_cr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 tgt = inst.M33.r1;
+       UINT64 val;
+       IA64FAULT fault;
+
+       from_cr_cnt[inst.M33.cr3]++;
+       switch (inst.M33.cr3) {
+           case 0: return cr_get(dcr);
+           case 1: return cr_get(itm);
+           case 2: return cr_get(iva);
+           case 8: return cr_get(pta);
+           case 16:return cr_get(ipsr);
+           case 17:return cr_get(isr);
+           case 19:return cr_get(iip);
+           case 20:return cr_get(ifa);
+           case 21:return cr_get(itir);
+           case 22:return cr_get(iipa);
+           case 23:return cr_get(ifs);
+           case 24:return cr_get(iim);
+           case 25:return cr_get(iha);
+           case 64:return cr_get(lid);
+           case 65:return cr_get(ivr);
+           case 66:return cr_get(tpr);
+           case 67:return vcpu_set_gr(vcpu,tgt,0L);
+           case 68:return cr_get(irr0);
+           case 69:return cr_get(irr1);
+           case 70:return cr_get(irr2);
+           case 71:return cr_get(irr3);
+           case 72:return cr_get(itv);
+           case 73:return cr_get(pmv);
+           case 74:return cr_get(cmcv);
+           case 80:return cr_get(lrr0);
+           case 81:return cr_get(lrr1);
+           default: return IA64_ILLOP_FAULT;
+       }
+       return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT priv_mov_from_psr(VCPU *vcpu, INST64 inst)
+{
+       UINT64 tgt = inst.M33.r1;
+       UINT64 val;
+       IA64FAULT fault;
+
+       if ((fault = vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
+               return vcpu_set_gr(vcpu, tgt, val);
+       else return fault;
+}
+
+/**************************************************************************
+Privileged operation decode and dispatch routines
+**************************************************************************/
+
+IA64_SLOT_TYPE slot_types[0x20][3] = {
+       {M, I, I}, {M, I, I}, {M, I, I}, {M, I, I},
+       {M, I, ILLEGAL}, {M, I, ILLEGAL},
+       {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
+       {M, M, I}, {M, M, I}, {M, M, I}, {M, M, I},
+       {M, F, I}, {M, F, I},
+       {M, M, F}, {M, M, F},
+       {M, I, B}, {M, I, B},
+       {M, B, B}, {M, B, B},
+       {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
+       {B, B, B}, {B, B, B},
+       {M, M, B}, {M, M, B},
+       {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
+       {M, F, B}, {M, F, B},
+       {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}
+};
+
+// pointer to privileged emulation function
+typedef IA64FAULT (*PPEFCN)(VCPU *vcpu, INST64 inst);
+
+PPEFCN Mpriv_funcs[64] = {
+  priv_mov_to_rr, priv_mov_to_dbr, priv_mov_to_ibr, priv_mov_to_pkr,
+  priv_mov_to_pmc, priv_mov_to_pmd, 0, 0,
+  0, priv_ptc_l, priv_ptc_g, priv_ptc_ga,
+  priv_ptr_d, priv_ptr_i, priv_itr_d, priv_itr_i,
+  priv_mov_from_rr, priv_mov_from_dbr, priv_mov_from_ibr, priv_mov_from_pkr,
+  priv_mov_from_pmc, 0, 0, 0,
+  0, 0, 0, 0,
+  0, 0, priv_tpa, priv_tak,
+  0, 0, 0, 0,
+  priv_mov_from_cr, priv_mov_from_psr, 0, 0,
+  0, 0, 0, 0,
+  priv_mov_to_cr, priv_mov_to_psr, priv_itc_d, priv_itc_i,
+  0, 0, 0, 0,
+  priv_ptc_e, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0
+};
+
+struct {
+       unsigned long mov_to_ar_imm;
+       unsigned long mov_to_ar_reg;
+       unsigned long mov_from_ar;
+       unsigned long ssm;
+       unsigned long rsm;
+       unsigned long rfi;
+       unsigned long bsw0;
+       unsigned long bsw1;
+       unsigned long cover;
+       unsigned long fc;
+       unsigned long cpuid;
+       unsigned long Mpriv_cnt[64];
+} privcnt = { 0 };
+
+unsigned long privop_trace = 0;
+
+IA64FAULT
+priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl)
+{
+       IA64_BUNDLE bundle;
+       IA64_BUNDLE __get_domain_bundle(UINT64);
+       int slot;
+       IA64_SLOT_TYPE slot_type;
+       INST64 inst;
+       PPEFCN pfunc;
+       unsigned long ipsr = regs->cr_ipsr;
+       UINT64 iip = regs->cr_iip;
+       int x6;
+       
+       // make a local copy of the bundle containing the privop
+#if 1
+       bundle = __get_domain_bundle(iip);
+       if (!bundle.i64[0] && !bundle.i64[1])
+#else
+       if (__copy_from_user(&bundle,iip,sizeof(bundle)))
+#endif
+       {
+//printf("*** priv_handle_op: privop bundle @%p not mapped, retrying\n",iip);
+               return vcpu_force_data_miss(vcpu,regs->cr_iip);
+       }
+#if 0
+       if (iip==0xa000000100001820) {
+               static int firstpagefault = 1;
+               if (firstpagefault) {
+                       printf("*** First time to domain page fault!\n");       
                        firstpagefault=0;
+               }
+       }
+#endif
+       if (privop_trace) {
+               static long i = 400;
+               //if (i > 0) printf("privop @%p\n",iip);
+               if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
+                       iip,ia64_get_itc(),ia64_get_itm());
+               i--;
+       }
+       slot = ((struct ia64_psr *)&ipsr)->ri;
+       if (!slot) inst.inst = (bundle.i64[0]>>5) & MASK_41;
+       else if (slot == 1)
+               inst.inst = ((bundle.i64[0]>>46) | bundle.i64[1]<<18) & MASK_41;
+       else if (slot == 2) inst.inst = (bundle.i64[1]>>23) & MASK_41; 
+       else printf("priv_handle_op: illegal slot: %d\n", slot);
+
+       slot_type = slot_types[bundle.template][slot];
+       if (priv_verbose) {
+               printf("priv_handle_op: checking bundle at 0x%lx (op=0x%016lx) 
slot %d (type=%d)\n",
+                iip, (UINT64)inst.inst, slot, slot_type);
+       }
+       if (slot_type == B && inst.generic.major == 0 && inst.B8.x6 == 0x0) {
+               // break instr for privified cover
+       }
+       else if (privlvl != 2) return (IA64_ILLOP_FAULT);
+       switch (slot_type) {
+           case M:
+               if (inst.generic.major == 0) {
+#if 0
+                       if (inst.M29.x6 == 0 && inst.M29.x3 == 0) {
+                               privcnt.cover++;
+                               return priv_cover(vcpu,inst);
+                       }
+#endif
+                       if (inst.M29.x3 != 0) break;
+                       if (inst.M30.x4 == 8 && inst.M30.x2 == 2) {
+                               privcnt.mov_to_ar_imm++;
+                               return priv_mov_to_ar_imm(vcpu,inst);
+                       }
+                       if (inst.M44.x4 == 6) {
+                               privcnt.ssm++;
+                               return priv_ssm(vcpu,inst);
+                       }
+                       if (inst.M44.x4 == 7) {
+                               privcnt.rsm++;
+                               return priv_rsm(vcpu,inst);
+                       }
+                       break;
+               }
+               else if (inst.generic.major != 1) break;
+               x6 = inst.M29.x6;
+               if (x6 == 0x2a) {
+                       if (inst.M29.r2 > 63 && inst.M29.ar3 < 8)
+                               privcnt.mov_from_ar++; // privified mov from kr
+                       else privcnt.mov_to_ar_reg++;
+                       return priv_mov_to_ar_reg(vcpu,inst);
+               }
+               if (inst.M29.x3 != 0) break;
+               if (!(pfunc = Mpriv_funcs[x6])) break;
+               if (x6 == 0x1e || x6 == 0x1f)  { // tpa or tak are "special"
+                       if (inst.M46.r3 > 63) {
+                               if (x6 == 0x1e) x6 = 0x1b;
+                               else x6 = 0x1a;
+                       }
+               }
+               if (x6 == 52 && inst.M28.r3 > 63)
+                       privcnt.fc++;
+               else if (x6 == 16 && inst.M43.r3 > 63)
+                       privcnt.cpuid++;
+               else privcnt.Mpriv_cnt[x6]++;
+               return (*pfunc)(vcpu,inst);
+               break;
+           case B:
+               if (inst.generic.major != 0) break;
+               if (inst.B8.x6 == 0x08) {
+                       IA64FAULT fault;
+                       privcnt.rfi++;
+                       fault = priv_rfi(vcpu,inst);
+                       if (fault == IA64_NO_FAULT) fault = 
IA64_RFI_IN_PROGRESS;
+                       return fault;
+               }
+               if (inst.B8.x6 == 0x0c) {
+                       privcnt.bsw0++;
+                       return priv_bsw0(vcpu,inst);
+               }
+               if (inst.B8.x6 == 0x0d) {
+                       privcnt.bsw1++;
+                       return priv_bsw1(vcpu,inst);
+               }
+               if (inst.B8.x6 == 0x0) { // break instr for privified cover
+                       privcnt.cover++;
+                       return priv_cover(vcpu,inst);
+               }
+               break;
+           case I:
+               if (inst.generic.major != 0) break;
+#if 0
+               if (inst.I26.x6 == 0 && inst.I26.x3 == 0) {
+                       privcnt.cover++;
+                       return priv_cover(vcpu,inst);
+               }
+#endif
+               if (inst.I26.x3 != 0) break;  // I26.x3 == I27.x3
+               if (inst.I26.x6 == 0x2a) {
+                       if (inst.I26.r2 > 63 && inst.I26.ar3 < 8)
+                               privcnt.mov_from_ar++; // privified mov from kr
+                       else privcnt.mov_to_ar_reg++;
+                       return priv_mov_to_ar_reg(vcpu,inst);
+               }
+               if (inst.I27.x6 == 0x0a) {
+                       privcnt.mov_to_ar_imm++;
+                       return priv_mov_to_ar_imm(vcpu,inst);
+               }
+               break;
+           default:
+               break;
+       }
+        //printf("We who are about do die salute you\n");
+       printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d 
(type=%d), ipsr=%p\n",
+                iip, (UINT64)inst.inst, slot, slot_type, ipsr);
+        //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
+        //thread_mozambique("privop fault\n");
+       return (IA64_ILLOP_FAULT);
+}
+
+/** Emulate a privileged operation.
+ *
+ * This should probably return 0 on success and the "trap number"
+ * (e.g. illegal operation for bad register, priv op for an
+ * instruction that isn't allowed, etc.) on "failure"
+ *
+ * @param vcpu virtual cpu
+ * @param isrcode interrupt service routine code
+ * @return fault
+ */
+IA64FAULT
+priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr)
+{
+       IA64FAULT fault;
+       UINT64 ipsr = regs->cr_ipsr;
+       UINT64 isrcode = (isr >> 4) & 0xf;
+       int privlvl;
+
+       // handle privops masked as illops? and breaks (6)
+       if (isrcode != 1 && isrcode != 2 && isrcode != 0 && isrcode != 6) {
+               printf("priv_emulate: isrcode != 0 or 1 or 2\n");
+               printf("priv_emulate: returning ILLOP, not implemented!\n");
+               while (1);
+               return IA64_ILLOP_FAULT;
+       }
+       //if (isrcode != 1 && isrcode != 2) return 0;
+       vcpu_set_regs(vcpu,regs);
+       privlvl = (ipsr & IA64_PSR_CPL) >> IA64_PSR_CPL0_BIT;
+       // its OK for a privified-cover to be executed in user-land
+       fault = priv_handle_op(vcpu,regs,privlvl);
+       if ((fault == IA64_NO_FAULT) || (fault == IA64_EXTINT_VECTOR)) { // 
success!!
+               // update iip/ipsr to point to the next instruction
+               (void)vcpu_increment_iip(vcpu);
+       }
+       if (fault == IA64_ILLOP_FAULT)
+               printf("priv_emulate: priv_handle_op fails, isr=%p\n",isr);
+       return fault;
+}
+
+
+// FIXME: Move these to include/public/arch-ia64?
+#define HYPERPRIVOP_RFI                        0x1
+#define HYPERPRIVOP_RSM_DT             0x2
+#define HYPERPRIVOP_SSM_DT             0x3
+#define HYPERPRIVOP_COVER              0x4
+#define HYPERPRIVOP_ITC_D              0x5
+#define HYPERPRIVOP_ITC_I              0x6
+#define HYPERPRIVOP_SSM_I              0x7
+#define HYPERPRIVOP_GET_IVR            0x8
+#define HYPERPRIVOP_GET_TPR            0x9
+#define HYPERPRIVOP_SET_TPR            0xa
+#define HYPERPRIVOP_EOI                        0xb
+#define HYPERPRIVOP_SET_ITM            0xc
+#define HYPERPRIVOP_THASH              0xd
+#define HYPERPRIVOP_PTC_GA             0xe
+#define HYPERPRIVOP_ITR_D              0xf
+#define HYPERPRIVOP_GET_RR             0x10
+#define HYPERPRIVOP_SET_RR             0x11
+#define HYPERPRIVOP_MAX                        0x11
+
+char *hyperpriv_str[HYPERPRIVOP_MAX+1] = {
+       0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
+       "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d",
+       "=rr", "rr=",
+       0
+};
+
+unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
+unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
+
+/* hyperprivops are generally executed in assembly (with physical psr.ic off)
+ * so this code is primarily used for debugging them */
+int
+ia64_hyperprivop(unsigned long iim, REGS *regs)
+{
+       struct vcpu *v = (struct domain *) current;
+       INST64 inst;
+       UINT64 val;
+       UINT64 itir, ifa;
+
+// FIXME: Handle faults appropriately for these
+       if (!iim || iim > HYPERPRIVOP_MAX) {
+               printf("bad hyperprivop; ignored\n");
+               printf("iim=%d, iip=%p\n",iim,regs->cr_iip);
+               return 1;
+       }
+       slow_hyperpriv_cnt[iim]++;
+       switch(iim) {
+           case HYPERPRIVOP_RFI:
+               (void)vcpu_rfi(v);
+               return 0;       // don't update iip
+           case HYPERPRIVOP_RSM_DT:
+               (void)vcpu_reset_psr_dt(v);
+               return 1;
+           case HYPERPRIVOP_SSM_DT:
+               (void)vcpu_set_psr_dt(v);
+               return 1;
+           case HYPERPRIVOP_COVER:
+               (void)vcpu_cover(v);
+               return 1;
+           case HYPERPRIVOP_ITC_D:
+               (void)vcpu_get_itir(v,&itir);
+               (void)vcpu_get_ifa(v,&ifa);
+               (void)vcpu_itc_d(v,regs->r8,itir,ifa);
+               return 1;
+           case HYPERPRIVOP_ITC_I:
+               (void)vcpu_get_itir(v,&itir);
+               (void)vcpu_get_ifa(v,&ifa);
+               (void)vcpu_itc_i(v,regs->r8,itir,ifa);
+               return 1;
+           case HYPERPRIVOP_SSM_I:
+               (void)vcpu_set_psr_i(v);
+               return 1;
+           case HYPERPRIVOP_GET_IVR:
+               (void)vcpu_get_ivr(v,&val);
+               regs->r8 = val;
+               return 1;
+           case HYPERPRIVOP_GET_TPR:
+               (void)vcpu_get_tpr(v,&val);
+               regs->r8 = val;
+               return 1;
+           case HYPERPRIVOP_SET_TPR:
+               (void)vcpu_set_tpr(v,regs->r8);
+               return 1;
+           case HYPERPRIVOP_EOI:
+               (void)vcpu_set_eoi(v,0L);
+               return 1;
+           case HYPERPRIVOP_SET_ITM:
+               (void)vcpu_set_itm(v,regs->r8);
+               return 1;
+           case HYPERPRIVOP_THASH:
+               (void)vcpu_thash(v,regs->r8,&val);
+               regs->r8 = val;
+               return 1;
+           case HYPERPRIVOP_PTC_GA:
+               (void)vcpu_ptc_ga(v,regs->r8,(1L << ((regs->r9 & 0xfc) >> 2)));
+               return 1;
+           case HYPERPRIVOP_ITR_D:
+               (void)vcpu_get_itir(v,&itir);
+               (void)vcpu_get_ifa(v,&ifa);
+               (void)vcpu_itr_d(v,regs->r8,regs->r9,itir,ifa);
+               return 1;
+           case HYPERPRIVOP_GET_RR:
+               (void)vcpu_get_rr(v,regs->r8,&val);
+               regs->r8 = val;
+               return 1;
+           case HYPERPRIVOP_SET_RR:
+               (void)vcpu_set_rr(v,regs->r8,regs->r9);
+               return 1;
+       }
+       return 0;
+}
+
+
+/**************************************************************************
+Privileged operation instrumentation routines
+**************************************************************************/
+
+char *Mpriv_str[64] = {
+  "mov_to_rr", "mov_to_dbr", "mov_to_ibr", "mov_to_pkr",
+  "mov_to_pmc", "mov_to_pmd", "<0x06>", "<0x07>",
+  "<0x08>", "ptc_l", "ptc_g", "ptc_ga",
+  "ptr_d", "ptr_i", "itr_d", "itr_i",
+  "mov_from_rr", "mov_from_dbr", "mov_from_ibr", "mov_from_pkr",
+  "mov_from_pmc", "<0x15>", "<0x16>", "<0x17>",
+  "<0x18>", "<0x19>", "privified-thash", "privified-ttag",
+  "<0x1c>", "<0x1d>", "tpa", "tak",
+  "<0x20>", "<0x21>", "<0x22>", "<0x23>",
+  "mov_from_cr", "mov_from_psr", "<0x26>", "<0x27>",
+  "<0x28>", "<0x29>", "<0x2a>", "<0x2b>",
+  "mov_to_cr", "mov_to_psr", "itc_d", "itc_i",
+  "<0x30>", "<0x31>", "<0x32>", "<0x33>",
+  "ptc_e", "<0x35>", "<0x36>", "<0x37>",
+  "<0x38>", "<0x39>", "<0x3a>", "<0x3b>",
+  "<0x3c>", "<0x3d>", "<0x3e>", "<0x3f>"
+};
+
+#define RS "Rsvd"
+char *cr_str[128] = {
+  "dcr","itm","iva",RS,RS,RS,RS,RS,
+  "pta",RS,RS,RS,RS,RS,RS,RS,
+  "ipsr","isr",RS,"iip","ifa","itir","iipa","ifs",
+  "iim","iha",RS,RS,RS,RS,RS,RS,
+  RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+  RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+  "lid","ivr","tpr","eoi","irr0","irr1","irr2","irr3",
+  "itv","pmv","cmcv",RS,RS,RS,RS,RS,
+  "lrr0","lrr1",RS,RS,RS,RS,RS,RS,
+  RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+  RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+  RS,RS,RS,RS,RS,RS,RS,RS
+};
+
+// FIXME: should use snprintf to ensure no buffer overflow
+int dump_privop_counts(char *buf)
+{
+       int i, j;
+       UINT64 sum = 0;
+       char *s = buf;
+
+       // this is ugly and should probably produce sorted output
+       // but it will have to do for now
+       sum += privcnt.mov_to_ar_imm; sum += privcnt.mov_to_ar_reg;
+       sum += privcnt.ssm; sum += privcnt.rsm;
+       sum += privcnt.rfi; sum += privcnt.bsw0;
+       sum += privcnt.bsw1; sum += privcnt.cover;
+       for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i];
+       s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum);
+       if (privcnt.mov_to_ar_imm)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.mov_to_ar_imm,
+                       "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum);
+       if (privcnt.mov_to_ar_reg)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.mov_to_ar_reg,
+                       "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum);
+       if (privcnt.mov_from_ar)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.mov_from_ar,
+                       "privified-mov_from_ar", 
(privcnt.mov_from_ar*100L)/sum);
+       if (privcnt.ssm)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.ssm,
+                       "ssm", (privcnt.ssm*100L)/sum);
+       if (privcnt.rsm)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.rsm,
+                       "rsm", (privcnt.rsm*100L)/sum);
+       if (privcnt.rfi)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.rfi,
+                       "rfi", (privcnt.rfi*100L)/sum);
+       if (privcnt.bsw0)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.bsw0,
+                       "bsw0", (privcnt.bsw0*100L)/sum);
+       if (privcnt.bsw1)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.bsw1,
+                       "bsw1", (privcnt.bsw1*100L)/sum);
+       if (privcnt.cover)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.cover,
+                       "cover", (privcnt.cover*100L)/sum);
+       if (privcnt.fc)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.fc,
+                       "privified-fc", (privcnt.fc*100L)/sum);
+       if (privcnt.cpuid)
+               s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.cpuid,
+                       "privified-getcpuid", (privcnt.cpuid*100L)/sum);
+       for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) {
+               if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n");
+               else s += sprintf(s,"%10d  %s [%d%%]\n", privcnt.Mpriv_cnt[i],
+                       Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum);
+               if (i == 0x24) { // mov from CR
+                       s += sprintf(s,"            [");
+                       for (j=0; j < 128; j++) if (from_cr_cnt[j]) {
+                               if (!cr_str[j])
+                                       s += sprintf(s,"PRIVSTRING NULL!!\n");
+                               s += 
sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]);
+                       }
+                       s += sprintf(s,"]\n");
+               }
+               else if (i == 0x2c) { // mov to CR
+                       s += sprintf(s,"            [");
+                       for (j=0; j < 128; j++) if (to_cr_cnt[j]) {
+                               if (!cr_str[j])
+                                       s += sprintf(s,"PRIVSTRING NULL!!\n");
+                               s += 
sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]);
+                       }
+                       s += sprintf(s,"]\n");
+               }
+       }
+       return s - buf;
+}
+
+int zero_privop_counts(char *buf)
+{
+       int i, j;
+       char *s = buf;
+
+       // this is ugly and should probably produce sorted output
+       // but it will have to do for now
+       privcnt.mov_to_ar_imm = 0; privcnt.mov_to_ar_reg = 0;
+       privcnt.mov_from_ar = 0;
+       privcnt.ssm = 0; privcnt.rsm = 0;
+       privcnt.rfi = 0; privcnt.bsw0 = 0;
+       privcnt.bsw1 = 0; privcnt.cover = 0;
+       privcnt.fc = 0; privcnt.cpuid = 0;
+       for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0;
+       for (j=0; j < 128; j++) from_cr_cnt[j] = 0;
+       for (j=0; j < 128; j++) to_cr_cnt[j] = 0;
+       s += sprintf(s,"All privop statistics zeroed\n");
+       return s - buf;
+}
+
+#ifdef PRIVOP_ADDR_COUNT
+
+extern struct privop_addr_count privop_addr_counter[];
+
+void privop_count_addr(unsigned long iip, int inst)
+{
+       struct privop_addr_count *v = &privop_addr_counter[inst];
+       int i;
+
+       for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) {
+               if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; }
+               else if (v->addr[i] == iip)  { v->count[i]++; return; }
+       }
+       v->overflow++;;
+}
+
+int dump_privop_addrs(char *buf)
+{
+       int i,j;
+       char *s = buf;
+       s += sprintf(s,"Privop addresses:\n");
+       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
+               struct privop_addr_count *v = &privop_addr_counter[i];
+               s += sprintf(s,"%s:\n",v->instname);
+               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) {
+                       if (!v->addr[j]) break;
+                       s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]);
+               }
+               if (v->overflow) 
+                       s += sprintf(s," other #%ld\n",v->overflow);
+       }
+       return s - buf;
+}
+
+void zero_privop_addrs(void)
+{
+       int i,j;
+       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
+               struct privop_addr_count *v = &privop_addr_counter[i];
+               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                       v->addr[j] = v->count[j] = 0;
+               v->overflow = 0;
+       }
+}
+#endif
+
+extern unsigned long dtlb_translate_count;
+extern unsigned long tr_translate_count;
+extern unsigned long phys_translate_count;
+extern unsigned long vhpt_translate_count;
+extern unsigned long lazy_cover_count;
+extern unsigned long idle_when_pending;
+extern unsigned long pal_halt_light_count;
+extern unsigned long context_switch_count;
+
+int dump_misc_stats(char *buf)
+{
+       char *s = buf;
+       s += sprintf(s,"Virtual TR translations: %d\n",tr_translate_count);
+       s += sprintf(s,"Virtual VHPT translations: %d\n",vhpt_translate_count);
+       s += sprintf(s,"Virtual DTLB translations: %d\n",dtlb_translate_count);
+       s += sprintf(s,"Physical translations: %d\n",phys_translate_count);
+       s += sprintf(s,"Idle when pending: %d\n",idle_when_pending);
+       s += sprintf(s,"PAL_HALT_LIGHT (no pending): 
%d\n",pal_halt_light_count);
+       s += sprintf(s,"context switches: %d\n",context_switch_count);
+       s += sprintf(s,"Lazy covers: %d\n",lazy_cover_count);
+       return s - buf;
+}
+
+void zero_misc_stats(void)
+{
+       dtlb_translate_count = 0;
+       tr_translate_count = 0;
+       phys_translate_count = 0;
+       vhpt_translate_count = 0;
+       lazy_cover_count = 0;
+       pal_halt_light_count = 0;
+       idle_when_pending = 0;
+       context_switch_count = 0;
+}
+
+int dump_hyperprivop_counts(char *buf)
+{
+       int i;
+       char *s = buf;
+       unsigned long total = 0;
+       for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += slow_hyperpriv_cnt[i];
+       s += sprintf(s,"Slow hyperprivops (total %d):\n",total);
+       for (i = 1; i <= HYPERPRIVOP_MAX; i++)
+               if (slow_hyperpriv_cnt[i])
+                       s += sprintf(s,"%10d %s\n",
+                               slow_hyperpriv_cnt[i], hyperpriv_str[i]);
+       total = 0;
+       for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += fast_hyperpriv_cnt[i];
+       s += sprintf(s,"Fast hyperprivops (total %d):\n",total);
+       for (i = 1; i <= HYPERPRIVOP_MAX; i++)
+               if (fast_hyperpriv_cnt[i])
+                       s += sprintf(s,"%10d %s\n",
+                               fast_hyperpriv_cnt[i], hyperpriv_str[i]);
+       return s - buf;
+}
+
+void zero_hyperprivop_counts(void)
+{
+       int i;
+       for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0;
+       for (i = 0; i <= HYPERPRIVOP_MAX; i++) fast_hyperpriv_cnt[i] = 0;
+}
+
+#define TMPBUFLEN 8*1024
+int dump_privop_counts_to_user(char __user *ubuf, int len)
+{
+       char buf[TMPBUFLEN];
+       int n = dump_privop_counts(buf);
+
+       n += dump_hyperprivop_counts(buf + n);
+       n += dump_reflect_counts(buf + n);
+#ifdef PRIVOP_ADDR_COUNT
+       n += dump_privop_addrs(buf + n);
+#endif
+       n += dump_misc_stats(buf + n);
+       if (len < TMPBUFLEN) return -1;
+       if (__copy_to_user(ubuf,buf,n)) return -1;
+       return n;
+}
+
+int zero_privop_counts_to_user(char __user *ubuf, int len)
+{
+       char buf[TMPBUFLEN];
+       int n = zero_privop_counts(buf);
+
+       zero_hyperprivop_counts();
+#ifdef PRIVOP_ADDR_COUNT
+       zero_privop_addrs();
+#endif
+       zero_misc_stats();
+       zero_reflect_counts();
+       if (len < TMPBUFLEN) return -1;
+       if (__copy_to_user(ubuf,buf,n)) return -1;
+       return n;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/process.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/process.c       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,749 @@
+/*
+ * Miscellaneous process/domain related routines
+ * 
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h>   /* FOR struct ia64_sal_retval */
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <asm/regionreg.h>
+#include <asm/privop.h>
+#include <asm/vcpu.h>
+#include <asm/ia64_int.h>
+#include <asm/dom_fw.h>
+#include "hpsim_ssc.h"
+
+extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
+extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_sal_retval 
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+
+extern unsigned long dom0_start, dom0_size;
+
+#define IA64_PSR_CPL1  (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
+// note IA64_PSR_PK removed from following, why is this necessary?
+#define        DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
+                       IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
+                       IA64_PSR_IT | IA64_PSR_BN)
+
+#define        DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
+                       IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI |       \
+                       IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
+                       IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \
+                       IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
+                       IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
+
+#define PSCB(x,y)      VCPU(x,y)
+#define PSCBX(x,y)     x->arch.y
+
+extern unsigned long vcpu_verbose;
+
+long do_iopl(domid_t domain, unsigned int new_io_pl)
+{
+       dummy();
+       return 0;
+}
+
+void schedule_tail(struct vcpu *next)
+{
+       unsigned long rr7;
+       //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
+       //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
+#ifdef CONFIG_VTI
+       /* rr7 will be postponed to last point when resuming back to guest */
+       vmx_load_all_rr(current);
+#else // CONFIG_VTI
+       if (rr7 = load_region_regs(current)) {
+               printk("schedule_tail: change to rr7 not yet implemented\n");
+       }
+#endif // CONFIG_VTI
+}
+
+void tdpfoo(void) { }
+
+// given a domain virtual address, pte and pagesize, extract the metaphysical
+// address, convert the pte for a physical address for (possibly different)
+// Xen PAGE_SIZE and return modified pte.  (NOTE: TLB insert should use
+// PAGE_SIZE!)
+unsigned long translate_domain_pte(unsigned long pteval,
+       unsigned long address, unsigned long itir)
+{
+       struct domain *d = current->domain;
+       unsigned long mask, pteval2, mpaddr;
+       unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+       extern struct domain *dom0;
+       extern unsigned long dom0_start, dom0_size;
+
+       // FIXME address had better be pre-validated on insert
+       mask = (1L << ((itir >> 2) & 0x3f)) - 1;
+       mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
+       if (d == dom0) {
+               if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+                       //printk("translate_domain_pte: out-of-bounds dom0 
mpaddr %p! itc=%lx...\n",mpaddr,ia64_get_itc());
+                       tdpfoo();
+               }
+       }
+       else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) {
+               printf("translate_domain_pte: bad mpa=%p (> 
%p),vadr=%p,pteval=%p,itir=%p\n",
+                       mpaddr,d->max_pages<<PAGE_SHIFT,address,pteval,itir);
+               tdpfoo();
+       }
+       pteval2 = lookup_domain_mpa(d,mpaddr);
+       pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
+       pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
+       pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
+       return pteval2;
+}
+
+// given a current domain metaphysical address, return the physical address
+unsigned long translate_domain_mpaddr(unsigned long mpaddr)
+{
+       extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+       unsigned long pteval;
+
+       if (current->domain == dom0) {
+               if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+                       printk("translate_domain_mpaddr: out-of-bounds dom0 
mpaddr %p! continuing...\n",mpaddr);
+                       tdpfoo();
+               }
+       }
+       pteval = lookup_domain_mpa(current->domain,mpaddr);
+       return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
+}
+
+unsigned long slow_reflect_count[0x80] = { 0 };
+unsigned long fast_reflect_count[0x80] = { 0 };
+
+#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++;
+
+void zero_reflect_counts(void)
+{
+       int i;
+       for (i=0; i<0x80; i++) slow_reflect_count[i] = 0;
+       for (i=0; i<0x80; i++) fast_reflect_count[i] = 0;
+}
+
+int dump_reflect_counts(char *buf)
+{
+       int i,j,cnt;
+       char *s = buf;
+
+       s += sprintf(s,"Slow reflections by vector:\n");
+       for (i = 0, j = 0; i < 0x80; i++) {
+               if (cnt = slow_reflect_count[i]) {
+                       s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
+                       if ((j++ & 3) == 3) s += sprintf(s,"\n");
+               }
+       }
+       if (j & 3) s += sprintf(s,"\n");
+       s += sprintf(s,"Fast reflections by vector:\n");
+       for (i = 0, j = 0; i < 0x80; i++) {
+               if (cnt = fast_reflect_count[i]) {
+                       s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
+                       if ((j++ & 3) == 3) s += sprintf(s,"\n");
+               }
+       }
+       if (j & 3) s += sprintf(s,"\n");
+       return s - buf;
+}
+
+void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long 
itiriim, struct pt_regs *regs, unsigned long vector)
+{
+       unsigned long vcpu_get_ipsr_int_state(struct vcpu *,unsigned long);
+       unsigned long vcpu_get_rr_ve(struct vcpu *,unsigned long);
+       struct domain *d = current->domain;
+       struct vcpu *v = current;
+
+       if (vector == IA64_EXTINT_VECTOR) {
+               
+               extern unsigned long vcpu_verbose, privop_trace;
+               static first_extint = 1;
+               if (first_extint) {
+                       printf("Delivering first extint to domain: ifa=%p, 
isr=%p, itir=%p, iip=%p\n",ifa,isr,itiriim,regs->cr_iip);
+                       //privop_trace = 1; vcpu_verbose = 1;
+                       first_extint = 0;
+               }
+       }
+       if (!PSCB(v,interrupt_collection_enabled)) {
+               if (!(PSCB(v,ipsr) & IA64_PSR_DT)) {
+                       panic_domain(regs,"psr.dt off, trying to deliver nested 
dtlb!\n");
+               }
+               vector &= ~0xf;
+               if (vector != IA64_DATA_TLB_VECTOR &&
+                   vector != IA64_ALT_DATA_TLB_VECTOR &&
+                   vector != IA64_VHPT_TRANS_VECTOR) {
+panic_domain(regs,"psr.ic off, delivering 
fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
+       vector,regs->cr_ipsr,regs->cr_iip,ifa,isr,PSCB(v,iip));
+                       
+               }
+//printf("Delivering NESTED DATA TLB fault\n");
+               vector = IA64_DATA_NESTED_TLB_VECTOR;
+               regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & 
~0xffUL;
+               regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | 
DELIVER_PSR_SET;
+// NOTE: nested trap must NOT pass PSCB address
+               //regs->r31 = (unsigned long) &PSCB(v);
+               inc_slow_reflect_count(vector);
+               return;
+
+       }
+       if ((vector & 0xf) == IA64_FORCED_IFA)
+               ifa = PSCB(v,tmp[0]);
+       vector &= ~0xf;
+       PSCB(v,ifa) = ifa;
+       if (vector < IA64_DATA_NESTED_TLB_VECTOR) /* VHPT miss, TLB miss, Alt 
TLB miss */
+               vcpu_thash(v,ifa,&PSCB(current,iha));
+       PSCB(v,unat) = regs->ar_unat;  // not sure if this is really needed?
+       PSCB(v,precover_ifs) = regs->cr_ifs;
+       vcpu_bsw0(v);
+       PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr);
+       if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+               PSCB(v,iim) = itiriim;
+       else PSCB(v,itir) = vcpu_get_itir_on_fault(v,ifa);
+       PSCB(v,isr) = isr; // this is unnecessary except for interrupts!
+       PSCB(v,iip) = regs->cr_iip;
+       PSCB(v,ifs) = 0;
+       PSCB(v,incomplete_regframe) = 0;
+
+       regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL;
+       regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
+#ifdef CONFIG_SMP
+#warning "SMP FIXME: sharedinfo doesn't handle smp yet, need page per vcpu"
+#endif
+       regs->r31 = &(((mapped_regs_t *)SHARED_ARCHINFO_ADDR)->ipsr);
+
+       PSCB(v,interrupt_delivery_enabled) = 0;
+       PSCB(v,interrupt_collection_enabled) = 0;
+
+       inc_slow_reflect_count(vector);
+}
+
+void foodpi(void) {}
+
+unsigned long pending_false_positive = 0;
+
+// ONLY gets called from ia64_leave_kernel
+// ONLY call with interrupts disabled?? (else might miss one?)
+// NEVER successful if already reflecting a trap/fault because psr.i==0
+void deliver_pending_interrupt(struct pt_regs *regs)
+{
+       struct domain *d = current->domain;
+       struct vcpu *v = current;
+       // FIXME: Will this work properly if doing an RFI???
+       if (!is_idle_task(d) && user_mode(regs)) {
+               //vcpu_poke_timer(v);
+               if (vcpu_deliverable_interrupts(v)) {
+                       unsigned long isr = regs->cr_ipsr & IA64_PSR_RI;
+                       if (vcpu_timer_pending_early(v))
+printf("*#*#*#* about to deliver early timer to domain 
%d!!!\n",v->domain->domain_id);
+                       reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR);
+               }
+               else if (PSCB(v,pending_interruption))
+                       ++pending_false_positive;
+       }
+}
+unsigned long lazy_cover_count = 0;
+
+int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
+{
+       if (!PSCB(v,interrupt_collection_enabled)) {
+               PSCB(v,ifs) = regs->cr_ifs;
+               PSCB(v,incomplete_regframe) = 1;
+               regs->cr_ifs = 0;
+               lazy_cover_count++;
+               return(1); // retry same instruction with cr.ifs off
+       }
+       return(0);
+}
+
+void ia64_do_page_fault (unsigned long address, unsigned long isr, struct 
pt_regs *regs, unsigned long itir)
+{
+       unsigned long iip = regs->cr_iip;
+       // FIXME should validate address here
+       unsigned long pteval;
+       unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
+       IA64FAULT fault;
+
+       if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) 
return;
+       if ((isr & IA64_ISR_SP)
+           || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH))
+       {
+               /*
+                * This fault was due to a speculative load or lfetch.fault, 
set the "ed"
+                * bit in the psr to ensure forward progress.  (Target register 
will get a
+                * NaT for ld.s, lfetch will be canceled.)
+                */
+               ia64_psr(regs)->ed = 1;
+               return;
+       }
+
+       fault = vcpu_translate(current,address,is_data,&pteval,&itir);
+       if (fault == IA64_NO_FAULT)
+       {
+               pteval = translate_domain_pte(pteval,address,itir);
+               
vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f);
+               return;
+       }
+       else if (IS_VMM_ADDRESS(iip))
+       {
+               if (!ia64_done_with_exception(regs)) {
+                       // should never happen.  If it does, region 0 addr may
+                       // indicate a bad xen pointer
+                       printk("*** xen_handle_domain_access: exception table"
+                              " lookup failed, iip=%p, addr=%p, spinning...\n",
+                               iip,address);
+                       panic_domain(regs,"*** xen_handle_domain_access: 
exception table"
+                              " lookup failed, iip=%p, addr=%p, spinning...\n",
+                               iip,address);
+               }
+               return;
+       }
+
+       reflect_interruption(address, isr, 0, regs, fault);
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+           unsigned long iim, unsigned long itir, unsigned long arg5,
+           unsigned long arg6, unsigned long arg7, unsigned long stack)
+{
+       struct pt_regs *regs = (struct pt_regs *) &stack;
+       unsigned long code, error = isr;
+       char buf[128];
+       int result, sig;
+       static const char *reason[] = {
+               "IA-64 Illegal Operation fault",
+               "IA-64 Privileged Operation fault",
+               "IA-64 Privileged Register fault",
+               "IA-64 Reserved Register/Field fault",
+               "Disabled Instruction Set Transition fault",
+               "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", 
"Illegal Hazard fault",
+               "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", 
"Unknown fault 12",
+               "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+       };
+#if 0
+printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
+ vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
+#endif
+
+       if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == 
IA64_ISR_CODE_LFETCH)) {
+               /*
+                * This fault was due to lfetch.fault, set "ed" bit in the psr 
to cancel
+                * the lfetch.
+                */
+               ia64_psr(regs)->ed = 1;
+               printf("ia64_fault: handled lfetch.fault\n");
+               return;
+       }
+
+       switch (vector) {
+             case 24: /* General Exception */
+               code = (isr >> 4) & 0xf;
+               sprintf(buf, "General Exception: %s%s", reason[code],
+                       (code == 3) ? ((isr & (1UL << 37))
+                                      ? " (RSE access)" : " (data access)") : 
"");
+               if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+                       printk("%s[%d]: possible hazard @ ip=%016lx (pr = 
%016lx)\n",
+                              current->comm, current->pid, regs->cr_iip + 
ia64_psr(regs)->ri,
+                              regs->pr);
+# endif
+                       printf("ia64_fault: returning on hazard\n");
+                       return;
+               }
+               break;
+
+             case 25: /* Disabled FP-Register */
+               if (isr & 2) {
+                       //disabled_fph_fault(regs);
+                       //return;
+               }
+               sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+               break;
+
+             case 26: /* NaT Consumption */
+               if (user_mode(regs)) {
+                       void *addr;
+
+                       if (((isr >> 4) & 0xf) == 2) {
+                               /* NaT page consumption */
+                               //sig = SIGSEGV;
+                               //code = SEGV_ACCERR;
+                               addr = (void *) ifa;
+                       } else {
+                               /* register NaT consumption */
+                               //sig = SIGILL;
+                               //code = ILL_ILLOPN;
+                               addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
+                       }
+                       //siginfo.si_signo = sig;
+                       //siginfo.si_code = code;
+                       //siginfo.si_errno = 0;
+                       //siginfo.si_addr = addr;
+                       //siginfo.si_imm = vector;
+                       //siginfo.si_flags = __ISR_VALID;
+                       //siginfo.si_isr = isr;
+                       //force_sig_info(sig, &siginfo, current);
+                       //return;
+               } //else if (ia64_done_with_exception(regs))
+                       //return;
+               sprintf(buf, "NaT consumption");
+               break;
+
+             case 31: /* Unsupported Data Reference */
+               if (user_mode(regs)) {
+                       //siginfo.si_signo = SIGILL;
+                       //siginfo.si_code = ILL_ILLOPN;
+                       //siginfo.si_errno = 0;
+                       //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
+                       //siginfo.si_imm = vector;
+                       //siginfo.si_flags = __ISR_VALID;
+                       //siginfo.si_isr = isr;
+                       //force_sig_info(SIGILL, &siginfo, current);
+                       //return;
+               }
+               sprintf(buf, "Unsupported data reference");
+               break;
+
+             case 29: /* Debug */
+             case 35: /* Taken Branch Trap */
+             case 36: /* Single Step Trap */
+               //if (fsys_mode(current, regs)) {}
+               switch (vector) {
+                     case 29:
+                       //siginfo.si_code = TRAP_HWBKPT;
+#ifdef CONFIG_ITANIUM
+                       /*
+                        * Erratum 10 (IFA may contain incorrect address) now 
has
+                        * "NoFix" status.  There are no plans for fixing this.
+                        */
+                       if (ia64_psr(regs)->is == 0)
+                         ifa = regs->cr_iip;
+#endif
+                       break;
+                     case 35: ifa = 0; break;
+                     case 36: ifa = 0; break;
+                     //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
+                     //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+               }
+               //siginfo.si_signo = SIGTRAP;
+               //siginfo.si_errno = 0;
+               //siginfo.si_addr  = (void *) ifa;
+               //siginfo.si_imm   = 0;
+               //siginfo.si_flags = __ISR_VALID;
+               //siginfo.si_isr   = isr;
+               //force_sig_info(SIGTRAP, &siginfo, current);
+               //return;
+
+             case 32: /* fp fault */
+             case 33: /* fp trap */
+               //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
+               //if ((result < 0) || (current->thread.flags & 
IA64_THREAD_FPEMU_SIGFPE)) {
+                       //siginfo.si_signo = SIGFPE;
+                       //siginfo.si_errno = 0;
+                       //siginfo.si_code = FPE_FLTINV;
+                       //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
+                       //siginfo.si_flags = __ISR_VALID;
+                       //siginfo.si_isr = isr;
+                       //siginfo.si_imm = 0;
+                       //force_sig_info(SIGFPE, &siginfo, current);
+               //}
+               //return;
+               sprintf(buf, "FP fault/trap");
+               break;
+
+             case 34:
+               if (isr & 0x2) {
+                       /* Lower-Privilege Transfer Trap */
+                       /*
+                        * Just clear PSR.lp and then return immediately: all 
the
+                        * interesting work (e.g., signal delivery is done in 
the kernel
+                        * exit path).
+                        */
+                       //ia64_psr(regs)->lp = 0;
+                       //return;
+                       sprintf(buf, "Lower-Privilege Transfer trap");
+               } else {
+                       /* Unimplemented Instr. Address Trap */
+                       if (user_mode(regs)) {
+                               //siginfo.si_signo = SIGILL;
+                               //siginfo.si_code = ILL_BADIADDR;
+                               //siginfo.si_errno = 0;
+                               //siginfo.si_flags = 0;
+                               //siginfo.si_isr = 0;
+                               //siginfo.si_imm = 0;
+                               //siginfo.si_addr = (void *) (regs->cr_iip + 
ia64_psr(regs)->ri);
+                               //force_sig_info(SIGILL, &siginfo, current);
+                               //return;
+                       }
+                       sprintf(buf, "Unimplemented Instruction Address fault");
+               }
+               break;
+
+             case 45:
+               printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+               printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+                      regs->cr_iip, ifa, isr);
+               //force_sig(SIGSEGV, current);
+               break;
+
+             case 46:
+               printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+               printk(KERN_ERR "  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 
0x%lx\n",
+                      regs->cr_iip, ifa, isr, iim);
+               //force_sig(SIGSEGV, current);
+               return;
+
+             case 47:
+               sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+               break;
+
+             default:
+               sprintf(buf, "Fault %lu", vector);
+               break;
+       }
+       //die_if_kernel(buf, regs, error);
+printk("ia64_fault: %s: reflecting\n",buf);
+reflect_interruption(ifa,isr,iim,regs,IA64_GENEX_VECTOR);
+//while(1);
+       //force_sig(SIGILL, current);
+}
+
+unsigned long running_on_sim = 0;
+
+void
+do_ssc(unsigned long ssc, struct pt_regs *regs)
+{
+       extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+       unsigned long arg0, arg1, arg2, arg3, retval;
+       char buf[2];
+/**/   static int last_fd, last_count; // FIXME FIXME FIXME
+/**/                                   // BROKEN FOR MULTIPLE DOMAINS & SMP
+/**/   struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat;
+       extern unsigned long vcpu_verbose, privop_trace;
+
+       arg0 = vcpu_get_gr(current,32);
+       switch(ssc) {
+           case SSC_PUTCHAR:
+               buf[0] = arg0;
+               buf[1] = '\0';
+               printf(buf);
+               break;
+           case SSC_GETCHAR:
+               retval = ia64_ssc(0,0,0,0,ssc);
+               vcpu_set_gr(current,8,retval);
+               break;
+           case SSC_WAIT_COMPLETION:
+               if (arg0) {     // metaphysical address
+
+                       arg0 = translate_domain_mpaddr(arg0);
+/**/                   stat = (struct ssc_disk_stat *)__va(arg0);
+///**/                 if (stat->fd == last_fd) stat->count = last_count;
+/**/                   stat->count = last_count;
+//if (last_count >= PAGE_SIZE) printf("ssc_wait: 
stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count);
+///**/                 retval = ia64_ssc(arg0,0,0,0,ssc);
+/**/                   retval = 0;
+               }
+               else retval = -1L;
+               vcpu_set_gr(current,8,retval);
+               break;
+           case SSC_OPEN:
+               arg1 = vcpu_get_gr(current,33); // access rights
+if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware.  
(ignoring...)\n"); arg0 = 0; }
+               if (arg0) {     // metaphysical address
+                       arg0 = translate_domain_mpaddr(arg0);
+                       retval = ia64_ssc(arg0,arg1,0,0,ssc);
+               }
+               else retval = -1L;
+               vcpu_set_gr(current,8,retval);
+               break;
+           case SSC_WRITE:
+           case SSC_READ:
+//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n");
+               arg1 = vcpu_get_gr(current,33);
+               arg2 = vcpu_get_gr(current,34);
+               arg3 = vcpu_get_gr(current,35);
+               if (arg2) {     // metaphysical address of descriptor
+                       struct ssc_disk_req *req;
+                       unsigned long mpaddr, paddr;
+                       long len;
+
+                       arg2 = translate_domain_mpaddr(arg2);
+                       req = (struct disk_req *)__va(arg2);
+                       req->len &= 0xffffffffL;        // avoid strange bug
+                       len = req->len;
+/**/                   last_fd = arg1;
+/**/                   last_count = len;
+                       mpaddr = req->addr;
+//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx 
",last_fd,mpaddr,len);
+                       retval = 0;
+                       if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) & 
PAGE_MASK)) {
+                               // do partial page first
+                               req->addr = translate_domain_mpaddr(mpaddr);
+                               req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK);
+                               len -= req->len; mpaddr += req->len;
+                               retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc);
+                               arg3 += req->len; // file offset
+/**/                           last_stat.fd = last_fd;
+/**/                           
(void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION);
+//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x 
",req->addr,req->len,retval);
+                       }
+                       if (retval >= 0) while (len > 0) {
+                               req->addr = translate_domain_mpaddr(mpaddr);
+                               req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len;
+                               len -= PAGE_SIZE; mpaddr += PAGE_SIZE;
+                               retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc);
+                               arg3 += req->len; // file offset
+// TEMP REMOVED AGAIN                          arg3 += req->len; // file offset
+/**/                           last_stat.fd = last_fd;
+/**/                           
(void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION);
+//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x 
",req->addr,req->len,retval);
+                       }
+                       // set it back to the original value
+                       req->len = last_count;
+               }
+               else retval = -1L;
+               vcpu_set_gr(current,8,retval);
+//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval);
+               break;
+           case SSC_CONNECT_INTERRUPT:
+               arg1 = vcpu_get_gr(current,33);
+               arg2 = vcpu_get_gr(current,34);
+               arg3 = vcpu_get_gr(current,35);
+               if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not 
implemented on hardware.  (ignoring...)\n"); break; }
+               (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc);
+               break;
+           case SSC_NETDEV_PROBE:
+               vcpu_set_gr(current,8,-1L);
+               break;
+           default:
+               printf("ia64_handle_break: bad ssc code %lx, iip=%p, b0=%p... 
spinning\n",ssc,regs->cr_iip,regs->b0);
+               while(1);
+               break;
+       }
+       vcpu_increment_iip(current);
+}
+
+int first_break = 1;
+
+void
+ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, 
unsigned long iim)
+{
+       struct domain *d = (struct domain *) current->domain;
+       struct vcpu *v = (struct domain *) current;
+       extern unsigned long running_on_sim;
+
+       if (first_break) {
+               if (platform_is_hp_ski()) running_on_sim = 1;
+               else running_on_sim = 0;
+               first_break = 0;
+       }
+       if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
+               if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
+               else do_ssc(vcpu_get_gr(current,36), regs);
+       }
+       else if (iim == d->arch.breakimm) {
+               if (ia64_hypercall(regs))
+                       vcpu_increment_iip(current);
+       }
+       else if (!PSCB(v,interrupt_collection_enabled)) {
+               if (ia64_hyperprivop(iim,regs))
+                       vcpu_increment_iip(current);
+       }
+       else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR);
+}
+
+void
+ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long 
isr, unsigned long itir)
+{
+       IA64FAULT vector;
+       struct domain *d = current->domain;
+       struct vcpu *v = current;
+       // FIXME: no need to pass itir in to this routine as we need to
+       // compute the virtual itir anyway (based on domain's RR.ps)
+       // AND ACTUALLY reflect_interruption doesn't use it anyway!
+       itir = vcpu_get_itir_on_fault(v,ifa);
+       vector = priv_emulate(current,regs,isr);
+       if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) {
+               reflect_interruption(ifa,isr,itir,regs,vector);
+       }
+}
+
+#define INTR_TYPE_MAX  10
+UINT64 int_counts[INTR_TYPE_MAX];
+
+void
+ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long 
isr, unsigned long iim, unsigned long vector)
+{
+       struct domain *d = (struct domain *) current->domain;
+       struct vcpu *v = (struct domain *) current;
+       unsigned long check_lazy_cover = 0;
+       unsigned long psr = regs->cr_ipsr;
+       unsigned long itir = vcpu_get_itir_on_fault(v,ifa);
+
+       if (!(psr & IA64_PSR_CPL)) {
+               printk("ia64_handle_reflection: reflecting with priv=0!!\n");
+       }
+       // FIXME: no need to pass itir in to this routine as we need to
+       // compute the virtual itir anyway (based on domain's RR.ps)
+       // AND ACTUALLY reflect_interruption doesn't use it anyway!
+       itir = vcpu_get_itir_on_fault(v,ifa);
+       switch(vector) {
+           case 8:
+               vector = IA64_DIRTY_BIT_VECTOR; break;
+           case 9:
+               vector = IA64_INST_ACCESS_BIT_VECTOR; break;
+           case 10:
+               check_lazy_cover = 1;
+               vector = IA64_DATA_ACCESS_BIT_VECTOR; break;
+           case 20:
+               check_lazy_cover = 1;
+               vector = IA64_PAGE_NOT_PRESENT_VECTOR; break;
+           case 22:
+               vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break;
+           case 23:
+               check_lazy_cover = 1;
+               vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break;
+           case 25:
+               vector = IA64_DISABLED_FPREG_VECTOR;
+               break;
+           case 26:
+printf("*** NaT fault... attempting to handle as privop\n");
+printf("isr=%p, ifa=%p,iip=%p,ipsr=%p\n",isr,ifa,regs->cr_iip,psr);
+               vector = priv_emulate(v,regs,isr);
+               if (vector == IA64_NO_FAULT) {
+printf("*** Handled privop masquerading as NaT fault\n");
+                       return;
+               }
+               vector = IA64_NAT_CONSUMPTION_VECTOR; break;
+           case 27:
+//printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc());
+               itir = iim;
+               vector = IA64_SPECULATION_VECTOR; break;
+           case 30:
+               // FIXME: Should we handle unaligned refs in Xen??
+               vector = IA64_UNALIGNED_REF_VECTOR; break;
+           default:
+               printf("ia64_handle_reflection: unhandled 
vector=0x%lx\n",vector);
+               while(vector);
+               return;
+       }
+       if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, 
isr, regs)) return;
+       reflect_interruption(ifa,isr,itir,regs,vector);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/regionreg.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/regionreg.c     Thu Sep  1 18:46:28 2005
@@ -0,0 +1,376 @@
+/*
+ * Region register and region id management
+ *
+ * Copyright (C) 2001-2004 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx
+ *     Bret Mckee (bret.mckee@xxxxxx)
+ *
+ */
+
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/regionreg.h>
+#include <asm/vhpt.h>
+#include <asm/vcpu.h>
+extern void ia64_new_rr7(unsigned long rid,void *shared_info, void 
*shared_arch_info);
+
+
+#define        IA64_MIN_IMPL_RID_BITS  (IA64_MIN_IMPL_RID_MSB+1)
+#define        IA64_MAX_IMPL_RID_BITS  24
+
+#define MIN_RIDS       (1 << IA64_MIN_IMPL_RID_BITS)
+#define        MIN_RID_MAX     (MIN_RIDS - 1)
+#define        MIN_RID_MASK    (MIN_RIDS - 1)
+#define        MAX_RIDS        (1 << (IA64_MAX_IMPL_RID_BITS))
+#define        MAX_RID         (MAX_RIDS - 1)
+#define        MAX_RID_BLOCKS  (1 << 
(IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS))
+#define RIDS_PER_RIDBLOCK MIN_RIDS
+
+#if 0
+// following already defined in include/asm-ia64/gcc_intrin.h
+// it should probably be ifdef'd out from there to ensure all region
+// register usage is encapsulated in this file
+static inline unsigned long
+ia64_get_rr (unsigned long rr)
+{
+           unsigned long r;
+           __asm__ __volatile__ (";;mov %0=rr[%1];;":"=r"(r):"r"(rr):"memory");
+           return r;
+}
+
+static inline void
+ia64_set_rr (unsigned long rr, unsigned long rrv)
+{
+           __asm__ __volatile__ (";;mov 
rr[%0]=%1;;"::"r"(rr),"r"(rrv):"memory");
+}
+#endif
+
+// use this to allocate a rid out of the "Xen reserved rid block"
+unsigned long allocate_reserved_rid(void)
+{
+       static unsigned long currentrid = XEN_DEFAULT_RID;
+       unsigned long t = currentrid;
+
+       unsigned long max = RIDS_PER_RIDBLOCK;
+
+       if (++currentrid >= max) return(-1UL);
+       return t;
+}
+
+
+// returns -1 if none available
+unsigned long allocate_metaphysical_rr(void)
+{
+       ia64_rr rrv;
+
+       rrv.rid = allocate_reserved_rid();
+       rrv.ps = PAGE_SHIFT;
+       rrv.ve = 0;
+       return rrv.rrval;
+}
+
+int deallocate_metaphysical_rid(unsigned long rid)
+{
+       // fix this when the increment allocation mechanism is fixed.
+       return 1;
+}
+
+/*************************************
+  Region Block setup/management
+*************************************/
+
+static int implemented_rid_bits = 0;
+static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 };
+
+void get_impl_rid_bits(void)
+{
+       // FIXME (call PAL)
+//#ifdef CONFIG_MCKINLEY
+       implemented_rid_bits = IA64_MAX_IMPL_RID_BITS;
+//#else
+//#error "rid ranges won't work on Merced"
+//#endif
+       if (implemented_rid_bits <= IA64_MIN_IMPL_RID_BITS ||
+           implemented_rid_bits > IA64_MAX_IMPL_RID_BITS)
+               BUG();
+}
+
+
+/*
+ * Allocate a power-of-two-sized chunk of region id space -- one or more
+ *  "rid blocks"
+ */
+int allocate_rid_range(struct domain *d, unsigned long ridbits)
+{
+       int i, j, n_rid_blocks;
+
+       if (implemented_rid_bits == 0) get_impl_rid_bits();
+       
+       if (ridbits >= IA64_MAX_IMPL_RID_BITS)
+       ridbits = IA64_MAX_IMPL_RID_BITS - 1;
+       
+       if (ridbits < IA64_MIN_IMPL_RID_BITS)
+       ridbits = IA64_MIN_IMPL_RID_BITS;
+
+       // convert to rid_blocks and find one
+       n_rid_blocks = ridbits - IA64_MIN_IMPL_RID_BITS + 1;
+       
+       // skip over block 0, reserved for "meta-physical mappings (and Xen)"
+       for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) {
+               if (ridblock_owner[i] == NULL) {
+                       for (j = i; j < i + n_rid_blocks; ++j) {
+                               if (ridblock_owner[j]) break;
+                       }
+                       if (ridblock_owner[j] == NULL) break;
+               }
+       }
+       
+       if (i >= MAX_RID_BLOCKS) return 0;
+       
+       // found an unused block:
+       //   (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits)
+       // mark this block as owned
+       for (j = i; j < i + n_rid_blocks; ++j) ridblock_owner[j] = d;
+       
+       // setup domain struct
+       d->arch.rid_bits = ridbits;
+       d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS; d->arch.ending_rid 
= (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS;
+printf("###allocating rid_range, domain %p: starting_rid=%lx, 
ending_rid=%lx\n",
+d,d->arch.starting_rid, d->arch.ending_rid);
+       
+       return 1;
+}
+
+
+int deallocate_rid_range(struct domain *d)
+{
+       int i;
+       int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS;
+       int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS;
+
+       return 1;  // KLUDGE ALERT
+       //
+       // not all domains will have allocated RIDs (physical mode loaders for 
instance)
+       //
+       if (d->arch.rid_bits == 0) return 1;
+
+#ifdef DEBUG
+       for (i = rid_block_start; i < rid_block_end; ++i) {
+               ASSERT(ridblock_owner[i] == d);
+           }
+#endif
+       
+       for (i = rid_block_start; i < rid_block_end; ++i)
+       ridblock_owner[i] = NULL;
+       
+       d->arch.rid_bits = 0;
+       d->arch.starting_rid = 0;
+       d->arch.ending_rid = 0;
+       return 1;
+}
+
+
+static inline void
+set_rr_no_srlz(unsigned long rr, unsigned long rrval)
+{
+       ia64_set_rr(rr, vmMangleRID(rrval));
+}
+
+void
+set_rr(unsigned long rr, unsigned long rrval)
+{
+       ia64_set_rr(rr, vmMangleRID(rrval));
+       ia64_srlz_d();
+}
+
+unsigned long
+get_rr(unsigned long rr)
+{
+       return vmUnmangleRID(ia64_get_rr(rr));
+}
+
+static inline int validate_page_size(unsigned long ps)
+{
+       switch(ps) {
+           case 12: case 13: case 14: case 16: case 18:
+           case 20: case 22: case 24: case 26: case 28:
+               return 1;
+           default:
+               return 0;
+       }
+}
+
+// validates and changes a single region register
+// in the currently executing domain
+// Passing a value of -1 is a (successful) no-op
+// NOTE: DOES NOT SET VCPU's rrs[x] value!!
+int set_one_rr(unsigned long rr, unsigned long val)
+{
+       struct vcpu *v = current;
+       unsigned long rreg = REGION_NUMBER(rr);
+       ia64_rr rrv, newrrv, memrrv;
+       unsigned long newrid;
+
+       if (val == -1) return 1;
+
+       rrv.rrval = val;
+       newrrv.rrval = 0;
+       newrid = v->arch.starting_rid + rrv.rid;
+
+       if (newrid > v->arch.ending_rid) {
+               printk("can't set rr%d to %lx, starting_rid=%lx,"
+                       "ending_rid=%lx, val=%lx\n", rreg, newrid,
+                       v->arch.starting_rid,v->arch.ending_rid,val);
+               return 0;
+       }
+
+#ifdef CONFIG_VTI
+       memrrv.rrval = rrv.rrval;
+       if (rreg == 7) {
+               newrrv.rid = newrid;
+               newrrv.ve = VHPT_ENABLED_REGION_7;
+               newrrv.ps = IA64_GRANULE_SHIFT;
+               ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
+                               v->vcpu_info->arch.privregs);
+       }
+       else {
+               newrrv.rid = newrid;
+               // FIXME? region 6 needs to be uncached for EFI to work
+               if (rreg == 6) newrrv.ve = VHPT_ENABLED_REGION_7;
+               else newrrv.ve = VHPT_ENABLED_REGION_0_TO_6;
+               newrrv.ps = PAGE_SHIFT;
+               if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
+               set_rr(rr,newrrv.rrval);
+       }
+#else
+       memrrv.rrval = rrv.rrval;
+       newrrv.rid = newrid;
+       newrrv.ve = 1;  // VHPT now enabled for region 7!!
+       newrrv.ps = PAGE_SHIFT;
+       if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
+       if (rreg == 7) ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
+                               v->vcpu_info->arch.privregs);
+       else set_rr(rr,newrrv.rrval);
+#endif
+       return 1;
+}
+
+// set rr0 to the passed rid (for metaphysical mode so don't use domain offset
+int set_metaphysical_rr0(void)
+{
+       struct vcpu *v = current;
+       ia64_rr rrv;
+       
+//     rrv.ve = 1;     FIXME: TURN ME BACK ON WHEN VHPT IS WORKING
+       set_rr(0,v->arch.metaphysical_rr0);
+}
+
+// validates/changes region registers 0-6 in the currently executing domain
+// Note that this is the one and only SP API (other than executing a privop)
+// for a domain to use to change region registers
+int set_all_rr( u64 rr0, u64 rr1, u64 rr2, u64 rr3,
+                    u64 rr4, u64 rr5, u64 rr6, u64 rr7)
+{
+       if (!set_one_rr(0x0000000000000000L, rr0)) return 0;
+       if (!set_one_rr(0x2000000000000000L, rr1)) return 0;
+       if (!set_one_rr(0x4000000000000000L, rr2)) return 0;
+       if (!set_one_rr(0x6000000000000000L, rr3)) return 0;
+       if (!set_one_rr(0x8000000000000000L, rr4)) return 0;
+       if (!set_one_rr(0xa000000000000000L, rr5)) return 0;
+       if (!set_one_rr(0xc000000000000000L, rr6)) return 0;
+       if (!set_one_rr(0xe000000000000000L, rr7)) return 0;
+       return 1;
+}
+
+void init_all_rr(struct vcpu *v)
+{
+       ia64_rr rrv;
+
+       rrv.rrval = 0;
+       rrv.rrval = v->domain->arch.metaphysical_rr0;
+       rrv.ps = PAGE_SHIFT;
+       rrv.ve = 1;
+if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); }
+       VCPU(v,rrs[0]) = -1;
+       VCPU(v,rrs[1]) = rrv.rrval;
+       VCPU(v,rrs[2]) = rrv.rrval;
+       VCPU(v,rrs[3]) = rrv.rrval;
+       VCPU(v,rrs[4]) = rrv.rrval;
+       VCPU(v,rrs[5]) = rrv.rrval;
+       rrv.ve = 0; 
+       VCPU(v,rrs[6]) = rrv.rrval;
+//     v->shared_info->arch.rrs[7] = rrv.rrval;
+}
+
+
+/* XEN/ia64 INTERNAL ROUTINES */
+
+unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval)
+{
+       ia64_rr rrv;
+           
+       rrv.rrval = rrval;
+       rrv.rid += v->arch.starting_rid;
+       return rrv.rrval;
+}
+
+unsigned long
+virtualize_rid(struct vcpu *v, unsigned long rrval)
+{
+       ia64_rr rrv;
+           
+       rrv.rrval = rrval;
+       rrv.rid -= v->arch.starting_rid;
+       return rrv.rrval;
+}
+
+// loads a thread's region register (0-6) state into
+// the real physical region registers.  Returns the
+// (possibly mangled) bits to store into rr7
+// iff it is different than what is currently in physical
+// rr7 (because we have to to assembly and physical mode
+// to change rr7).  If no change to rr7 is required, returns 0.
+//
+unsigned long load_region_regs(struct vcpu *v)
+{
+       unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6, rr7;
+       // TODO: These probably should be validated
+       unsigned long bad = 0;
+
+       if (VCPU(v,metaphysical_mode)) {
+               ia64_rr rrv;
+
+               rrv.rrval = 0;
+               rrv.rid = v->domain->arch.metaphysical_rr0;
+               rrv.ps = PAGE_SHIFT;
+               rrv.ve = 1;
+               rr0 = rrv.rrval;
+               set_rr_no_srlz(0x0000000000000000L, rr0);
+               ia64_srlz_d();
+       }
+       else {
+               rr0 =  VCPU(v,rrs[0]);
+               if (!set_one_rr(0x0000000000000000L, rr0)) bad |= 1;
+       }
+       rr1 =  VCPU(v,rrs[1]);
+       rr2 =  VCPU(v,rrs[2]);
+       rr3 =  VCPU(v,rrs[3]);
+       rr4 =  VCPU(v,rrs[4]);
+       rr5 =  VCPU(v,rrs[5]);
+       rr6 =  VCPU(v,rrs[6]);
+       rr7 =  VCPU(v,rrs[7]);
+       if (!set_one_rr(0x2000000000000000L, rr1)) bad |= 2;
+       if (!set_one_rr(0x4000000000000000L, rr2)) bad |= 4;
+       if (!set_one_rr(0x6000000000000000L, rr3)) bad |= 8;
+       if (!set_one_rr(0x8000000000000000L, rr4)) bad |= 0x10;
+       if (!set_one_rr(0xa000000000000000L, rr5)) bad |= 0x20;
+       if (!set_one_rr(0xc000000000000000L, rr6)) bad |= 0x40;
+       if (!set_one_rr(0xe000000000000000L, rr7)) bad |= 0x80;
+       if (bad) {
+               panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad);
+       }
+       return 0;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/sn_console.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/sn_console.c    Thu Sep  1 18:46:28 2005
@@ -0,0 +1,84 @@
+/*
+ * C-Brick Serial Port (and console) driver for SGI Altix machines.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <asm/acpi.h>
+#include <asm/sn/sn_sal.h>
+#include <xen/serial.h>
+
+void sn_putc(struct serial_port *, char);
+
+static struct uart_driver sn_sal_console = {
+       .putc = sn_putc,
+};
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * pulled from arch/ia64/sn/kernel/setup.c
+ */
+static void __init early_sn_setup(void)
+{
+       efi_system_table_t *efi_systab;
+       efi_config_table_t *config_tables;
+       struct ia64_sal_systab *sal_systab;
+       struct ia64_sal_desc_entry_point *ep;
+       char *p;
+       int i, j;
+
+       /*
+        * Parse enough of the SAL tables to locate the SAL entry point. Since, 
console
+        * IO on SN2 is done via SAL calls, early_printk won't work without 
this.
+        *
+        * This code duplicates some of the ACPI table parsing that is in efi.c 
& sal.c.
+        * Any changes to those file may have to be made hereas well.
+        */
+       efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+       config_tables = __va(efi_systab->tables);
+       for (i = 0; i < efi_systab->nr_tables; i++) {
+               if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+                   0) {
+                       sal_systab = __va(config_tables[i].table);
+                       p = (char *)(sal_systab + 1);
+                       for (j = 0; j < sal_systab->entry_count; j++) {
+                               if (*p == SAL_DESC_ENTRY_POINT) {
+                                       ep = (struct ia64_sal_desc_entry_point
+                                             *)p;
+                                       ia64_sal_handler_init(__va
+                                                             (ep->sal_proc),
+                                                             __va(ep->gp));
+                                       return;
+                               }
+                               p += SAL_DESC_SIZE(*p);
+                       }
+               }
+       }
+       /* Uh-oh, SAL not available?? */
+       printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+/**
+ * sn_serial_console_early_setup - Sets up early console output support
+ *
+ * pulled from drivers/serial/sn_console.c
+ */
+int __init sn_serial_console_early_setup(void)
+{
+       if (strcmp("sn2",acpi_get_sysname()))
+               return -1;
+
+       early_sn_setup();       /* Find SAL entry points */
+       serial_register_uart(0, &sn_sal_console, NULL);
+
+       return 0;
+}
+
+/*
+ * sn_putc - Send a character to the console, polled or interrupt mode
+ */
+void sn_putc(struct serial_port *port, char c)
+{
+       return ia64_sn_console_putc(c);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/vcpu.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/vcpu.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,1843 @@
+/*
+ * Virtualized CPU functions
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <linux/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/vmx_vcpu.h>
+
+typedef        union {
+       struct ia64_psr ia64_psr;
+       unsigned long i64;
+} PSR;
+
+//typedef      struct pt_regs  REGS;
+//typedef struct domain VCPU;
+
+// this def for vcpu_regs won't work if kernel stack is present
+#define        vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs)
+#define        PSCB(x,y)       VCPU(x,y)
+#define        PSCBX(x,y)      x->arch.y
+
+#define        TRUE    1
+#define        FALSE   0
+#define        IA64_PTA_SZ_BIT         2
+#define        IA64_PTA_VF_BIT         8
+#define        IA64_PTA_BASE_BIT       15
+#define        IA64_PTA_LFMT           (1UL << IA64_PTA_VF_BIT)
+#define        IA64_PTA_SZ(x)  (x##UL << IA64_PTA_SZ_BIT)
+
+#define STATIC
+
+#ifdef PRIVOP_ADDR_COUNT
+struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = {
+       { "=ifa", { 0 }, { 0 }, 0 },
+       { "thash", { 0 }, { 0 }, 0 },
+       0
+};
+extern void privop_count_addr(unsigned long addr, int inst);
+#define        PRIVOP_COUNT_ADDR(regs,inst) 
privop_count_addr(regs->cr_iip,inst)
+#else
+#define        PRIVOP_COUNT_ADDR(x,y) do {} while (0)
+#endif
+
+unsigned long dtlb_translate_count = 0;
+unsigned long tr_translate_count = 0;
+unsigned long phys_translate_count = 0;
+
+unsigned long vcpu_verbose = 0;
+#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
+
+extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa);
+extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa);
+
+/**************************************************************************
+ VCPU general register access routines
+**************************************************************************/
+
+UINT64
+vcpu_get_gr(VCPU *vcpu, unsigned reg)
+{
+       REGS *regs = vcpu_regs(vcpu);
+       UINT64 val;
+
+       if (!reg) return 0;
+       getreg(reg,&val,0,regs);        // FIXME: handle NATs later
+       return val;
+}
+
+// returns:
+//   IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
+//   IA64_NO_FAULT otherwise
+IA64FAULT
+vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value)
+{
+       REGS *regs = vcpu_regs(vcpu);
+       long sof = (regs->cr_ifs) & 0x7f;
+
+       if (!reg) return IA64_ILLOP_FAULT;
+       if (reg >= sof + 32) return IA64_ILLOP_FAULT;
+       setreg(reg,value,0,regs);       // FIXME: handle NATs later
+       return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU privileged application register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+       if (reg == 44) return (vcpu_set_itc(vcpu,val));
+       else if (reg == 27) return (IA64_ILLOP_FAULT);
+       else if (reg == 24)
+           printf("warning: setting ar.eflg is a no-op; no IA-32 support\n");
+       else if (reg > 7) return (IA64_ILLOP_FAULT);
+       else PSCB(vcpu,krs[reg]) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
+{
+       if (reg == 24)
+           printf("warning: getting ar.eflg is a no-op; no IA-32 support\n");
+       else if (reg > 7) return (IA64_ILLOP_FAULT);
+       else *val = PSCB(vcpu,krs[reg]);
+       return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU processor status register access routines
+**************************************************************************/
+
+void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode)
+{
+       /* only do something if mode changes */
+       if (!!newmode ^ !!PSCB(vcpu,metaphysical_mode)) {
+               if (newmode) set_metaphysical_rr0();
+               else if (PSCB(vcpu,rrs[0]) != -1)
+                       set_one_rr(0, PSCB(vcpu,rrs[0]));
+               PSCB(vcpu,metaphysical_mode) = newmode;
+       }
+}
+
+IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu)
+{
+       vcpu_set_metaphysical_mode(vcpu,TRUE);
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+       struct ia64_psr psr, imm, *ipsr;
+       REGS *regs = vcpu_regs(vcpu);
+
+       //PRIVOP_COUNT_ADDR(regs,_RSM);
+       // TODO: All of these bits need to be virtualized
+       // TODO: Only allowed for current vcpu
+       __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+       ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+       imm = *(struct ia64_psr *)&imm24;
+       // interrupt flag
+       if (imm.i) PSCB(vcpu,interrupt_delivery_enabled) = 0;
+       if (imm.ic)  PSCB(vcpu,interrupt_collection_enabled) = 0;
+       // interrupt collection flag
+       //if (imm.ic) PSCB(vcpu,interrupt_delivery_enabled) = 0;
+       // just handle psr.up and psr.pp for now
+       if (imm24 & ~(IA64_PSR_BE | IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP
+               | IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT
+               | IA64_PSR_DFL | IA64_PSR_DFH))
+                       return (IA64_ILLOP_FAULT);
+       if (imm.dfh) ipsr->dfh = 0;
+       if (imm.dfl) ipsr->dfl = 0;
+       if (imm.pp) { ipsr->pp = 0; psr.pp = 0; }
+       if (imm.up) { ipsr->up = 0; psr.up = 0; }
+       if (imm.sp) { ipsr->sp = 0; psr.sp = 0; }
+       if (imm.be) ipsr->be = 0;
+       if (imm.dt) vcpu_set_metaphysical_mode(vcpu,TRUE);
+       __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+       return IA64_NO_FAULT;
+}
+
+extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu);
+#define SPURIOUS_VECTOR 0xf
+
+IA64FAULT vcpu_set_psr_dt(VCPU *vcpu)
+{
+       vcpu_set_metaphysical_mode(vcpu,FALSE);
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_i(VCPU *vcpu)
+{
+       PSCB(vcpu,interrupt_delivery_enabled) = 1;
+       PSCB(vcpu,interrupt_collection_enabled) = 1;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+       struct ia64_psr psr, imm, *ipsr;
+       REGS *regs = vcpu_regs(vcpu);
+       UINT64 mask, enabling_interrupts = 0;
+
+       //PRIVOP_COUNT_ADDR(regs,_SSM);
+       // TODO: All of these bits need to be virtualized
+       __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+       imm = *(struct ia64_psr *)&imm24;
+       ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+       // just handle psr.sp,pp and psr.i,ic (and user mask) for now
+       mask = IA64_PSR_PP|IA64_PSR_SP|IA64_PSR_I|IA64_PSR_IC|IA64_PSR_UM |
+               IA64_PSR_DT|IA64_PSR_DFL|IA64_PSR_DFH;
+       if (imm24 & ~mask) return (IA64_ILLOP_FAULT);
+       if (imm.dfh) ipsr->dfh = 1;
+       if (imm.dfl) ipsr->dfl = 1;
+       if (imm.pp) { ipsr->pp = 1; psr.pp = 1; }
+       if (imm.sp) { ipsr->sp = 1; psr.sp = 1; }
+       if (imm.i) {
+               if (!PSCB(vcpu,interrupt_delivery_enabled)) {
+//printf("vcpu_set_psr_sm: psr.ic 0->1 ");
+                       enabling_interrupts = 1;
+               }
+               PSCB(vcpu,interrupt_delivery_enabled) = 1;
+       }
+       if (imm.ic)  PSCB(vcpu,interrupt_collection_enabled) = 1;
+       // TODO: do this faster
+       if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
+       if (imm.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
+       if (imm.ac) { ipsr->ac = 1; psr.ac = 1; }
+       if (imm.up) { ipsr->up = 1; psr.up = 1; }
+       if (imm.be) {
+               printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+               return (IA64_ILLOP_FAULT);
+       }
+       if (imm.dt) vcpu_set_metaphysical_mode(vcpu,FALSE);
+       __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+#if 0 // now done with deliver_pending_interrupts
+       if (enabling_interrupts) {
+               if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) {
+//printf("with interrupts pending\n");
+                       return IA64_EXTINT_VECTOR;
+               }
+//else printf("but nothing pending\n");
+       }
+#endif
+       if (enabling_interrupts &&
+               vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+                       PSCB(vcpu,pending_interruption) = 1;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
+{
+       struct ia64_psr psr, newpsr, *ipsr;
+       REGS *regs = vcpu_regs(vcpu);
+       UINT64 enabling_interrupts = 0;
+
+       // TODO: All of these bits need to be virtualized
+       __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+       newpsr = *(struct ia64_psr *)&val;
+       ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+       // just handle psr.up and psr.pp for now
+       //if (val & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP)) return 
(IA64_ILLOP_FAULT);
+       // however trying to set other bits can't be an error as it is in ssm
+       if (newpsr.dfh) ipsr->dfh = 1;
+       if (newpsr.dfl) ipsr->dfl = 1;
+       if (newpsr.pp) { ipsr->pp = 1; psr.pp = 1; }
+       if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
+       if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; }
+       if (newpsr.i) {
+               if (!PSCB(vcpu,interrupt_delivery_enabled))
+                       enabling_interrupts = 1;
+               PSCB(vcpu,interrupt_delivery_enabled) = 1;
+       }
+       if (newpsr.ic)  PSCB(vcpu,interrupt_collection_enabled) = 1;
+       if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
+       if (newpsr.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
+       if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; }
+       if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
+       if (newpsr.dt && newpsr.rt) vcpu_set_metaphysical_mode(vcpu,FALSE);
+       else vcpu_set_metaphysical_mode(vcpu,TRUE);
+       if (newpsr.be) {
+               printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+               return (IA64_ILLOP_FAULT);
+       }
+       //__asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+#if 0 // now done with deliver_pending_interrupts
+       if (enabling_interrupts) {
+               if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+                       return IA64_EXTINT_VECTOR;
+       }
+#endif
+       if (enabling_interrupts &&
+               vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+                       PSCB(vcpu,pending_interruption) = 1;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval)
+{
+       UINT64 psr;
+       struct ia64_psr newpsr;
+
+       // TODO: This needs to return a "filtered" view of
+       // the psr, not the actual psr.  Probably the psr needs
+       // to be a field in regs (in addition to ipsr).
+       __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+       newpsr = *(struct ia64_psr *)&psr;
+       if (newpsr.cpl == 2) newpsr.cpl = 0;
+       if (PSCB(vcpu,interrupt_delivery_enabled)) newpsr.i = 1;
+       else newpsr.i = 0;
+       if (PSCB(vcpu,interrupt_collection_enabled)) newpsr.ic = 1;
+       else newpsr.ic = 0;
+       *pval = *(unsigned long *)&newpsr;
+       return IA64_NO_FAULT;
+}
+
+BOOLEAN vcpu_get_psr_ic(VCPU *vcpu)
+{
+       return !!PSCB(vcpu,interrupt_collection_enabled);
+}
+
+BOOLEAN vcpu_get_psr_i(VCPU *vcpu)
+{
+       return !!PSCB(vcpu,interrupt_delivery_enabled);
+}
+
+UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr)
+{
+       UINT64 dcr = PSCBX(vcpu,dcr);
+       PSR psr = {0};
+
+       //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr);
+       psr.i64 = prevpsr;
+       psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1;
+       psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1;
+       psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled);
+       psr.ia64_psr.i = PSCB(vcpu,interrupt_delivery_enabled);
+       psr.ia64_psr.bn = PSCB(vcpu,banknum);
+       psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1;
+       if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain
+       // psr.pk = 1;
+       //printf("returns 0x%016lx...",psr.i64);
+       return psr.i64;
+}
+
+/**************************************************************************
+ VCPU control register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval)
+{
+extern unsigned long privop_trace;
+//privop_trace=0;
+//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip));
+       // Reads of cr.dcr on Xen always have the sign bit set, so
+       // a domain can differentiate whether it is running on SP or not
+       *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCBX(vcpu,iva) & ~0x7fffL;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCB(vcpu,pta);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval)
+{
+       //REGS *regs = vcpu_regs(vcpu);
+       //*pval = regs->cr_ipsr;
+       *pval = PSCB(vcpu,ipsr);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCB(vcpu,isr);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval)
+{
+       //REGS *regs = vcpu_regs(vcpu);
+       //*pval = regs->cr_iip;
+       *pval = PSCB(vcpu,iip);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval)
+{
+       UINT64 val = PSCB(vcpu,ifa);
+       REGS *regs = vcpu_regs(vcpu);
+       PRIVOP_COUNT_ADDR(regs,_GET_IFA);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr)
+{
+       ia64_rr rr;
+
+       rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
+       return(rr.ps);
+}
+
+unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr)
+{
+       ia64_rr rr;
+
+       rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
+       return(rr.rid);
+}
+
+unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa)
+{
+       ia64_rr rr;
+
+       rr.rrval = 0;
+       rr.ps = vcpu_get_rr_ps(vcpu,ifa);
+       rr.rid = vcpu_get_rr_rid(vcpu,ifa);
+       return (rr.rrval);
+}
+
+
+IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval)
+{
+       UINT64 val = PSCB(vcpu,itir);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
+{
+       UINT64 val = PSCB(vcpu,iipa);
+       // SP entry code does not save iipa yet nor does it get
+       //  properly delivered in the pscb
+       printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n");
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval)
+{
+       //PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs;
+       //*pval = PSCB(vcpu,regs).cr_ifs;
+       *pval = PSCB(vcpu,ifs);
+       PSCB(vcpu,incomplete_regframe) = 0;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval)
+{
+       UINT64 val = PSCB(vcpu,iim);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval)
+{
+       //return vcpu_thash(vcpu,PSCB(vcpu,ifa),pval);
+       UINT64 val = PSCB(vcpu,iha);
+       REGS *regs = vcpu_regs(vcpu);
+       PRIVOP_COUNT_ADDR(regs,_THASH);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val)
+{
+extern unsigned long privop_trace;
+//privop_trace=1;
+       // Reads of cr.dcr on SP always have the sign bit set, so
+       // a domain can differentiate whether it is running on SP or not
+       // Thus, writes of DCR should ignore the sign bit
+//verbose("vcpu_set_dcr: called\n");
+       PSCBX(vcpu,dcr) = val & ~0x8000000000000000L;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val)
+{
+       PSCBX(vcpu,iva) = val & ~0x7fffL;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val)
+{
+       if (val & IA64_PTA_LFMT) {
+               printf("*** No support for VHPT long format yet!!\n");
+               return (IA64_ILLOP_FAULT);
+       }
+       if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT;
+       if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT;
+       PSCB(vcpu,pta) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,ipsr) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,isr) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,iip) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_increment_iip(VCPU *vcpu)
+{
+       REGS *regs = vcpu_regs(vcpu);
+       struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+       if (ipsr->ri == 2) { ipsr->ri=0; regs->cr_iip += 16; }
+       else ipsr->ri++;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,ifa) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,itir) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val)
+{
+       // SP entry code does not save iipa yet nor does it get
+       //  properly delivered in the pscb
+       printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n");
+       PSCB(vcpu,iipa) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val)
+{
+       //REGS *regs = vcpu_regs(vcpu);
+       PSCB(vcpu,ifs) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,iim) = val;
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val)
+{
+       PSCB(vcpu,iha) = val;
+       return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU interrupt control register access routines
+**************************************************************************/
+
+void vcpu_pend_unspecified_interrupt(VCPU *vcpu)
+{
+       PSCB(vcpu,pending_interruption) = 1;
+}
+
+void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
+{
+       if (vector & ~0xff) {
+               printf("vcpu_pend_interrupt: bad vector\n");
+               return;
+       }
+#ifdef CONFIG_VTI
+    if ( VMX_DOMAIN(vcpu) ) {
+           set_bit(vector,VPD_CR(vcpu,irr));
+    } else
+#endif // CONFIG_VTI
+    {
+       /* if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; */
+       if (test_bit(vector,PSCBX(vcpu,irr))) {
+//printf("vcpu_pend_interrupt: overrun\n");
+       }
+       set_bit(vector,PSCBX(vcpu,irr));
+       PSCB(vcpu,pending_interruption) = 1;
+    }
+
+#if 0
+    /* Keir: I think you should unblock when an interrupt is pending. */
+    {
+        int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags);
+        vcpu_unblock(vcpu);
+        if ( running )
+            smp_send_event_check_cpu(vcpu->processor);
+    }
+#endif
+}
+
+void early_tick(VCPU *vcpu)
+{
+       UINT64 *p = &PSCBX(vcpu,irr[3]);
+       printf("vcpu_check_pending: about to deliver early tick\n");
+       printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p);
+}
+
+#define        IA64_TPR_MMI    0x10000
+#define        IA64_TPR_MIC    0x000f0
+
+/* checks to see if a VCPU has any unmasked pending interrupts
+ * if so, returns the highest, else returns SPURIOUS_VECTOR */
+/* NOTE: Since this gets called from vcpu_get_ivr() and the
+ * semantics of "mov rx=cr.ivr" ignore the setting of the psr.i bit,
+ * this routine also ignores pscb.interrupt_delivery_enabled
+ * and this must be checked independently; see vcpu_deliverable interrupts() */
+UINT64 vcpu_check_pending_interrupts(VCPU *vcpu)
+{
+       UINT64 *p, *q, *r, bits, bitnum, mask, i, vector;
+
+       p = &PSCBX(vcpu,irr[3]);
+       /* q = &PSCB(vcpu,delivery_mask[3]); */
+       r = &PSCBX(vcpu,insvc[3]);
+       for (i = 3; ; p--, q--, r--, i--) {
+               bits = *p /* & *q */;
+               if (bits) break; // got a potential interrupt
+               if (*r) {
+                       // nothing in this word which is pending+inservice
+                       // but there is one inservice which masks lower
+                       return SPURIOUS_VECTOR;
+               }
+               if (i == 0) {
+               // checked all bits... nothing pending+inservice
+                       return SPURIOUS_VECTOR;
+               }
+       }
+       // have a pending,deliverable interrupt... see if it is masked
+       bitnum = ia64_fls(bits);
+//printf("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...",bitnum);
+       vector = bitnum+(i*64);
+       mask = 1L << bitnum;
+//printf("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...",vector);
+       if (*r >= mask) {
+               // masked by equal inservice
+//printf("but masked by equal inservice\n");
+               return SPURIOUS_VECTOR;
+       }
+       if (PSCB(vcpu,tpr) & IA64_TPR_MMI) {
+               // tpr.mmi is set
+//printf("but masked by tpr.mmi\n");
+               return SPURIOUS_VECTOR;
+       }
+       if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) {
+               //tpr.mic masks class
+//printf("but masked by tpr.mic\n");
+               return SPURIOUS_VECTOR;
+       }
+
+//printf("returned to caller\n");
+#if 0
+if (vector == (PSCB(vcpu,itv) & 0xff)) {
+       UINT64 now = ia64_get_itc();
+       UINT64 itm = PSCBX(vcpu,domain_itm);
+       if (now < itm) early_tick(vcpu);
+
+}
+#endif
+       return vector;
+}
+
+UINT64 vcpu_deliverable_interrupts(VCPU *vcpu)
+{
+       return (vcpu_get_psr_i(vcpu) &&
+               vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR);
+}
+
+UINT64 vcpu_deliverable_timer(VCPU *vcpu)
+{
+       return (vcpu_get_psr_i(vcpu) &&
+               vcpu_check_pending_interrupts(vcpu) == PSCB(vcpu,itv));
+}
+
+IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval)
+{
+extern unsigned long privop_trace;
+//privop_trace=1;
+       //TODO: Implement this
+       printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n");
+       //*pval = 0;
+       *pval = ia64_getreg(_IA64_REG_CR_LID);
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
+{
+       int i;
+       UINT64 vector, mask;
+
+#define HEARTBEAT_FREQ 16      // period in seconds
+#ifdef HEARTBEAT_FREQ
+#define N_DOMS 16      // period in seconds
+       static long count[N_DOMS] = { 0 };
+       static long nonclockcount[N_DOMS] = { 0 };
+       REGS *regs = vcpu_regs(vcpu);
+       unsigned domid = vcpu->domain->domain_id;
+#endif
+#ifdef IRQ_DEBUG
+       static char firstivr = 1;
+       static char firsttime[256];
+       if (firstivr) {
+               int i;
+               for (i=0;i<256;i++) firsttime[i]=1;
+               firstivr=0;
+       }
+#endif
+
+       vector = vcpu_check_pending_interrupts(vcpu);
+       if (vector == SPURIOUS_VECTOR) {
+               PSCB(vcpu,pending_interruption) = 0;
+               *pval = vector;
+               return IA64_NO_FAULT;
+       }
+#ifdef HEARTBEAT_FREQ
+       if (domid >= N_DOMS) domid = N_DOMS-1;
+       if (vector == (PSCB(vcpu,itv) & 0xff)) {
+           if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) {
+               printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
+                       domid, count[domid], nonclockcount[domid]);
+               //count[domid] = 0;
+               //dump_runq();
+           }
+       }
+       else nonclockcount[domid]++;
+#endif
+       // now have an unmasked, pending, deliverable vector!
+       // getting ivr has "side effects"
+#ifdef IRQ_DEBUG
+       if (firsttime[vector]) {
+               printf("*** First get_ivr on vector=%d,itc=%lx\n",
+                       vector,ia64_get_itc());
+               firsttime[vector]=0;
+       }
+#endif
+       i = vector >> 6;
+       mask = 1L << (vector & 0x3f);
+//printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector);
+       PSCBX(vcpu,insvc[i]) |= mask;
+       PSCBX(vcpu,irr[i]) &= ~mask;
+       //PSCB(vcpu,pending_interruption)--;
+       *pval = vector;
+       // if delivering a timer interrupt, remember domain_itm
+       if (vector == (PSCB(vcpu,itv) & 0xff)) {
+               PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm);
+       }
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCB(vcpu,tpr);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_eoi(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = 0L;  // reads of eoi always return 0
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+       printk("vcpu_get_irr: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+#else
+       *pval = vcpu->irr[0];
+       return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+       printk("vcpu_get_irr: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+#else
+       *pval = vcpu->irr[1];
+       return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+       printk("vcpu_get_irr: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+#else
+       *pval = vcpu->irr[2];
+       return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+       printk("vcpu_get_irr: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+#else
+       *pval = vcpu->irr[3];
+       return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCB(vcpu,itv);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCB(vcpu,pmv);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval)
+{
+       *pval = PSCB(vcpu,cmcv);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval)
+{
+       // fix this when setting values other than m-bit is supported
+       printf("vcpu_get_lrr0: Unmasked interrupts unsupported\n");
+       *pval = (1L << 16);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval)
+{
+       // fix this when setting values other than m-bit is supported
+       printf("vcpu_get_lrr1: Unmasked interrupts unsupported\n");
+       *pval = (1L << 16);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val)
+{
+       printf("vcpu_set_lid: Setting cr.lid is unsupported\n");
+       return (IA64_ILLOP_FAULT);
+}
+
+IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val)
+{
+       if (val & 0xff00) return IA64_RSVDREG_FAULT;
+       PSCB(vcpu,tpr) = val;
+       if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+               PSCB(vcpu,pending_interruption) = 1;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val)
+{
+       UINT64 *p, bits, vec, bitnum;
+       int i;
+
+       p = &PSCBX(vcpu,insvc[3]);
+       for (i = 3; (i >= 0) && !(bits = *p); i--, p--);
+       if (i < 0) {
+               printf("Trying to EOI interrupt when none are in-service.\r\n");
+               return;
+       }
+       bitnum = ia64_fls(bits);
+       vec = bitnum + (i*64);
+       /* clear the correct bit */
+       bits &= ~(1L << bitnum);
+       *p = bits;
+       /* clearing an eoi bit may unmask another pending interrupt... */
+       if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled...
+               // worry about this later... Linux only calls eoi
+               // with interrupts disabled
+               printf("Trying to EOI interrupt with interrupts enabled\r\n");
+       }
+       if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+               PSCB(vcpu,pending_interruption) = 1;
+//printf("YYYYY vcpu_set_eoi: Successful\n");
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val)
+{
+       if (!(val & (1L << 16))) {
+               printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
+               return (IA64_ILLOP_FAULT);
+       }
+       // no place to save this state but nothing to do anyway
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val)
+{
+       if (!(val & (1L << 16))) {
+               printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
+               return (IA64_ILLOP_FAULT);
+       }
+       // no place to save this state but nothing to do anyway
+       return (IA64_NO_FAULT);
+}
+
+// parameter is a time interval specified in cycles
+void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles)
+{
+    PSCBX(vcpu,xen_timer_interval) = cycles;
+    vcpu_set_next_timer(vcpu);
+    printf("vcpu_enable_timer(%d): interval set to %d cycles\n",
+             PSCBX(vcpu,xen_timer_interval));
+    __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask));
+}
+
+IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val)
+{
+extern unsigned long privop_trace;
+//privop_trace=1;
+       if (val & 0xef00) return (IA64_ILLOP_FAULT);
+       PSCB(vcpu,itv) = val;
+       if (val & 0x10000) {
+printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 
0\n",val,PSCBX(vcpu,domain_itm));
+               PSCBX(vcpu,domain_itm) = 0;
+       }
+       else vcpu_enable_timer(vcpu,1000000L);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val)
+{
+       if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
+       PSCB(vcpu,pmv) = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val)
+{
+       if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
+       PSCB(vcpu,cmcv) = val;
+       return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU temporary register access routines
+**************************************************************************/
+UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index)
+{
+       if (index > 7) return 0;
+       return PSCB(vcpu,tmp[index]);
+}
+
+void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val)
+{
+       if (index <= 7) PSCB(vcpu,tmp[index]) = val;
+}
+
+/**************************************************************************
+Interval timer routines
+**************************************************************************/
+
+BOOLEAN vcpu_timer_disabled(VCPU *vcpu)
+{
+       UINT64 itv = PSCB(vcpu,itv);
+       return(!itv || !!(itv & 0x10000));
+}
+
+BOOLEAN vcpu_timer_inservice(VCPU *vcpu)
+{
+       UINT64 itv = PSCB(vcpu,itv);
+       return (test_bit(itv, PSCBX(vcpu,insvc)));
+}
+
+BOOLEAN vcpu_timer_expired(VCPU *vcpu)
+{
+       unsigned long domain_itm = PSCBX(vcpu,domain_itm);
+       unsigned long now = ia64_get_itc();
+
+       if (!domain_itm) return FALSE;
+       if (now < domain_itm) return FALSE;
+       if (vcpu_timer_disabled(vcpu)) return FALSE;
+       return TRUE;
+}
+
+void vcpu_safe_set_itm(unsigned long val)
+{
+       unsigned long epsilon = 100;
+       UINT64 now = ia64_get_itc();
+
+       local_irq_disable();
+       while (1) {
+//printf("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now);
+               ia64_set_itm(val);
+               if (val > (now = ia64_get_itc())) break;
+               val = now + epsilon;
+               epsilon <<= 1;
+       }
+       local_irq_enable();
+}
+
+void vcpu_set_next_timer(VCPU *vcpu)
+{
+       UINT64 d = PSCBX(vcpu,domain_itm);
+       //UINT64 s = PSCBX(vcpu,xen_itm);
+       UINT64 s = local_cpu_data->itm_next;
+       UINT64 now = ia64_get_itc();
+       //UINT64 interval = PSCBX(vcpu,xen_timer_interval);
+
+       /* gloss over the wraparound problem for now... we know it exists
+        * but it doesn't matter right now */
+
+#if 0
+       /* ensure at least next SP tick is in the future */
+       if (!interval) PSCBX(vcpu,xen_itm) = now +
+#if 0
+               (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE :
+                                       DEFAULT_CLOCK_RATE);
+#else
+       3000000;
+//printf("vcpu_set_next_timer: HACK!\n");
+#endif
+#if 0
+       if (PSCBX(vcpu,xen_itm) < now)
+               while (PSCBX(vcpu,xen_itm) < now + (interval>>1))
+                       PSCBX(vcpu,xen_itm) += interval;
+#endif
+#endif
+
+       if (is_idle_task(vcpu->domain)) {
+               printf("****** vcpu_set_next_timer called during idle!!\n");
+       }
+       //s = PSCBX(vcpu,xen_itm);
+       if (d && (d > now) && (d < s)) {
+               vcpu_safe_set_itm(d);
+               //using_domain_as_itm++;
+       }
+       else {
+               vcpu_safe_set_itm(s);
+               //using_xen_as_itm++;
+       }
+}
+
+IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val)
+{
+       UINT now = ia64_get_itc();
+
+       //if (val < now) val = now + 1000;
+//printf("*** vcpu_set_itm: called with %lx\n",val);
+       PSCBX(vcpu,domain_itm) = val;
+       vcpu_set_next_timer(vcpu);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val)
+{
+
+       UINT64 oldnow = ia64_get_itc();
+       UINT64 olditm = PSCBX(vcpu,domain_itm);
+       unsigned long d = olditm - oldnow;
+       unsigned long x = local_cpu_data->itm_next - oldnow;
+
+       UINT64 newnow = val, min_delta;
+
+#define DISALLOW_SETTING_ITC_FOR_NOW
+#ifdef DISALLOW_SETTING_ITC_FOR_NOW
+printf("vcpu_set_itc: Setting ar.itc is currently disabled\n");
+#else
+       local_irq_disable();
+       if (olditm) {
+printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
+               PSCBX(vcpu,domain_itm) = newnow + d;
+       }
+       local_cpu_data->itm_next = newnow + x;
+       d = PSCBX(vcpu,domain_itm);
+       x = local_cpu_data->itm_next;
+
+       ia64_set_itc(newnow);
+       if (d && (d > newnow) && (d < x)) {
+               vcpu_safe_set_itm(d);
+               //using_domain_as_itm++;
+       }
+       else {
+               vcpu_safe_set_itm(x);
+               //using_xen_as_itm++;
+       }
+       local_irq_enable();
+#endif
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval)
+{
+       //FIXME: Implement this
+       printf("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n");
+       return (IA64_NO_FAULT);
+       //return (IA64_ILLOP_FAULT);
+}
+
+IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval)
+{
+       //TODO: Implement this
+       printf("vcpu_get_itc: Getting ar.itc is unsupported\n");
+       return (IA64_ILLOP_FAULT);
+}
+
+void vcpu_pend_timer(VCPU *vcpu)
+{
+       UINT64 itv = PSCB(vcpu,itv) & 0xff;
+
+       if (vcpu_timer_disabled(vcpu)) return;
+       //if (vcpu_timer_inservice(vcpu)) return;
+       if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) {
+               // already delivered an interrupt for this so
+               // don't deliver another
+               return;
+       }
+#if 0
+       // attempt to flag "timer tick before its due" source
+       {
+       UINT64 itm = PSCBX(vcpu,domain_itm);
+       UINT64 now = ia64_get_itc();
+       if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n");
+       }
+#endif
+       vcpu_pend_interrupt(vcpu, itv);
+}
+
+// returns true if ready to deliver a timer interrupt too early
+UINT64 vcpu_timer_pending_early(VCPU *vcpu)
+{
+       UINT64 now = ia64_get_itc();
+       UINT64 itm = PSCBX(vcpu,domain_itm);
+
+       if (vcpu_timer_disabled(vcpu)) return 0;
+       if (!itm) return 0;
+       return (vcpu_deliverable_timer(vcpu) && (now < itm));
+}
+
+//FIXME: This is a hack because everything dies if a timer tick is lost
+void vcpu_poke_timer(VCPU *vcpu)
+{
+       UINT64 itv = PSCB(vcpu,itv) & 0xff;
+       UINT64 now = ia64_get_itc();
+       UINT64 itm = PSCBX(vcpu,domain_itm);
+       UINT64 irr;
+
+       if (vcpu_timer_disabled(vcpu)) return;
+       if (!itm) return;
+       if (itv != 0xefL) {
+               printf("vcpu_poke_timer: unimplemented itv=%lx!\n",itv);
+               while(1);
+       }
+       // using 0xef instead of itv so can get real irr
+       if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) {
+               if (!test_bit(0xefL,PSCBX(vcpu,irr))) {
+                       irr = ia64_getreg(_IA64_REG_CR_IRR3);
+                       if (irr & (1L<<(0xef-0xc0))) return;
+if (now-itm>0x800000)
+printf("*** poking timer: 
now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm());
+                       vcpu_pend_timer(vcpu);
+               }
+       }
+}
+
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa)
+{
+       PSCB(vcpu,tmp[0]) = ifa;        // save ifa in vcpu structure, then 
specify IA64_FORCED_IFA
+       return (vcpu_get_rr_ve(vcpu,ifa) ? IA64_DATA_TLB_VECTOR : 
IA64_ALT_DATA_TLB_VECTOR) | IA64_FORCED_IFA;
+}
+
+
+IA64FAULT vcpu_rfi(VCPU *vcpu)
+{
+       // TODO: Only allowed for current vcpu
+       PSR psr;
+       UINT64 int_enable, regspsr = 0;
+       UINT64 ifs;
+       REGS *regs = vcpu_regs(vcpu);
+       extern void dorfirfi(void);
+
+       psr.i64 = PSCB(vcpu,ipsr);
+       if (psr.ia64_psr.cpl < 3) psr.ia64_psr.cpl = 2;
+       if (psr.ia64_psr.i) PSCB(vcpu,interrupt_delivery_enabled) = 1;
+       int_enable = psr.ia64_psr.i;
+       if (psr.ia64_psr.ic)  PSCB(vcpu,interrupt_collection_enabled) = 1;
+       if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it) 
vcpu_set_metaphysical_mode(vcpu,FALSE);
+       else vcpu_set_metaphysical_mode(vcpu,TRUE);
+       psr.ia64_psr.ic = 1; psr.ia64_psr.i = 1;
+       psr.ia64_psr.dt = 1; psr.ia64_psr.rt = 1; psr.ia64_psr.it = 1;
+       psr.ia64_psr.bn = 1;
+       //psr.pk = 1;  // checking pkeys shouldn't be a problem but seems broken
+       if (psr.ia64_psr.be) {
+               printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+               return (IA64_ILLOP_FAULT);
+       }
+       PSCB(vcpu,incomplete_regframe) = 0; // is this necessary?
+       ifs = PSCB(vcpu,ifs);
+       //if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs != 
regs->cr_ifs) {
+       //if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
+       if (ifs & regs->cr_ifs & 0x8000000000000000L) {
+               // TODO: validate PSCB(vcpu,iip)
+               // TODO: PSCB(vcpu,ipsr) = psr;
+               PSCB(vcpu,ipsr) = psr.i64;
+               // now set up the trampoline
+               regs->cr_iip = *(unsigned long *)dorfirfi; // function pointer!!
+               __asm__ __volatile ("mov %0=psr;;":"=r"(regspsr)::"memory");
+               regs->cr_ipsr = regspsr & ~(IA64_PSR_I | IA64_PSR_IC | 
IA64_PSR_BN);
+       }
+       else {
+               regs->cr_ipsr = psr.i64;
+               regs->cr_iip = PSCB(vcpu,iip);
+       }
+       PSCB(vcpu,interrupt_collection_enabled) = 1;
+       vcpu_bsw1(vcpu);
+       PSCB(vcpu,interrupt_delivery_enabled) = int_enable;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_cover(VCPU *vcpu)
+{
+       // TODO: Only allowed for current vcpu
+       REGS *regs = vcpu_regs(vcpu);
+
+       if (!PSCB(vcpu,interrupt_collection_enabled)) {
+               if (!PSCB(vcpu,incomplete_regframe))
+                       PSCB(vcpu,ifs) = regs->cr_ifs;
+               else PSCB(vcpu,incomplete_regframe) = 0;
+       }
+       regs->cr_ifs = 0;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+       UINT64 pta = PSCB(vcpu,pta);
+       UINT64 pta_sz = (pta & IA64_PTA_SZ(0x3f)) >> IA64_PTA_SZ_BIT;
+       UINT64 pta_base = pta & ~((1UL << IA64_PTA_BASE_BIT)-1);
+       UINT64 Mask = (1L << pta_sz) - 1;
+       UINT64 Mask_60_15 = (Mask >> 15) & 0x3fffffffffff;
+       UINT64 compMask_60_15 = ~Mask_60_15;
+       //UINT64 rr_ps = RR_TO_PS(get_rr(vadr));
+       UINT64 rr_ps = vcpu_get_rr_ps(vcpu,vadr);
+       UINT64 VHPT_offset = (vadr >> rr_ps) << 3;
+       UINT64 VHPT_addr1 = vadr & 0xe000000000000000L;
+       UINT64 VHPT_addr2a =
+               ((pta_base >> 15) & 0x3fffffffffff) & compMask_60_15;
+       UINT64 VHPT_addr2b =
+               ((VHPT_offset >> 15) & 0x3fffffffffff) & Mask_60_15;;
+       UINT64 VHPT_addr3 = VHPT_offset & 0x7fff;
+       UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) |
+                       VHPT_addr3;
+
+#if 0
+       if (VHPT_addr1 == 0xe000000000000000L) {
+           printf("vcpu_thash: thash unsupported with rr7 @%lx\n",
+               PSCB(vcpu,iip));
+           return (IA64_ILLOP_FAULT);
+       }
+#endif
+//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr);
+       *pval = VHPT_addr;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+       printf("vcpu_ttag: ttag instruction unsupported\n");
+       return (IA64_ILLOP_FAULT);
+}
+
+#define itir_ps(itir)  ((itir >> 2) & 0x3f)
+#define itir_mask(itir) (~((1UL << itir_ps(itir)) - 1))
+
+unsigned long vhpt_translate_count = 0;
+
+IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 
*pteval, UINT64 *itir)
+{
+       unsigned long pta, pta_mask, iha, pte, ps;
+       TR_ENTRY *trp;
+       ia64_rr rr;
+
+       if (!(address >> 61)) {
+               if (!PSCB(vcpu,metaphysical_mode)) {
+                       REGS *regs = vcpu_regs(vcpu);
+                       unsigned long viip = PSCB(vcpu,iip);
+                       unsigned long vipsr = PSCB(vcpu,ipsr);
+                       unsigned long iip = regs->cr_iip;
+                       unsigned long ipsr = regs->cr_ipsr;
+                       printk("vcpu_translate: bad address %p, viip=%p, 
vipsr=%p, iip=%p, ipsr=%p continuing\n", address, viip, vipsr, iip, ipsr);
+               }
+
+               *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | 
_PAGE_PL_2 | _PAGE_AR_RWX;
+               *itir = PAGE_SHIFT << 2;
+               phys_translate_count++;
+               return IA64_NO_FAULT;
+       }
+
+       /* check translation registers */
+       if ((trp = match_tr(vcpu,address))) {
+                       tr_translate_count++;
+               *pteval = trp->page_flags;
+               *itir = trp->itir;
+               return IA64_NO_FAULT;
+       }
+
+       /* check 1-entry TLB */
+       if ((trp = match_dtlb(vcpu,address))) {
+               dtlb_translate_count++;
+               *pteval = trp->page_flags;
+               *itir = trp->itir;
+               return IA64_NO_FAULT;
+       }
+
+       /* check guest VHPT */
+       pta = PSCB(vcpu,pta);
+       rr.rrval = PSCB(vcpu,rrs)[address>>61];
+       if (rr.ve && (pta & IA64_PTA_VE))
+       {
+               if (pta & IA64_PTA_VF)
+               {
+                       /* long format VHPT - not implemented */
+                       return (is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR);
+               }
+               else
+               {
+                       /* short format VHPT */
+
+                       /* avoid recursively walking VHPT */
+                       pta_mask = (itir_mask(pta) << 3) >> 3;
+                       if (((address ^ pta) & pta_mask) == 0)
+                               return (is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR);
+
+                       vcpu_thash(vcpu, address, &iha);
+                       if (__copy_from_user(&pte, (void *)iha, sizeof(pte)) != 
0)
+                               return IA64_VHPT_TRANS_VECTOR;
+
+                       /* 
+                        * Optimisation: this VHPT walker aborts on not-present 
pages
+                        * instead of inserting a not-present translation, this 
allows
+                        * vectoring directly to the miss handler.
+       \                */
+                       if (pte & _PAGE_P)
+                       {
+                               *pteval = pte;
+                               *itir = vcpu_get_itir_on_fault(vcpu,address);
+                               vhpt_translate_count++;
+                               return IA64_NO_FAULT;
+                       }
+                       return (is_data ? IA64_DATA_TLB_VECTOR : 
IA64_INST_TLB_VECTOR);
+               }
+       }
+       return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR);
+}
+
+IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+       UINT64 pteval, itir, mask;
+       IA64FAULT fault;
+
+       fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir);
+       if (fault == IA64_NO_FAULT)
+       {
+               mask = itir_mask(itir);
+               *padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask);
+               return (IA64_NO_FAULT);
+       }
+       else
+       {
+               PSCB(vcpu,tmp[0]) = vadr;       // save ifa in vcpu structure, 
then specify IA64_FORCED_IFA
+               return (fault | IA64_FORCED_IFA);
+       }
+}
+
+IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
+{
+       printf("vcpu_tak: tak instruction unsupported\n");
+       return (IA64_ILLOP_FAULT);
+       // HACK ALERT: tak does a thash for now
+       //return vcpu_thash(vcpu,vadr,key);
+}
+
+/**************************************************************************
+ VCPU debug breakpoint register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+       // TODO: unimplemented DBRs return a reserved register fault
+       // TODO: Should set Logical CPU state, not just physical
+       ia64_set_dbr(reg,val);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+       // TODO: unimplemented IBRs return a reserved register fault
+       // TODO: Should set Logical CPU state, not just physical
+       ia64_set_ibr(reg,val);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+       // TODO: unimplemented DBRs return a reserved register fault
+       UINT64 val = ia64_get_dbr(reg);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+       // TODO: unimplemented IBRs return a reserved register fault
+       UINT64 val = ia64_get_ibr(reg);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU performance monitor register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+       // TODO: Should set Logical CPU state, not just physical
+       // NOTE: Writes to unimplemented PMC registers are discarded
+       ia64_set_pmc(reg,val);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+       // TODO: Should set Logical CPU state, not just physical
+       // NOTE: Writes to unimplemented PMD registers are discarded
+       ia64_set_pmd(reg,val);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+       // NOTE: Reads from unimplemented PMC registers return zero
+       UINT64 val = (UINT64)ia64_get_pmc(reg);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+       // NOTE: Reads from unimplemented PMD registers return zero
+       UINT64 val = (UINT64)ia64_get_pmd(reg);
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU banked general register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_bsw0(VCPU *vcpu)
+{
+       // TODO: Only allowed for current vcpu
+       REGS *regs = vcpu_regs(vcpu);
+       unsigned long *r = &regs->r16;
+       unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
+       unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
+       int i;
+
+       if (PSCB(vcpu,banknum)) {
+               for (i = 0; i < 16; i++) { *b1++ = *r; *r++ = *b0++; }
+               PSCB(vcpu,banknum) = 0;
+       }
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_bsw1(VCPU *vcpu)
+{
+       // TODO: Only allowed for current vcpu
+       REGS *regs = vcpu_regs(vcpu);
+       unsigned long *r = &regs->r16;
+       unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
+       unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
+       int i;
+
+       if (!PSCB(vcpu,banknum)) {
+               for (i = 0; i < 16; i++) { *b0++ = *r; *r++ = *b1++; }
+               PSCB(vcpu,banknum) = 1;
+       }
+       return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU cpuid access routines
+**************************************************************************/
+
+
+IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+       // FIXME: This could get called as a result of a rsvd-reg fault
+       // if reg > 3
+       switch(reg) {
+           case 0:
+               memcpy(pval,"Xen/ia64",8);
+               break;
+           case 1:
+               *pval = 0;
+               break;
+           case 2:
+               *pval = 0;
+               break;
+           case 3:
+               *pval = ia64_get_cpuid(3);
+               break;
+           case 4:
+               *pval = ia64_get_cpuid(4);
+               break;
+           default:
+               if (reg > (ia64_get_cpuid(3) & 0xff))
+                       return IA64_RSVDREG_FAULT;
+               *pval = ia64_get_cpuid(reg);
+               break;
+       }
+       return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU region register access routines
+**************************************************************************/
+
+unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr)
+{
+       ia64_rr rr;
+
+       rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
+       return(rr.ve);
+}
+
+IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+       PSCB(vcpu,rrs)[reg>>61] = val;
+       // warning: set_one_rr() does it "live"
+       set_one_rr(reg,val);
+       return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+       UINT val = PSCB(vcpu,rrs)[reg>>61];
+       *pval = val;
+       return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU protection key register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+#ifndef PKR_USE_FIXED
+       printk("vcpu_get_pkr: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+#else
+       UINT64 val = (UINT64)ia64_get_pkr(reg);
+       *pval = val;
+       return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+#ifndef PKR_USE_FIXED
+       printk("vcpu_set_pkr: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+#else
+//     if (reg >= NPKRS) return (IA64_ILLOP_FAULT);
+       vcpu->pkrs[reg] = val;
+       ia64_set_pkr(reg,val);
+       return (IA64_NO_FAULT);
+#endif
+}
+
+/**************************************************************************
+ VCPU translation register access routines
+**************************************************************************/
+
+static void vcpu_purge_tr_entry(TR_ENTRY *trp)
+{
+       trp->p = 0;
+}
+
+static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 
ifa)
+{
+       UINT64 ps;
+
+       trp->itir = itir;
+       trp->rid = virtualize_rid(current, get_rr(ifa) & RR_RID_MASK);
+       trp->p = 1;
+       ps = trp->ps;
+       trp->page_flags = pte;
+       if (trp->pl < 2) trp->pl = 2;
+       trp->vadr = ifa & ~0xfff;
+       if (ps > 12) { // "ignore" relevant low-order bits
+               trp->ppn &= ~((1UL<<(ps-12))-1);
+               trp->vadr &= ~((1UL<<ps)-1);
+       }
+}
+
+TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count)
+{
+       unsigned long rid = (get_rr(ifa) & RR_RID_MASK);
+       int i;
+
+       for (i = 0; i < count; i++, trp++) {
+               if (!trp->p) continue;
+               if (physicalize_rid(vcpu,trp->rid) != rid) continue;
+               if (ifa < trp->vadr) continue;
+               if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
+               //if (trp->key && !match_pkr(vcpu,trp->key)) continue;
+               return trp;
+       }
+       return 0;
+}
+
+TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa)
+{
+       TR_ENTRY *trp;
+
+       trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS);
+       if (trp) return trp;
+       trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS);
+       if (trp) return trp;
+       return 0;
+}
+
+IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte,
+               UINT64 itir, UINT64 ifa)
+{
+       TR_ENTRY *trp;
+
+       if (slot >= NDTRS) return IA64_RSVDREG_FAULT;
+       trp = &PSCBX(vcpu,dtrs[slot]);
+//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
+       vcpu_set_tr_entry(trp,pte,itir,ifa);
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte,
+               UINT64 itir, UINT64 ifa)
+{
+       TR_ENTRY *trp;
+
+       if (slot >= NITRS) return IA64_RSVDREG_FAULT;
+       trp = &PSCBX(vcpu,itrs[slot]);
+//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
+       vcpu_set_tr_entry(trp,pte,itir,ifa);
+       return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU translation cache access routines
+**************************************************************************/
+
+void foobar(void) { /*vcpu_verbose = 1;*/ }
+
+extern struct domain *dom0;
+
+void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, 
UINT64 mp_pte, UINT64 logps)
+{
+       unsigned long psr;
+       unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
+
+       // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
+       // FIXME, must be inlined or potential for nested fault here!
+       if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) {
+               printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n");
+               //FIXME: kill domain here
+               while(1);
+       }
+       psr = ia64_clear_ic();
+       ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
+       ia64_set_psr(psr);
+       // ia64_srlz_i(); // no srls req'd, will rfi later
+#ifdef VHPT_GLOBAL
+       if (vcpu->domain==dom0 && ((vaddr >> 61) == 7)) {
+               // FIXME: this is dangerous... vhpt_flush_address ensures these
+               // addresses never get flushed.  More work needed if this
+               // ever happens.
+//printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
+               if (logps > PAGE_SHIFT) vhpt_multiple_insert(vaddr,pte,logps);
+               else vhpt_insert(vaddr,pte,logps<<2);
+       }
+       // even if domain pagesize is larger than PAGE_SIZE, just put
+       // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
+       else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
+#endif
+       if (IorD & 0x4) return;  // don't place in 1-entry TLB
+       if (IorD & 0x1) {
+               vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
+               PSCBX(vcpu,itlb_pte) = mp_pte;
+       }
+       if (IorD & 0x2) {
+               vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
+               PSCBX(vcpu,dtlb_pte) = mp_pte;
+       }
+}
+
+// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check
+// the physical address contained for correctness
+TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa)
+{
+       TR_ENTRY *trp;
+
+       if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1))
+               return (&vcpu->arch.dtlb);
+       return 0UL;
+}
+
+IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+       unsigned long pteval, logps = (itir >> 2) & 0x3f;
+       unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
+
+       if (logps < PAGE_SHIFT) {
+               printf("vcpu_itc_d: domain trying to use smaller page size!\n");
+               //FIXME: kill domain here
+               while(1);
+       }
+       //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+       pteval = translate_domain_pte(pte,ifa,itir);
+       if (!pteval) return IA64_ILLOP_FAULT;
+       vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+       unsigned long pteval, logps = (itir >> 2) & 0x3f;
+       unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
+
+       // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
+       if (logps < PAGE_SHIFT) {
+               printf("vcpu_itc_i: domain trying to use smaller page size!\n");
+               //FIXME: kill domain here
+               while(1);
+       }
+       //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+       pteval = translate_domain_pte(pte,ifa,itir);
+       // FIXME: what to do if bad physical address? (machine check?)
+       if (!pteval) return IA64_ILLOP_FAULT;
+       vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
+{
+       printk("vcpu_ptc_l: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+}
+
+// At privlvl=0, fc performs no access rights or protection key checks, while
+// at privlvl!=0, fc performs access rights checks as if it were a 1-byte
+// read but no protection key check.  Thus in order to avoid an unexpected
+// access rights fault, we have to translate the virtual address to a
+// physical address (possibly via a metaphysical address) and do the fc
+// on the physical address, which is guaranteed to flush the same cache line
+IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr)
+{
+       // TODO: Only allowed for current vcpu
+       UINT64 mpaddr, paddr;
+       IA64FAULT fault;
+       unsigned long translate_domain_mpaddr(unsigned long);
+       IA64FAULT vcpu_tpa(VCPU *, UINT64, UINT64 *);
+
+       fault = vcpu_tpa(vcpu, vadr, &mpaddr);
+       if (fault == IA64_NO_FAULT) {
+               paddr = translate_domain_mpaddr(mpaddr);
+               ia64_fc(__va(paddr));
+       }
+       return fault;
+}
+
+int ptce_count = 0;
+IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
+{
+       // Note that this only needs to be called once, i.e. the
+       // architected loop to purge the entire TLB, should use
+       //  base = stride1 = stride2 = 0, count0 = count 1 = 1
+
+#ifdef VHPT_GLOBAL
+       vhpt_flush();   // FIXME: This is overdoing it
+#endif
+       local_flush_tlb_all();
+       // just invalidate the "whole" tlb
+       vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
+       vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
+{
+       printk("vcpu_ptc_g: called, not implemented yet\n");
+       return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
+{
+       extern ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
+       // FIXME: validate not flushing Xen addresses
+       // if (Xen address) return(IA64_ILLOP_FAULT);
+       // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
+//printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
+#ifdef VHPT_GLOBAL
+       vhpt_flush_address(vadr,addr_range);
+#endif
+       ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
+       vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
+       vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+       return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
+{
+       printf("vcpu_ptr_d: Purging TLB is unsupported\n");
+       return (IA64_ILLOP_FAULT);
+}
+
+IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
+{
+       printf("vcpu_ptr_i: Purging TLB is unsupported\n");
+       return (IA64_ILLOP_FAULT);
+}
+
+void vcpu_set_regs(VCPU *vcpu, REGS *regs)
+{
+       vcpu->arch.regs = regs;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/vhpt.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/vhpt.c  Thu Sep  1 18:46:28 2005
@@ -0,0 +1,151 @@
+/*
+ * Initialize VHPT support.
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ *     Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/dma.h>
+#include <asm/vhpt.h>
+
+unsigned long vhpt_paddr, vhpt_pend, vhpt_pte;
+
+void vhpt_flush(void)
+{
+       struct vhpt_lf_entry *v = (void *)VHPT_ADDR;
+       int i, cnt = 0;
+#if 0
+static int firsttime = 2;
+
+if (firsttime) firsttime--;
+else {
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+}
+#endif
+       for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
+               v->itir = 0;
+               v->CChain = 0;
+               v->page_flags = 0;
+               v->ti_tag = INVALID_TI_TAG;
+       }
+       // initialize cache too???
+}
+
+#ifdef VHPT_GLOBAL
+void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
+{
+       unsigned long ps;
+       struct vhpt_lf_entry *vlfe;
+
+       if ((vadr >> 61) == 7) {
+               // no vhpt for region 7 yet, see vcpu_itc_no_srlz
+               printf("vhpt_flush_address: region 7, spinning...\n");
+               while(1);
+       }
+#if 0
+       // this only seems to occur at shutdown, but it does occur
+       if ((!addr_range) || addr_range & (addr_range - 1)) {
+               printf("vhpt_flush_address: weird range, spinning...\n");
+               while(1);
+       }
+//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
+#endif
+       while ((long)addr_range > 0) {
+               vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
+               // FIXME: for now, just blow it away even if it belongs to
+               // another domain.  Later, use ttag to check for match
+//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
+//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
+//}
+               vlfe->ti_tag |= INVALID_TI_TAG;
+               addr_range -= PAGE_SIZE;
+               vadr += PAGE_SIZE;
+       }
+}
+#endif
+
+void vhpt_map(void)
+{
+       unsigned long psr;
+
+       psr = ia64_clear_ic();
+       ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, vhpt_pte, VHPT_SIZE_LOG2);
+       ia64_set_psr(psr);
+       ia64_srlz_i();
+}
+
+void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned 
long logps)
+{
+       unsigned long mask = (1L << logps) - 1;
+       extern long running_on_sim;
+       int i;
+
+       if (logps-PAGE_SHIFT > 10 && !running_on_sim) {
+               // if this happens, we may want to revisit this algorithm
+               printf("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
+               while(1);
+       }
+       if (logps-PAGE_SHIFT > 2) {
+               // FIXME: Should add counter here to see how often this
+               //  happens (e.g. for 16MB pages!) and determine if it
+               //  is a performance problem.  On a quick look, it takes
+               //  about 39000 instrs for a 16MB page and it seems to occur
+               //  only a few times/second, so OK for now.
+               //  An alternate solution would be to just insert the one
+               //  16KB in the vhpt (but with the full mapping)?
+               //printf("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
+                       //"va=%p, pa=%p, pa-masked=%p\n",
+                       //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
+                       //(pte&_PFN_MASK)&~mask);
+       }
+       vaddr &= ~mask;
+       pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
+       for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) {
+               vhpt_insert(vaddr,pte,logps<<2);
+               vaddr += PAGE_SIZE;
+       }
+}
+
+void vhpt_init(void)
+{
+       unsigned long vhpt_total_size, vhpt_alignment, vhpt_imva;
+#if !VHPT_ENABLED
+       return;
+#endif
+       // allocate a huge chunk of physical memory.... how???
+       vhpt_total_size = 1 << VHPT_SIZE_LOG2;  // 4MB, 16MB, 64MB, or 256MB
+       vhpt_alignment = 1 << VHPT_SIZE_LOG2;   // 4MB, 16MB, 64MB, or 256MB
+       printf("vhpt_init: vhpt size=%p, 
align=%p\n",vhpt_total_size,vhpt_alignment);
+       /* This allocation only holds true if vhpt table is unique for
+        * all domains. Or else later new vhpt table should be allocated
+        * from domain heap when each domain is created. Assume xen buddy
+        * allocator can provide natural aligned page by order?
+        */
+       vhpt_imva = alloc_xenheap_pages(VHPT_SIZE_LOG2 - PAGE_SHIFT);
+       if (!vhpt_imva) {
+               printf("vhpt_init: can't allocate VHPT!\n");
+               while(1);
+       }
+       vhpt_paddr = __pa(vhpt_imva);
+       vhpt_pend = vhpt_paddr + vhpt_total_size - 1;
+       printf("vhpt_init: vhpt paddr=%p, end=%p\n",vhpt_paddr,vhpt_pend);
+       vhpt_pte = pte_val(pfn_pte(vhpt_paddr >> PAGE_SHIFT, PAGE_KERNEL));
+       vhpt_map();
+       ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
+               VHPT_ENABLED);
+       vhpt_flush();
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xen.lds.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xen.lds.S       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,251 @@
+#include <linux/config.h>
+
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+
+#define LOAD_OFFSET    (KERNEL_START - KERNEL_TR_PAGE_SIZE)
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(phys_start)
+jiffies = jiffies_64;
+PHDRS {
+  code   PT_LOAD;
+  percpu PT_LOAD;
+  data   PT_LOAD;
+}
+SECTIONS
+{
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.exit.text)
+       *(.exit.data)
+       *(.exitcall.exit)
+       *(.IA_64.unwind.exit.text)
+       *(.IA_64.unwind_info.exit.text)
+       }
+
+  v = PAGE_OFFSET;     /* this symbol is here to make debugging easier... */
+  phys_start = _start - LOAD_OFFSET;
+
+  code : { } :code
+  . = KERNEL_START;
+
+  _text = .;
+  _stext = .;
+
+  .text : AT(ADDR(.text) - LOAD_OFFSET)
+    {
+       *(.text.ivt)
+       *(.text)
+       SCHED_TEXT
+       LOCK_TEXT
+       *(.gnu.linkonce.t*)
+    }
+  .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
+       { *(.text2) }
+#ifdef CONFIG_SMP
+  .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
+       { *(.text.lock) }
+#endif
+  _etext = .;
+
+  /* Read-only data */
+
+  /* Exception table */
+  . = ALIGN(16);
+  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
+       {
+         __start___ex_table = .;
+         *(__ex_table)
+         __stop___ex_table = .;
+       }
+
+  .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
+       {
+         __start___vtop_patchlist = .;
+         *(.data.patch.vtop)
+         __end___vtop_patchlist = .;
+       }
+
+  .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+       {
+         __start___mckinley_e9_bundles = .;
+         *(.data.patch.mckinley_e9)
+         __end___mckinley_e9_bundles = .;
+       }
+
+  /* Global data */
+  _data = .;
+
+#if defined(CONFIG_IA64_GENERIC)
+  /* Machine Vector */
+  . = ALIGN(16);
+  .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
+       {
+         machvec_start = .;
+         *(.machvec)
+         machvec_end = .;
+       }
+#endif
+
+  /* Unwind info & table: */
+  . = ALIGN(8);
+  .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
+       { *(.IA_64.unwind_info*) }
+  .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
+       {
+         __start_unwind = .;
+         *(.IA_64.unwind*)
+         __end_unwind = .;
+       }
+
+  RODATA
+
+  .opd : AT(ADDR(.opd) - LOAD_OFFSET)
+       { *(.opd) }
+
+  /* Initialization code and data: */
+
+  . = ALIGN(PAGE_SIZE);
+  __init_begin = .;
+  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
+       {
+         _sinittext = .;
+         *(.init.text)
+         _einittext = .;
+       }
+
+  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
+       { *(.init.data) }
+
+  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
+       {
+         __initramfs_start = .;
+         *(.init.ramfs)
+         __initramfs_end = .;
+       }
+
+   . = ALIGN(16);
+  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
+        {
+         __setup_start = .;
+         *(.init.setup)
+         __setup_end = .;
+       }
+  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
+       {
+         __initcall_start = .;
+         *(.initcall1.init)
+         *(.initcall2.init)
+         *(.initcall3.init)
+         *(.initcall4.init)
+         *(.initcall5.init)
+         *(.initcall6.init)
+         *(.initcall7.init)
+         __initcall_end = .;
+       }
+   __con_initcall_start = .;
+  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
+       { *(.con_initcall.init) }
+  __con_initcall_end = .;
+  __security_initcall_start = .;
+  .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
+       { *(.security_initcall.init) }
+  __security_initcall_end = .;
+  . = ALIGN(PAGE_SIZE);
+  __init_end = .;
+
+  /* The initial task and kernel stack */
+  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
+       { *(.data.init_task) }
+
+  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
+        { *(__special_page_section)
+         __start_gate_section = .;
+         *(.data.gate)
+         __stop_gate_section = .;
+       }
+  . = ALIGN(PAGE_SIZE);                /* make sure the gate page doesn't 
expose kernel data */
+
+  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
+        { *(.data.cacheline_aligned) }
+
+  /* Per-cpu data: */
+  percpu : { } :percpu
+  . = ALIGN(PERCPU_PAGE_SIZE);
+  __phys_per_cpu_start = .;
+  .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
+       {
+               __per_cpu_start = .;
+               *(.data.percpu)
+               __per_cpu_end = .;
+       }
+  . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into 
percpu page size */
+
+  data : { } :data
+  .data : AT(ADDR(.data) - LOAD_OFFSET)
+       { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
+
+  . = ALIGN(16);       /* gp must be 16-byte aligned for exc. table */
+  .got : AT(ADDR(.got) - LOAD_OFFSET)
+       { *(.got.plt) *(.got) }
+  __gp = ADDR(.got) + 0x200000;
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
+       { *(.sdata) *(.sdata1) *(.srdata) }
+  _edata  =  .;
+  _bss = .;
+  .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
+       { *(.sbss) *(.scommon) }
+  .bss : AT(ADDR(.bss) - LOAD_OFFSET)
+       { *(.bss) *(COMMON) }
+
+  _end = .;
+
+  code : { } :code
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+  /* Discard them for now since Intel SoftSDV cannot handle them.
+  .comment 0 : { *(.comment) }
+  .note 0 : { *(.note) }
+  */
+  /DISCARD/ : { *(.comment) }
+  /DISCARD/ : { *(.note) }
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenasm.S
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenasm.S        Thu Sep  1 18:46:28 2005
@@ -0,0 +1,501 @@
+/*
+ * Assembly support routines for Xen/ia64
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ *     Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/vhpt.h>
+
+#if 0
+// FIXME: there's gotta be a better way...
+// ski and spaski are different... moved to xenmisc.c
+#define RunningOnHpSki(rx,ry,pn)                       \
+       addl rx = 2, r0;                                \
+       addl ry = 3, r0;                                \
+       ;;                                              \
+       mov rx = cpuid[rx];                             \
+       mov ry = cpuid[ry];                             \
+       ;;                                              \
+       cmp.eq pn,p0 = 0, rx;                           \
+       ;;                                              \
+       (pn) movl rx = 0x7000004 ;                      \
+       ;;                                              \
+       (pn) cmp.ge pn,p0 = ry, rx;                     \
+       ;;
+
+//int platform_is_hp_ski(void)
+GLOBAL_ENTRY(platform_is_hp_ski)
+       mov r8 = 0
+       RunningOnHpSki(r3,r9,p8)
+(p8)   mov r8 = 1
+       br.ret.sptk.many b0
+END(platform_is_hp_ski)
+#endif
+
+// Change rr7 to the passed value while ensuring
+// Xen is mapped into the new region.
+//   in0: new rr7 value
+//   in1: Xen virtual address of shared info (to be pinned)
+#define PSR_BITS_TO_CLEAR                                              \
+       (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |         \
+        IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |        \
+        IA64_PSR_DFL | IA64_PSR_DFH)
+// FIXME? Note that this turns off the DB bit (debug)
+#define PSR_BITS_TO_SET        IA64_PSR_BN
+
+//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void 
*shared_arch_info);
+GLOBAL_ENTRY(ia64_new_rr7)
+       // not sure this unwind statement is correct...
+       .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
+       alloc loc1 = ar.pfs, 3, 8, 0, 0
+1:     {
+         mov r28  = in0                // copy procedure index
+         mov r8   = ip                 // save ip to compute branch
+         mov loc0 = rp                 // save rp
+       };;
+       .body
+       movl loc2=PERCPU_ADDR
+       ;;
+       tpa loc2=loc2                   // grab this BEFORE changing rr7
+       ;;
+#if VHPT_ENABLED
+       movl loc6=VHPT_ADDR
+       ;;
+       tpa loc6=loc6                   // grab this BEFORE changing rr7
+       ;;
+#endif
+       mov loc5=in1
+       ;;
+       tpa loc5=loc5                   // grab this BEFORE changing rr7
+       ;;
+       mov loc7=in2                    // arch_vcpu_info_t
+       ;;
+       tpa loc7=loc7                   // grab this BEFORE changing rr7
+       ;;
+       mov loc3 = psr                  // save psr
+       adds r8  = 1f-1b,r8             // calculate return address for call
+       ;;
+       tpa r8=r8                       // convert rp to physical
+       ;;
+       mov loc4=ar.rsc                 // save RSE configuration
+       ;;
+       mov ar.rsc=0                    // put RSE in enforced lazy, LE mode
+       movl r16=PSR_BITS_TO_CLEAR
+       movl r17=PSR_BITS_TO_SET
+       ;;
+       or loc3=loc3,r17                // add in psr the bits to set
+       ;;
+       andcm r16=loc3,r16              // removes bits to clear from psr
+       br.call.sptk.many rp=ia64_switch_mode_phys
+1:
+       // now in physical mode with psr.i/ic off so do rr7 switch
+       dep     r16=-1,r0,61,3
+       ;;
+       mov     rr[r16]=in0
+       srlz.d
+       ;;
+
+       // re-pin mappings for kernel text and data
+       mov r18=KERNEL_TR_PAGE_SHIFT<<2
+       movl r17=KERNEL_START
+       ;;
+       rsm psr.i | psr.ic
+       ;;
+       srlz.i
+       ;;
+       ptr.i   r17,r18
+       ptr.d   r17,r18
+       ;;
+       mov cr.itir=r18
+       mov cr.ifa=r17
+       mov r16=IA64_TR_KERNEL
+       //mov r3=ip
+       movl r18=PAGE_KERNEL
+       ;;
+       dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+       ;;
+       or r18=r2,r18
+       ;;
+       srlz.i
+       ;;
+       itr.i itr[r16]=r18
+       ;;
+       itr.d dtr[r16]=r18
+       ;;
+
+       // re-pin mappings for stack (current), per-cpu, vhpt, and shared info
+
+       // unless overlaps with KERNEL_TR
+       dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT
+       ;;
+       cmp.eq p7,p0=r17,r18
+(p7)   br.cond.sptk    .stack_overlaps
+       ;;
+       movl r25=PAGE_KERNEL
+       dep r21=0,r13,60,4              // physical address of "current"
+       ;;
+       or r23=r25,r21                  // construct PA | page properties
+       mov r25=IA64_GRANULE_SHIFT<<2
+       ;;
+       ptr.d   r13,r25
+       ;;
+       mov cr.itir=r25
+       mov cr.ifa=r13                  // VA of next task...
+       ;;
+       mov r25=IA64_TR_CURRENT_STACK
+       ;;
+       itr.d dtr[r25]=r23              // wire in new mapping...
+       ;;
+.stack_overlaps:
+
+       movl r22=PERCPU_ADDR
+       ;;
+       movl r25=PAGE_KERNEL
+       ;;
+       mov r21=loc2                    // saved percpu physical address
+       ;;
+       or r23=r25,r21                  // construct PA | page properties
+       mov r24=PERCPU_PAGE_SHIFT<<2
+       ;;
+       ptr.d   r22,r24
+       ;;
+       mov cr.itir=r24
+       mov cr.ifa=r22
+       ;;
+       mov r25=IA64_TR_PERCPU_DATA
+       ;;
+       itr.d dtr[r25]=r23              // wire in new mapping...
+       ;;
+
+#if VHPT_ENABLED
+       movl r22=VHPT_ADDR
+       ;;
+       movl r25=PAGE_KERNEL
+       ;;
+       mov r21=loc6                    // saved vhpt physical address
+       ;;
+       or r23=r25,r21                  // construct PA | page properties
+       mov r24=VHPT_PAGE_SHIFT<<2
+       ;;
+       ptr.d   r22,r24
+       ;;
+       mov cr.itir=r24
+       mov cr.ifa=r22
+       ;;
+       mov r25=IA64_TR_VHPT
+       ;;
+       itr.d dtr[r25]=r23              // wire in new mapping...
+       ;;
+#endif
+
+       movl r22=SHAREDINFO_ADDR
+       ;;
+       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+       ;;
+       mov r21=loc5                    // saved sharedinfo physical address
+       ;;
+       or r23=r25,r21                  // construct PA | page properties
+       mov r24=PAGE_SHIFT<<2
+       ;;
+       ptr.d   r22,r24
+       ;;
+       mov cr.itir=r24
+       mov cr.ifa=r22
+       ;;
+       mov r25=IA64_TR_SHARED_INFO
+       ;;
+       itr.d dtr[r25]=r23              // wire in new mapping...
+       ;;
+       // Map for arch_vcpu_info_t
+       movl r22=SHARED_ARCHINFO_ADDR
+       ;;
+       movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+       ;;
+       mov r21=loc7                    // saved sharedinfo physical address
+       ;;
+       or r23=r25,r21                  // construct PA | page properties
+       mov r24=PAGE_SHIFT<<2
+       ;;
+       ptr.d   r22,r24
+       ;;
+       mov cr.itir=r24
+       mov cr.ifa=r22
+       ;;
+       mov r25=IA64_TR_ARCH_INFO
+       ;;
+       itr.d dtr[r25]=r23              // wire in new mapping...
+       ;;
+
+       // done, switch back to virtual and return
+       mov r16=loc3                    // r16= original psr
+       br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+       mov psr.l = loc3                // restore init PSR
+
+       mov ar.pfs = loc1
+       mov rp = loc0
+       ;;
+       mov ar.rsc=loc4                 // restore RSE configuration
+       srlz.d                          // seralize restoration of psr.l
+       br.ret.sptk.many rp
+END(ia64_new_rr7)
+
+#include "minstate.h"
+
+GLOBAL_ENTRY(ia64_prepare_handle_privop)
+       .prologue
+       /*
+        * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+        */
+       mov r16=r0
+       DO_SAVE_SWITCH_STACK
+       br.call.sptk.many rp=ia64_handle_privop         // stack frame setup in 
ivt
+.ret22:        .body
+       DO_LOAD_SWITCH_STACK
+       br.cond.sptk.many rp                            // goes to 
ia64_leave_kernel
+END(ia64_prepare_handle_privop)
+
+GLOBAL_ENTRY(ia64_prepare_handle_break)
+       .prologue
+       /*
+        * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+        */
+       mov r16=r0
+       DO_SAVE_SWITCH_STACK
+       br.call.sptk.many rp=ia64_handle_break  // stack frame setup in ivt
+.ret23:        .body
+       DO_LOAD_SWITCH_STACK
+       br.cond.sptk.many rp                    // goes to ia64_leave_kernel
+END(ia64_prepare_handle_break)
+
+GLOBAL_ENTRY(ia64_prepare_handle_reflection)
+       .prologue
+       /*
+        * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+        */
+       mov r16=r0
+       DO_SAVE_SWITCH_STACK
+       br.call.sptk.many rp=ia64_handle_reflection     // stack frame setup in 
ivt
+.ret24:        .body
+       DO_LOAD_SWITCH_STACK
+       br.cond.sptk.many rp                    // goes to ia64_leave_kernel
+END(ia64_prepare_handle_reflection)
+
+GLOBAL_ENTRY(__get_domain_bundle)
+       EX(.failure_in_get_bundle,ld8 r8=[r32],8)
+       ;;
+       EX(.failure_in_get_bundle,ld8 r9=[r32])
+       ;;
+       br.ret.sptk.many rp
+       ;;
+.failure_in_get_bundle:
+       mov r8=0
+       ;;
+       mov r9=0
+       ;;
+       br.ret.sptk.many rp
+       ;;
+END(__get_domain_bundle)
+
+GLOBAL_ENTRY(dorfirfi)
+        movl r16 = XSI_IIP
+        movl r17 = XSI_IPSR
+        movl r18 = XSI_IFS
+       ;;
+       ld8 r16 = [r16]
+       ld8 r17 = [r17]
+       ld8 r18 = [r18]
+       ;;
+        mov cr.iip=r16
+        mov cr.ipsr=r17
+        mov cr.ifs=r18
+       ;;
+        // fall through
+END(dorfirfi)
+
+GLOBAL_ENTRY(dorfi)
+        rfi
+       ;;
+END(dorfirfi)
+
+//
+// Long's Peak UART Offsets
+//
+#define COM_TOP 0xff5e0000
+#define COM_BOT 0xff5e2000
+
+// UART offsets        
+#define UART_TX                0       /* Out: Transmit buffer (DLAB=0) */
+#define UART_INT_ENB   1       /* interrupt enable (DLAB=0) */ 
+#define UART_INT_ID    2       /* Interrupt ID register */
+#define UART_LINE_CTL  3       /* Line control register */
+#define UART_MODEM_CTL 4       /* Modem Control Register */
+#define UART_LSR       5       /* In:  Line Status Register */
+#define UART_MSR       6       /* Modem status register */     
+#define UART_DLATCH_LOW UART_TX
+#define UART_DLATCH_HIGH UART_INT_ENB
+#define COM1   0x3f8
+#define COM2   0x2F8
+#define COM3   0x3E8
+
+/* interrupt enable bits (offset 1) */
+#define DATA_AVAIL_INT 1
+#define XMIT_HOLD_EMPTY_INT 2
+#define LINE_STAT_INT 4
+#define MODEM_STAT_INT 8
+
+/* line status bits (offset 5) */
+#define REC_DATA_READY 1
+#define OVERRUN 2
+#define PARITY_ERROR 4
+#define FRAMING_ERROR 8
+#define BREAK_INTERRUPT 0x10
+#define XMIT_HOLD_EMPTY 0x20
+#define XMIT_SHIFT_EMPTY 0x40
+
+// Write a single character
+// input: r32 = character to be written
+// output: none
+GLOBAL_ENTRY(longs_peak_putc)  
+       rsm psr.dt
+        movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR
+       ;;
+       srlz.i
+       ;;
+
+.Chk_THRE_p:
+        ld1.acq r18=[r16]
+        ;;
+       
+       and r18 = XMIT_HOLD_EMPTY, r18
+       ;;
+       cmp4.eq p6,p0=0,r18
+       ;;
+       
+(p6)    br .Chk_THRE_p
+       ;;
+        movl r16 = 0x8000000000000000 + COM_TOP + UART_TX
+       ;;
+       st1.rel [r16]=r32
+       ;;
+       ssm psr.dt
+       ;;
+       srlz.i
+       ;;
+       br.ret.sptk.many b0
+END(longs_peak_putc)   
+
+/* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
+GLOBAL_ENTRY(pal_emulator_static)
+       mov r8=-1
+       mov r9=256
+       ;;
+       cmp.gtu p7,p8=r9,r32            /* r32 <= 255? */
+(p7)   br.cond.sptk.few static
+       ;;
+       mov r9=512
+       ;;
+       cmp.gtu p7,p8=r9,r32
+(p7)   br.cond.sptk.few stacked
+       ;;
+static:        cmp.eq p7,p8=6,r32              /* PAL_PTCE_INFO */
+(p8)   br.cond.sptk.few 1f
+       ;;
+       mov r8=0                        /* status = 0 */
+       movl r9=0x100000000             /* tc.base */
+       movl r10=0x0000000200000003     /* count[0], count[1] */
+       movl r11=0x1000000000002000     /* stride[0], stride[1] */
+       br.ret.sptk.few rp
+1:     cmp.eq p7,p8=14,r32             /* PAL_FREQ_RATIOS */
+(p8)   br.cond.sptk.few 1f
+       mov r8=0                        /* status = 0 */
+       movl r9 =0x900000002            /* proc_ratio (1/100) */
+       movl r10=0x100000100            /* bus_ratio<<32 (1/256) */
+       movl r11=0x900000002            /* itc_ratio<<32 (1/100) */
+       ;;
+1:     cmp.eq p7,p8=19,r32             /* PAL_RSE_INFO */
+(p8)   br.cond.sptk.few 1f
+       mov r8=0                        /* status = 0 */
+       mov r9=96                       /* num phys stacked */
+       mov r10=0                       /* hints */
+       mov r11=0
+       br.ret.sptk.few rp
+1:     cmp.eq p7,p8=1,r32              /* PAL_CACHE_FLUSH */
+(p8)   br.cond.sptk.few 1f
+#if 0
+       mov r9=ar.lc
+       movl r8=524288                  /* flush 512k million cache lines 
(16MB) */
+       ;;
+       mov ar.lc=r8
+       movl r8=0xe000000000000000
+       ;;
+.loop: fc r8
+       add r8=32,r8
+       br.cloop.sptk.few .loop
+       sync.i
+       ;;
+       srlz.i
+       ;;
+       mov ar.lc=r9
+       mov r8=r0
+       ;;
+1:     cmp.eq p7,p8=15,r32             /* PAL_PERF_MON_INFO */
+(p8)   br.cond.sptk.few 1f
+       mov r8=0                        /* status = 0 */
+       movl r9 =0x08122f04             /* generic=4 width=47 retired=8 
cycles=18 */
+       mov r10=0                       /* reserved */
+       mov r11=0                       /* reserved */
+       mov r16=0xffff                  /* implemented PMC */
+       mov r17=0x3ffff                 /* implemented PMD */
+       add r18=8,r29                   /* second index */
+       ;;
+       st8 [r29]=r16,16                /* store implemented PMC */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       ;;
+       st8 [r29]=r0,16                 /* clear remaining bits  */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       ;;
+       st8 [r29]=r17,16                /* store implemented PMD */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       mov r16=0xf0                    /* cycles count capable PMC */
+       ;;
+       st8 [r29]=r0,16                 /* clear remaining bits  */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       mov r17=0xf0                    /* retired bundles capable PMC */
+       ;;
+       st8 [r29]=r16,16                /* store cycles capable */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       ;;
+       st8 [r29]=r0,16                 /* clear remaining bits  */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       ;;
+       st8 [r29]=r17,16                /* store retired bundle capable */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       ;;
+       st8 [r29]=r0,16                 /* clear remaining bits  */
+       st8 [r18]=r0,16                 /* clear remaining bits  */
+       ;;
+1:     br.cond.sptk.few rp
+#else
+1:
+#endif
+stacked:
+       br.ret.sptk.few rp
+END(pal_emulator_static)
+
+GLOBAL_ENTRY(vhpt_insert)
+//     alloc loc0 = ar.pfs, 3, 1, 0, 0
+       mov r16=r32
+       mov r26=r33
+       mov r27=r34
+       ;;
+       VHPT_INSERT()
+//     VHPT_INSERT1()  ... add collision chains later
+//     mov ar.pfs = loc0
+       br.ret.sptk.few rp
+       ;;
+END(vhpt_insert)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenirq.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenirq.c        Thu Sep  1 18:46:28 2005
@@ -0,0 +1,78 @@
+/*
+ * Xen irq routines
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
+#include <asm/delay.h>
+
+
+void
+xen_debug_irq(ia64_vector vector, struct pt_regs *regs)
+{
+//FIXME: For debug only, can be removed
+       static char firstirq = 1;
+       static char firsttime[256];
+       static char firstpend[256];
+       if (firstirq) {
+               int i;
+               for (i=0;i<256;i++) firsttime[i] = 1;
+               for (i=0;i<256;i++) firstpend[i] = 1;
+               firstirq = 0;
+       }
+       if (firsttime[vector]) {
+               printf("**** (entry) First received int on vector=%d,itc=%lx\n",
+                       (unsigned long) vector, ia64_get_itc());
+               firsttime[vector] = 0;
+       }
+}
+
+
+int
+xen_do_IRQ(ia64_vector vector)
+{
+       if (vector != 0xef) {
+               extern void vcpu_pend_interrupt(void *, int);
+#if 0
+               if (firsttime[vector]) {
+                       printf("**** (iterate) First received int on 
vector=%d,itc=%lx\n",
+                       (unsigned long) vector, ia64_get_itc());
+                       firsttime[vector] = 0;
+               }
+               if (firstpend[vector]) {
+                       printf("**** First pended int on vector=%d,itc=%lx\n",
+                               (unsigned long) vector,ia64_get_itc());
+                       firstpend[vector] = 0;
+               }
+#endif
+               //FIXME: TEMPORARY HACK!!!!
+               vcpu_pend_interrupt(dom0->vcpu[0],vector);
+               vcpu_wake(dom0->vcpu[0]);
+               return(1);
+       }
+       return(0);
+}
+
+/* From linux/kernel/softirq.c */
+#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
+# define invoke_softirq()      __do_softirq()
+#else
+# define invoke_softirq()      do_softirq()
+#endif
+
+/*
+ * Exit an interrupt context. Process softirqs if needed and possible:
+ */
+void irq_exit(void)
+{
+       //account_system_vtime(current);
+       //sub_preempt_count(IRQ_EXIT_OFFSET);
+       if (!in_interrupt() && local_softirq_pending())
+               invoke_softirq();
+       //preempt_enable_no_resched();
+}
+/* end from linux/kernel/softirq.c */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenmem.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenmem.c        Thu Sep  1 18:46:28 2005
@@ -0,0 +1,86 @@
+/*
+ * Xen memory allocator routines
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *     Dan Magenheimer <dan.magenheimer@xxxxxx>
+ * Copyright (C) 2005 Intel Corp.
+ *
+ * Routines used by ia64 machines with contiguous (or virtually contiguous)
+ * memory.
+ */
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+#include <xen/mm.h>
+
+extern struct page *zero_page_memmap_ptr;
+struct pfn_info *frame_table;
+unsigned long frame_table_size;
+unsigned long max_page;
+
+struct page *mem_map;
+#define MAX_DMA_ADDRESS ~0UL   // FIXME???
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+static unsigned long num_dma_physpages;
+#endif
+
+/*
+ * Set up the page tables.
+ */
+#ifdef CONFIG_VTI
+unsigned long *mpt_table;
+unsigned long mpt_table_size;
+#endif // CONFIG_VTI
+
+void
+paging_init (void)
+{
+       struct pfn_info *pg;
+
+#ifdef CONFIG_VTI
+       unsigned int mpt_order;
+       /* Create machine to physical mapping table
+        * NOTE: similar to frame table, later we may need virtually
+        * mapped mpt table if large hole exists. Also MAX_ORDER needs
+        * to be changed in common code, which only support 16M by far
+        */
+       mpt_table_size = max_page * sizeof(unsigned long);
+       mpt_order = get_order(mpt_table_size);
+       ASSERT(mpt_order <= MAX_ORDER);
+       if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL)
+               panic("Not enough memory to bootstrap Xen.\n");
+
+       printk("machine to physical table: 0x%lx\n", (u64)mpt_table);
+       memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size);
+#endif // CONFIG_VTI
+
+       /* Other mapping setup */
+
+       zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
+
+/* FIXME: postpone support to machines with big holes between physical memorys.
+ * Current hack allows only efi memdesc upto 4G place. (See efi.c)
+ */
+#ifndef CONFIG_VIRTUAL_MEM_MAP
+#define FT_ALIGN_SIZE  (16UL << 20)
+void __init init_frametable(void)
+{
+       unsigned long i, pfn;
+       frame_table_size = max_page * sizeof(struct pfn_info);
+       frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
+
+       /* Request continuous trunk from boot allocator, since HV
+        * address is identity mapped */
+       pfn = alloc_boot_pages(
+            frame_table_size >> PAGE_SHIFT, FT_ALIGN_SIZE >> PAGE_SHIFT);
+       if (pfn == 0)
+               panic("Not enough memory for frame table.\n");
+
+       frame_table = __va(pfn << PAGE_SHIFT);
+       memset(frame_table, 0, frame_table_size);
+       printk("size of frame_table: %lukB\n",
+               frame_table_size >> 10);
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenmisc.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenmisc.c       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,391 @@
+/*
+ * Xen misc
+ * 
+ * Functions/decls that are/may be needed to link with Xen because
+ * of x86 dependencies
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ *     Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <linux/config.h>
+#include <xen/sched.h>
+#include <linux/efi.h>
+#include <asm/processor.h>
+#include <xen/serial.h>
+#include <asm/io.h>
+#include <xen/softirq.h>
+
+efi_memory_desc_t ia64_efi_io_md;
+EXPORT_SYMBOL(ia64_efi_io_md);
+unsigned long wait_init_idle;
+int phys_proc_id[NR_CPUS];
+unsigned long loops_per_jiffy = (1<<12);       // from linux/init/main.c
+
+void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n"); 
}
+void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check 
abort handling)\n"); }
+void ia64_mca_cpu_init(void *x) { }
+void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
+void ia64_patch_vtop(unsigned long a, unsigned long b) { }
+void hpsim_setup(char **x)
+{
+#ifdef CONFIG_SMP
+       init_smp_config();
+#endif
+}
+
+// called from mem_init... don't think s/w I/O tlb is needed in Xen
+//void swiotlb_init(void) { }  ...looks like it IS needed
+
+long
+is_platform_hp_ski(void)
+{
+       int i;
+       long cpuid[6];
+
+       for (i = 0; i < 5; ++i)
+               cpuid[i] = ia64_get_cpuid(i);
+       if ((cpuid[0] & 0xff) != 'H') return 0;
+       if ((cpuid[3] & 0xff) != 0x4) return 0;
+       if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
+       if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
+       if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
+       return 1;
+}
+
+long
+platform_is_hp_ski(void)
+{
+       extern long running_on_sim;
+       return running_on_sim;
+}
+
+/* calls in xen/common code that are unused on ia64 */
+
+void sync_lazy_execstate_cpu(unsigned int cpu) {}
+
+#ifdef CONFIG_VTI
+int grant_table_create(struct domain *d) { return 0; }
+void grant_table_destroy(struct domain *d) { return; }
+#endif
+
+struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); }
+
+void raise_actimer_softirq(void)
+{
+       raise_softirq(AC_TIMER_SOFTIRQ);
+}
+
+#ifndef CONFIG_VTI
+unsigned long
+__gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+{
+       if (d == dom0)
+               return(gpfn);
+       else {
+               unsigned long pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT);
+               if (!pte) {
+printk("__gpfn_to_mfn_foreign: bad gpfn. spinning...\n");
+while(1);
+                       return 0;
+               }
+               return ((pte & _PFN_MASK) >> PAGE_SHIFT);
+       }
+}
+
+u32
+__mfn_to_gpfn(struct domain *d, unsigned long frame)
+{
+       // FIXME: is this right?
+if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) {
+printk("__mfn_to_gpfn: bad frame. spinning...\n");
+while(1);
+}
+       return frame;
+}
+#endif
+
+#ifndef CONFIG_VTI
+unsigned long __hypercall_create_continuation(
+       unsigned int op, unsigned int nr_args, ...)
+{
+       printf("__hypercall_create_continuation: not implemented!!!\n");
+}
+#endif
+
+///////////////////////////////
+
+///////////////////////////////
+// from arch/x86/apic.c
+///////////////////////////////
+
+extern unsigned long domain0_ready;
+
+int reprogram_ac_timer(s_time_t timeout)
+{
+       struct vcpu *v = current;
+
+#ifdef CONFIG_VTI
+//     if(VMX_DOMAIN(v))
+               return 1;
+#endif // CONFIG_VTI
+       if (!domain0_ready) return 1;
+       local_cpu_data->itm_next = timeout;
+       if (is_idle_task(v->domain)) vcpu_safe_set_itm(timeout);
+       else vcpu_set_next_timer(current);
+       return 1;
+}
+
+///////////////////////////////
+// from arch/ia64/page_alloc.c
+///////////////////////////////
+DEFINE_PER_CPU(struct page_state, page_states) = {0};
+unsigned long totalram_pages;
+
+void __mod_page_state(unsigned long offset, unsigned long delta)
+{
+       unsigned long flags;
+       void* ptr;
+
+       local_irq_save(flags);
+       ptr = &__get_cpu_var(page_states);
+       *(unsigned long*)(ptr + offset) += delta;
+       local_irq_restore(flags);
+}
+
+///////////////////////////////
+// from arch/x86/flushtlb.c
+///////////////////////////////
+
+u32 tlbflush_clock;
+u32 tlbflush_time[NR_CPUS];
+
+///////////////////////////////
+// from arch/x86/memory.c
+///////////////////////////////
+
+void init_percpu_info(void)
+{
+       dummy();
+    //memset(percpu_info, 0, sizeof(percpu_info));
+}
+
+void free_page_type(struct pfn_info *page, unsigned int type)
+{
+       dummy();
+}
+
+///////////////////////////////
+//// misc memory stuff
+///////////////////////////////
+
+unsigned long __get_free_pages(unsigned int mask, unsigned int order)
+{
+       void *p = alloc_xenheap_pages(order);
+
+       memset(p,0,PAGE_SIZE<<order);
+       return (unsigned long)p;
+}
+
+void __free_pages(struct page *page, unsigned int order)
+{
+       if (order) BUG();
+       free_xenheap_page(page);
+}
+
+void *pgtable_quicklist_alloc(void)
+{
+       return alloc_xenheap_pages(0);
+}
+
+void pgtable_quicklist_free(void *pgtable_entry)
+{
+       free_xenheap_page(pgtable_entry);
+}
+
+///////////////////////////////
+// from arch/ia64/traps.c
+///////////////////////////////
+
+void show_registers(struct pt_regs *regs)
+{
+       printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
+}
+
+int is_kernel_text(unsigned long addr)
+{
+       extern char _stext[], _etext[];
+       if (addr >= (unsigned long) _stext &&
+           addr <= (unsigned long) _etext)
+           return 1;
+
+       return 0;
+}
+
+unsigned long kernel_text_end(void)
+{
+       extern char _etext[];
+       return (unsigned long) _etext;
+}
+
+///////////////////////////////
+// from common/keyhandler.c
+///////////////////////////////
+void dump_pageframe_info(struct domain *d)
+{
+       printk("dump_pageframe_info not implemented\n");
+}
+
+///////////////////////////////
+// called from arch/ia64/head.S
+///////////////////////////////
+
+void console_print(char *msg)
+{
+       printk("console_print called, how did start_kernel return???\n");
+}
+
+void kernel_thread_helper(void)
+{
+       printk("kernel_thread_helper not implemented\n");
+       dummy();
+}
+
+void sys_exit(void)
+{
+       printk("sys_exit not implemented\n");
+       dummy();
+}
+
+////////////////////////////////////
+// called from unaligned.c
+////////////////////////////////////
+
+void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ 
((noreturn)) */
+{
+       printk("die_if_kernel: called, not implemented\n");
+}
+
+long
+ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
+          unsigned long user_rbs_end, unsigned long addr, long *val)
+{
+       printk("ia64_peek: called, not implemented\n");
+}
+
+long
+ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
+          unsigned long user_rbs_end, unsigned long addr, long val)
+{
+       printk("ia64_poke: called, not implemented\n");
+}
+
+void
+ia64_sync_fph (struct task_struct *task)
+{
+       printk("ia64_sync_fph: called, not implemented\n");
+}
+
+void
+ia64_flush_fph (struct task_struct *task)
+{
+       printk("ia64_flush_fph: called, not implemented\n");
+}
+
+////////////////////////////////////
+// called from irq_ia64.c:init_IRQ()
+//   (because CONFIG_IA64_HP_SIM is specified)
+////////////////////////////////////
+void hpsim_irq_init(void) { }
+
+
+// accomodate linux extable.c
+//const struct exception_table_entry *
+void *search_module_extables(unsigned long addr) { return NULL; }
+void *__module_text_address(unsigned long addr) { return NULL; }
+void *module_text_address(unsigned long addr) { return NULL; }
+
+void cs10foo(void) {}
+void cs01foo(void) {}
+
+unsigned long context_switch_count = 0;
+
+void context_switch(struct vcpu *prev, struct vcpu *next)
+{
+//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
+//printk("@@@@@@ context switch from domain %d (%x) to domain %d (%x)\n",
+//prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff);
+//if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo();
+//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
+//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
+#ifdef CONFIG_VTI
+       vtm_domain_out(prev);
+#endif
+       context_switch_count++;
+       switch_to(prev,next,prev);
+#ifdef CONFIG_VTI
+        vtm_domain_in(current);
+#endif
+
+// leave this debug for now: it acts as a heartbeat when more than
+// one domain is active
+{
+static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
+static int i = 100;
+int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
+if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
+if (!i--) { printk("+",id); i = 1000000; }
+}
+
+#ifdef CONFIG_VTI
+       if (VMX_DOMAIN(current))
+               vmx_load_all_rr(current);
+#else
+       if (!is_idle_task(current->domain)) {
+               load_region_regs(current);
+               if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
+       }
+       if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+       /* nothing to do */
+}
+
+void continue_running(struct vcpu *same)
+{
+       /* nothing to do */
+}
+
+void panic_domain(struct pt_regs *regs, const char *fmt, ...)
+{
+       va_list args;
+       char buf[128];
+       struct vcpu *v = current;
+       static volatile int test = 1;   // so can continue easily in debug
+       extern spinlock_t console_lock;
+       unsigned long flags;
+    
+loop:
+       printf("$$$$$ PANIC in domain %d (k6=%p): ",
+               v->domain->domain_id, 
+               __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT]);
+       va_start(args, fmt);
+       (void)vsnprintf(buf, sizeof(buf), fmt, args);
+       va_end(args);
+       printf(buf);
+       if (regs) show_registers(regs);
+       domain_pause_by_systemcontroller(current->domain);
+       v->domain->shutdown_code = SHUTDOWN_crash;
+       set_bit(_DOMF_shutdown, v->domain->domain_flags);
+       if (v->domain->domain_id == 0) {
+               int i = 1000000000L;
+               // if domain0 crashes, just periodically print out panic
+               // message to make post-mortem easier
+               while(i--);
+               goto loop;
+       }
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xensetup.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xensetup.c      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,389 @@
+/******************************************************************************
+ * xensetup.c
+ * Copyright (c) 2004-2005  Hewlett-Packard Co
+ *         Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+//#include <xen/spinlock.h>
+#include <xen/multiboot.h>
+#include <xen/sched.h>
+#include <xen/mm.h>
+//#include <xen/delay.h>
+#include <xen/compile.h>
+//#include <xen/console.h>
+#include <xen/serial.h>
+#include <xen/trace.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <xen/string.h>
+
+unsigned long xenheap_phys_end;
+
+char saved_command_line[COMMAND_LINE_SIZE];
+
+struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+
+cpumask_t cpu_present_map;
+
+#ifdef CLONE_DOMAIN0
+struct domain *clones[CLONE_DOMAIN0];
+#endif
+extern unsigned long domain0_ready;
+
+int find_max_pfn (unsigned long, unsigned long, void *);
+void start_of_day(void);
+
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus); 
+
+/*
+ * opt_xenheap_megabytes: Size of Xen heap in megabytes, including:
+ *     xen image
+ *     bootmap bits
+ *     xen heap
+ * Note: To allow xenheap size configurable, the prerequisite is
+ * to configure elilo allowing relocation defaultly. Then since
+ * elilo chooses 256M as alignment when relocating, alignment issue
+ * on IPF can be addressed.
+ */
+unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
+unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
+extern long running_on_sim;
+unsigned long xen_pstart;
+
+static int
+xen_count_pages(u64 start, u64 end, void *arg)
+{
+    unsigned long *count = arg;
+
+    /* FIXME: do we need consider difference between DMA-usable memory and
+     * normal memory? Seems that HV has no requirement to operate DMA which
+     * is owned by Dom0? */
+    *count += (end - start) >> PAGE_SHIFT;
+    return 0;
+}
+
+/* Find first hole after trunk for xen image */
+static int
+xen_find_first_hole(u64 start, u64 end, void *arg)
+{
+    unsigned long *first_hole = arg;
+
+    if ((*first_hole) == 0) {
+       if ((start <= KERNEL_START) && (KERNEL_START < end))
+           *first_hole = __pa(end);
+    }
+
+    return 0;
+}
+
+static void __init do_initcalls(void)
+{
+    initcall_t *call;
+    for ( call = &__initcall_start; call < &__initcall_end; call++ )
+        (*call)();
+}
+
+/*
+ * IPF loader only supports one commaind line currently, for
+ * both xen and guest kernel. This function provides pre-parse
+ * to mixed command line, to split it into two parts.
+ *
+ * User should split the parameters by "--", with strings after
+ * spliter for guest kernel. Missing "--" means whole line belongs
+ * to guest. Example:
+ *     "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty
+ * root=/dev/sda3 ro"
+ */
+static char null[4] = { 0 };
+
+void early_cmdline_parse(char **cmdline_p)
+{
+    char *guest_cmd;
+    char *split = "--";
+
+    if (*cmdline_p == NULL) {
+       *cmdline_p = &null[0];
+       saved_command_line[0] = '\0';
+       return;
+    }
+
+    guest_cmd = strstr(*cmdline_p, split);
+    /* If no spliter, whole line is for guest */
+    if (guest_cmd == NULL) {
+       guest_cmd = *cmdline_p;
+       *cmdline_p = &null[0];
+    } else {
+       *guest_cmd = '\0';      /* Split boot parameters for xen and guest */
+       guest_cmd += strlen(split);
+       while (*guest_cmd == ' ') guest_cmd++;
+    }
+
+    strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE);
+    return;
+}
+
+struct ns16550_defaults ns16550_com1 = {
+    .baud      = BAUD_AUTO,
+    .data_bits = 8,
+    .parity    = 'n',
+    .stop_bits = 1
+};
+
+struct ns16550_defaults ns16550_com2 = {
+    .baud      = BAUD_AUTO,
+    .data_bits = 8,
+    .parity    = 'n',
+    .stop_bits = 1
+};
+
+void start_kernel(void)
+{
+    unsigned char *cmdline;
+    void *heap_start;
+    int i;
+    unsigned long max_mem, nr_pages, firsthole_start;
+    unsigned long dom0_memory_start, dom0_memory_end;
+    unsigned long initial_images_start, initial_images_end;
+
+    running_on_sim = is_platform_hp_ski();
+    /* Kernel may be relocated by EFI loader */
+    xen_pstart = ia64_tpa(KERNEL_START);
+
+    /* Must do this early -- e.g., spinlocks rely on get_current(). */
+    //set_current(&idle0_vcpu);
+    ia64_r13 = (void *)&idle0_vcpu;
+    idle0_vcpu.domain = &idle0_domain;
+
+    early_setup_arch(&cmdline);
+
+    /* We initialise the serial devices very early so we can get debugging. */
+    if (running_on_sim) hpsim_serial_init();
+    else {
+       ns16550_init(0, &ns16550_com1);
+       /* Also init com2 for Tiger4. */
+       ns16550_com2.io_base = 0x2f8;
+       ns16550_com2.irq     = 3;
+       ns16550_init(1, &ns16550_com2);
+    }
+    serial_init_preirq();
+
+    init_console();
+    set_printk_prefix("(XEN) ");
+
+    /* xenheap should be in same TR-covered range with xen image */
+    xenheap_phys_end = xen_pstart + xenheap_size;
+    printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
+           xen_pstart, xenheap_phys_end);
+
+    /* Find next hole */
+    firsthole_start = 0;
+    efi_memmap_walk(xen_find_first_hole, &firsthole_start);
+
+    initial_images_start = xenheap_phys_end;
+    initial_images_end = initial_images_start + ia64_boot_param->initrd_size;
+
+    /* Later may find another memory trunk, even away from xen image... */
+    if (initial_images_end > firsthole_start) {
+       printk("Not enough memory to stash the DOM0 kernel image.\n");
+       printk("First hole:0x%lx, relocation end: 0x%lx\n",
+               firsthole_start, initial_images_end);
+       for ( ; ; );
+    }
+
+    /* This copy is time consuming, but elilo may load Dom0 image
+     * within xenheap range */
+    printk("ready to move Dom0 to 0x%lx...", initial_images_start);
+    memmove(__va(initial_images_start),
+          __va(ia64_boot_param->initrd_start),
+          ia64_boot_param->initrd_size);
+    ia64_boot_param->initrd_start = initial_images_start;
+    printk("Done\n");
+
+    /* first find highest page frame number */
+    max_page = 0;
+    efi_memmap_walk(find_max_pfn, &max_page);
+    printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
+
+    heap_start = memguard_init(ia64_imva(&_end));
+    printf("Before heap_start: 0x%lx\n", heap_start);
+    heap_start = __va(init_boot_allocator(__pa(heap_start)));
+    printf("After heap_start: 0x%lx\n", heap_start);
+
+    reserve_memory();
+
+    efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
+    efi_memmap_walk(xen_count_pages, &nr_pages);
+
+    printk("System RAM: %luMB (%lukB)\n",
+       nr_pages >> (20 - PAGE_SHIFT),
+       nr_pages << (PAGE_SHIFT - 10));
+
+    init_frametable();
+
+    ia64_fph_enable();
+    __ia64_init_fpu();
+
+    alloc_dom0();
+#ifdef DOMU_BUILD_STAGING
+    alloc_domU_staging();
+#endif
+
+    end_boot_allocator();
+
+    init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
+    printk("Xen heap: %luMB (%lukB)\n",
+       (xenheap_phys_end-__pa(heap_start)) >> 20,
+       (xenheap_phys_end-__pa(heap_start)) >> 10);
+
+    late_setup_arch(&cmdline);
+    setup_per_cpu_areas();
+    mem_init();
+
+printk("About to call scheduler_init()\n");
+    scheduler_init();
+    local_irq_disable();
+printk("About to call xen_time_init()\n");
+    xen_time_init();
+#ifdef CONFIG_VTI
+    init_xen_time(); /* initialise the time */
+#endif // CONFIG_VTI 
+printk("About to call ac_timer_init()\n");
+    ac_timer_init();
+// init_xen_time(); ???
+
+#ifdef CONFIG_SMP
+    if ( opt_nosmp )
+    {
+        max_cpus = 0;
+        smp_num_siblings = 1;
+        //boot_cpu_data.x86_num_cores = 1;
+    }
+
+    smp_prepare_cpus(max_cpus);
+
+    /* We aren't hotplug-capable yet. */
+    //BUG_ON(!cpus_empty(cpu_present_map));
+    for_each_cpu ( i )
+        cpu_set(i, cpu_present_map);
+
+    //BUG_ON(!local_irq_is_enabled());
+
+printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus);
+    for_each_present_cpu ( i )
+    {
+        if ( num_online_cpus() >= max_cpus )
+            break;
+        if ( !cpu_online(i) ) {
+printk("About to call __cpu_up(%d)\n",i);
+            __cpu_up(i);
+       }
+    }
+
+    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+    smp_cpus_done(max_cpus);
+#endif
+
+
+       // FIXME: Should the following be swapped and moved later?
+    schedulers_start();
+    do_initcalls();
+printk("About to call sort_main_extable()\n");
+    sort_main_extable();
+
+    /* surrender usage of kernel registers to domain, use percpu area instead 
*/
+    __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE);
+    __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] = 
ia64_get_kr(IA64_KR_PER_CPU_DATA);
+    __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] = 
ia64_get_kr(IA64_KR_CURRENT_STACK);
+    __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] = 
ia64_get_kr(IA64_KR_FPU_OWNER);
+    __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT);
+    __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE);
+
+    /* Create initial domain 0. */
+printk("About to call do_createdomain()\n");
+    dom0 = do_createdomain(0, 0);
+    init_task.domain = &idle0_domain;
+    init_task.processor = 0;
+//    init_task.mm = &init_mm;
+    init_task.domain->arch.mm = &init_mm;
+//    init_task.thread = INIT_THREAD;
+    //arch_do_createdomain(current);
+#ifdef CLONE_DOMAIN0
+    {
+    int i;
+    for (i = 0; i < CLONE_DOMAIN0; i++) {
+       clones[i] = do_createdomain(i+1, 0);
+        if ( clones[i] == NULL )
+            panic("Error creating domain0 clone %d\n",i);
+    }
+    }
+#endif
+    if ( dom0 == NULL )
+        panic("Error creating domain 0\n");
+
+    set_bit(_DOMF_privileged, &dom0->domain_flags);
+
+    /*
+     * We're going to setup domain0 using the module(s) that we stashed safely
+     * above our heap. The second module, if present, is an initrd ramdisk.
+     */
+printk("About to call construct_dom0()\n");
+    dom0_memory_start = __va(ia64_boot_param->initrd_start);
+    dom0_memory_end = ia64_boot_param->initrd_size;
+    if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
+                       0,
+                        0,
+                       0) != 0)
+        panic("Could not set up DOM0 guest OS\n");
+#ifdef CLONE_DOMAIN0
+    {
+    int i;
+    dom0_memory_start = __va(ia64_boot_param->initrd_start);
+    dom0_memory_end = ia64_boot_param->initrd_size;
+    for (i = 0; i < CLONE_DOMAIN0; i++) {
+printk("CONSTRUCTING DOMAIN0 CLONE #%d\n",i+1);
+        if ( construct_domU(clones[i], dom0_memory_start, dom0_memory_end,
+                        0, 
+                        0,
+                       0) != 0)
+            panic("Could not set up DOM0 clone %d\n",i);
+    }
+    }
+#endif
+
+    /* The stash space for the initial kernel image can now be freed up. */
+    init_domheap_pages(ia64_boot_param->initrd_start,
+                      ia64_boot_param->initrd_start + 
ia64_boot_param->initrd_size);
+    if (!running_on_sim)  // slow on ski and pages are pre-initialized to zero
+       scrub_heap_pages();
+
+printk("About to call init_trace_bufs()\n");
+    init_trace_bufs();
+
+    /* Give up the VGA console if DOM0 is configured to grab it. */
+#ifndef IA64
+    console_endboot(cmdline && strstr(cmdline, "tty0"));
+#endif
+
+#ifdef CLONE_DOMAIN0
+    {
+    int i;
+    for (i = 0; i < CLONE_DOMAIN0; i++)
+       domain_unpause_by_systemcontroller(clones[i]);
+    }
+#endif
+    domain_unpause_by_systemcontroller(dom0);
+    domain0_ready = 1;
+    local_irq_enable();
+printk("About to call startup_cpu_idle_loop()\n");
+    startup_cpu_idle_loop();
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xentime.c
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xentime.c       Thu Sep  1 18:46:28 2005
@@ -0,0 +1,382 @@
+/*
+ * xen/arch/ia64/time.c
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *     Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#ifdef XEN
+#include <asm/vcpu.h>
+#include <linux/jiffies.h>     // not included by xen/sched.h
+#endif
+#include <xen/softirq.h>
+
+#ifdef XEN
+seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+#endif
+
+#define TIME_KEEPER_ID  0
+extern unsigned long wall_jiffies;
+
+static s_time_t        stime_irq;       /* System time at last 'time update' */
+
+unsigned long domain0_ready = 0;
+
+#ifndef CONFIG_VTI
+static inline u64 get_time_delta(void)
+{
+       return ia64_get_itc();
+}
+#else // CONFIG_VTI
+static s_time_t        stime_irq = 0x0;       /* System time at last 'time 
update' */
+unsigned long itc_scale;
+unsigned long itc_at_irq;
+static unsigned long   wc_sec, wc_nsec; /* UTC time at last 'time update'.   */
+//static rwlock_t        time_lock = RW_LOCK_UNLOCKED;
+static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs 
*regs);
+
+static inline u64 get_time_delta(void)
+{
+    s64      delta_itc;
+    u64      delta, cur_itc;
+    
+    cur_itc = ia64_get_itc();
+
+    delta_itc = (s64)(cur_itc - itc_at_irq);
+    if ( unlikely(delta_itc < 0) ) delta_itc = 0;
+    delta = ((u64)delta_itc) * itc_scale;
+    delta = delta >> 32;
+
+    return delta;
+}
+
+u64 tick_to_ns(u64 tick)
+{
+    return (tick * itc_scale) >> 32;
+}
+#endif // CONFIG_VTI
+
+s_time_t get_s_time(void)
+{
+    s_time_t now;
+    unsigned long flags;
+
+    read_lock_irqsave(&xtime_lock, flags);
+
+    now = stime_irq + get_time_delta();
+
+    /* Ensure that the returned system time is monotonically increasing. */
+    {
+        static s_time_t prev_now = 0;
+        if ( unlikely(now < prev_now) )
+            now = prev_now;
+        prev_now = now;
+    }
+
+    read_unlock_irqrestore(&xtime_lock, flags);
+
+    return now; 
+}
+
+void update_dom_time(struct vcpu *v)
+{
+// FIXME: implement this?
+//     printf("update_dom_time: called, not implemented, skipping\n");
+       return;
+}
+
+/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
+void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
+{
+#ifdef  CONFIG_VTI
+    u64 _nsecs;
+
+    write_lock_irq(&xtime_lock);
+
+    _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base);
+    while ( _nsecs >= 1000000000 ) 
+    {
+        _nsecs -= 1000000000;
+        secs++;
+    }
+
+    wc_sec  = secs;
+    wc_nsec = (unsigned long)_nsecs;
+
+    write_unlock_irq(&xtime_lock);
+
+    update_dom_time(current->domain);
+#else
+// FIXME: Should this be do_settimeofday (from linux)???
+       printf("do_settime: called, not implemented, stopping\n");
+       dummy();
+#endif
+}
+
+irqreturn_t
+xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+       unsigned long new_itm;
+
+#define HEARTBEAT_FREQ 16      // period in seconds
+#ifdef HEARTBEAT_FREQ
+       static long count = 0;
+       if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) {
+               printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n",
+                       regs->cr_iip,
+                       VCPU(current,interrupt_delivery_enabled),
+                       VCPU(current,pending_interruption));
+               count = 0;
+       }
+#endif
+#ifndef XEN
+       if (unlikely(cpu_is_offline(smp_processor_id()))) {
+               return IRQ_HANDLED;
+       }
+#endif
+#ifdef XEN
+       if (current->domain == dom0) {
+               // FIXME: there's gotta be a better way of doing this...
+               // We have to ensure that domain0 is launched before we
+               // call vcpu_timer_expired on it
+               //domain0_ready = 1; // moved to xensetup.c
+               VCPU(current,pending_interruption) = 1;
+       }
+       if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
+               vcpu_pend_timer(dom0->vcpu[0]);
+               //vcpu_set_next_timer(dom0->vcpu[0]);
+               vcpu_wake(dom0->vcpu[0]);
+       }
+       if (!is_idle_task(current->domain) && current->domain != dom0) {
+               if (vcpu_timer_expired(current)) {
+                       vcpu_pend_timer(current);
+                       // ensure another timer interrupt happens even if 
domain doesn't
+                       vcpu_set_next_timer(current);
+                       vcpu_wake(current);
+               }
+       }
+       raise_actimer_softirq();
+#endif
+
+#ifndef XEN
+       platform_timer_interrupt(irq, dev_id, regs);
+#endif
+
+       new_itm = local_cpu_data->itm_next;
+
+       if (!time_after(ia64_get_itc(), new_itm))
+#ifdef XEN
+               return;
+#else
+               printk(KERN_ERR "Oops: timer tick before it's due 
(itc=%lx,itm=%lx)\n",
+                      ia64_get_itc(), new_itm);
+#endif
+
+#ifdef XEN
+//     printf("GOT TO HERE!!!!!!!!!!!\n");
+       //while(1);
+#else
+       profile_tick(CPU_PROFILING, regs);
+#endif
+
+       while (1) {
+#ifndef XEN
+               update_process_times(user_mode(regs));
+#endif
+
+               new_itm += local_cpu_data->itm_delta;
+
+               if (smp_processor_id() == TIME_KEEPER_ID) {
+                       /*
+                        * Here we are in the timer irq handler. We have irqs 
locally
+                        * disabled, but we don't know if the timer_bh is 
running on
+                        * another CPU. We need to avoid to SMP race by 
acquiring the
+                        * xtime_lock.
+                        */
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+                       write_seqlock(&xtime_lock);
+#endif
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+                       do_timer(regs);
+#endif
+                       local_cpu_data->itm_next = new_itm;
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+                       write_sequnlock(&xtime_lock);
+#endif
+               } else
+                       local_cpu_data->itm_next = new_itm;
+
+               if (time_after(new_itm, ia64_get_itc()))
+                       break;
+       }
+
+       do {
+               /*
+                * If we're too close to the next clock tick for
+                * comfort, we increase the safety margin by
+                * intentionally dropping the next tick(s).  We do NOT
+                * update itm.next because that would force us to call
+                * do_timer() which in turn would let our clock run
+                * too fast (with the potentially devastating effect
+                * of losing monotony of time).
+                */
+               while (!time_after(new_itm, ia64_get_itc() + 
local_cpu_data->itm_delta/2))
+                       new_itm += local_cpu_data->itm_delta;
+//#ifdef XEN
+//             vcpu_set_next_timer(current);
+//#else
+//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm);
+               ia64_set_itm(new_itm);
+//#endif
+               /* double check, in case we got hit by a (slow) PMI: */
+       } while (time_after_eq(ia64_get_itc(), new_itm));
+       return IRQ_HANDLED;
+}
+
+static struct irqaction xen_timer_irqaction = {
+#ifdef CONFIG_VTI
+       .handler =      vmx_timer_interrupt,
+#else // CONFIG_VTI
+       .handler =      xen_timer_interrupt,
+#endif // CONFIG_VTI
+#ifndef XEN
+       .flags =        SA_INTERRUPT,
+#endif
+       .name =         "timer"
+};
+
+void __init
+xen_time_init (void)
+{
+       register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction);
+       ia64_init_itm();
+}
+
+
+#ifdef CONFIG_VTI
+
+/* Late init function (after all CPUs are booted). */
+int __init init_xen_time()
+{
+    struct timespec tm;
+
+    itc_scale  = 1000000000UL << 32 ;
+    itc_scale /= local_cpu_data->itc_freq;
+
+    /* System time ticks from zero. */
+    stime_irq = (s_time_t)0;
+    itc_at_irq = ia64_get_itc();
+
+    /* Wallclock time starts as the initial RTC time. */
+    efi_gettimeofday(&tm);
+    wc_sec  = tm.tv_sec;
+    wc_nsec = tm.tv_nsec;
+
+
+    printk("Time init:\n");
+    printk(".... System Time: %ldns\n", NOW());
+    printk(".... scale:       %16lX\n", itc_scale);
+    printk(".... Wall Clock:  %lds %ldus\n", wc_sec, wc_nsec/1000);
+
+    return 0;
+}
+
+static irqreturn_t
+vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+    unsigned long new_itm;
+    struct vcpu *v = current;
+
+
+    new_itm = local_cpu_data->itm_next;
+
+    if (!time_after(ia64_get_itc(), new_itm))
+        return;
+
+    while (1) {
+#ifdef CONFIG_SMP
+        /*
+         * For UP, this is done in do_timer().  Weird, but
+         * fixing that would require updates to all
+         * platforms.
+         */
+        update_process_times(user_mode(v, regs));
+#endif
+        new_itm += local_cpu_data->itm_delta;
+
+        if (smp_processor_id() == TIME_KEEPER_ID) {
+            /*
+             * Here we are in the timer irq handler. We have irqs locally
+             * disabled, but we don't know if the timer_bh is running on
+             * another CPU. We need to avoid to SMP race by acquiring the
+             * xtime_lock.
+             */
+            local_cpu_data->itm_next = new_itm;
+            
+            write_lock_irq(&xtime_lock);
+            /* Update jiffies counter. */
+            (*(unsigned long *)&jiffies_64)++;
+
+            /* Update wall time. */
+            wc_nsec += 1000000000/HZ;
+            if ( wc_nsec >= 1000000000 )
+            {
+                wc_nsec -= 1000000000;
+                wc_sec++;
+            }
+
+            /* Updates system time (nanoseconds since boot). */
+            stime_irq += MILLISECS(1000/HZ);
+            itc_at_irq = ia64_get_itc();
+
+            write_unlock_irq(&xtime_lock);
+            
+        } else
+            local_cpu_data->itm_next = new_itm;
+
+        if (time_after(new_itm, ia64_get_itc()))
+            break;
+    }
+
+    do {
+        /*
+         * If we're too close to the next clock tick for
+         * comfort, we increase the safety margin by
+         * intentionally dropping the next tick(s).  We do NOT
+         * update itm.next because that would force us to call
+         * do_timer() which in turn would let our clock run
+         * too fast (with the potentially devastating effect
+         * of losing monotony of time).
+         */
+        while (!time_after(new_itm, ia64_get_itc() + 
local_cpu_data->itm_delta/2))
+            new_itm += local_cpu_data->itm_delta;
+        ia64_set_itm(new_itm);
+        /* double check, in case we got hit by a (slow) PMI: */
+    } while (time_after_eq(ia64_get_itc(), new_itm));
+    raise_softirq(AC_TIMER_SOFTIRQ);
+    
+    return IRQ_HANDLED;
+}
+#endif // CONFIG_VTI
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- /dev/null   Thu Sep  1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h      Thu Sep  1 18:46:28 2005
@@ -0,0 +1,577 @@
+#ifndef _ASM_IA64_PGTABLE_H
+#define _ASM_IA64_PGTABLE_H
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the IA-64 page table tree.
+ *
+ * This hopefully works with any (fixed) IA-64 page-size, as defined
+ * in <asm/page.h>.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@xxxxxxxxxx>
+ */
+
+#include <linux/config.h>
+
+#include <asm/mman.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/types.h>
+#ifdef XEN
+#ifndef __ASSEMBLY__
+#include <xen/sched.h> /* needed for mm_struct (via asm/domain.h) */
+#endif
+#endif
+
+#define IA64_MAX_PHYS_BITS     50      /* max. number of physical address bits 
(architected) */
+
+/*
+ * First, define the various bits in a PTE.  Note that the PTE format
+ * matches the VHPT short format, the firt doubleword of the VHPD long
+ * format, and the first doubleword of the TLB insertion format.
+ */
+#define _PAGE_P_BIT            0
+#define _PAGE_A_BIT            5
+#define _PAGE_D_BIT            6
+
+#define _PAGE_P                        (1 << _PAGE_P_BIT)      /* page present 
bit */
+#define _PAGE_MA_WB            (0x0 <<  2)     /* write back memory attribute 
*/
+#define _PAGE_MA_UC            (0x4 <<  2)     /* uncacheable memory attribute 
*/
+#define _PAGE_MA_UCE           (0x5 <<  2)     /* UC exported attribute */
+#define _PAGE_MA_WC            (0x6 <<  2)     /* write coalescing memory 
attribute */
+#define _PAGE_MA_NAT           (0x7 <<  2)     /* not-a-thing attribute */
+#define _PAGE_MA_MASK          (0x7 <<  2)
+#define _PAGE_PL_0             (0 <<  7)       /* privilege level 0 (kernel) */
+#define _PAGE_PL_1             (1 <<  7)       /* privilege level 1 (unused) */
+#define _PAGE_PL_2             (2 <<  7)       /* privilege level 2 (unused) */
+#define _PAGE_PL_3             (3 <<  7)       /* privilege level 3 (user) */
+#define _PAGE_PL_MASK          (3 <<  7)
+#define _PAGE_AR_R             (0 <<  9)       /* read only */
+#define _PAGE_AR_RX            (1 <<  9)       /* read & execute */
+#define _PAGE_AR_RW            (2 <<  9)       /* read & write */
+#define _PAGE_AR_RWX           (3 <<  9)       /* read, write & execute */
+#define _PAGE_AR_R_RW          (4 <<  9)       /* read / read & write */
+#define _PAGE_AR_RX_RWX                (5 <<  9)       /* read & exec / read, 
write & exec */
+#define _PAGE_AR_RWX_RW                (6 <<  9)       /* read, write & exec / 
read & write */
+#define _PAGE_AR_X_RX          (7 <<  9)       /* exec & promote / read & exec 
*/
+#define _PAGE_AR_MASK          (7 <<  9)
+#define _PAGE_AR_SHIFT         9
+#define _PAGE_A                        (1 << _PAGE_A_BIT)      /* page 
accessed bit */
+#define _PAGE_D                        (1 << _PAGE_D_BIT)      /* page dirty 
bit */
+#define _PAGE_PPN_MASK         (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & 
~0xfffUL)
+#define _PAGE_ED               (__IA64_UL(1) << 52)    /* exception deferral */
+#define _PAGE_PROTNONE         (__IA64_UL(1) << 63)
+
+/* Valid only for a PTE with the present bit cleared: */
+#define _PAGE_FILE             (1 << 1)                /* see swap & file pte 
remarks below */
+
+#define _PFN_MASK              _PAGE_PPN_MASK
+/* Mask of bits which may be changed by pte_modify(); the odd bits are there 
for _PAGE_PROTNONE */
+#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | 
_PAGE_AR_MASK | _PAGE_ED)
+
+#define _PAGE_SIZE_4K  12
+#define _PAGE_SIZE_8K  13
+#define _PAGE_SIZE_16K 14
+#define _PAGE_SIZE_64K 16
+#define _PAGE_SIZE_256K        18
+#define _PAGE_SIZE_1M  20
+#define _PAGE_SIZE_4M  22
+#define _PAGE_SIZE_16M 24
+#define _PAGE_SIZE_64M 26
+#define _PAGE_SIZE_256M        28
+#define _PAGE_SIZE_1G  30
+#define _PAGE_SIZE_4G  32
+
+#define __ACCESS_BITS          _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB
+#define __DIRTY_BITS_NO_ED     _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB
+#define __DIRTY_BITS           _PAGE_ED | __DIRTY_BITS_NO_ED
+
+/*
+ * Definitions for first level:
+ *
+ * PGDIR_SHIFT determines what a first-level page table entry can map.
+ */
+#define PGDIR_SHIFT            (PAGE_SHIFT + 2*(PAGE_SHIFT-3))
+#define PGDIR_SIZE             (__IA64_UL(1) << PGDIR_SHIFT)
+#define PGDIR_MASK             (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD           (1UL << (PAGE_SHIFT-3))
+#define USER_PTRS_PER_PGD      (5*PTRS_PER_PGD/8)      /* regions 0-4 are user 
regions */
+#define FIRST_USER_ADDRESS     0
+
+/*
+ * Definitions for second level:
+ *
+ * PMD_SHIFT determines the size of the area a second-level page table
+ * can map.
+ */
+#define PMD_SHIFT      (PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SIZE       (1UL << PMD_SHIFT)
+#define PMD_MASK       (~(PMD_SIZE-1))
+#define PTRS_PER_PMD   (1UL << (PAGE_SHIFT-3))
+
+/*
+ * Definitions for third level:
+ */
+#define PTRS_PER_PTE   (__IA64_UL(1) << (PAGE_SHIFT-3))
+
+/*
+ * All the normal masks have the "page accessed" bits on, as any time
+ * they are used, the page is accessed. They are cleared only by the
+ * page-out routines.
+ */
+#define PAGE_NONE      __pgprot(_PAGE_PROTNONE | _PAGE_A)
+#define PAGE_SHARED    __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
+#define PAGE_READONLY  __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY      __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define PAGE_GATE      __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
+#define PAGE_KERNEL    __pgprot(__DIRTY_BITS  | _PAGE_PL_0 | _PAGE_AR_RWX)
+#define PAGE_KERNELRX  __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+
+# ifndef __ASSEMBLY__
+
+#include <asm/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/processor.h>
+
+/*
+ * Next come the mappings that determine how mmap() protection bits
+ * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented.  The
+ * _P version gets used for a private shared memory segment, the _S
+ * version gets used for a shared memory segment with MAP_SHARED on.
+ * In a private shared memory segment, we do a copy-on-write if a task
+ * attempts to write to the page.
+ */
+       /* xwr */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_READONLY   /* write to priv pg -> copy & make writable */
+#define __P011 PAGE_READONLY   /* ditto */
+#define __