# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 3ca4ca7a9cc234d33c3981852fc37c73fcd72218
# Parent d34925e4144bcdadb020ee2deef766a994bf7b04
Final changes for linux 2.6.13 rebasing and some directory reorgs
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/Makefile Thu Sep 1 18:46:28 2005
@@ -1,9 +1,6 @@
include $(BASEDIR)/Rules.mk
-VPATH = linux linux-xen linux/lib
-#VPATH = linux-xen linux/lib
-
-# libs-y += arch/ia64/lib/lib.a
+VPATH = xen vmx linux linux-xen
OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \
xenmisc.o acpi.o hypercall.o \
@@ -15,8 +12,6 @@
irq_ia64.o irq_lsapic.o vhpt.o xenasm.o hyperprivop.o dom_fw.o \
grant_table.o sn_console.o
-#OBJS += idiv64.o idiv32.o \
-
# TMP holder to contain *.0 moved out of CONFIG_VTI
OBJS += vmx_init.o
@@ -27,7 +22,7 @@
pal_emul.o vmx_irq_ia64.o
endif
-# files from xen/arch/ia64/linux/lib (linux/arch/ia64/lib)
+# lib files from xen/arch/ia64/linux/ (linux/arch/ia64/lib)
OBJS += bitop.o clear_page.o flush.o copy_page_mck.o
\
memset.o strlen.o memcpy_mck.o \
__divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
@@ -86,9 +81,9 @@
touch $@
# I'm sure a Makefile wizard would know a better way to do this
-xen.lds.s: xen.lds.S
+xen.lds.s: xen/xen.lds.S
$(CC) -E $(CPPFLAGS) -P -DXEN -D__ASSEMBLY__ \
- -o xen.lds.s xen.lds.S
+ -o xen.lds.s xen/xen.lds.S
# variants of divide/modulo
# see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib)
@@ -111,7 +106,7 @@
clean:
- rm -f *.o *~ core xen.lds.s
$(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s
+ rm -f *.o *~ core xen.lds.s
$(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s map.out
rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
rm -f linux/lib/*.o
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/README.origin
--- a/xen/arch/ia64/linux/README.origin Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/README.origin Thu Sep 1 18:46:28 2005
@@ -13,12 +13,13 @@
machvec.c -> linux/arch/ia64/kernel/machvec.c
patch.c -> linux/arch/ia64/kernel/patch.c
pcdp.h -> drivers/firmware/pcdp.h
-lib/bitop.c -> linux/arch/ia64/lib/bitop.c
-lib/clear_page.S -> linux/arch/ia64/lib/clear_page.S
-lib/copy_page_mck.S -> linux/arch/ia64/lib/copy_page_mck.S
-lib/flush.S -> linux/arch/ia64/lib/flush.S
-lib/idiv32.S -> linux/arch/ia64/lib/idiv32.S
-lib/idiv64.S -> linux/arch/ia64/lib/idiv64.S
-lib/memcpy_mck.S -> linux/arch/ia64/lib/memcpy_mck.S
-lib/memset.S -> linux/arch/ia64/lib/memset.S
-lib/strlen.S -> linux/arch/ia64/lib/strlen.S
+
+bitop.c -> linux/arch/ia64/lib/bitop.c
+clear_page.S -> linux/arch/ia64/lib/clear_page.S
+copy_page_mck.S -> linux/arch/ia64/lib/copy_page_mck.S
+flush.S -> linux/arch/ia64/lib/flush.S
+idiv32.S -> linux/arch/ia64/lib/idiv32.S
+idiv64.S -> linux/arch/ia64/lib/idiv64.S
+memcpy_mck.S -> linux/arch/ia64/lib/memcpy_mck.S
+memset.S -> linux/arch/ia64/lib/memset.S
+strlen.S -> linux/arch/ia64/lib/strlen.S
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/config.h Thu Sep 1 18:46:28 2005
@@ -203,6 +203,7 @@
#endif // CONFIG_VTI
#define __attribute_used__ __attribute__ ((unused))
+#define __nocast
// see include/asm-x86/atomic.h (different from standard linux)
#define _atomic_set(v,i) (((v).counter) = (i))
@@ -262,9 +263,6 @@
// these declarations got moved at some point, find a better place for them
extern int ht_per_core;
-// needed for include/xen/smp.h
-#define __smp_processor_id() 0
-
// xen/include/asm/config.h
/******************************************************************************
* config.h
@@ -297,6 +295,10 @@
#endif /* __ASSEMBLY__ */
#endif /* __XEN_IA64_CONFIG_H__ */
+// needed for include/xen/smp.h
+#define __smp_processor_id() 0
+
+
// FOLLOWING ADDED FOR XEN POST-NGIO and/or LINUX 2.6.7
// following derived from linux/include/linux/compiler-gcc3.h
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/bug.h
--- a/xen/include/asm-ia64/linux/asm-generic/bug.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/bug.h Thu Sep 1 18:46:28 2005
@@ -4,17 +4,11 @@
#include <linux/compiler.h>
#include <linux/config.h>
+#ifdef CONFIG_BUG
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
panic("BUG!"); \
-} while (0)
-#endif
-
-#ifndef HAVE_ARCH_PAGE_BUG
-#define PAGE_BUG(page) do { \
- printk("page BUG for page at %p\n", page); \
- BUG(); \
} while (0)
#endif
@@ -31,4 +25,18 @@
} while (0)
#endif
+#else /* !CONFIG_BUG */
+#ifndef HAVE_ARCH_BUG
+#define BUG()
#endif
+
+#ifndef HAVE_ARCH_BUG_ON
+#define BUG_ON(condition) do { if (condition) ; } while(0)
+#endif
+
+#ifndef HAVE_ARCH_WARN_ON
+#define WARN_ON(condition) do { if (condition) ; } while(0)
+#endif
+#endif
+
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/errno.h
--- a/xen/include/asm-ia64/linux/asm-generic/errno.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/errno.h Thu Sep 1 18:46:28 2005
@@ -102,4 +102,8 @@
#define EKEYREVOKED 128 /* Key has been revoked */
#define EKEYREJECTED 129 /* Key was rejected by service */
+/* for robust mutexes */
+#define EOWNERDEAD 130 /* Owner died */
+#define ENOTRECOVERABLE 131 /* State not recoverable */
+
#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/iomap.h
--- a/xen/include/asm-ia64/linux/asm-generic/iomap.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/iomap.h Thu Sep 1 18:46:28 2005
@@ -2,6 +2,7 @@
#define __GENERIC_IO_H
#include <linux/linkage.h>
+#include <asm/byteorder.h>
/*
* These are the "generic" interfaces for doing new-style
@@ -26,11 +27,15 @@
*/
extern unsigned int fastcall ioread8(void __iomem *);
extern unsigned int fastcall ioread16(void __iomem *);
+extern unsigned int fastcall ioread16be(void __iomem *);
extern unsigned int fastcall ioread32(void __iomem *);
+extern unsigned int fastcall ioread32be(void __iomem *);
extern void fastcall iowrite8(u8, void __iomem *);
extern void fastcall iowrite16(u16, void __iomem *);
+extern void fastcall iowrite16be(u16, void __iomem *);
extern void fastcall iowrite32(u32, void __iomem *);
+extern void fastcall iowrite32be(u32, void __iomem *);
/*
* "string" versions of the above. Note that they
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/pci.h
--- a/xen/include/asm-ia64/linux/asm-generic/pci.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/pci.h Thu Sep 1 18:46:28 2005
@@ -22,6 +22,14 @@
region->end = res->end;
}
+static inline void
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+ struct pci_bus_region *region)
+{
+ res->start = region->start;
+ res->end = region->end;
+}
+
#define pcibios_scan_all_fns(a, b) 0
#ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h
--- a/xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h Thu Sep 1
17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h Thu Sep 1
18:46:28 2005
@@ -2,6 +2,8 @@
#define _PGTABLE_NOPUD_H
#ifndef __ASSEMBLY__
+
+#define __PAGETABLE_PUD_FOLDED
/*
* Having the pud type consist of a pgd gets the size right, and allows
@@ -52,5 +54,8 @@
#define pud_free(x) do { } while (0)
#define __pud_free_tlb(tlb, x) do { } while (0)
+#undef pud_addr_end
+#define pud_addr_end(addr, end) (end)
+
#endif /* __ASSEMBLY__ */
#endif /* _PGTABLE_NOPUD_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/pgtable.h
--- a/xen/include/asm-ia64/linux/asm-generic/pgtable.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/pgtable.h Thu Sep 1 18:46:28 2005
@@ -16,7 +16,7 @@
#ifndef __HAVE_ARCH_SET_PTE_ATOMIC
#define ptep_establish(__vma, __address, __ptep, __entry) \
do { \
- set_pte(__ptep, __entry); \
+ set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#else /* __HAVE_ARCH_SET_PTE_ATOMIC */
@@ -37,26 +37,30 @@
*/
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
do { \
- set_pte(__ptep, __entry); \
+ set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
flush_tlb_page(__vma, __address); \
} while (0)
#endif
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_young(pte))
- return 0;
- set_pte(ptep, pte_mkold(pte));
- return 1;
-}
+#define ptep_test_and_clear_young(__vma, __address, __ptep) \
+({ \
+ pte_t __pte = *(__ptep); \
+ int r = 1; \
+ if (!pte_young(__pte)) \
+ r = 0; \
+ else \
+ set_pte_at((__vma)->vm_mm, (__address), \
+ (__ptep), pte_mkold(__pte)); \
+ r; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep) \
({ \
- int __young = ptep_test_and_clear_young(__ptep); \
+ int __young; \
+ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \
if (__young) \
flush_tlb_page(__vma, __address); \
__young; \
@@ -64,20 +68,24 @@
#endif
#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-static inline int ptep_test_and_clear_dirty(pte_t *ptep)
-{
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- return 0;
- set_pte(ptep, pte_mkclean(pte));
- return 1;
-}
+#define ptep_test_and_clear_dirty(__vma, __address, __ptep) \
+({ \
+ pte_t __pte = *__ptep; \
+ int r = 1; \
+ if (!pte_dirty(__pte)) \
+ r = 0; \
+ else \
+ set_pte_at((__vma)->vm_mm, (__address), (__ptep), \
+ pte_mkclean(__pte)); \
+ r; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
({ \
- int __dirty = ptep_test_and_clear_dirty(__ptep); \
+ int __dirty; \
+ __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep); \
if (__dirty) \
flush_tlb_page(__vma, __address); \
__dirty; \
@@ -85,36 +93,29 @@
#endif
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
- pte_t pte = *ptep;
- pte_clear(ptep);
- return pte;
-}
+#define ptep_get_and_clear(__mm, __address, __ptep) \
+({ \
+ pte_t __pte = *(__ptep); \
+ pte_clear((__mm), (__address), (__ptep)); \
+ __pte; \
+})
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define ptep_clear_flush(__vma, __address, __ptep) \
({ \
- pte_t __pte = ptep_get_and_clear(__ptep); \
+ pte_t __pte; \
+ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \
flush_tlb_page(__vma, __address); \
__pte; \
})
#endif
#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long
address, pte_t *ptep)
{
pte_t old_pte = *ptep;
- set_pte(ptep, pte_wrprotect(old_pte));
-}
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_MKDIRTY
-static inline void ptep_mkdirty(pte_t *ptep)
-{
- pte_t old_pte = *ptep;
- set_pte(ptep, pte_mkdirty(old_pte));
+ set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif
@@ -124,6 +125,9 @@
#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
#define page_test_and_clear_dirty(page) (0)
+#define pte_maybe_dirty(pte) pte_dirty(pte)
+#else
+#define pte_maybe_dirty(pte) (1)
#endif
#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
@@ -134,4 +138,77 @@
#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
#endif
+#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+#define lazy_mmu_prot_update(pte) do { } while (0)
+#endif
+
+/*
+ * When walking page tables, get the address of the next boundary,
+ * or the end address of the range if that comes earlier. Although no
+ * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
+ */
+
+#define pgd_addr_end(addr, end)
\
+({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#ifndef pud_addr_end
+#define pud_addr_end(addr, end)
\
+({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef pmd_addr_end
+#define pmd_addr_end(addr, end)
\
+({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+#endif
+
+#ifndef __ASSEMBLY__
+/*
+ * When walking page tables, we usually want to skip any p?d_none entries;
+ * and any p?d_bad entries - reporting the error before resetting to none.
+ * Do the tests inline, but report and clear the bad entry in mm/memory.c.
+ */
+void pgd_clear_bad(pgd_t *);
+void pud_clear_bad(pud_t *);
+void pmd_clear_bad(pmd_t *);
+
+static inline int pgd_none_or_clear_bad(pgd_t *pgd)
+{
+ if (pgd_none(*pgd))
+ return 1;
+ if (unlikely(pgd_bad(*pgd))) {
+ pgd_clear_bad(pgd);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pud_none_or_clear_bad(pud_t *pud)
+{
+ if (pud_none(*pud))
+ return 1;
+ if (unlikely(pud_bad(*pud))) {
+ pud_clear_bad(pud);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int pmd_none_or_clear_bad(pmd_t *pmd)
+{
+ if (pmd_none(*pmd))
+ return 1;
+ if (unlikely(pmd_bad(*pmd))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+ return 0;
+}
+#endif /* !__ASSEMBLY__ */
+
#endif /* _ASM_GENERIC_PGTABLE_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/sections.h
--- a/xen/include/asm-ia64/linux/asm-generic/sections.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/sections.h Thu Sep 1 18:46:28 2005
@@ -8,6 +8,9 @@
extern char __bss_start[], __bss_stop[];
extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
+extern char _sextratext[] __attribute__((weak));
+extern char _eextratext[] __attribute__((weak));
extern char _end[];
+extern char __per_cpu_start[], __per_cpu_end[];
#endif /* _ASM_GENERIC_SECTIONS_H_ */
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/topology.h
--- a/xen/include/asm-ia64/linux/asm-generic/topology.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/topology.h Thu Sep 1 18:46:28 2005
@@ -41,8 +41,15 @@
#ifndef node_to_first_cpu
#define node_to_first_cpu(node) (0)
#endif
+#ifndef pcibus_to_node
+#define pcibus_to_node(node) (-1)
+#endif
+
#ifndef pcibus_to_cpumask
-#define pcibus_to_cpumask(bus) (cpu_online_map)
+#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \
+ CPU_MASK_ALL : \
+ node_to_cpumask(pcibus_to_node(bus)) \
+ )
#endif
#endif /* _ASM_GENERIC_TOPOLOGY_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h
--- a/xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h Thu Sep 1
17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h Thu Sep 1
18:46:28 2005
@@ -73,7 +73,7 @@
}
#define SECURITY_INIT \
- .security_initcall.init : { \
+ .security_initcall.init : AT(ADDR(.security_initcall.init) -
LOAD_OFFSET) { \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
*(.security_initcall.init) \
VMLINUX_SYMBOL(__security_initcall_end) = .; \
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/acpi.h
--- a/xen/include/asm-ia64/linux/asm/acpi.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/acpi.h Thu Sep 1 18:46:28 2005
@@ -98,6 +98,15 @@
int acpi_request_vector (u32 int_type);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+/*
+ * Record the cpei override flag and current logical cpu. This is
+ * useful for CPU removal.
+ */
+extern unsigned int can_cpei_retarget(void);
+extern unsigned int is_cpu_cpei_target(unsigned int cpu);
+extern void set_cpei_target_cpu(unsigned int cpu);
+extern unsigned int get_cpei_target_cpu(void);
+
#ifdef CONFIG_ACPI_NUMA
/* Proximity bitmap length; _PXM is at most 255 (8 bit)*/
#define MAX_PXM_DOMAINS (256)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/bitops.h
--- a/xen/include/asm-ia64/linux/asm/bitops.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/bitops.h Thu Sep 1 18:46:28 2005
@@ -314,8 +314,8 @@
#ifdef __KERNEL__
/*
- * find_last_zero_bit - find the last zero bit in a 64 bit quantity
- * @x: The value to search
+ * Return bit number of last (most-significant) bit set. Undefined
+ * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3).
*/
static inline unsigned long
ia64_fls (unsigned long x)
@@ -327,10 +327,23 @@
return exp - 0xffff;
}
+/*
+ * Find the last (most significant) bit set. Returns 0 for x==0 and
+ * bits are numbered from 1..32 (e.g., fls(9) == 4).
+ */
static inline int
-fls (int x)
-{
- return ia64_fls((unsigned int) x);
+fls (int t)
+{
+ unsigned long x = t & 0xffffffffu;
+
+ if (!x)
+ return 0;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ return ia64_popcnt(x);
}
/*
@@ -353,9 +366,9 @@
return result;
}
-#define hweight32(x) hweight64 ((x) & 0xfffffffful)
-#define hweight16(x) hweight64 ((x) & 0xfffful)
-#define hweight8(x) hweight64 ((x) & 0xfful)
+#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful)
+#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful)
+#define hweight8(x) (unsigned int) hweight64((x) & 0xfful)
#endif /* __KERNEL__ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/break.h
--- a/xen/include/asm-ia64/linux/asm/break.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/break.h Thu Sep 1 18:46:28 2005
@@ -12,6 +12,8 @@
* OS-specific debug break numbers:
*/
#define __IA64_BREAK_KDB 0x80100
+#define __IA64_BREAK_KPROBE 0x80200
+#define __IA64_BREAK_JPROBE 0x80300
/*
* OS-specific break numbers:
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/bug.h
--- a/xen/include/asm-ia64/linux/asm/bug.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/bug.h Thu Sep 1 18:46:28 2005
@@ -1,6 +1,7 @@
#ifndef _ASM_IA64_BUG_H
#define _ASM_IA64_BUG_H
+#ifdef CONFIG_BUG
#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
# define ia64_abort() __builtin_trap()
#else
@@ -8,8 +9,10 @@
#endif
#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__);
ia64_abort(); } while (0)
-/* should this BUG should be made generic? */
+/* should this BUG be made generic? */
#define HAVE_ARCH_BUG
+#endif
+
#include <asm-generic/bug.h>
#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/cacheflush.h
--- a/xen/include/asm-ia64/linux/asm/cacheflush.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/cacheflush.h Thu Sep 1 18:46:28 2005
@@ -19,7 +19,7 @@
#define flush_cache_all() do { } while (0)
#define flush_cache_mm(mm) do { } while (0)
#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr) do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
#define flush_icache_page(vma,page) do { } while (0)
#define flush_cache_vmap(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/param.h
--- a/xen/include/asm-ia64/linux/asm/param.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/param.h Thu Sep 1 18:46:28 2005
@@ -27,7 +27,7 @@
*/
# define HZ 32
# else
-# define HZ 1024
+# define HZ CONFIG_HZ
# endif
# define USER_HZ HZ
# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/pci.h
--- a/xen/include/asm-ia64/linux/asm/pci.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/pci.h Thu Sep 1 18:46:28 2005
@@ -47,7 +47,7 @@
}
static inline void
-pcibios_penalize_isa_irq (int irq)
+pcibios_penalize_isa_irq (int irq, int active)
{
/* We don't do dynamic PCI IRQ allocation */
}
@@ -82,6 +82,25 @@
#define sg_dma_len(sg) ((sg)->dma_length)
#define sg_dma_address(sg) ((sg)->dma_address)
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+ enum pci_dma_burst_strategy *strat,
+ unsigned long *strategy_parameter)
+{
+ unsigned long cacheline_size;
+ u8 byte;
+
+ pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+ if (byte == 0)
+ cacheline_size = 1024;
+ else
+ cacheline_size = (int) byte * 4;
+
+ *strat = PCI_DMA_BURST_MULTIPLE;
+ *strategy_parameter = cacheline_size;
+}
+#endif
+
#define HAVE_PCI_MMAP
extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct
*vma,
enum pci_mmap_state mmap_state, int
write_combine);
@@ -109,6 +128,7 @@
void *acpi_handle;
void *iommu;
int segment;
+ int node; /* nearest node with memory or -1 for global
allocation */
unsigned int windows;
struct pci_window *window;
@@ -121,14 +141,9 @@
extern struct pci_ops pci_root_ops;
-static inline int pci_name_bus(char *name, struct pci_bus *bus)
+static inline int pci_proc_domain(struct pci_bus *bus)
{
- if (pci_domain_nr(bus) == 0) {
- sprintf(name, "%02x", bus->number);
- } else {
- sprintf(name, "%04x:%02x", pci_domain_nr(bus), bus->number);
- }
- return 0;
+ return (pci_domain_nr(bus) != 0);
}
static inline void pcibios_add_platform_entries(struct pci_dev *dev)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/percpu.h
--- a/xen/include/asm-ia64/linux/asm/percpu.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/percpu.h Thu Sep 1 18:46:28 2005
@@ -50,7 +50,7 @@
#else /* ! SMP */
-#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
+#define per_cpu(var, cpu) (*((void)(cpu),
&per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
#define per_cpu_init() (__phys_per_cpu_start)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/sections.h
--- a/xen/include/asm-ia64/linux/asm/sections.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/sections.h Thu Sep 1 18:46:28 2005
@@ -17,6 +17,7 @@
extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[];
extern char __start_gate_brl_fsys_bubble_down_patchlist[],
__end_gate_brl_fsys_bubble_down_patchlist[];
extern char __start_unwind[], __end_unwind[];
+extern char __start_ivt_text[], __end_ivt_text[];
#endif /* _ASM_IA64_SECTIONS_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/signal.h
--- a/xen/include/asm-ia64/linux/asm/signal.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/signal.h Thu Sep 1 18:46:28 2005
@@ -114,27 +114,11 @@
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
-/*
- * These values of sa_flags are used only by the kernel as part of the
- * irq handling routines.
- *
- * SA_INTERRUPT is also used by the irq handling routines.
- * SA_SHIRQ is for shared interrupt support on PCI and EISA.
- */
-#define SA_PROBE SA_ONESHOT
-#define SA_SAMPLE_RANDOM SA_RESTART
-#define SA_SHIRQ 0x04000000
#define SA_PERCPU_IRQ 0x02000000
#endif /* __KERNEL__ */
-#define SIG_BLOCK 0 /* for blocking signals */
-#define SIG_UNBLOCK 1 /* for unblocking signals */
-#define SIG_SETMASK 2 /* for setting the signal mask */
-
-#define SIG_DFL ((__sighandler_t)0) /* default signal handling */
-#define SIG_IGN ((__sighandler_t)1) /* ignore signal */
-#define SIG_ERR ((__sighandler_t)-1) /* error return from signal */
+#include <asm-generic/signal.h>
# ifndef __ASSEMBLY__
@@ -142,9 +126,6 @@
/* Avoid too many header ordering problems. */
struct siginfo;
-
-/* Type of a signal handler. */
-typedef void __user (*__sighandler_t)(int);
typedef struct sigaltstack {
void __user *ss_sp;
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/smp.h
--- a/xen/include/asm-ia64/linux/asm/smp.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/smp.h Thu Sep 1 18:46:28 2005
@@ -3,16 +3,14 @@
*
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@xxxxxxxxxxx>
- * Copyright (C) 2001-2003 Hewlett-Packard Co
+ * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P.
* David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Bjorn Helgaas <bjorn.helgaas@xxxxxx>
*/
#ifndef _ASM_IA64_SMP_H
#define _ASM_IA64_SMP_H
#include <linux/config.h>
-
-#ifdef CONFIG_SMP
-
#include <linux/init.h>
#include <linux/threads.h>
#include <linux/kernel.h>
@@ -24,12 +22,31 @@
#include <asm/processor.h>
#include <asm/ptrace.h>
+static inline unsigned int
+ia64_get_lid (void)
+{
+ union {
+ struct {
+ unsigned long reserved : 16;
+ unsigned long eid : 8;
+ unsigned long id : 8;
+ unsigned long ignored : 32;
+ } f;
+ unsigned long bits;
+ } lid;
+
+ lid.bits = ia64_getreg(_IA64_REG_CR_LID);
+ return lid.f.id << 8 | lid.f.eid;
+}
+
+#ifdef CONFIG_SMP
+
#define XTP_OFFSET 0x1e0008
#define SMP_IRQ_REDIRECTION (1 << 0)
#define SMP_IPI_REDIRECTION (1 << 1)
-#define smp_processor_id() (current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
extern struct smp_boot_data {
int cpu_count;
@@ -39,6 +56,10 @@
extern char no_int_routing __devinitdata;
extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_sibling_map[NR_CPUS];
+extern int smp_num_siblings;
+extern int smp_num_cpucores;
extern void __iomem *ipi_base_addr;
extern unsigned char smp_int_redirect;
@@ -90,22 +111,7 @@
writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */
}
-static inline unsigned int
-hard_smp_processor_id (void)
-{
- union {
- struct {
- unsigned long reserved : 16;
- unsigned long eid : 8;
- unsigned long id : 8;
- unsigned long ignored : 32;
- } f;
- unsigned long bits;
- } lid;
-
- lid.bits = ia64_getreg(_IA64_REG_CR_LID);
- return lid.f.id << 8 | lid.f.eid;
-}
+#define hard_smp_processor_id() ia64_get_lid()
/* Upping and downing of CPUs */
extern int __cpu_disable (void);
@@ -122,10 +128,12 @@
extern void smp_send_reschedule (int cpu);
extern void lock_ipi_calllock(void);
extern void unlock_ipi_calllock(void);
+extern void identify_siblings (struct cpuinfo_ia64 *);
#else
-#define cpu_logical_id(cpuid) 0
+#define cpu_logical_id(i) 0
+#define cpu_physical_id(i) ia64_get_lid()
#endif /* CONFIG_SMP */
#endif /* _ASM_IA64_SMP_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm/thread_info.h
--- a/xen/include/asm-ia64/linux/asm/thread_info.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/thread_info.h Thu Sep 1 18:46:28 2005
@@ -25,7 +25,7 @@
__u32 flags; /* thread_info flags (see TIF_*) */
__u32 cpu; /* current CPU */
mm_segment_t addr_limit; /* user-level address space limit */
- __s32 preempt_count; /* 0=premptable, <0=BUG; will also
serve as bh-counter */
+ int preempt_count; /* 0=premptable, <0=BUG; will also
serve as bh-counter */
struct restart_block restart_block;
struct {
int signo;
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/topology.h
--- a/xen/include/asm-ia64/linux/asm/topology.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/topology.h Thu Sep 1 18:46:28 2005
@@ -40,27 +40,61 @@
*/
#define node_to_first_cpu(node) (__ffs(node_to_cpumask(node)))
+/*
+ * Determines the node for a given pci bus
+ */
+#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
+
void build_cpu_to_node_map(void);
+
+#define SD_CPU_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000000), \
+ .per_cpu_gain = 100, \
+ .cache_nice_tries = 2, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_NEWIDLE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_AFFINE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
.parent = NULL, \
.groups = NULL, \
- .min_interval = 80, \
- .max_interval = 320, \
- .busy_factor = 320, \
+ .min_interval = 8, \
+ .max_interval = 8*(min(num_online_cpus(), 32)), \
+ .busy_factor = 64, \
.imbalance_pct = 125, \
.cache_hot_time = (10*1000000), \
- .cache_nice_tries = 1, \
+ .cache_nice_tries = 2, \
+ .busy_idx = 3, \
+ .idle_idx = 2, \
+ .newidle_idx = 0, /* unused */ \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
.per_cpu_gain = 100, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
- | SD_BALANCE_NEWIDLE \
- | SD_WAKE_IDLE \
+ | SD_BALANCE_FORK \
| SD_WAKE_BALANCE, \
.last_balance = jiffies, \
- .balance_interval = 1, \
+ .balance_interval = 64, \
.nr_balance_failed = 0, \
}
@@ -69,17 +103,21 @@
.span = CPU_MASK_NONE, \
.parent = NULL, \
.groups = NULL, \
- .min_interval = 80, \
- .max_interval = 320, \
- .busy_factor = 320, \
- .imbalance_pct = 125, \
+ .min_interval = 64, \
+ .max_interval = 64*num_online_cpus(), \
+ .busy_factor = 128, \
+ .imbalance_pct = 133, \
.cache_hot_time = (10*1000000), \
.cache_nice_tries = 1, \
+ .busy_idx = 3, \
+ .idle_idx = 3, \
+ .newidle_idx = 0, /* unused */ \
+ .wake_idx = 0, /* unused */ \
+ .forkexec_idx = 0, /* unused */ \
.per_cpu_gain = 100, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_EXEC, \
+ .flags = SD_LOAD_BALANCE, \
.last_balance = jiffies, \
- .balance_interval = 100*(63+num_online_cpus())/64, \
+ .balance_interval = 64, \
.nr_balance_failed = 0, \
}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/unaligned.h
--- a/xen/include/asm-ia64/linux/asm/unaligned.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/unaligned.h Thu Sep 1 18:46:28 2005
@@ -1,121 +1,6 @@
#ifndef _ASM_IA64_UNALIGNED_H
#define _ASM_IA64_UNALIGNED_H
-#include <linux/types.h>
-
-/*
- * The main single-value unaligned transfer routines.
- *
- * Based on <asm-alpha/unaligned.h>.
- *
- * Copyright (C) 1998, 1999, 2003 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-#define get_unaligned(ptr) \
- ((__typeof__(*(ptr)))ia64_get_unaligned((ptr), sizeof(*(ptr))))
-
-#define put_unaligned(x,ptr) \
- ia64_put_unaligned((unsigned long)(x), (ptr), sizeof(*(ptr)))
-
-struct __una_u64 { __u64 x __attribute__((packed)); };
-struct __una_u32 { __u32 x __attribute__((packed)); };
-struct __una_u16 { __u16 x __attribute__((packed)); };
-
-static inline unsigned long
-__uld8 (const unsigned long * addr)
-{
- const struct __una_u64 *ptr = (const struct __una_u64 *) addr;
- return ptr->x;
-}
-
-static inline unsigned long
-__uld4 (const unsigned int * addr)
-{
- const struct __una_u32 *ptr = (const struct __una_u32 *) addr;
- return ptr->x;
-}
-
-static inline unsigned long
-__uld2 (const unsigned short * addr)
-{
- const struct __una_u16 *ptr = (const struct __una_u16 *) addr;
- return ptr->x;
-}
-
-static inline void
-__ust8 (unsigned long val, unsigned long * addr)
-{
- struct __una_u64 *ptr = (struct __una_u64 *) addr;
- ptr->x = val;
-}
-
-static inline void
-__ust4 (unsigned long val, unsigned int * addr)
-{
- struct __una_u32 *ptr = (struct __una_u32 *) addr;
- ptr->x = val;
-}
-
-static inline void
-__ust2 (unsigned long val, unsigned short * addr)
-{
- struct __una_u16 *ptr = (struct __una_u16 *) addr;
- ptr->x = val;
-}
-
-
-/*
- * This function doesn't actually exist. The idea is that when someone uses
the macros
- * below with an unsupported size (datatype), the linker will alert us to the
problem via
- * an unresolved reference error.
- */
-extern unsigned long ia64_bad_unaligned_access_length (void);
-
-#define ia64_get_unaligned(_ptr,size)
\
-({
\
- const void *__ia64_ptr = (_ptr);
\
- unsigned long __ia64_val;
\
-
\
- switch (size) {
\
- case 1:
\
- __ia64_val = *(const unsigned char *) __ia64_ptr;
\
- break;
\
- case 2:
\
- __ia64_val = __uld2((const unsigned short *)__ia64_ptr);
\
- break;
\
- case 4:
\
- __ia64_val = __uld4((const unsigned int *)__ia64_ptr);
\
- break;
\
- case 8:
\
- __ia64_val = __uld8((const unsigned long *)__ia64_ptr);
\
- break;
\
- default:
\
- __ia64_val = ia64_bad_unaligned_access_length();
\
- }
\
- __ia64_val;
\
-})
-
-#define ia64_put_unaligned(_val,_ptr,size) \
-do { \
- const void *__ia64_ptr = (_ptr); \
- unsigned long __ia64_val = (_val); \
- \
- switch (size) { \
- case 1: \
- *(unsigned char *)__ia64_ptr = (__ia64_val); \
- break; \
- case 2: \
- __ust2(__ia64_val, (unsigned short *)__ia64_ptr); \
- break; \
- case 4: \
- __ust4(__ia64_val, (unsigned int *)__ia64_ptr); \
- break; \
- case 8: \
- __ust8(__ia64_val, (unsigned long *)__ia64_ptr); \
- break; \
- default: \
- ia64_bad_unaligned_access_length(); \
- } \
-} while (0)
+#include <asm-generic/unaligned.h>
#endif /* _ASM_IA64_UNALIGNED_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/unistd.h
--- a/xen/include/asm-ia64/linux/asm/unistd.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/unistd.h Thu Sep 1 18:46:28 2005
@@ -263,6 +263,12 @@
#define __NR_add_key 1271
#define __NR_request_key 1272
#define __NR_keyctl 1273
+#define __NR_ioprio_set 1274
+#define __NR_ioprio_get 1275
+#define __NR_set_zone_reclaim 1276
+#define __NR_inotify_init 1277
+#define __NR_inotify_add_watch 1278
+#define __NR_inotify_rm_watch 1279
#ifdef __KERNEL__
@@ -392,7 +398,7 @@
* proper prototype, but we can't use __typeof__ either, because not all
cond_syscall()
* declarations have prototypes at the moment.
*/
-#define cond_syscall(x) asmlinkage long x (void)
__attribute__((weak,alias("sys_ni_syscall")));
+#define cond_syscall(x) asmlinkage long x (void)
__attribute__((weak,alias("sys_ni_syscall")))
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/bitmap.h
--- a/xen/include/asm-ia64/linux/bitmap.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/bitmap.h Thu Sep 1 18:46:28 2005
@@ -41,7 +41,9 @@
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf
- * bitmap_parse(ubuf, ulen, dst, nbits) Parse bitmap dst from
buf
+ * bitmap_parse(ubuf, ulen, dst, nbits) Parse bitmap dst from
user buf
+ * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
+ * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list
*/
/*
@@ -98,6 +100,10 @@
const unsigned long *src, int nbits);
extern int bitmap_parse(const char __user *ubuf, unsigned int ulen,
unsigned long *dst, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
+extern int bitmap_parselist(const char *buf, unsigned long *maskp,
+ int nmaskbits);
extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/bitops.h
--- a/xen/include/asm-ia64/linux/bitops.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/bitops.h Thu Sep 1 18:46:28 2005
@@ -134,4 +134,26 @@
return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
}
+/*
+ * rol32 - rotate a 32-bit value left
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> (32 - shift));
+}
+
+/*
+ * ror32 - rotate a 32-bit value right
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 ror32(__u32 word, unsigned int shift)
+{
+ return (word >> shift) | (word << (32 - shift));
+}
+
#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/dma-mapping.h
--- a/xen/include/asm-ia64/linux/dma-mapping.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/dma-mapping.h Thu Sep 1 18:46:28 2005
@@ -14,7 +14,12 @@
};
#define DMA_64BIT_MASK 0xffffffffffffffffULL
+#define DMA_40BIT_MASK 0x000000ffffffffffULL
+#define DMA_39BIT_MASK 0x0000007fffffffffULL
#define DMA_32BIT_MASK 0x00000000ffffffffULL
+#define DMA_31BIT_MASK 0x000000007fffffffULL
+#define DMA_30BIT_MASK 0x000000003fffffffULL
+#define DMA_29BIT_MASK 0x000000001fffffffULL
#include <asm/dma-mapping.h>
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/efi.h
--- a/xen/include/asm-ia64/linux/efi.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/efi.h Thu Sep 1 18:46:28 2005
@@ -301,7 +301,6 @@
extern int __init efi_uart_console_only (void);
extern void efi_initialize_iomem_resources(struct resource *code_resource,
struct resource *data_resource);
-extern efi_status_t phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc);
extern unsigned long __init efi_get_time(void);
extern int __init efi_set_rtc_mmss(unsigned long nowtime);
extern struct efi_memory_map memmap;
@@ -316,7 +315,7 @@
*/
static inline int efi_range_is_wc(unsigned long start, unsigned long len)
{
- int i;
+ unsigned long i;
for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) {
unsigned long paddr = __pa(start + i);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/err.h
--- a/xen/include/asm-ia64/linux/err.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/err.h Thu Sep 1 18:46:28 2005
@@ -13,6 +13,8 @@
* This should be a per-architecture thing, to allow different
* error and pointer decisions.
*/
+#define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L)
+
static inline void *ERR_PTR(long error)
{
return (void *) error;
@@ -25,7 +27,7 @@
static inline long IS_ERR(const void *ptr)
{
- return unlikely((unsigned long)ptr > (unsigned long)-1000L);
+ return IS_ERR_VALUE((unsigned long)ptr);
}
#endif /* _LINUX_ERR_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/gfp.h
--- a/xen/include/asm-ia64/linux/gfp.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/gfp.h Thu Sep 1 18:46:28 2005
@@ -12,8 +12,8 @@
* GFP bitmasks..
*/
/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA 0x01
-#define __GFP_HIGHMEM 0x02
+#define __GFP_DMA 0x01u
+#define __GFP_HIGHMEM 0x02u
/*
* Action modifiers - doesn't change the zoning
@@ -26,26 +26,29 @@
*
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
*/
-#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
-#define __GFP_HIGH 0x20 /* Should access emergency pools? */
-#define __GFP_IO 0x40 /* Can start physical IO? */
-#define __GFP_FS 0x80 /* Can call down to low-level FS? */
-#define __GFP_COLD 0x100 /* Cache-cold page required */
-#define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */
-#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */
-#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
-#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
-#define __GFP_NO_GROW 0x2000 /* Slab internal usage */
-#define __GFP_COMP 0x4000 /* Add compound page metadata */
-#define __GFP_ZERO 0x8000 /* Return zeroed page on success */
+#define __GFP_WAIT 0x10u /* Can wait and reschedule? */
+#define __GFP_HIGH 0x20u /* Should access emergency pools? */
+#define __GFP_IO 0x40u /* Can start physical IO? */
+#define __GFP_FS 0x80u /* Can call down to low-level FS? */
+#define __GFP_COLD 0x100u /* Cache-cold page required */
+#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */
+#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */
+#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */
+#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */
+#define __GFP_NO_GROW 0x2000u /* Slab internal usage */
+#define __GFP_COMP 0x4000u /* Add compound page metadata */
+#define __GFP_ZERO 0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
+#define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */
-#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
- __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+ __GFP_NOMEMALLOC|__GFP_NORECLAIM)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
@@ -82,7 +85,7 @@
extern struct page *
FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
+static inline struct page *alloc_pages_node(int nid, unsigned int __nocast
gfp_mask,
unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
@@ -93,17 +96,17 @@
}
#ifdef CONFIG_NUMA
-extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
+extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask,
unsigned order);
static inline struct page *
-alloc_pages(unsigned int gfp_mask, unsigned int order)
+alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
return NULL;
return alloc_pages_current(gfp_mask, order);
}
-extern struct page *alloc_page_vma(unsigned gfp_mask,
+extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
struct vm_area_struct *vma, unsigned long addr);
#else
#define alloc_pages(gfp_mask, order) \
@@ -112,8 +115,8 @@
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned
int order));
-extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask,
unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask),0)
@@ -130,5 +133,10 @@
#define free_page(addr) free_pages((addr),0)
void page_alloc_init(void);
+#ifdef CONFIG_NUMA
+void drain_remote_pages(void);
+#else
+static inline void drain_remote_pages(void) { };
+#endif
#endif /* __LINUX_GFP_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/mmzone.h
--- a/xen/include/asm-ia64/linux/mmzone.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/mmzone.h Thu Sep 1 18:46:28 2005
@@ -11,6 +11,7 @@
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/numa.h>
+#include <linux/init.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -61,6 +62,12 @@
unsigned long other_node; /* allocation from other node */
#endif
} ____cacheline_aligned_in_smp;
+
+#ifdef CONFIG_NUMA
+#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
+#else
+#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
+#endif
#define ZONE_DMA 0
#define ZONE_NORMAL 1
@@ -121,8 +128,11 @@
*/
unsigned long lowmem_reserve[MAX_NR_ZONES];
+#ifdef CONFIG_NUMA
+ struct per_cpu_pageset *pageset[NR_CPUS];
+#else
struct per_cpu_pageset pageset[NR_CPUS];
-
+#endif
/*
* free areas of different sizes
*/
@@ -144,6 +154,14 @@
int all_unreclaimable; /* All pages pinned */
/*
+ * Does the allocator try to reclaim pages from the zone as soon
+ * as it fails a watermark_ok() in __alloc_pages?
+ */
+ int reclaim_pages;
+ /* A count of how many reclaimers are scanning this zone */
+ atomic_t reclaim_in_progress;
+
+ /*
* prev_priority holds the scanning priority for this zone. It is
* defined as the scanning priority at which we achieved our reclaim
* target at the previous try_to_free_pages() or balance_pgdat()
@@ -251,7 +269,9 @@
struct zone node_zones[MAX_NR_ZONES];
struct zonelist node_zonelists[GFP_ZONETYPES];
int nr_zones;
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
struct page *node_mem_map;
+#endif
struct bootmem_data *bdata;
unsigned long node_start_pfn;
unsigned long node_present_pages; /* total number of physical pages */
@@ -266,6 +286,12 @@
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr))
+#else
+#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn +
(pagenr))
+#endif
+#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
extern struct pglist_data *pgdat_list;
@@ -278,6 +304,16 @@
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int alloc_type, int can_try_harder, int gfp_high);
+#ifdef CONFIG_HAVE_MEMORY_PRESENT
+void memory_present(int nid, unsigned long start, unsigned long end);
+#else
+static inline void memory_present(int nid, unsigned long start, unsigned long
end) {}
+#endif
+
+#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
+#endif
+
/*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
*/
@@ -370,9 +406,9 @@
#include <linux/topology.h>
/* Returns the number of the current Node. */
-#define numa_node_id() (cpu_to_node(_smp_processor_id()))
-
-#ifndef CONFIG_DISCONTIGMEM
+#define numa_node_id() (cpu_to_node(raw_smp_processor_id()))
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
extern struct pglist_data contig_page_data;
#define NODE_DATA(nid) (&contig_page_data)
@@ -380,35 +416,176 @@
#define MAX_NODES_SHIFT 1
#define pfn_to_nid(pfn) (0)
-#else /* CONFIG_DISCONTIGMEM */
+#else /* CONFIG_NEED_MULTIPLE_NODES */
#include <asm/mmzone.h>
+
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_SPARSEMEM
+#include <asm/sparsemem.h>
+#endif
#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
/*
* with 32 bit page->flags field, we reserve 8 bits for node/zone info.
* there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
*/
-#define MAX_NODES_SHIFT 6
+#define FLAGS_RESERVED 8
+
#elif BITS_PER_LONG == 64
/*
* with 64 bit flags field, there's plenty of room.
*/
-#define MAX_NODES_SHIFT 10
-#endif
-
-#endif /* !CONFIG_DISCONTIGMEM */
-
-#if NODES_SHIFT > MAX_NODES_SHIFT
-#error NODES_SHIFT > MAX_NODES_SHIFT
-#endif
-
-/* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */
-#define MAX_ZONES_SHIFT 2
-
-#if ZONES_SHIFT > MAX_ZONES_SHIFT
-#error ZONES_SHIFT > MAX_ZONES_SHIFT
-#endif
+#define FLAGS_RESERVED 32
+
+#else
+
+#error BITS_PER_LONG not defined
+
+#endif
+
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#define early_pfn_to_nid(nid) (0UL)
+#endif
+
+#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+
+#ifdef CONFIG_SPARSEMEM
+
+/*
+ * SECTION_SHIFT #bits space required to store a section #
+ *
+ * PA_SECTION_SHIFT physical address to/from section number
+ * PFN_SECTION_SHIFT pfn to/from section number
+ */
+#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
+
+#define PA_SECTION_SHIFT (SECTION_SIZE_BITS)
+#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT)
+
+#define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT)
+
+#define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT)
+#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1))
+
+#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
+#error Allocator MAX_ORDER exceeds SECTION_SIZE
+#endif
+
+struct page;
+struct mem_section {
+ /*
+ * This is, logically, a pointer to an array of struct
+ * pages. However, it is stored with some other magic.
+ * (see sparse.c::sparse_init_one_section())
+ *
+ * Making it a UL at least makes someone do a cast
+ * before using it wrong.
+ */
+ unsigned long section_mem_map;
+};
+
+extern struct mem_section mem_section[NR_MEM_SECTIONS];
+
+static inline struct mem_section *__nr_to_section(unsigned long nr)
+{
+ return &mem_section[nr];
+}
+
+/*
+ * We use the lower bits of the mem_map pointer to store
+ * a little bit of information. There should be at least
+ * 3 bits here due to 32-bit alignment.
+ */
+#define SECTION_MARKED_PRESENT (1UL<<0)
+#define SECTION_HAS_MEM_MAP (1UL<<1)
+#define SECTION_MAP_LAST_BIT (1UL<<2)
+#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
+
+static inline struct page *__section_mem_map_addr(struct mem_section *section)
+{
+ unsigned long map = section->section_mem_map;
+ map &= SECTION_MAP_MASK;
+ return (struct page *)map;
+}
+
+static inline int valid_section(struct mem_section *section)
+{
+ return (section->section_mem_map & SECTION_MARKED_PRESENT);
+}
+
+static inline int section_has_mem_map(struct mem_section *section)
+{
+ return (section->section_mem_map & SECTION_HAS_MEM_MAP);
+}
+
+static inline int valid_section_nr(unsigned long nr)
+{
+ return valid_section(__nr_to_section(nr));
+}
+
+/*
+ * Given a kernel address, find the home node of the underlying memory.
+ */
+#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT)
+
+static inline struct mem_section *__pfn_to_section(unsigned long pfn)
+{
+ return __nr_to_section(pfn_to_section_nr(pfn));
+}
+
+#define pfn_to_page(pfn) \
+({ \
+ unsigned long __pfn = (pfn); \
+ __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \
+})
+#define page_to_pfn(page) \
+({ \
+ page - __section_mem_map_addr(__nr_to_section( \
+ page_to_section(page))); \
+})
+
+static inline int pfn_valid(unsigned long pfn)
+{
+ if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
+ return 0;
+ return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
+}
+
+/*
+ * These are _only_ used during initialisation, therefore they
+ * can use __initdata ... They could have names to indicate
+ * this restriction.
+ */
+#ifdef CONFIG_NUMA
+#define pfn_to_nid early_pfn_to_nid
+#endif
+
+#define pfn_to_pgdat(pfn) \
+({ \
+ NODE_DATA(pfn_to_nid(pfn)); \
+})
+
+#define early_pfn_valid(pfn) pfn_valid(pfn)
+void sparse_init(void);
+#else
+#define sparse_init() do {} while (0)
+#endif /* CONFIG_SPARSEMEM */
+
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid))
+#else
+#define early_pfn_in_nid(pfn, nid) (1)
+#endif
+
+#ifndef early_pfn_valid
+#define early_pfn_valid(pfn) (1)
+#endif
+
+void memory_present(int nid, unsigned long start, unsigned long end);
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/numa.h
--- a/xen/include/asm-ia64/linux/numa.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/numa.h Thu Sep 1 18:46:28 2005
@@ -3,7 +3,7 @@
#include <linux/config.h>
-#ifdef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_FLATMEM
#include <asm/numnodes.h>
#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/page-flags.h
--- a/xen/include/asm-ia64/linux/page-flags.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/page-flags.h Thu Sep 1 18:46:28 2005
@@ -61,21 +61,20 @@
#define PG_active 6
#define PG_slab 7 /* slab debug (Suparna wants
this) */
-#define PG_highmem 8
-#define PG_checked 9 /* kill me in 2.5.<early>. */
-#define PG_arch_1 10
-#define PG_reserved 11
-
-#define PG_private 12 /* Has something at ->private */
-#define PG_writeback 13 /* Page is under writeback */
-#define PG_nosave 14 /* Used for system suspend/resume */
-#define PG_compound 15 /* Part of a compound page */
-
-#define PG_swapcache 16 /* Swap page: swp_entry_t in private */
-#define PG_mappedtodisk 17 /* Has blocks allocated on-disk
*/
-#define PG_reclaim 18 /* To be reclaimed asap */
-#define PG_nosave_free 19 /* Free, should not be written */
-
+#define PG_checked 8 /* kill me in 2.5.<early>. */
+#define PG_arch_1 9
+#define PG_reserved 10
+#define PG_private 11 /* Has something at ->private */
+
+#define PG_writeback 12 /* Page is under writeback */
+#define PG_nosave 13 /* Used for system suspend/resume */
+#define PG_compound 14 /* Part of a compound page */
+#define PG_swapcache 15 /* Swap page: swp_entry_t in private */
+
+#define PG_mappedtodisk 16 /* Has blocks allocated on-disk
*/
+#define PG_reclaim 17 /* To be reclaimed asap */
+#define PG_nosave_free 18 /* Free, should not be written */
+#define PG_uncached 19 /* Page has been mapped as uncached */
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -131,12 +130,13 @@
unsigned long allocstall; /* direct reclaim calls */
unsigned long pgrotated; /* pages rotated to tail of the LRU */
+ unsigned long nr_bounce; /* pages for bounce buffers */
};
extern void get_page_state(struct page_state *ret);
extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned offset);
-extern void __mod_page_state(unsigned offset, unsigned long delta);
+extern unsigned long __read_page_state(unsigned long offset);
+extern void __mod_page_state(unsigned long offset, unsigned long delta);
#define read_page_state(member) \
__read_page_state(offsetof(struct page_state, member))
@@ -214,7 +214,7 @@
#define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags)
#ifdef CONFIG_HIGHMEM
-#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags)
+#define PageHighMem(page) is_highmem(page_zone(page))
#else
#define PageHighMem(page) 0 /* needed to optimize away at compile time */
#endif
@@ -301,10 +301,13 @@
#define PageSwapCache(page) 0
#endif
+#define PageUncached(page) test_bit(PG_uncached, &(page)->flags)
+#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
+#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)
+
struct page; /* forward declaration */
int test_clear_page_dirty(struct page *page);
-int __clear_page_dirty(struct page *page);
int test_clear_page_writeback(struct page *page);
int test_set_page_writeback(struct page *page);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/slab.h
--- a/xen/include/asm-ia64/linux/slab.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/slab.h Thu Sep 1 18:46:28 2005
@@ -1,3 +1,137 @@
-#include <xen/xmalloc.h>
-#include <linux/gfp.h>
-#include <asm/delay.h>
+/*
+ * linux/mm/slab.h
+ * Written by Mark Hemment, 1996.
+ * (markhe@xxxxxxxxxxxxxxxxx)
+ */
+
+#ifndef _LINUX_SLAB_H
+#define _LINUX_SLAB_H
+
+#if defined(__KERNEL__)
+
+typedef struct kmem_cache_s kmem_cache_t;
+
+#include <linux/config.h> /* kmalloc_sizes.h needs CONFIG_
options */
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
+#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES
*/
+
+/* flags for kmem_cache_alloc() */
+#define SLAB_NOFS GFP_NOFS
+#define SLAB_NOIO GFP_NOIO
+#define SLAB_ATOMIC GFP_ATOMIC
+#define SLAB_USER GFP_USER
+#define SLAB_KERNEL GFP_KERNEL
+#define SLAB_DMA GFP_DMA
+
+#define SLAB_LEVEL_MASK GFP_LEVEL_MASK
+
+#define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */
+
+/* flags to pass to kmem_cache_create().
+ * The first 3 are only valid when the allocator as been build
+ * SLAB_DEBUG_SUPPORT.
+ */
+#define SLAB_DEBUG_FREE 0x00000100UL /* Peform (expensive)
checks on free */
+#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as
verifier) */
+#define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a
cache */
+#define SLAB_POISON 0x00000800UL /* Poison objects */
+#define SLAB_NO_REAP 0x00001000UL /* never reap from the
cache */
+#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w
cache lines */
+#define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
+#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */
+#define SLAB_STORE_USER 0x00010000UL /* store the last owner
for bug hunting */
+#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* track pages allocated to
indicate
+ what is reclaimable later*/
+#define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create()
fails */
+#define SLAB_DESTROY_BY_RCU 0x00080000UL /* defer freeing pages to RCU */
+
+/* flags passed to a constructor func */
+#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then
deconstructor */
+#define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't
sleep */
+#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor
it's a verify call */
+
+/* prototypes */
+extern void __init kmem_cache_init(void);
+
+extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned
long,
+ void (*)(void *, kmem_cache_t *,
unsigned long),
+ void (*)(void *, kmem_cache_t *,
unsigned long));
+extern int kmem_cache_destroy(kmem_cache_t *);
+extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast);
+extern void kmem_cache_free(kmem_cache_t *, void *);
+extern unsigned int kmem_cache_size(kmem_cache_t *);
+extern const char *kmem_cache_name(kmem_cache_t *);
+extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int
__nocast gfpflags);
+
+/* Size description struct for general caches. */
+struct cache_sizes {
+ size_t cs_size;
+ kmem_cache_t *cs_cachep;
+ kmem_cache_t *cs_dmacachep;
+};
+extern struct cache_sizes malloc_sizes[];
+extern void *__kmalloc(size_t, unsigned int __nocast);
+
+static inline void *kmalloc(size_t size, unsigned int __nocast flags)
+{
+ if (__builtin_constant_p(size)) {
+ int i = 0;
+#define CACHE(x) \
+ if (size <= x) \
+ goto found; \
+ else \
+ i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+ {
+ extern void __you_cannot_kmalloc_that_much(void);
+ __you_cannot_kmalloc_that_much();
+ }
+found:
+ return kmem_cache_alloc((flags & GFP_DMA) ?
+ malloc_sizes[i].cs_dmacachep :
+ malloc_sizes[i].cs_cachep, flags);
+ }
+ return __kmalloc(size, flags);
+}
+
+extern void *kcalloc(size_t, size_t, unsigned int __nocast);
+extern void kfree(const void *);
+extern unsigned int ksize(const void *);
+
+#ifdef CONFIG_NUMA
+extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node);
+extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node);
+#else
+static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int
node)
+{
+ return kmem_cache_alloc(cachep, flags);
+}
+static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int
node)
+{
+ return kmalloc(size, flags);
+}
+#endif
+
+extern int FASTCALL(kmem_cache_reap(int));
+extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
+
+/* System wide caches */
+extern kmem_cache_t *vm_area_cachep;
+extern kmem_cache_t *names_cachep;
+extern kmem_cache_t *files_cachep;
+extern kmem_cache_t *filp_cachep;
+extern kmem_cache_t *fs_cachep;
+extern kmem_cache_t *signal_cachep;
+extern kmem_cache_t *sighand_cachep;
+extern kmem_cache_t *bio_cachep;
+
+extern atomic_t slab_reclaim_pages;
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_SLAB_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/threads.h
--- a/xen/include/asm-ia64/linux/threads.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/threads.h Thu Sep 1 18:46:28 2005
@@ -7,7 +7,7 @@
* The default limit for the nr of threads is now in
* /proc/sys/kernel/threads-max.
*/
-
+
/*
* Maximum supported processors that can run under SMP. This value is
* set via configure setting. The maximum is equal to the size of the
@@ -25,11 +25,12 @@
/*
* This controls the default maximum pid allocated to a process
*/
-#define PID_MAX_DEFAULT 0x8000
+#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
/*
* A maximum of 4 million PIDs should be enough for a while:
*/
-#define PID_MAX_LIMIT (sizeof(long) > 4 ? 4*1024*1024 : PID_MAX_DEFAULT)
+#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
+ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/timex.h
--- a/xen/include/asm-ia64/linux/timex.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/timex.h Thu Sep 1 18:46:28 2005
@@ -240,9 +240,7 @@
extern long time_maxerror; /* maximum error */
extern long time_esterror; /* estimated error */
-extern long time_phase; /* phase offset (scaled us) */
extern long time_freq; /* frequency offset (scaled ppm) */
-extern long time_adj; /* tick adjust (scaled 1 / HZ) */
extern long time_reftime; /* time at last adjustment (s) */
extern long time_adjust; /* The amount of adjtime left */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/topology.h
--- a/xen/include/asm-ia64/linux/topology.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/topology.h Thu Sep 1 18:46:28 2005
@@ -31,8 +31,11 @@
#include <linux/bitops.h>
#include <linux/mmzone.h>
#include <linux/smp.h>
+#include <asm/topology.h>
-#include <asm/topology.h>
+#ifndef node_has_online_mem
+#define node_has_online_mem(nid) (1)
+#endif
#ifndef nr_cpus_node
#define nr_cpus_node(node)
\
@@ -86,6 +89,11 @@
.cache_hot_time = 0, \
.cache_nice_tries = 0, \
.per_cpu_gain = 25, \
+ .busy_idx = 0, \
+ .idle_idx = 0, \
+ .newidle_idx = 1, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
@@ -112,12 +120,15 @@
.cache_hot_time = (5*1000000/2), \
.cache_nice_tries = 1, \
.per_cpu_gain = 100, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
- | SD_WAKE_AFFINE \
- | SD_WAKE_IDLE \
- | SD_WAKE_BALANCE, \
+ | SD_WAKE_AFFINE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/wait.h
--- a/xen/include/asm-ia64/linux/wait.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/wait.h Thu Sep 1 18:46:28 2005
@@ -33,7 +33,7 @@
struct __wait_queue {
unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
- struct task_struct * task;
+ void *private;
wait_queue_func_t func;
struct list_head task_list;
};
@@ -60,7 +60,7 @@
*/
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
- .task = tsk, \
+ .private = tsk, \
.func = default_wake_function, \
.task_list = { NULL, NULL } }
@@ -79,14 +79,14 @@
static inline void init_waitqueue_head(wait_queue_head_t *q)
{
- q->lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&q->lock);
INIT_LIST_HEAD(&q->task_list);
}
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
q->flags = 0;
- q->task = p;
+ q->private = p;
q->func = default_wake_function;
}
@@ -94,7 +94,7 @@
wait_queue_func_t func)
{
q->flags = 0;
- q->task = NULL;
+ q->private = NULL;
q->func = func;
}
@@ -110,7 +110,7 @@
* aio specifies a wait queue entry with an async notification
* callback routine, not associated with any task.
*/
-#define is_sync_wait(wait) (!(wait) || ((wait)->task))
+#define is_sync_wait(wait) (!(wait) || ((wait)->private))
extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t *
wait));
extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q,
wait_queue_t * wait));
@@ -169,6 +169,18 @@
finish_wait(&wq, &__wait); \
} while (0)
+/**
+ * wait_event - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
#define wait_event(wq, condition) \
do { \
if (condition) \
@@ -191,6 +203,22 @@
finish_wait(&wq, &__wait); \
} while (0)
+/**
+ * wait_event_timeout - sleep until a condition gets true or a timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if the @timeout elapsed, and the remaining
+ * jiffies if the condition evaluated to true before the timeout elapsed.
+ */
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
@@ -217,6 +245,21 @@
finish_wait(&wq, &__wait); \
} while (0)
+/**
+ * wait_event_interruptible - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a
+ * signal and 0 if @condition evaluated to true.
+ */
#define wait_event_interruptible(wq, condition)
\
({ \
int __ret = 0; \
@@ -245,6 +288,23 @@
finish_wait(&wq, &__wait); \
} while (0)
+/**
+ * wait_event_interruptible_timeout - sleep until a condition gets true or a
timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
+ * was interrupted by a signal, and the remaining jiffies otherwise
+ * if the condition evaluated to true before the timeout elapsed.
+ */
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
@@ -324,18 +384,16 @@
#define DEFINE_WAIT(name) \
wait_queue_t name = { \
- .task = current, \
+ .private = current, \
.func = autoremove_wake_function, \
- .task_list = { .next = &(name).task_list, \
- .prev = &(name).task_list, \
- }, \
+ .task_list = LIST_HEAD_INIT((name).task_list), \
}
#define DEFINE_WAIT_BIT(name, word, bit) \
struct wait_bit_queue name = { \
.key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
.wait = { \
- .task = current, \
+ .private = current, \
.func = wake_bit_function, \
.task_list = \
LIST_HEAD_INIT((name).wait.task_list), \
@@ -344,7 +402,7 @@
#define init_wait(wait)
\
do { \
- (wait)->task = current; \
+ (wait)->private = current; \
(wait)->func = autoremove_wake_function; \
INIT_LIST_HEAD(&(wait)->task_list); \
} while (0)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/mm.h Thu Sep 1 18:46:28 2005
@@ -316,6 +316,7 @@
#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
#endif
+#if 0 /* removed when rebasing to 2.6.13 */
/*
* The zone field is never updated after free_area_init_core()
* sets it, so none of the operations on it need to be atomic.
@@ -347,6 +348,7 @@
page->flags &= ~(~0UL << NODEZONE_SHIFT);
page->flags |= nodezone_num << NODEZONE_SHIFT;
}
+#endif
#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr;
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/time.h
--- a/xen/include/asm-ia64/time.h Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/time.h Thu Sep 1 18:46:28 2005
@@ -1,1 +1,1 @@
-#include <xen/linuxtime.h>
+#include <asm/linux/time.h>
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/bitop.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/bitop.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,88 @@
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/intrinsics.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+
+/*
+ * Find next zero bit in a bitmap reasonably efficiently..
+ */
+
+int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long
offset)
+{
+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+ unsigned long result = offset & ~63UL;
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset &= 63UL;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (64-offset);
+ if (size < 64)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= 64;
+ result += 64;
+ }
+ while (size & ~63UL) {
+ if (~(tmp = *(p++)))
+ goto found_middle;
+ result += 64;
+ size -= 64;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* any bits zero? */
+ return result + size; /* nope */
+found_middle:
+ return result + ffz(tmp);
+}
+EXPORT_SYMBOL(__find_next_zero_bit);
+
+/*
+ * Find next bit in a bitmap reasonably efficiently..
+ */
+int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
+{
+ unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
+ unsigned long result = offset & ~63UL;
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset &= 63UL;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= ~0UL << offset;
+ if (size < 64)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= 64;
+ result += 64;
+ }
+ while (size & ~63UL) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += 64;
+ size -= 64;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+ found_first:
+ tmp &= ~0UL >> (64-size);
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+ found_middle:
+ return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(__find_next_bit);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/clear_page.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/clear_page.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 1999-2002 Hewlett-Packard Co
+ * Stephane Eranian <eranian@xxxxxxxxxx>
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx>
+ *
+ * 1/06/01 davidm Tuned for Itanium.
+ * 2/12/02 kchen Tuned for both Itanium and McKinley
+ * 3/08/02 davidm Some more tweaking
+ */
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_ITANIUM
+# define L3_LINE_SIZE 64 // Itanium L3 line size
+# define PREFETCH_LINES 9 // magic number
+#else
+# define L3_LINE_SIZE 128 // McKinley L3 line size
+# define PREFETCH_LINES 12 // magic number
+#endif
+
+#define saved_lc r2
+#define dst_fetch r3
+#define dst1 r8
+#define dst2 r9
+#define dst3 r10
+#define dst4 r11
+
+#define dst_last r31
+
+GLOBAL_ENTRY(clear_page)
+ .prologue
+ .regstk 1,0,0,0
+ mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count,
-1=repeat/until
+ .save ar.lc, saved_lc
+ mov saved_lc = ar.lc
+
+ .body
+ mov ar.lc = (PREFETCH_LINES - 1)
+ mov dst_fetch = in0
+ adds dst1 = 16, in0
+ adds dst2 = 32, in0
+ ;;
+.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+ adds dst3 = 48, in0 // executing this multiple times is
harmless
+ br.cloop.sptk.few .fetch
+ ;;
+ addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
+ mov ar.lc = r16 // one L3 line per iteration
+ adds dst4 = 64, in0
+ ;;
+#ifdef CONFIG_ITANIUM
+ // Optimized for Itanium
+1: stf.spill.nta [dst1] = f0, 64
+ stf.spill.nta [dst2] = f0, 64
+ cmp.lt p8,p0=dst_fetch, dst_last
+ ;;
+#else
+ // Optimized for McKinley
+1: stf.spill.nta [dst1] = f0, 64
+ stf.spill.nta [dst2] = f0, 64
+ stf.spill.nta [dst3] = f0, 64
+ stf.spill.nta [dst4] = f0, 128
+ cmp.lt p8,p0=dst_fetch, dst_last
+ ;;
+ stf.spill.nta [dst1] = f0, 64
+ stf.spill.nta [dst2] = f0, 64
+#endif
+ stf.spill.nta [dst3] = f0, 64
+(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
+ br.cloop.sptk.few 1b
+ ;;
+ mov ar.lc = saved_lc // restore lc
+ br.ret.sptk.many rp
+END(clear_page)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/copy_page_mck.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/copy_page_mck.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,185 @@
+/*
+ * McKinley-optimized version of copy_page().
+ *
+ * Copyright (C) 2002 Hewlett-Packard Co
+ * David Mosberger <davidm@xxxxxxxxxx>
+ *
+ * Inputs:
+ * in0: address of target page
+ * in1: address of source page
+ * Output:
+ * no return value
+ *
+ * General idea:
+ * - use regular loads and stores to prefetch data to avoid consuming
M-slot just for
+ * lfetches => good for in-cache performance
+ * - avoid l2 bank-conflicts by not storing into the same 16-byte bank
within a single
+ * cycle
+ *
+ * Principle of operation:
+ * First, note that L1 has a line-size of 64 bytes and L2 a line-size of
128 bytes.
+ * To avoid secondary misses in L2, we prefetch both source and
destination with a line-size
+ * of 128 bytes. When both of these lines are in the L2 and the first
half of the
+ * source line is in L1, we start copying the remaining words. The second
half of the
+ * source line is prefetched in an earlier iteration, so that by the time
we start
+ * accessing it, it's also present in the L1.
+ *
+ * We use a software-pipelined loop to control the overall operation. The
pipeline
+ * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used
for prefetching
+ * source cache-lines. The second PREFETCH_DIST stages are used for
prefetching destination
+ * cache-lines, the last K stages are used to copy the cache-line words
not copied by
+ * the prefetches. The four relevant points in the pipelined are called
A, B, C, D:
+ * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a
destination-line
+ * should be prefetched, p[C] is TRUE if the second half of an L2 line
should be brought
+ * into L1D and p[D] is TRUE if a cacheline needs to be copied.
+ *
+ * This all sounds very complicated, but thanks to the modulo-scheduled
loop support,
+ * the resulting code is very regular and quite easy to follow (once you
get the idea).
+ *
+ * As a secondary optimization, the first 2*PREFETCH_DIST iterations are
implemented
+ * as the separate .prefetch_loop. Logically, this loop performs exactly
like the
+ * main-loop (.line_copy), but has all known-to-be-predicated-off
instructions removed,
+ * so that each loop iteration is faster (again, good for cached case).
+ *
+ * When reading the code, it helps to keep the following picture in mind:
+ *
+ * word 0 word 1
+ * +------+------+---
+ * | v[x] | t1 | ^
+ * | t2 | t3 | |
+ * | t4 | t5 | |
+ * | t6 | t7 | | 128 bytes
+ * | n[y] | t9 | | (L2 cache line)
+ * | t10 | t11 | |
+ * | t12 | t13 | |
+ * | t14 | t15 | v
+ * +------+------+---
+ *
+ * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C]
+ * to fetch the second-half of the L2 cache line into L1, and the tX words
are copied in
+ * an order that avoids bank conflicts.
+ */
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2
misses (8 ld, 8 st)
+
+#define src0 r2
+#define src1 r3
+#define dst0 r9
+#define dst1 r10
+#define src_pre_mem r11
+#define dst_pre_mem r14
+#define src_pre_l2 r15
+#define dst_pre_l2 r16
+#define t1 r17
+#define t2 r18
+#define t3 r19
+#define t4 r20
+#define t5 t1 // alias!
+#define t6 t2 // alias!
+#define t7 t3 // alias!
+#define t9 t5 // alias!
+#define t10 t4 // alias!
+#define t11 t7 // alias!
+#define t12 t6 // alias!
+#define t14 t10 // alias!
+#define t13 r21
+#define t15 r22
+
+#define saved_lc r23
+#define saved_pr r24
+
+#define A 0
+#define B (PREFETCH_DIST)
+#define C (B + PREFETCH_DIST)
+#define D (C + 3)
+#define N (D + 1)
+#define Nrot ((N + 7) & ~7)
+
+GLOBAL_ENTRY(copy_page)
+ .prologue
+ alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
+
+ .rotr v[2*PREFETCH_DIST], n[D-C+1]
+ .rotp p[N]
+
+ .save ar.lc, saved_lc
+ mov saved_lc = ar.lc
+ .save pr, saved_pr
+ mov saved_pr = pr
+ .body
+
+ mov src_pre_mem = in1
+ mov pr.rot = 0x10000
+ mov ar.ec = 1 // special unrolled loop
+
+ mov dst_pre_mem = in0
+ mov ar.lc = 2*PREFETCH_DIST - 1
+
+ add src_pre_l2 = 8*8, in1
+ add dst_pre_l2 = 8*8, in0
+ add src0 = 8, in1 // first t1 src
+ add src1 = 3*8, in1 // first t3 src
+ add dst0 = 8, in0 // first t1 dst
+ add dst1 = 3*8, in0 // first t3 dst
+ mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
+ nop.m 0
+ nop.i 0
+ ;;
+ // same as .line_copy loop, but with all predicated-off instructions
removed:
+.prefetch_loop:
+(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0
+(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2
+ br.ctop.sptk .prefetch_loop
+ ;;
+ cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop
cleared it to zero)
+ mov ar.lc = t1 // with 64KB pages, t1 is too
big to fit in 8 bits!
+ mov ar.ec = N // # of stages in pipeline
+ ;;
+.line_copy:
+(p[D]) ld8 t2 = [src0], 3*8 // M0
+(p[D]) ld8 t4 = [src1], 3*8 // M1
+(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory
+(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2
+ ;;
+(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory
+(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2
+(p[D]) st8 [dst0] = t1, 8 // M2
+(p[D]) st8 [dst1] = t3, 8 // M3
+ ;;
+(p[D]) ld8 t5 = [src0], 8
+(p[D]) ld8 t7 = [src1], 3*8
+(p[D]) st8 [dst0] = t2, 3*8
+(p[D]) st8 [dst1] = t4, 3*8
+ ;;
+(p[D]) ld8 t6 = [src0], 3*8
+(p[D]) ld8 t10 = [src1], 8
+(p[D]) st8 [dst0] = t5, 8
+(p[D]) st8 [dst1] = t7, 3*8
+ ;;
+(p[D]) ld8 t9 = [src0], 3*8
+(p[D]) ld8 t11 = [src1], 3*8
+(p[D]) st8 [dst0] = t6, 3*8
+(p[D]) st8 [dst1] = t10, 8
+ ;;
+(p[D]) ld8 t12 = [src0], 8
+(p[D]) ld8 t14 = [src1], 8
+(p[D]) st8 [dst0] = t9, 3*8
+(p[D]) st8 [dst1] = t11, 3*8
+ ;;
+(p[D]) ld8 t13 = [src0], 4*8
+(p[D]) ld8 t15 = [src1], 4*8
+(p[D]) st8 [dst0] = t12, 8
+(p[D]) st8 [dst1] = t14, 8
+ ;;
+(p[D-1])ld8 t1 = [src0], 8
+(p[D-1])ld8 t3 = [src1], 8
+(p[D]) st8 [dst0] = t13, 4*8
+(p[D]) st8 [dst1] = t15, 4*8
+ br.ctop.sptk .line_copy
+ ;;
+ mov ar.lc = saved_lc
+ mov pr = saved_pr, -1
+ br.ret.sptk.many rp
+END(copy_page)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/flush.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/flush.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,61 @@
+/*
+ * Cache flushing routines.
+ *
+ * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 05/28/05 Zoltan Menyhart Dynamic stride size
+ */
+
+#include <asm/asmmacro.h>
+
+
+ /*
+ * flush_icache_range(start,end)
+ *
+ * Make i-cache(s) coherent with d-caches.
+ *
+ * Must deal with range from start to end-1 but nothing else (need
to
+ * be careful not to touch addresses that may be unmapped).
+ *
+ * Note: "in0" and "in1" are preserved for debugging purposes.
+ */
+GLOBAL_ENTRY(flush_icache_range)
+
+ .prologue
+ alloc r2=ar.pfs,2,0,0,0
+ movl r3=ia64_i_cache_stride_shift
+ mov r21=1
+ ;;
+ ld8 r20=[r3] // r20: stride shift
+ sub r22=in1,r0,1 // last byte address
+ ;;
+ shr.u r23=in0,r20 // start / (stride size)
+ shr.u r22=r22,r20 // (last byte address) / (stride size)
+ shl r21=r21,r20 // r21: stride size of the i-cache(s)
+ ;;
+ sub r8=r22,r23 // number of strides - 1
+ shl r24=r23,r20 // r24: addresses for "fc.i" =
+ // "start" rounded down to stride
boundary
+ .save ar.lc,r3
+ mov r3=ar.lc // save ar.lc
+ ;;
+
+ .body
+ mov ar.lc=r8
+ ;;
+ /*
+ * 32 byte aligned loop, even number of (actually 2) bundles
+ */
+.Loop: fc.i r24 // issuable on M0 only
+ add r24=r21,r24 // we flush "stride size" bytes per
iteration
+ nop.i 0
+ br.cloop.sptk.few .Loop
+ ;;
+ sync.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.lc=r3 // restore ar.lc
+ br.ret.sptk.many rp
+END(flush_icache_range)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/idiv32.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/idiv32.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 32-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture". This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP mod
+#else
+# define OP div
+#endif
+
+#ifdef UNSIGNED
+# define SGN u
+# define EXTEND zxt4
+# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define EXTEND sxt4
+# define INT_TO_FP(a,b) fcvt.xf a=b
+# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b) a##b
+#define PASTE(a,b) PASTE1(a,b)
+#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3))
+
+GLOBAL_ENTRY(NAME)
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias)
+ EXTEND in0 = in0 // in0 = a
+ EXTEND in1 = in1 // in1 = b
+ ;;
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+#ifdef MODULO
+ sub in1 = r0, in1 // in1 = -b
+#endif
+ ;;
+ // Convert the inputs to FP, to avoid FP software-assist faults.
+ INT_TO_FP(f8, f8)
+ INT_TO_FP(f9, f9)
+ ;;
+ setf.exp f7 = r2 // f7 = 2^-34
+ frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b)
+ ;;
+(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0
+(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1
+ ;;
+#ifdef MODULO
+ setf.sig f9 = in1 // f9 = -b
+#endif
+(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0
+(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34
+ ;;
+#ifdef MODULO
+ setf.sig f7 = in0
+#endif
+(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1
+ ;;
+ FP_TO_INT(f6, f6) // q = trunc(q2)
+ ;;
+#ifdef MODULO
+ xma.l f6 = f6, f9, f7 // r = q*(-b) + a
+ ;;
+#endif
+ getf.sig r8 = f6 // transfer result to result register
+ br.ret.sptk.many rp
+END(NAME)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/idiv64.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/idiv64.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *
+ * 64-bit integer division.
+ *
+ * This code is based on the application note entitled "Divide, Square Root
+ * and Remainder Algorithms for the IA-64 Architecture". This document
+ * is available as Intel document number 248725-002 or via the web at
+ * http://developer.intel.com/software/opensource/numerics/
+ *
+ * For more details on the theory behind these algorithms, see "IA-64
+ * and Elementary Functions" by Peter Markstein; HP Professional Books
+ * (http://www.hp.com/go/retailbooks/)
+ */
+
+#include <asm/asmmacro.h>
+
+#ifdef MODULO
+# define OP mod
+#else
+# define OP div
+#endif
+
+#ifdef UNSIGNED
+# define SGN u
+# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
+# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
+#else
+# define SGN
+# define INT_TO_FP(a,b) fcvt.xf a=b
+# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
+#endif
+
+#define PASTE1(a,b) a##b
+#define PASTE(a,b) PASTE1(a,b)
+#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3))
+
+GLOBAL_ENTRY(NAME)
+ .regstk 2,0,0,0
+ // Transfer inputs to FP registers.
+ setf.sig f8 = in0
+ setf.sig f9 = in1
+ ;;
+ // Convert the inputs to FP, to avoid FP software-assist faults.
+ INT_TO_FP(f8, f8)
+ INT_TO_FP(f9, f9)
+ ;;
+ frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b)
+ ;;
+(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0
+(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1
+ ;;
+(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0
+(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0
+ ;;
+#ifdef MODULO
+ sub in1 = r0, in1 // in1 = -b
+#endif
+(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1
+(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0
+ ;;
+(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1
+(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a
+ ;;
+#ifdef MODULO
+ setf.sig f8 = in0 // f8 = a
+ setf.sig f9 = in1 // f9 = -b
+#endif
+(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2
+ ;;
+ FP_TO_INT(f11, f11) // q = trunc(q3)
+ ;;
+#ifdef MODULO
+ xma.l f11 = f11, f9, f8 // r = q*(-b) + a
+ ;;
+#endif
+ getf.sig r8 = f11 // transfer result to result register
+ br.ret.sptk.many rp
+END(NAME)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/memcpy_mck.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/memcpy_mck.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,661 @@
+/*
+ * Itanium 2-optimized version of memcpy and copy_user function
+ *
+ * Inputs:
+ * in0: destination address
+ * in1: source address
+ * in2: number of bytes to copy
+ * Output:
+ * 0 if success, or number of byte NOT copied if error occurred.
+ *
+ * Copyright (C) 2002 Intel Corp.
+ * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx>
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+#define EK(y...) EX(y)
+
+/* McKinley specific optimization */
+
+#define retval r8
+#define saved_pfs r31
+#define saved_lc r10
+#define saved_pr r11
+#define saved_in0 r14
+#define saved_in1 r15
+#define saved_in2 r16
+
+#define src0 r2
+#define src1 r3
+#define dst0 r17
+#define dst1 r18
+#define cnt r9
+
+/* r19-r30 are temp for each code section */
+#define PREFETCH_DIST 8
+#define src_pre_mem r19
+#define dst_pre_mem r20
+#define src_pre_l2 r21
+#define dst_pre_l2 r22
+#define t1 r23
+#define t2 r24
+#define t3 r25
+#define t4 r26
+#define t5 t1 // alias!
+#define t6 t2 // alias!
+#define t7 t3 // alias!
+#define n8 r27
+#define t9 t5 // alias!
+#define t10 t4 // alias!
+#define t11 t7 // alias!
+#define t12 t6 // alias!
+#define t14 t10 // alias!
+#define t13 r28
+#define t15 r29
+#define tmp r30
+
+/* defines for long_copy block */
+#define A 0
+#define B (PREFETCH_DIST)
+#define C (B + PREFETCH_DIST)
+#define D (C + 1)
+#define N (D + 1)
+#define Nrot ((N + 7) & ~7)
+
+/* alias */
+#define in0 r32
+#define in1 r33
+#define in2 r34
+
+GLOBAL_ENTRY(memcpy)
+ and r28=0x7,in0
+ and r29=0x7,in1
+ mov f6=f0
+ br.cond.sptk .common_code
+ ;;
+END(memcpy)
+GLOBAL_ENTRY(__copy_user)
+ .prologue
+// check dest alignment
+ and r28=0x7,in0
+ and r29=0x7,in1
+ mov f6=f1
+ mov saved_in0=in0 // save dest pointer
+ mov saved_in1=in1 // save src pointer
+ mov saved_in2=in2 // save len
+ ;;
+.common_code:
+ cmp.gt p15,p0=8,in2 // check for small size
+ cmp.ne p13,p0=0,r28 // check dest alignment
+ cmp.ne p14,p0=0,r29 // check src alignment
+ add src0=0,in1
+ sub r30=8,r28 // for .align_dest
+ mov retval=r0 // initialize return value
+ ;;
+ add dst0=0,in0
+ add dst1=1,in0 // dest odd index
+ cmp.le p6,p0 = 1,r30 // for .align_dest
+(p15) br.cond.dpnt .memcpy_short
+(p13) br.cond.dpnt .align_dest
+(p14) br.cond.dpnt .unaligned_src
+ ;;
+
+// both dest and src are aligned on 8-byte boundary
+.aligned_src:
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
+ .save pr, saved_pr
+ mov saved_pr=pr
+
+ shr.u cnt=in2,7 // this much cache line
+ ;;
+ cmp.lt p6,p0=2*PREFETCH_DIST,cnt
+ cmp.lt p7,p8=1,cnt
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc
+ .body
+ add cnt=-1,cnt
+ add src_pre_mem=0,in1 // prefetch src pointer
+ add dst_pre_mem=0,in0 // prefetch dest pointer
+ ;;
+(p7) mov ar.lc=cnt // prefetch count
+(p8) mov ar.lc=r0
+(p6) br.cond.dpnt .long_copy
+ ;;
+
+.prefetch:
+ lfetch.fault [src_pre_mem], 128
+ lfetch.fault.excl [dst_pre_mem], 128
+ br.cloop.dptk.few .prefetch
+ ;;
+
+.medium_copy:
+ and tmp=31,in2 // copy length after iteration
+ shr.u r29=in2,5 // number of 32-byte iteration
+ add dst1=8,dst0 // 2nd dest pointer
+ ;;
+ add cnt=-1,r29 // ctop iteration adjustment
+ cmp.eq p10,p0=r29,r0 // do we really need to loop?
+ add src1=8,src0 // 2nd src pointer
+ cmp.le p6,p0=8,tmp
+ ;;
+ cmp.le p7,p0=16,tmp
+ mov ar.lc=cnt // loop setup
+ cmp.eq p16,p17 = r0,r0
+ mov ar.ec=2
+(p10) br.dpnt.few .aligned_src_tail
+ ;;
+ TEXT_ALIGN(32)
+1:
+EX(.ex_handler, (p16) ld8 r34=[src0],16)
+EK(.ex_handler, (p16) ld8 r38=[src1],16)
+EX(.ex_handler, (p17) st8 [dst0]=r33,16)
+EK(.ex_handler, (p17) st8 [dst1]=r37,16)
+ ;;
+EX(.ex_handler, (p16) ld8 r32=[src0],16)
+EK(.ex_handler, (p16) ld8 r36=[src1],16)
+EX(.ex_handler, (p16) st8 [dst0]=r34,16)
+EK(.ex_handler, (p16) st8 [dst1]=r38,16)
+ br.ctop.dptk.few 1b
+ ;;
+
+.aligned_src_tail:
+EX(.ex_handler, (p6) ld8 t1=[src0])
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8)
+ cmp.le p8,p0=24,tmp
+ and r21=-8,tmp
+ ;;
+EX(.ex_hndlr_s, (p8) ld8 t3=[src1])
+EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1
+ and in2=7,tmp // remaining length
+EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2
+ add src0=src0,r21 // setting up src pointer
+ add dst0=dst0,r21 // setting up dest pointer
+ ;;
+EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3
+ mov pr=saved_pr,-1
+ br.dptk.many .memcpy_short
+ ;;
+
+/* code taken from copy_page_mck */
+.long_copy:
+ .rotr v[2*PREFETCH_DIST]
+ .rotp p[N]
+
+ mov src_pre_mem = src0
+ mov pr.rot = 0x10000
+ mov ar.ec = 1 // special unrolled loop
+
+ mov dst_pre_mem = dst0
+
+ add src_pre_l2 = 8*8, src0
+ add dst_pre_l2 = 8*8, dst0
+ ;;
+ add src0 = 8, src_pre_mem // first t1 src
+ mov ar.lc = 2*PREFETCH_DIST - 1
+ shr.u cnt=in2,7 // number of lines
+ add src1 = 3*8, src_pre_mem // first t3 src
+ add dst0 = 8, dst_pre_mem // first t1 dst
+ add dst1 = 3*8, dst_pre_mem // first t3 dst
+ ;;
+ and tmp=127,in2 // remaining bytes after this
block
+ add cnt = -(2*PREFETCH_DIST) - 1, cnt
+ // same as .line_copy loop, but with all predicated-off instructions
removed:
+.prefetch_loop:
+EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0
+EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2
+ br.ctop.sptk .prefetch_loop
+ ;;
+ cmp.eq p16, p0 = r0, r0 // reset p16 to 1
+ mov ar.lc = cnt
+ mov ar.ec = N // # of stages in pipeline
+ ;;
+.line_copy:
+EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0
+EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1
+EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2
prefetch dst from memory
+EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3
prefetch dst from L2
+ ;;
+EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0
prefetch src from memory
+EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1
prefetch src from L2
+EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2
+EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3
+ ;;
+EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8)
+EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8)
+EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8)
+EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8)
+EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8)
+ ;;
+EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8)
+EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8)
+ ;;
+EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8)
+EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8)
+EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8)
+EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8)
+ br.ctop.sptk .line_copy
+ ;;
+
+ add dst0=-8,dst0
+ add src0=-8,src0
+ mov in2=tmp
+ .restore sp
+ br.sptk.many .medium_copy
+ ;;
+
+#define BLOCK_SIZE 128*32
+#define blocksize r23
+#define curlen r24
+
+// dest is on 8-byte boundary, src is not. We need to do
+// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle.
+.unaligned_src:
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,3,5,0,8
+ .save ar.lc, saved_lc
+ mov saved_lc=ar.lc
+ .save pr, saved_pr
+ mov saved_pr=pr
+ .body
+.4k_block:
+ mov saved_in0=dst0 // need to save all input arguments
+ mov saved_in2=in2
+ mov blocksize=BLOCK_SIZE
+ ;;
+ cmp.lt p6,p7=blocksize,in2
+ mov saved_in1=src0
+ ;;
+(p6) mov in2=blocksize
+ ;;
+ shr.u r21=in2,7 // this much cache line
+ shr.u r22=in2,4 // number of 16-byte iteration
+ and curlen=15,in2 // copy length after iteration
+ and r30=7,src0 // source alignment
+ ;;
+ cmp.lt p7,p8=1,r21
+ add cnt=-1,r21
+ ;;
+
+ add src_pre_mem=0,src0 // prefetch src pointer
+ add dst_pre_mem=0,dst0 // prefetch dest pointer
+ and src0=-8,src0 // 1st src pointer
+(p7) mov ar.lc = cnt
+(p8) mov ar.lc = r0
+ ;;
+ TEXT_ALIGN(32)
+1: lfetch.fault [src_pre_mem], 128
+ lfetch.fault.excl [dst_pre_mem], 128
+ br.cloop.dptk.few 1b
+ ;;
+
+ shladd dst1=r22,3,dst0 // 2nd dest pointer
+ shladd src1=r22,3,src0 // 2nd src pointer
+ cmp.eq p8,p9=r22,r0 // do we really need to loop?
+ cmp.le p6,p7=8,curlen; // have at least 8 byte remaining?
+ add cnt=-1,r22 // ctop iteration adjustment
+ ;;
+EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer
+EK(.ex_handler, (p9) ld8 r37=[src1],8)
+(p8) br.dpnt.few .noloop
+ ;;
+
+// The jump address is calculated based on src alignment. The COPYU
+// macro below need to confine its size to power of two, so an entry
+// can be caulated using shl instead of an expensive multiply. The
+// size is then hard coded by the following #define to match the
+// actual size. This make it somewhat tedious when COPYU macro gets
+// changed and this need to be adjusted to match.
+#define LOOP_SIZE 6
+1:
+ mov r29=ip // jmp_table thread
+ mov ar.lc=cnt
+ ;;
+ add r29=.jump_table - 1b - (.jmp1-.jump_table), r29
+ shl r28=r30, LOOP_SIZE // jmp_table thread
+ mov ar.ec=2 // loop setup
+ ;;
+ add r29=r29,r28 // jmp_table thread
+ cmp.eq p16,p17=r0,r0
+ ;;
+ mov b6=r29 // jmp_table thread
+ ;;
+ br.cond.sptk.few b6
+
+// for 8-15 byte case
+// We will skip the loop, but need to replicate the side effect
+// that the loop produces.
+.noloop:
+EX(.ex_handler, (p6) ld8 r37=[src1],8)
+ add src0=8,src0
+(p6) shl r25=r30,3
+ ;;
+EX(.ex_handler, (p6) ld8 r27=[src1])
+(p6) shr.u r28=r37,r25
+(p6) sub r26=64,r25
+ ;;
+(p6) shl r27=r27,r26
+ ;;
+(p6) or r21=r28,r27
+
+.unaligned_src_tail:
+/* check if we have more than blocksize to copy, if so go back */
+ cmp.gt p8,p0=saved_in2,blocksize
+ ;;
+(p8) add dst0=saved_in0,blocksize
+(p8) add src0=saved_in1,blocksize
+(p8) sub in2=saved_in2,blocksize
+(p8) br.dpnt .4k_block
+ ;;
+
+/* we have up to 15 byte to copy in the tail.
+ * part of work is already done in the jump table code
+ * we are at the following state.
+ * src side:
+ *
+ * xxxxxx xx <----- r21 has xxxxxxxx already
+ * -------- -------- --------
+ * 0 8 16
+ * ^
+ * |
+ * src1
+ *
+ * dst
+ * -------- -------- --------
+ * ^
+ * |
+ * dst1
+ */
+EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy
+(p6) add curlen=-8,curlen // update length
+ mov ar.pfs=saved_pfs
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,-1
+ mov in2=curlen // remaining length
+ mov dst0=dst1 // dest pointer
+ add src0=src1,r30 // forward by src alignment
+ ;;
+
+// 7 byte or smaller.
+.memcpy_short:
+ cmp.le p8,p9 = 1,in2
+ cmp.le p10,p11 = 2,in2
+ cmp.le p12,p13 = 3,in2
+ cmp.le p14,p15 = 4,in2
+ add src1=1,src0 // second src pointer
+ add dst1=1,dst0 // second dest pointer
+ ;;
+
+EX(.ex_handler_short, (p8) ld1 t1=[src0],2)
+EK(.ex_handler_short, (p10) ld1 t2=[src1],2)
+(p9) br.ret.dpnt rp // 0 byte copy
+ ;;
+
+EX(.ex_handler_short, (p8) st1 [dst0]=t1,2)
+EK(.ex_handler_short, (p10) st1 [dst1]=t2,2)
+(p11) br.ret.dpnt rp // 1 byte copy
+
+EX(.ex_handler_short, (p12) ld1 t3=[src0],2)
+EK(.ex_handler_short, (p14) ld1 t4=[src1],2)
+(p13) br.ret.dpnt rp // 2 byte copy
+ ;;
+
+ cmp.le p6,p7 = 5,in2
+ cmp.le p8,p9 = 6,in2
+ cmp.le p10,p11 = 7,in2
+
+EX(.ex_handler_short, (p12) st1 [dst0]=t3,2)
+EK(.ex_handler_short, (p14) st1 [dst1]=t4,2)
+(p15) br.ret.dpnt rp // 3 byte copy
+ ;;
+
+EX(.ex_handler_short, (p6) ld1 t5=[src0],2)
+EK(.ex_handler_short, (p8) ld1 t6=[src1],2)
+(p7) br.ret.dpnt rp // 4 byte copy
+ ;;
+
+EX(.ex_handler_short, (p6) st1 [dst0]=t5,2)
+EK(.ex_handler_short, (p8) st1 [dst1]=t6,2)
+(p9) br.ret.dptk rp // 5 byte copy
+
+EX(.ex_handler_short, (p10) ld1 t7=[src0],2)
+(p11) br.ret.dptk rp // 6 byte copy
+ ;;
+
+EX(.ex_handler_short, (p10) st1 [dst0]=t7,2)
+ br.ret.dptk rp // done all cases
+
+
+/* Align dest to nearest 8-byte boundary. We know we have at
+ * least 7 bytes to copy, enough to crawl to 8-byte boundary.
+ * Actual number of byte to crawl depend on the dest alignment.
+ * 7 byte or less is taken care at .memcpy_short
+
+ * src0 - source even index
+ * src1 - source odd index
+ * dst0 - dest even index
+ * dst1 - dest odd index
+ * r30 - distance to 8-byte boundary
+ */
+
+.align_dest:
+ add src1=1,in1 // source odd index
+ cmp.le p7,p0 = 2,r30 // for .align_dest
+ cmp.le p8,p0 = 3,r30 // for .align_dest
+EX(.ex_handler_short, (p6) ld1 t1=[src0],2)
+ cmp.le p9,p0 = 4,r30 // for .align_dest
+ cmp.le p10,p0 = 5,r30
+ ;;
+EX(.ex_handler_short, (p7) ld1 t2=[src1],2)
+EK(.ex_handler_short, (p8) ld1 t3=[src0],2)
+ cmp.le p11,p0 = 6,r30
+EX(.ex_handler_short, (p6) st1 [dst0] = t1,2)
+ cmp.le p12,p0 = 7,r30
+ ;;
+EX(.ex_handler_short, (p9) ld1 t4=[src1],2)
+EK(.ex_handler_short, (p10) ld1 t5=[src0],2)
+EX(.ex_handler_short, (p7) st1 [dst1] = t2,2)
+EK(.ex_handler_short, (p8) st1 [dst0] = t3,2)
+ ;;
+EX(.ex_handler_short, (p11) ld1 t6=[src1],2)
+EK(.ex_handler_short, (p12) ld1 t7=[src0],2)
+ cmp.eq p6,p7=r28,r29
+EX(.ex_handler_short, (p9) st1 [dst1] = t4,2)
+EK(.ex_handler_short, (p10) st1 [dst0] = t5,2)
+ sub in2=in2,r30
+ ;;
+EX(.ex_handler_short, (p11) st1 [dst1] = t6,2)
+EK(.ex_handler_short, (p12) st1 [dst0] = t7)
+ add dst0=in0,r30 // setup arguments
+ add src0=in1,r30
+(p6) br.cond.dptk .aligned_src
+(p7) br.cond.dpnt .unaligned_src
+ ;;
+
+/* main loop body in jump table format */
+#define COPYU(shift)
\
+1:
\
+EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */
\
+EK(.ex_handler, (p16) ld8 r36=[src1],8);
\
+ (p17) shrp r35=r33,r34,shift;; /* 1 */
\
+EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail
section */ \
+ nop.m 0;
\
+ (p16) shrp r38=r36,r37,shift;
\
+EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */
\
+EK(.ex_handler, (p17) st8 [dst1]=r39,8);
\
+ br.ctop.dptk.few 1b;;
\
+ (p7) add src1=-8,src1; /* back out for <8 byte case */
\
+ shrp r21=r22,r38,shift; /* speculative work */
\
+ br.sptk.few .unaligned_src_tail /* branch out of jump table */
\
+ ;;
+ TEXT_ALIGN(32)
+.jump_table:
+ COPYU(8) // unaligned cases
+.jmp1:
+ COPYU(16)
+ COPYU(24)
+ COPYU(32)
+ COPYU(40)
+ COPYU(48)
+ COPYU(56)
+
+#undef A
+#undef B
+#undef C
+#undef D
+
+/*
+ * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
+ * instruction failed in the bundle. The exception algorithm is that we
+ * first figure out the faulting address, then detect if there is any
+ * progress made on the copy, if so, redo the copy from last known copied
+ * location up to the faulting address (exclusive). In the copy_from_user
+ * case, remaining byte in kernel buffer will be zeroed.
+ *
+ * Take copy_from_user as an example, in the code there are multiple loads
+ * in a bundle and those multiple loads could span over two pages, the
+ * faulting address is calculated as page_round_down(max(src0, src1)).
+ * This is based on knowledge that if we can access one byte in a page, we
+ * can access any byte in that page.
+ *
+ * predicate used in the exception handler:
+ * p6-p7: direction
+ * p10-p11: src faulting addr calculation
+ * p12-p13: dst faulting addr calculation
+ */
+
+#define A r19
+#define B r20
+#define C r21
+#define D r22
+#define F r28
+
+#define memset_arg0 r32
+#define memset_arg2 r33
+
+#define saved_retval loc0
+#define saved_rtlink loc1
+#define saved_pfs_stack loc2
+
+.ex_hndlr_s:
+ add src0=8,src0
+ br.sptk .ex_handler
+ ;;
+.ex_hndlr_d:
+ add dst0=8,dst0
+ br.sptk .ex_handler
+ ;;
+.ex_hndlr_lcpy_1:
+ mov src1=src_pre_mem
+ mov dst1=dst_pre_mem
+ cmp.gtu p10,p11=src_pre_mem,saved_in1
+ cmp.gtu p12,p13=dst_pre_mem,saved_in0
+ ;;
+(p10) add src0=8,saved_in1
+(p11) mov src0=saved_in1
+(p12) add dst0=8,saved_in0
+(p13) mov dst0=saved_in0
+ br.sptk .ex_handler
+.ex_handler_lcpy:
+ // in line_copy block, the preload addresses should always ahead
+ // of the other two src/dst pointers. Furthermore, src1/dst1 should
+ // always ahead of src0/dst0.
+ mov src1=src_pre_mem
+ mov dst1=dst_pre_mem
+.ex_handler:
+ mov pr=saved_pr,-1 // first restore pr, lc, and pfs
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ ;;
+.ex_handler_short: // fault occurred in these sections didn't change pr, lc,
pfs
+ cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction
+ cmp.ltu p10,p11=src0,src1
+ cmp.ltu p12,p13=dst0,dst1
+ fcmp.eq p8,p0=f6,f0 // is it memcpy?
+ mov tmp = dst0
+ ;;
+(p11) mov src1 = src0 // pick the larger of the two
+(p13) mov dst0 = dst1 // make dst0 the smaller one
+(p13) mov dst1 = tmp // and dst1 the larger one
+ ;;
+(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
+(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
+ ;;
+(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store
+(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load
+ mov retval=saved_in2
+(p8) ld1 tmp=[src1] // force an oops for memcpy call
+(p8) st1 [dst1]=r0 // force an oops for memcpy call
+(p14) br.ret.sptk.many rp
+
+/*
+ * The remaining byte to copy is calculated as:
+ *
+ * A = (faulting_addr - orig_src) -> len to faulting ld address
+ * or
+ * (faulting_addr - orig_dst) -> len to faulting st address
+ * B = (cur_dst - orig_dst) -> len copied so far
+ * C = A - B -> len need to be copied
+ * D = orig_len - A -> len need to be zeroed
+ */
+(p6) sub A = F, saved_in0
+(p7) sub A = F, saved_in1
+ clrrrb
+ ;;
+ alloc saved_pfs_stack=ar.pfs,3,3,3,0
+ sub B = dst0, saved_in0 // how many byte copied so far
+ ;;
+ sub C = A, B
+ sub D = saved_in2, A
+ ;;
+ cmp.gt p8,p0=C,r0 // more than 1 byte?
+ add memset_arg0=saved_in0, A
+(p6) mov memset_arg2=0 // copy_to_user should not call memset
+(p7) mov memset_arg2=D // copy_from_user need to have kbuf
zeroed
+ mov r8=0
+ mov saved_retval = D
+ mov saved_rtlink = b0
+
+ add out0=saved_in0, B
+ add out1=saved_in1, B
+ mov out2=C
+(p8) br.call.sptk.few b0=__copy_user // recursive call
+ ;;
+
+ add saved_retval=saved_retval,r8 // above might return non-zero
value
+ cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte?
+ mov out0=memset_arg0 // *s
+ mov out1=r0 // c
+ mov out2=memset_arg2 // n
+(p8) br.call.sptk.few b0=memset
+ ;;
+
+ mov retval=saved_retval
+ mov ar.pfs=saved_pfs_stack
+ mov b0=saved_rtlink
+ br.ret.sptk.many rp
+
+/* end of McKinley specific optimization */
+END(__copy_user)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/memset.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/memset.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,362 @@
+/* Optimized version of the standard memset() function.
+
+ Copyright (c) 2002 Hewlett-Packard Co/CERN
+ Sverre Jarp <Sverre.Jarp@xxxxxxx>
+
+ Return: dest
+
+ Inputs:
+ in0: dest
+ in1: value
+ in2: count
+
+ The algorithm is fairly straightforward: set byte by byte until we
+ we get to a 16B-aligned address, then loop on 128 B chunks using an
+ early store as prefetching, then loop on 32B chucks, then clear remaining
+ words, finally clear remaining bytes.
+ Since a stf.spill f0 can store 16B in one go, we use this instruction
+ to get peak speed when value = 0. */
+
+#include <asm/asmmacro.h>
+#undef ret
+
+#define dest in0
+#define value in1
+#define cnt in2
+
+#define tmp r31
+#define save_lc r30
+#define ptr0 r29
+#define ptr1 r28
+#define ptr2 r27
+#define ptr3 r26
+#define ptr9 r24
+#define loopcnt r23
+#define linecnt r22
+#define bytecnt r21
+
+#define fvalue f6
+
+// This routine uses only scratch predicate registers (p6 - p15)
+#define p_scr p6 // default register for
same-cycle branches
+#define p_nz p7
+#define p_zr p8
+#define p_unalgn p9
+#define p_y p11
+#define p_n p12
+#define p_yy p13
+#define p_nn p14
+
+#define MIN1 15
+#define MIN1P1HALF 8
+#define LINE_SIZE 128
+#define LSIZE_SH 7 // shift amount
+#define PREF_AHEAD 8
+
+GLOBAL_ENTRY(memset)
+{ .mmi
+ .prologue
+ alloc tmp = ar.pfs, 3, 0, 0, 0
+ lfetch.nt1 [dest] //
+ .save ar.lc, save_lc
+ mov.i save_lc = ar.lc
+ .body
+} { .mmi
+ mov ret0 = dest // return value
+ cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is
zero
+ cmp.eq p_scr, p0 = cnt, r0
+;; }
+{ .mmi
+ and ptr2 = -(MIN1+1), dest // aligned address
+ and tmp = MIN1, dest // prepare to check for correct
alignment
+ tbit.nz p_y, p_n = dest, 0 // Do we have an odd address?
(M_B_U)
+} { .mib
+ mov ptr1 = dest
+ mux1 value = value, @brcst // create 8 identical bytes in
word
+(p_scr) br.ret.dpnt.many rp // return immediately
if count = 0
+;; }
+{ .mib
+ cmp.ne p_unalgn, p0 = tmp, r0 //
+} { .mib
+ sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1
higher than loopcnt
+ cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
+(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few
(M_B_U)
+;; }
+{ .mmi
+(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
+(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
+;; }
+{ .mib
+(p_y) add cnt = -8, cnt //
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
+} { .mib
+(p_y) st8 [ptr2] = value,-4 //
+(p_n) add ptr2 = 4, ptr2 //
+;; }
+{ .mib
+(p_yy) add cnt = -4, cnt //
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
+} { .mib
+(p_yy) st4 [ptr2] = value,-2 //
+(p_nn) add ptr2 = 2, ptr2 //
+;; }
+{ .mmi
+ mov tmp = LINE_SIZE+1 // for compare
+(p_y) add cnt = -2, cnt //
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
+} { .mmi
+ setf.sig fvalue=value // transfer value to FLP side
+(p_y) st2 [ptr2] = value,-1 //
+(p_n) add ptr2 = 1, ptr2 //
+;; }
+
+{ .mmi
+(p_yy) st1 [ptr2] = value //
+ cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
+} { .mbb
+(p_yy) add cnt = -1, cnt //
+(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
+;; }
+
+{ .mib
+ nop.m 0
+ shr.u linecnt = cnt, LSIZE_SH
+(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill
+;; }
+
+ TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache
lines; fill later
+{ .mmi
+ and tmp = -(LINE_SIZE), cnt // compute end of range
+ mov ptr9 = ptr1 // used for prefetching
+ and cnt = (LINE_SIZE-1), cnt // remainder
+} { .mmi
+ mov loopcnt = PREF_AHEAD-1 // default prefetch loop
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+;; }
+{ .mmi
+(p_scr) add loopcnt = -1, linecnt //
+ add ptr2 = 8, ptr1 // start of stores (beyond
prefetch stores)
+ add ptr1 = tmp, ptr1 // first address beyond total
range
+;; }
+{ .mmi
+ add tmp = -1, linecnt // next loop count
+ mov.i ar.lc = loopcnt //
+;; }
+.pref_l1a:
+{ .mib
+ stf8 [ptr9] = fvalue, 128 // Do stores one cache line
apart
+ nop.i 0
+ br.cloop.dptk.few .pref_l1a
+;; }
+{ .mmi
+ add ptr0 = 16, ptr2 // Two stores in parallel
+ mov.i ar.lc = tmp //
+;; }
+.l1ax:
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 24
+ stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 24
+ stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 8
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 24
+ stf8 [ptr0] = fvalue, 24
+ ;; }
+ { .mmi
+ stf8 [ptr2] = fvalue, 8
+ stf8 [ptr0] = fvalue, 32
+ cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ ;; }
+{ .mmb
+ stf8 [ptr2] = fvalue, 24
+(p_scr) stf8 [ptr9] = fvalue, 128
+ br.cloop.dptk.few .l1ax
+;; }
+{ .mbb
+ cmp.le p_scr, p0 = 8, cnt // just a few bytes left ?
+(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2
+ br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3
+;; }
+
+ TEXT_ALIGN(32)
+.l1b: // ------------------------------------ // L1B: store ahead into cache
lines; fill later
+{ .mmi
+ and tmp = -(LINE_SIZE), cnt // compute end of range
+ mov ptr9 = ptr1 // used for prefetching
+ and cnt = (LINE_SIZE-1), cnt // remainder
+} { .mmi
+ mov loopcnt = PREF_AHEAD-1 // default prefetch loop
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+;; }
+{ .mmi
+(p_scr) add loopcnt = -1, linecnt
+ add ptr2 = 16, ptr1 // start of stores (beyond
prefetch stores)
+ add ptr1 = tmp, ptr1 // first address beyond total
range
+;; }
+{ .mmi
+ add tmp = -1, linecnt // next loop count
+ mov.i ar.lc = loopcnt
+;; }
+.pref_l1b:
+{ .mib
+ stf.spill [ptr9] = f0, 128 // Do stores one cache line
apart
+ nop.i 0
+ br.cloop.dptk.few .pref_l1b
+;; }
+{ .mmi
+ add ptr0 = 16, ptr2 // Two stores in parallel
+ mov.i ar.lc = tmp
+;; }
+.l1bx:
+ { .mmi
+ stf.spill [ptr2] = f0, 32
+ stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+ stf.spill [ptr2] = f0, 32
+ stf.spill [ptr0] = f0, 32
+ ;; }
+ { .mmi
+ stf.spill [ptr2] = f0, 32
+ stf.spill [ptr0] = f0, 64
+ cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ ;; }
+{ .mmb
+ stf.spill [ptr2] = f0, 32
+(p_scr) stf.spill [ptr9] = f0, 128
+ br.cloop.dptk.few .l1bx
+;; }
+{ .mib
+ cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
+(p_scr) br.cond.dpnt.many .move_bytes_from_alignment //
+;; }
+
+.fraction_of_line:
+{ .mib
+ add ptr2 = 16, ptr1
+ shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
+;; }
+{ .mib
+ cmp.eq p_scr, p0 = loopcnt, r0
+ add loopcnt = -1, loopcnt
+(p_scr) br.cond.dpnt.many .store_words
+;; }
+{ .mib
+ and cnt = 0x1f, cnt // compute the remaining cnt
+ mov.i ar.lc = loopcnt
+;; }
+ TEXT_ALIGN(32)
+.l2: // ------------------------------------ // L2A: store 32B in 2 cycles
+{ .mmb
+ stf8 [ptr1] = fvalue, 8
+ stf8 [ptr2] = fvalue, 8
+;; } { .mmb
+ stf8 [ptr1] = fvalue, 24
+ stf8 [ptr2] = fvalue, 24
+ br.cloop.dptk.many .l2
+;; }
+.store_words:
+{ .mib
+ cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
+(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
+;; }
+
+{ .mmi
+ stf8 [ptr1] = fvalue, 8 // store
+ cmp.le p_y, p_n = 16, cnt
+ add cnt = -8, cnt // subtract
+;; }
+{ .mmi
+(p_y) stf8 [ptr1] = fvalue, 8 // store
+(p_y) cmp.le.unc p_yy, p_nn = 16, cnt
+(p_y) add cnt = -8, cnt // subtract
+;; }
+{ .mmi // store
+(p_yy) stf8 [ptr1] = fvalue, 8
+(p_yy) add cnt = -8, cnt // subtract
+;; }
+
+.move_bytes_from_alignment:
+{ .mib
+ cmp.eq p_scr, p0 = cnt, r0
+ tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a
st4 ?
+(p_scr) br.cond.dpnt.few .restore_and_exit
+;; }
+{ .mib
+(p_y) st4 [ptr1] = value,4
+ tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a
st2 ?
+;; }
+{ .mib
+(p_yy) st2 [ptr1] = value,2
+ tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a
st1 ?
+;; }
+
+{ .mib
+(p_y) st1 [ptr1] = value
+;; }
+.restore_and_exit:
+{ .mib
+ nop.m 0
+ mov.i ar.lc = save_lc
+ br.ret.sptk.many rp
+;; }
+
+.move_bytes_unaligned:
+{ .mmi
+ .pred.rel "mutex",p_y, p_n
+ .pred.rel "mutex",p_yy, p_nn
+(p_n) cmp.le p_yy, p_nn = 4, cnt
+(p_y) cmp.le p_yy, p_nn = 5, cnt
+(p_n) add ptr2 = 2, ptr1
+} { .mmi
+(p_y) add ptr2 = 3, ptr1
+(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte
[15, 14 (or less) left]
+(p_y) add cnt = -1, cnt
+;; }
+{ .mmi
+(p_yy) cmp.le.unc p_y, p0 = 8, cnt
+ add ptr3 = ptr1, cnt // prepare last store
+ mov.i ar.lc = save_lc
+} { .mmi
+(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
+(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11,
10 (o less) left]
+(p_yy) add cnt = -4, cnt
+;; }
+{ .mmi
+(p_y) cmp.le.unc p_yy, p0 = 8, cnt
+ add ptr3 = -1, ptr3 // last store
+ tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the
end ?
+} { .mmi
+(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
+(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6
(or less) left]
+(p_y) add cnt = -4, cnt
+;; }
+{ .mmi
+(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
+(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2
(or less) left]
+ tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the
end ?
+} { .mmi
+(p_yy) add cnt = -4, cnt
+;; }
+{ .mmb
+(p_scr) st2 [ptr1] = value // fill 2 (aligned)
bytes
+(p_y) st1 [ptr3] = value // fill last byte (using ptr3)
+ br.ret.sptk.many rp
+}
+END(memset)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/strlen.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/linux/strlen.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,192 @@
+/*
+ *
+ * Optimized version of the standard strlen() function
+ *
+ *
+ * Inputs:
+ * in0 address of string
+ *
+ * Outputs:
+ * ret0 the number of characters in the string (0 if empty string)
+ * does not count the \0
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@xxxxxxxxxx>
+ *
+ * 09/24/99 S.Eranian add speculation recovery code
+ */
+
+#include <asm/asmmacro.h>
+
+//
+//
+// This is an enhanced version of the basic strlen. it includes a combination
+// of compute zero index (czx), parallel comparisons, speculative loads and
+// loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+// The goal is to look at the string in chunks of 8 bytes.
+// so we need to do a few extra checks at the beginning because the
+// string may not be 8-byte aligned. In this case we load the 8byte
+// quantity which includes the start of the string and mask the unused
+// bytes with 0xff to avoid confusing czx.
+// We use speculative loads and software pipelining to hide memory
+// latency and do read ahead safely. This way we defer any exception.
+//
+// Because we don't want the kernel to be relying on particular
+// settings of the DCR register, we provide recovery code in case
+// speculation fails. The recovery code is going to "redo" the work using
+// only normal loads. If we still get a fault then we generate a
+// kernel panic. Otherwise we return the strlen as usual.
+//
+// The fact that speculation may fail can be caused, for instance, by
+// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+// a NaT bit will be set if the translation is not present. The normal
+// load, on the other hand, will cause the translation to be inserted
+// if the mapping exists.
+//
+// It should be noted that we execute recovery code only when we need
+// to use the data that has been speculatively loaded: we don't execute
+// recovery code on pure read ahead data.
+//
+// Remarks:
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// register to 1. This is required to make sure that we get the parallel
+// compare correct.
+//
+// - we don't use the epilogue counter to exit the loop but we need to set
+// it to zero beforehand.
+//
+// - after the loop we must test for Nat values because neither the
+// czx nor cmp instruction raise a NaT consumption fault. We must be
+// careful not to look too far for a Nat for which we don't care.
+// For instance we don't need to look at a NaT in val2 if the zero byte
+// was in val1.
+//
+// - Clearly performance tuning is required.
+//
+//
+//
+#define saved_pfs r11
+#define tmp r10
+#define base r16
+#define orig r17
+#define saved_pr r18
+#define src r19
+#define mask r20
+#define val r21
+#define val1 r22
+#define val2 r23
+
+GLOBAL_ENTRY(strlen)
+ .prologue
+ .save ar.pfs, saved_pfs
+ alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
+
+ .rotr v[2], w[2] // declares our 4 aliases
+
+ extr.u tmp=in0,0,3 // tmp=least significant 3 bits
+ mov orig=in0 // keep trackof initial byte address
+ dep src=0,in0,0,3 // src=8byte-aligned in0 address
+ .save pr, saved_pr
+ mov saved_pr=pr // preserve predicates (rotation)
+ ;;
+
+ .body
+
+ ld8 v[1]=[src],8 // must not speculate: can fail here
+ shl tmp=tmp,3 // multiply by 8bits/byte
+ mov mask=-1 // our mask
+ ;;
+ ld8.s w[1]=[src],8 // speculatively load next
+ cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and
+ sub tmp=64,tmp // how many bits to shift our mask on the right
+ ;;
+ shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
+ mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
+ ;;
+ add base=-16,src // keep track of aligned base
+ or v[1]=v[1],mask // now we have a safe initial byte pattern
+ ;;
+1:
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
+ czx1.r val2=w[1] // search 0 byte from right following 8bytes
+ ;;
+ ld8.s w[0]=[src],8 // speculatively load next to next
+ cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
+ cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
+(p6) br.wtop.dptk 1b // loop until p6 == 0
+ ;;
+ //
+ // We must return try the recovery code iff
+ // val1_is_nat || (val1==8 && val2_is_nat)
+ //
+ // XXX Fixme
+ // - there must be a better way of doing the test
+ //
+ cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
+ tnat.nz p6,p7=val1 // test NaT on val1
+(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT
+ ;;
+ //
+ // if we come here p7 is true, i.e., initialized for // cmp
+ //
+ cmp.eq.and p7,p0=8,val1// val1==8?
+ tnat.nz.and p7,p0=val2 // test NaT if val2
+(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT
+ ;;
+(p8) mov val1=val2 // the other test got us out of the loop
+(p8) adds src=-16,src // correct position when 3 ahead
+(p9) adds src=-24,src // correct position when 4 ahead
+ ;;
+ sub ret0=src,orig // distance from base
+ sub tmp=8,val1 // which byte in word
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // adjust
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp // end of normal execution
+
+ //
+ // Outlined recovery code when speculation failed
+ //
+ // This time we don't use speculation and rely on the normal exception
+ // mechanism. that's why the loop is not as good as the previous one
+ // because read ahead is not possible
+ //
+ // IMPORTANT:
+ // Please note that in the case of strlen() as opposed to strlen_user()
+ // we don't use the exception mechanism, as this function is not
+ // supposed to fail. If that happens it means we have a bug and the
+ // code will cause of kernel fault.
+ //
+ // XXX Fixme
+ // - today we restart from the beginning of the string instead
+ // of trying to continue where we left off.
+ //
+.recover:
+ ld8 val=[base],8 // will fail if unrecoverable fault
+ ;;
+ or val=val,mask // remask first bytes
+ cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
+ ;;
+ //
+ // ar.ec is still zero here
+ //
+2:
+(p6) ld8 val=[base],8 // will fail if unrecoverable fault
+ ;;
+ czx1.r val1=val // search 0 byte from right
+ ;;
+ cmp.eq p6,p0=8,val1 // val1==8 ?
+(p6) br.wtop.dptk 2b // loop until p6 == 0
+ ;; // (avoid WAW on p63)
+ sub ret0=base,orig // distance from base
+ sub tmp=8,val1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.many rp // end of successful recovery code
+END(strlen)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/mm.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/mm.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,152 @@
+/******************************************************************************
+ * arch/ia64/mm.c
+ *
+ * Copyright (c) 2002-2005 K A Fraser
+ * Copyright (c) 2004 Christian Limpach
+ * Copyright (c) 2005, Intel Corporation.
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * A description of the x86 page table API:
+ *
+ * Domains trap to do_mmu_update with a list of update requests.
+ * This is a list of (ptr, val) pairs, where the requested operation
+ * is *ptr = val.
+ *
+ * Reference counting of pages:
+ * ----------------------------
+ * Each page has two refcounts: tot_count and type_count.
+ *
+ * TOT_COUNT is the obvious reference count. It counts all uses of a
+ * physical page frame by a domain, including uses as a page directory,
+ * a page table, or simple mappings via a PTE. This count prevents a
+ * domain from releasing a frame back to the free pool when it still holds
+ * a reference to it.
+ *
+ * TYPE_COUNT is more subtle. A frame can be put to one of three
+ * mutually-exclusive uses: it might be used as a page directory, or a
+ * page table, or it may be mapped writable by the domain [of course, a
+ * frame may not be used in any of these three ways!].
+ * So, type_count is a count of the number of times a frame is being
+ * referred to in its current incarnation. Therefore, a page can only
+ * change its type when its type count is zero.
+ *
+ * Pinning the page type:
+ * ----------------------
+ * The type of a page can be pinned/unpinned with the commands
+ * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
+ * pinning is not reference counted, so it can't be nested).
+ * This is useful to prevent a page's type count falling to zero, at which
+ * point safety checks would need to be carried out next time the count
+ * is increased again.
+ *
+ * A further note on writable page mappings:
+ * -----------------------------------------
+ * For simplicity, the count of writable mappings for a page may not
+ * correspond to reality. The 'writable count' is incremented for every
+ * PTE which maps the page with the _PAGE_RW flag set. However, for
+ * write access to be possible the page directory entry must also have
+ * its _PAGE_RW bit set. We do not check this as it complicates the
+ * reference counting considerably [consider the case of multiple
+ * directory entries referencing a single page table, some with the RW
+ * bit set, others not -- it starts getting a bit messy].
+ * In normal use, this simplification shouldn't be a problem.
+ * However, the logic can be added if required.
+ *
+ * One more note on read-only page mappings:
+ * -----------------------------------------
+ * We want domains to be able to map pages for read-only access. The
+ * main reason is that page tables and directories should be readable
+ * by a domain, but it would not be safe for them to be writable.
+ * However, domains have free access to rings 1 & 2 of the Intel
+ * privilege model. In terms of page protection, these are considered
+ * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
+ * read-only restrictions are respected in supervisor mode -- if the
+ * bit is clear then any mapped page is writable.
+ *
+ * We get round this by always setting the WP bit and disallowing
+ * updates to it. This is very unlikely to cause a problem for guest
+ * OS's, which will generally use the WP bit to simplify copy-on-write
+ * implementation (in that case, OS wants a fault when it writes to
+ * an application-supplied buffer).
+ */
+
+#include <xen/config.h>
+#include <public/xen.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/errno.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmmu.h>
+#include <asm/regionreg.h>
+#include <asm/vmx_mm_def.h>
+/*
+ uregs->ptr is virtual address
+ uregs->val is pte value
+ */
+#ifdef CONFIG_VTI
+int do_mmu_update(mmu_update_t *ureqs,u64 count,u64 *pdone,u64 foreigndom)
+{
+ int i,cmd;
+ u64 mfn, gpfn;
+ VCPU *vcpu;
+ mmu_update_t req;
+ ia64_rr rr;
+ thash_cb_t *hcb;
+ thash_data_t entry={0},*ovl;
+ vcpu = current;
+ search_section_t sections;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ for ( i = 0; i < count; i++ )
+ {
+ copy_from_user(&req, ureqs, sizeof(req));
+ cmd = req.ptr&3;
+ req.ptr &= ~3;
+ if(cmd ==MMU_NORMAL_PT_UPDATE){
+ entry.page_flags = req.val;
+ entry.locked = 1;
+ entry.tc = 1;
+ entry.cl = DSIDE_TLB;
+ rr = vmx_vcpu_rr(vcpu, req.ptr);
+ entry.ps = rr.ps;
+ entry.key = redistribute_rid(rr.rid);
+ entry.rid = rr.rid;
+ entry.vadr = PAGEALIGN(req.ptr,entry.ps);
+ sections.tr = 1;
+ sections.tc = 0;
+ ovl = thash_find_overlap(hcb, &entry, sections);
+ if (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ thash_purge_and_insert(hcb, &entry);
+ }else if(cmd == MMU_MACHPHYS_UPDATE){
+ mfn = req.ptr >>PAGE_SHIFT;
+ gpfn = req.val;
+ set_machinetophys(mfn,gpfn);
+ }else{
+ printf("Unkown command of mmu_update:ptr: %lx,val: %lx
\n",req.ptr,req.val);
+ while(1);
+ }
+ ureqs ++;
+ }
+ return 0;
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/mmio.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/mmio.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,515 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx)
+ */
+
+#include <linux/sched.h>
+#include <asm/tlb.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/gcc_intrin.h>
+#include <linux/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/privop.h>
+#include <asm/types.h>
+#include <public/io/ioreq.h>
+#include <asm/mm.h>
+#include <asm/vmx.h>
+
+/*
+struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
+{
+ int i;
+ for (i=0; mio_base[i].iot != NOT_IO; i++ ) {
+ if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end )
+ return &mio_base[i];
+ }
+ return NULL;
+}
+*/
+
+#define PIB_LOW_HALF(ofst) !(ofst&(1<<20))
+#define PIB_OFST_INTA 0x1E0000
+#define PIB_OFST_XTP 0x1E0008
+
+static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int
ma)
+{
+ switch (pib_off) {
+ case PIB_OFST_INTA:
+ panic("Undefined write on PIB INTA\n");
+ break;
+ case PIB_OFST_XTP:
+ if ( s == 1 && ma == 4 /* UC */) {
+ vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src;
+ }
+ else {
+ panic("Undefined write on PIB XTP\n");
+ }
+ break;
+ default:
+ if ( PIB_LOW_HALF(pib_off) ) { // lower half
+ if ( s != 8 || ma != 0x4 /* UC */ ) {
+ panic("Undefined IPI-LHF write with s %d, ma %d!\n", s, ma);
+ }
+ else {
+ write_ipi(vcpu, pib_off, *(uint64_t *)src);
+ // TODO for SM-VP
+ }
+ }
+ else { // upper half
+ printf("IPI-UHF write %lx\n",pib_off);
+ panic("Not support yet for SM-VP\n");
+ }
+ break;
+ }
+}
+
+static void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int
ma)
+{
+ switch (pib_off) {
+ case PIB_OFST_INTA:
+ // todo --- emit on processor system bus.
+ if ( s == 1 && ma == 4) { // 1 byte load
+ // TODO: INTA read from IOSAPIC
+ }
+ else {
+ panic("Undefined read on PIB INTA\n");
+ }
+ break;
+ case PIB_OFST_XTP:
+ if ( s == 1 && ma == 4) {
+ *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp;
+ }
+ else {
+ panic("Undefined read on PIB XTP\n");
+ }
+ break;
+ default:
+ if ( PIB_LOW_HALF(pib_off) ) { // lower half
+ if ( s != 8 || ma != 4 ) {
+ panic("Undefined IPI-LHF read!\n");
+ }
+ else {
+#ifdef IPI_DEBUG
+ printf("IPI-LHF read %lx\n",pib_off);
+#endif
+ *(uint64_t *)dest = 0; // TODO for SM-VP
+ }
+ }
+ else { // upper half
+ if ( s != 1 || ma != 4 ) {
+ panic("Undefined PIB-UHF read!\n");
+ }
+ else {
+#ifdef IPI_DEBUG
+ printf("IPI-UHF read %lx\n",pib_off);
+#endif
+ *(uint8_t *)dest = 0; // TODO for SM-VP
+ }
+ }
+ break;
+ }
+}
+
+static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
+{
+ struct vcpu *v = current;
+ vcpu_iodata_t *vio;
+ ioreq_t *p;
+ unsigned long addr;
+
+ vio = get_vio(v->domain, v->vcpu_id);
+ if (vio == 0) {
+ panic("bad shared page: %lx", (unsigned long)vio);
+ }
+ p = &vio->vp_ioreq;
+ p->addr = pa;
+ p->size = s;
+ p->count = 1;
+ p->dir = dir;
+ if(dir==IOREQ_WRITE) //write;
+ p->u.data = *val;
+ p->pdata_valid = 0;
+ p->port_mm = 1;
+ p->df = 0;
+
+ set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+ p->state = STATE_IOREQ_READY;
+ evtchn_send(iopacket_port(v->domain));
+ vmx_wait_io();
+ if(dir==IOREQ_READ){ //read
+ *val=p->u.data;
+ }
+ return;
+}
+#define TO_LEGACY_IO(pa) (((pa)>>12<<2)|((pa)&0x3))
+
+static void legacy_io_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
+{
+ struct vcpu *v = current;
+ vcpu_iodata_t *vio;
+ ioreq_t *p;
+ unsigned long addr;
+
+ vio = get_vio(v->domain, v->vcpu_id);
+ if (vio == 0) {
+ panic("bad shared page: %lx");
+ }
+ p = &vio->vp_ioreq;
+ p->addr = TO_LEGACY_IO(pa&0x3ffffffUL);
+ p->size = s;
+ p->count = 1;
+ p->dir = dir;
+ if(dir==IOREQ_WRITE) //write;
+ p->u.data = *val;
+ p->pdata_valid = 0;
+ p->port_mm = 0;
+ p->df = 0;
+
+ set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+ p->state = STATE_IOREQ_READY;
+ evtchn_send(iopacket_port(v->domain));
+
+ vmx_wait_io();
+ if(dir==IOREQ_READ){ //read
+ *val=p->u.data;
+ }
+#ifdef DEBUG_PCI
+ if(dir==IOREQ_WRITE)
+ if(p->addr == 0xcf8UL)
+ printk("Write 0xcf8, with val [0x%lx]\n", p->u.data);
+ else
+ if(p->addr == 0xcfcUL)
+ printk("Read 0xcfc, with val [0x%lx]\n", p->u.data);
+#endif //DEBUG_PCI
+ return;
+}
+
+static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma,
int dir)
+{
+ struct virutal_platform_def *v_plat;
+ //mmio_type_t iot;
+ unsigned long iot;
+ iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT);
+ v_plat = vmx_vcpu_get_plat(vcpu);
+
+ switch (iot) {
+ case GPFN_PIB:
+ if(!dir)
+ pib_write(vcpu, dest, src_pa - v_plat->pib_base, s, ma);
+ else
+ pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
+ break;
+ case GPFN_GFW:
+ break;
+ case GPFN_IOSAPIC:
+ case GPFN_FRAME_BUFFER:
+ case GPFN_LOW_MMIO:
+ low_mmio_access(vcpu, src_pa, dest, s, dir);
+ break;
+ case GPFN_LEGACY_IO:
+ legacy_io_access(vcpu, src_pa, dest, s, dir);
+ break;
+ default:
+ panic("Bad I/O access\n");
+ break;
+ }
+ return;
+}
+
+/*
+ * Read or write data in guest virtual address mode.
+ */
+/*
+void
+memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
+{
+ uint64_t pa;
+
+ if (!vtlb->nomap)
+ panic("Normal memory write shouldn't go to this point!");
+ pa = PPN_2_PA(vtlb->ppn);
+ pa += POFFSET((u64)dest, vtlb->ps);
+ mmio_write (vcpu, src, pa, s, vtlb->ma);
+}
+
+
+void
+memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
+{
+ uint64_t pa = (uint64_t)dest;
+ int ma;
+
+ if ( pa & (1UL <<63) ) {
+ // UC
+ ma = 4;
+ pa <<=1;
+ pa >>=1;
+ }
+ else {
+ // WBL
+ ma = 0; // using WB for WBL
+ }
+ mmio_write (vcpu, src, pa, s, ma);
+}
+
+void
+memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
+{
+ uint64_t pa;
+
+ if (!vtlb->nomap)
+ panic("Normal memory write shouldn't go to this point!");
+ pa = PPN_2_PA(vtlb->ppn);
+ pa += POFFSET((u64)src, vtlb->ps);
+
+ mmio_read(vcpu, pa, dest, s, vtlb->ma);
+}
+
+void
+memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
+{
+ uint64_t pa = (uint64_t)src;
+ int ma;
+
+ if ( pa & (1UL <<63) ) {
+ // UC
+ ma = 4;
+ pa <<=1;
+ pa >>=1;
+ }
+ else {
+ // WBL
+ ma = 0; // using WB for WBL
+ }
+ mmio_read(vcpu, pa, dest, s, ma);
+}
+*/
+
+
+/*
+ * Deliver IPI message. (Only U-VP is supported now)
+ * offset: address offset to IPI space.
+ * value: deliver value.
+ */
+static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector)
+{
+#ifdef IPI_DEBUG
+ printf ("deliver_ipi %lx %lx\n",dm,vector);
+#endif
+ switch ( dm ) {
+ case 0: // INT
+ vmx_vcpu_pend_interrupt (vcpu, vector);
+ break;
+ case 2: // PMI
+ // TODO -- inject guest PMI
+ panic ("Inject guest PMI!\n");
+ break;
+ case 4: // NMI
+ vmx_vcpu_pend_interrupt (vcpu, 2);
+ break;
+ case 5: // INIT
+ // TODO -- inject guest INIT
+ panic ("Inject guest INIT!\n");
+ break;
+ case 7: // ExtINT
+ vmx_vcpu_pend_interrupt (vcpu, 0);
+ break;
+ case 1:
+ case 3:
+ case 6:
+ default:
+ panic ("Deliver reserved IPI!\n");
+ break;
+ }
+}
+
+/*
+ * TODO: Use hash table for the lookup.
+ */
+static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid)
+{
+ int i;
+ VCPU *vcpu;
+ LID lid;
+ for (i=0; i<MAX_VIRT_CPUS; i++) {
+ vcpu = d->vcpu[i];
+ if (!vcpu)
+ continue;
+ lid.val = VPD_CR(vcpu, lid);
+ if ( lid.id == id && lid.eid == eid ) {
+ return vcpu;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * execute write IPI op.
+ */
+static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value)
+{
+ VCPU *target_cpu;
+
+ target_cpu = lid_2_vcpu(vcpu->domain,
+ ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid);
+ if ( target_cpu == NULL ) panic("Unknown IPI cpu\n");
+ if ( target_cpu == vcpu ) {
+ // IPI to self
+ deliver_ipi (vcpu, ((ipi_d_t)value).dm,
+ ((ipi_d_t)value).vector);
+ return 1;
+ }
+ else {
+ // TODO: send Host IPI to inject guest SMP IPI interruption
+ panic ("No SM-VP supported!\n");
+ return 0;
+ }
+}
+
+
+/*
+ dir 1: read 0:write
+ inst_type 0:integer 1:floating point
+ */
+extern IA64_BUNDLE __vmx_get_domain_bundle(u64 iip);
+#define SL_INTEGER 0 // store/load interger
+#define SL_FLOATING 1 // store/load floating
+
+void emulate_io_inst(VCPU *vcpu, u64 padr, u64 ma)
+{
+ REGS *regs;
+ IA64_BUNDLE bundle;
+ int slot, dir, inst_type;
+ size_t size;
+ u64 data, value,post_update, slot1a, slot1b, temp;
+ INST64 inst;
+ regs=vcpu_regs(vcpu);
+ bundle = __vmx_get_domain_bundle(regs->cr_iip);
+ slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+ if (!slot) inst.inst = bundle.slot0;
+ else if (slot == 1){
+ slot1a=bundle.slot1a;
+ slot1b=bundle.slot1b;
+ inst.inst =slot1a + (slot1b<<18);
+ }
+ else if (slot == 2) inst.inst = bundle.slot2;
+
+
+ // Integer Load/Store
+ if(inst.M1.major==4&&inst.M1.m==0&&inst.M1.x==0){
+ inst_type = SL_INTEGER; //
+ size=(inst.M1.x6&0x3);
+ if((inst.M1.x6>>2)>0xb){ // write
+ dir=IOREQ_WRITE; //write
+ vmx_vcpu_get_gr(vcpu,inst.M4.r2,&data);
+ }else if((inst.M1.x6>>2)<0xb){ // read
+ dir=IOREQ_READ;
+ vmx_vcpu_get_gr(vcpu,inst.M1.r1,&value);
+ }
+ }
+ // Integer Load + Reg update
+ else if(inst.M2.major==4&&inst.M2.m==1&&inst.M2.x==0){
+ inst_type = SL_INTEGER;
+ dir = IOREQ_READ; //write
+ size = (inst.M2.x6&0x3);
+ vmx_vcpu_get_gr(vcpu,inst.M2.r1,&value);
+ vmx_vcpu_get_gr(vcpu,inst.M2.r3,&temp);
+ vmx_vcpu_get_gr(vcpu,inst.M2.r2,&post_update);
+ temp += post_update;
+ vmx_vcpu_set_gr(vcpu,inst.M2.r3,temp,0);
+ }
+ // Integer Load/Store + Imm update
+ else if(inst.M3.major==5){
+ inst_type = SL_INTEGER; //
+ size=(inst.M3.x6&0x3);
+ if((inst.M5.x6>>2)>0xb){ // write
+ dir=IOREQ_WRITE; //write
+ vmx_vcpu_get_gr(vcpu,inst.M5.r2,&data);
+ vmx_vcpu_get_gr(vcpu,inst.M5.r3,&temp);
+ post_update = (inst.M5.i<<7)+inst.M5.imm7;
+ if(inst.M5.s)
+ temp -= post_update;
+ else
+ temp += post_update;
+ vmx_vcpu_set_gr(vcpu,inst.M5.r3,temp,0);
+
+ }else if((inst.M3.x6>>2)<0xb){ // read
+ dir=IOREQ_READ;
+ vmx_vcpu_get_gr(vcpu,inst.M3.r1,&value);
+ vmx_vcpu_get_gr(vcpu,inst.M3.r3,&temp);
+ post_update = (inst.M3.i<<7)+inst.M3.imm7;
+ if(inst.M3.s)
+ temp -= post_update;
+ else
+ temp += post_update;
+ vmx_vcpu_set_gr(vcpu,inst.M3.r3,temp,0);
+
+ }
+ }
+ // Floating-point Load/Store
+// else if(inst.M6.major==6&&inst.M6.m==0&&inst.M6.x==0&&inst.M6.x6==3){
+// inst_type=SL_FLOATING; //fp
+// dir=IOREQ_READ;
+// size=3; //ldfd
+// }
+ else{
+ printf("This memory access instruction can't be emulated two: %lx\n
",inst.inst);
+ while(1);
+ }
+
+ size = 1 << size;
+ if(dir==IOREQ_WRITE){
+ mmio_access(vcpu, padr, &data, size, ma, dir);
+ }else{
+ mmio_access(vcpu, padr, &data, size, ma, dir);
+ if(size==0)
+ data = (value & 0xffffffffffffff00U) | (data & 0xffU);
+ else if(size==1)
+ data = (value & 0xffffffffffff0000U) | (data & 0xffffU);
+ else if(size==2)
+ data = (value & 0xffffffff00000000U) | (data & 0xffffffffU);
+
+ if(inst_type==SL_INTEGER){ //gp
+ vmx_vcpu_set_gr(vcpu,inst.M1.r1,data,0);
+ }else{
+ panic("Don't support ldfd now !");
+/* switch(inst.M6.f1){
+
+ case 6:
+ regs->f6=(struct ia64_fpreg)data;
+ case 7:
+ regs->f7=(struct ia64_fpreg)data;
+ case 8:
+ regs->f8=(struct ia64_fpreg)data;
+ case 9:
+ regs->f9=(struct ia64_fpreg)data;
+ case 10:
+ regs->f10=(struct ia64_fpreg)data;
+ case 11:
+ regs->f11=(struct ia64_fpreg)data;
+ default :
+ ia64_ldfs(inst.M6.f1,&data);
+ }
+*/
+ }
+ }
+ vmx_vcpu_increment_iip(vcpu);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/pal_emul.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/pal_emul.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,280 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@xxxxxxxxx>
+ * Copyright (c) 2005 Yu Ke <ke.yu@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <asm/vmx_vcpu.h>
+
+static void
+get_pal_parameters (VCPU *vcpu, UINT64 *gr29,
+ UINT64 *gr30, UINT64 *gr31) {
+
+ vmx_vcpu_get_gr(vcpu,29,gr29);
+ vmx_vcpu_get_gr(vcpu,30,gr30);
+ vmx_vcpu_get_gr(vcpu,31,gr31);
+}
+
+static void
+set_pal_result (VCPU *vcpu,struct ia64_pal_retval result) {
+
+ vmx_vcpu_set_gr(vcpu,8, result.status,0);
+ vmx_vcpu_set_gr(vcpu,9, result.v0,0);
+ vmx_vcpu_set_gr(vcpu,10, result.v1,0);
+ vmx_vcpu_set_gr(vcpu,11, result.v2,0);
+}
+
+
+static struct ia64_pal_retval
+pal_cache_flush (VCPU *vcpu) {
+ UINT64 gr28,gr29, gr30, gr31;
+ struct ia64_pal_retval result;
+
+ get_pal_parameters (vcpu, &gr29, &gr30, &gr31);
+ vmx_vcpu_get_gr(vcpu,28,&gr28);
+
+ /* Always call Host Pal in int=1 */
+ gr30 = gr30 &(~(0x2UL));
+
+ /* call Host PAL cache flush */
+ result=ia64_pal_call_static(gr28 ,gr29, gr30,gr31,1); // Clear psr.ic
when call PAL_CACHE_FLUSH
+
+ /* If host PAL call is interrupted, then loop to complete it */
+// while (result.status == 1) {
+// ia64_pal_call_static(gr28 ,gr29, gr30,
+// result.v1,1LL);
+// }
+ while (result.status != 0) {
+ panic("PAL_CACHE_FLUSH ERROR, status %d", result.status);
+ }
+
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_vm_tr_read (VCPU *vcpu ) {
+#warning pal_vm_tr_read: to be implemented
+ struct ia64_pal_retval result;
+
+ result.status= -1; //unimplemented
+
+ return result;
+}
+
+
+static struct ia64_pal_retval
+pal_prefetch_visibility (VCPU *vcpu) {
+ /* Due to current MM virtualization algorithm,
+ * We do not allow guest to change mapping attribute.
+ * Thus we will not support PAL_PREFETCH_VISIBILITY
+ */
+ struct ia64_pal_retval result;
+
+ result.status= -1; //unimplemented
+
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_platform_addr(VCPU *vcpu) {
+ struct ia64_pal_retval result;
+
+ result.status= 0; //success
+
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_halt (VCPU *vcpu) {
+#warning pal_halt: to be implemented
+ //bugbug: to be implement.
+ struct ia64_pal_retval result;
+
+ result.status= -1; //unimplemented
+
+ return result;
+}
+
+
+static struct ia64_pal_retval
+pal_halt_light (VCPU *vcpu) {
+ struct ia64_pal_retval result;
+
+ result.status= -1; //unimplemented
+
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_cache_read (VCPU *vcpu) {
+ struct ia64_pal_retval result;
+
+ result.status= -1; //unimplemented
+
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_cache_write (VCPU *vcpu) {
+ struct ia64_pal_retval result;
+
+ result.status= -1; //unimplemented
+
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_bus_get_features(VCPU *vcpu){
+
+}
+
+static struct ia64_pal_retval
+pal_cache_summary(VCPU *vcpu){
+
+}
+
+static struct ia64_pal_retval
+pal_cache_init(VCPU *vcpu){
+ struct ia64_pal_retval result;
+ result.status=0;
+ return result;
+}
+
+static struct ia64_pal_retval
+pal_cache_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_cache_prot_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_cache_shared_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_mem_attrib(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_debug_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_fixed_addr(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_freq_base(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_freq_ratios(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_halt_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_logical_to_physica(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_perf_mon_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_proc_get_features(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_ptce_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_register_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_rse_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_test_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_vm_summary(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_vm_info(VCPU *vcpu){
+}
+
+static struct ia64_pal_retval
+pal_vm_page_size(VCPU *vcpu){
+}
+
+void
+pal_emul( VCPU *vcpu) {
+ UINT64 gr28;
+ struct ia64_pal_retval result;
+
+
+ vmx_vcpu_get_gr(vcpu,28,&gr28); //bank1
+
+ switch (gr28) {
+ case PAL_CACHE_FLUSH:
+ result = pal_cache_flush (vcpu);
+ break;
+
+ case PAL_PREFETCH_VISIBILITY:
+ result = pal_prefetch_visibility (vcpu);
+ break;
+
+ case PAL_VM_TR_READ:
+ result = pal_vm_tr_read (vcpu);
+ break;
+
+ case PAL_HALT:
+ result = pal_halt (vcpu);
+ break;
+
+ case PAL_HALT_LIGHT:
+ result = pal_halt_light (vcpu);
+ break;
+
+ case PAL_CACHE_READ:
+ result = pal_cache_read (vcpu);
+ break;
+
+ case PAL_CACHE_WRITE:
+ result = pal_cache_write (vcpu);
+ break;
+
+ case PAL_PLATFORM_ADDR:
+ result = pal_platform_addr (vcpu);
+ break;
+
+ default:
+ panic("pal_emul(): guest call unsupported pal" );
+ }
+ set_pal_result (vcpu, result);
+}
+
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vlsapic.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vlsapic.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,620 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vlsapic.c: virtual lsapic model including ITC timer.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ */
+
+#include <linux/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+#define SHARED_VLAPIC_INF
+#ifdef V_IOSAPIC_READY
+static inline vl_apic_info* get_psapic(VCPU *vcpu)
+{
+ shared_iopage_t *sp = get_sp(vcpu->domain);
+ return &(sp->vcpu_iodata[vcpu->vcpu_id].apic_intr);
+}
+#endif
+//u64 fire_itc;
+//u64 fire_itc2;
+//u64 fire_itm;
+//u64 fire_itm2;
+/*
+ * Update the checked last_itc.
+ */
+static void update_last_itc(vtime_t *vtm, uint64_t cur_itc)
+{
+ vtm->last_itc = cur_itc;
+}
+
+/*
+ * ITC value saw in guest (host+offset+drift).
+ */
+static uint64_t now_itc(vtime_t *vtm)
+{
+ uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc();
+
+ if ( vtm->vtm_local_drift ) {
+// guest_itc -= vtm->vtm_local_drift;
+ }
+ if ( (long)(guest_itc - vtm->last_itc) > 0 ) {
+ return guest_itc;
+
+ }
+ else {
+ /* guest ITC backwarded due after LP switch */
+ return vtm->last_itc;
+ }
+}
+
+/*
+ * Interval time components reset.
+ */
+static void vtm_reset(VCPU *vcpu)
+{
+ uint64_t cur_itc;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ vtm->vtm_offset = 0;
+ vtm->vtm_local_drift = 0;
+ VPD_CR(vcpu, itm) = 0;
+ VPD_CR(vcpu, itv) = 0x10000;
+ cur_itc = ia64_get_itc();
+ vtm->last_itc = vtm->vtm_offset + cur_itc;
+}
+
+/* callback function when vtm_timer expires */
+static void vtm_timer_fn(void *data)
+{
+ vtime_t *vtm;
+ VCPU *vcpu = data;
+ u64 cur_itc,vitm;
+
+ UINT64 vec;
+
+ vec = VPD_CR(vcpu, itv) & 0xff;
+ vmx_vcpu_pend_interrupt(vcpu, vec);
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ cur_itc = now_itc(vtm);
+ vitm =VPD_CR(vcpu, itm);
+ //fire_itc2 = cur_itc;
+ //fire_itm2 = vitm;
+ update_last_itc(vtm,cur_itc); // pseudo read to update vITC
+}
+
+void vtm_init(VCPU *vcpu)
+{
+ vtime_t *vtm;
+ uint64_t itc_freq;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+
+ itc_freq = local_cpu_data->itc_freq;
+ vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
+ vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
+ init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
+ vtm_reset(vcpu);
+}
+
+/*
+ * Action when guest read ITC.
+ */
+uint64_t vtm_get_itc(VCPU *vcpu)
+{
+ uint64_t guest_itc, spsr;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ // FIXME: should use local_irq_disable & local_irq_enable ??
+ local_irq_save(spsr);
+ guest_itc = now_itc(vtm);
+// update_last_itc(vtm, guest_itc);
+
+ local_irq_restore(spsr);
+ return guest_itc;
+}
+
+void vtm_set_itc(VCPU *vcpu, uint64_t new_itc)
+{
+ uint64_t spsr;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ local_irq_save(spsr);
+ vtm->vtm_offset = new_itc - ia64_get_itc();
+ vtm->last_itc = new_itc;
+ vtm_interruption_update(vcpu, vtm);
+ local_irq_restore(spsr);
+}
+
+void vtm_set_itv(VCPU *vcpu)
+{
+ uint64_t spsr,itv;
+ vtime_t *vtm;
+
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ local_irq_save(spsr);
+ itv = VPD_CR(vcpu, itv);
+ if ( ITV_IRQ_MASK(itv) )
+ rem_ac_timer(&vtm->vtm_timer);
+ vtm_interruption_update(vcpu, vtm);
+ local_irq_restore(spsr);
+}
+
+
+/*
+ * Update interrupt or hook the vtm ac_timer for fire
+ * At this point vtm_timer should be removed if itv is masked.
+ */
+/* Interrupt must be disabled at this point */
+
+extern u64 tick_to_ns(u64 tick);
+#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */
+void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
+{
+ uint64_t cur_itc,vitm,vitv;
+ uint64_t expires;
+ long diff_now, diff_last;
+ uint64_t spsr;
+
+ vitv = VPD_CR(vcpu, itv);
+ if ( ITV_IRQ_MASK(vitv) ) {
+ return;
+ }
+
+ vitm =VPD_CR(vcpu, itm);
+ local_irq_save(spsr);
+ cur_itc =now_itc(vtm);
+ diff_last = vtm->last_itc - vitm;
+ diff_now = cur_itc - vitm;
+ update_last_itc (vtm,cur_itc);
+
+ if ( diff_last >= 0 ) {
+ // interrupt already fired.
+ rem_ac_timer(&vtm->vtm_timer);
+ }
+ else if ( diff_now >= 0 ) {
+ // ITV is fired.
+ vmx_vcpu_pend_interrupt(vcpu, vitv&0xff);
+ }
+ /* Both last_itc & cur_itc < itm, wait for fire condition */
+ else {
+ expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP;
+ set_ac_timer(&vtm->vtm_timer, expires);
+ }
+ local_irq_restore(spsr);
+}
+
+/*
+ * Action for vtm when the domain is scheduled out.
+ * Remove the ac_timer for vtm.
+ */
+void vtm_domain_out(VCPU *vcpu)
+{
+ if(!is_idle_task(vcpu->domain))
+ rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
+}
+
+/*
+ * Action for vtm when the domain is scheduled in.
+ * Fire vtm IRQ or add the ac_timer for vtm.
+ */
+void vtm_domain_in(VCPU *vcpu)
+{
+ vtime_t *vtm;
+
+ if(!is_idle_task(vcpu->domain)) {
+ vtm=&(vcpu->arch.arch_vmx.vtm);
+ vtm_interruption_update(vcpu, vtm);
+ }
+}
+
+/*
+ * Next for vLSapic
+ */
+
+#define NMI_VECTOR 2
+#define ExtINT_VECTOR 0
+#define NULL_VECTOR -1
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i])
+static void update_vhpi(VCPU *vcpu, int vec)
+{
+ u64 vhpi;
+ if ( vec == NULL_VECTOR ) {
+ vhpi = 0;
+ }
+ else if ( vec == NMI_VECTOR ) { // NMI
+ vhpi = 32;
+ } else if (vec == ExtINT_VECTOR) { //ExtINT
+ vhpi = 16;
+ }
+ else {
+ vhpi = vec / 16;
+ }
+
+ VMX_VPD(vcpu,vhpi) = vhpi;
+ // TODO: Add support for XENO
+ if ( VMX_VPD(vcpu,vac).a_int ) {
+ ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT,
+ (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0);
+ }
+}
+
+#ifdef V_IOSAPIC_READY
+void vlapic_update_shared_info(VCPU *vcpu)
+{
+ //int i;
+
+ vl_apic_info *ps;
+
+ if (vcpu->domain == dom0)
+ return;
+
+ ps = get_psapic(vcpu);
+ ps->vl_lapic_id = ((VPD_CR(vcpu, lid) >> 16) & 0xffff) << 16;
+ printf("vl_lapic_id = %x\n", ps->vl_lapic_id);
+ ps->vl_apr = 0;
+ // skip ps->vl_logical_dest && ps->vl_dest_format
+ // IPF support physical destination mode only
+ ps->vl_arb_id = 0;
+ /*
+ for ( i=0; i<4; i++ ) {
+ ps->tmr[i] = 0; // edge trigger
+ }
+ */
+}
+
+void vlapic_update_ext_irq(VCPU *vcpu)
+{
+ int vec;
+
+ vl_apic_info *ps = get_psapic(vcpu);
+ while ( (vec = highest_bits(ps->irr)) != NULL_VECTOR ) {
+ clear_bit (vec, ps->irr);
+ vmx_vcpu_pend_interrupt(vcpu, vec);
+ }
+}
+#endif
+
+void vlsapic_reset(VCPU *vcpu)
+{
+ int i;
+#ifdef V_IOSAPIC_READY
+ vl_apic_info *psapic; // shared lapic inf.
+#endif
+
+ VPD_CR(vcpu, lid) = ia64_getreg(_IA64_REG_CR_LID);
+ VPD_CR(vcpu, ivr) = 0;
+ VPD_CR(vcpu,tpr) = 0x10000;
+ VPD_CR(vcpu, eoi) = 0;
+ VPD_CR(vcpu, irr[0]) = 0;
+ VPD_CR(vcpu, irr[1]) = 0;
+ VPD_CR(vcpu, irr[2]) = 0;
+ VPD_CR(vcpu, irr[3]) = 0;
+ VPD_CR(vcpu, pmv) = 0x10000;
+ VPD_CR(vcpu, cmcv) = 0x10000;
+ VPD_CR(vcpu, lrr0) = 0x10000; // default reset value?
+ VPD_CR(vcpu, lrr1) = 0x10000; // default reset value?
+ update_vhpi(vcpu, NULL_VECTOR);
+ for ( i=0; i<4; i++) {
+ VLSAPIC_INSVC(vcpu,i) = 0;
+ }
+#ifdef V_IOSAPIC_READY
+ vlapic_update_shared_info(vcpu);
+ //vlapic_update_shared_irr(vcpu);
+#endif
+ DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) );
+}
+
+/*
+ * Find highest signaled bits in 4 words (long).
+ *
+ * return 0-255: highest bits.
+ * -1 : Not found.
+ */
+static __inline__ int highest_bits(uint64_t *dat)
+{
+ uint64_t bits, bitnum;
+ int i;
+
+ /* loop for all 256 bits */
+ for ( i=3; i >= 0 ; i -- ) {
+ bits = dat[i];
+ if ( bits ) {
+ bitnum = ia64_fls(bits);
+ return i*64+bitnum;
+ }
+ }
+ return NULL_VECTOR;
+}
+
+/*
+ * Return 0-255 for pending irq.
+ * NULL_VECTOR: when no pending.
+ */
+static int highest_pending_irq(VCPU *vcpu)
+{
+ if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
+ if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
+ return highest_bits(&VPD_CR(vcpu, irr[0]));
+}
+
+static int highest_inservice_irq(VCPU *vcpu)
+{
+ if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
+ if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
+ return highest_bits(&(VLSAPIC_INSVC(vcpu, 0)));
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static int is_higher_irq(int pending, int inservice)
+{
+ return ( (pending >> 4) > (inservice>>4) ||
+ ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) );
+}
+
+static int is_higher_class(int pending, int mic)
+{
+ return ( (pending >> 4) > mic );
+}
+
+static int is_invalid_irq(int vec)
+{
+ return (vec == 1 || ((vec <= 14 && vec >= 3)));
+}
+
+#define IRQ_NO_MASKED 0
+#define IRQ_MASKED_BY_VTPR 1
+#define IRQ_MASKED_BY_INSVC 2 // masked by inservice IRQ
+
+/* See Table 5-8 in SDM vol2 for the definition */
+static int
+_xirq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+ tpr_t vtpr;
+ uint64_t mmi;
+
+ vtpr.val = VPD_CR(vcpu, tpr);
+
+ if ( h_inservice == NMI_VECTOR ) {
+ return IRQ_MASKED_BY_INSVC;
+ }
+ if ( h_pending == NMI_VECTOR ) {
+ // Non Maskable Interrupt
+ return IRQ_NO_MASKED;
+ }
+ if ( h_inservice == ExtINT_VECTOR ) {
+ return IRQ_MASKED_BY_INSVC;
+ }
+ mmi = vtpr.mmi;
+ if ( h_pending == ExtINT_VECTOR ) {
+ if ( mmi ) {
+ // mask all external IRQ
+ return IRQ_MASKED_BY_VTPR;
+ }
+ else {
+ return IRQ_NO_MASKED;
+ }
+ }
+
+ if ( is_higher_irq(h_pending, h_inservice) ) {
+ if ( !mmi && is_higher_class(h_pending, vtpr.mic) ) {
+ return IRQ_NO_MASKED;
+ }
+ else {
+ return IRQ_MASKED_BY_VTPR;
+ }
+ }
+ else {
+ return IRQ_MASKED_BY_INSVC;
+ }
+}
+
+static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
+{
+ int mask;
+
+ mask = _xirq_masked(vcpu, h_pending, h_inservice);
+ return mask;
+}
+
+
+/*
+ * May come from virtualization fault or
+ * nested host interrupt.
+ */
+void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
+{
+ uint64_t spsr;
+
+ if (vector & ~0xff) {
+ DPRINTK("vmx_vcpu_pend_interrupt: bad vector\n");
+ return;
+ }
+ local_irq_save(spsr);
+ VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63);
+ //vlapic_update_shared_irr(vcpu);
+ local_irq_restore(spsr);
+ vcpu->arch.irq_new_pending = 1;
+}
+
+/*
+ * Add batch of pending interrupt.
+ * The interrupt source is contained in pend_irr[0-3] with
+ * each bits stand for one interrupt.
+ */
+void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, UINT64 *pend_irr)
+{
+ uint64_t spsr;
+ int i;
+
+ local_irq_save(spsr);
+ for (i=0 ; i<4; i++ ) {
+ VPD_CR(vcpu,irr[i]) |= pend_irr[i];
+ }
+ //vlapic_update_shared_irr(vcpu);
+ local_irq_restore(spsr);
+ vcpu->arch.irq_new_pending = 1;
+}
+
+/*
+ * If the new pending interrupt is enabled and not masked, we directly inject
+ * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when
+ * the interrupt becomes unmasked, it gets injected.
+ * RETURN:
+ * TRUE: Interrupt is injected.
+ * FALSE: Not injected but may be in VHPI when vac.a_int=1
+ *
+ * Optimization: We defer setting the VHPI until the EOI time, if a higher
+ * priority interrupt is in-service. The idea is to reduce the
+ * number of unnecessary calls to inject_vhpi.
+ */
+int vmx_check_pending_irq(VCPU *vcpu)
+{
+ uint64_t spsr, mask;
+ int h_pending, h_inservice;
+ int injected=0;
+ uint64_t isr;
+ IA64_PSR vpsr;
+
+ local_irq_save(spsr);
+ h_pending = highest_pending_irq(vcpu);
+ if ( h_pending == NULL_VECTOR ) goto chk_irq_exit;
+ h_inservice = highest_inservice_irq(vcpu);
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ mask = irq_masked(vcpu, h_pending, h_inservice);
+ if ( vpsr.i && IRQ_NO_MASKED == mask ) {
+ isr = vpsr.val & IA64_PSR_RI;
+ if ( !vpsr.ic )
+ panic("Interrupt when IC=0\n");
+ vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
+ injected = 1;
+ }
+ else if ( mask == IRQ_MASKED_BY_INSVC ) {
+ // cann't inject VHPI
+// DPRINTK("IRQ masked by higher inservice\n");
+ }
+ else {
+ // masked by vpsr.i or vtpr.
+ update_vhpi(vcpu,h_pending);
+ }
+
+chk_irq_exit:
+ local_irq_restore(spsr);
+ return injected;
+}
+
+/*
+ * Only coming from virtualization fault.
+ */
+void guest_write_eoi(VCPU *vcpu)
+{
+ int vec;
+ uint64_t spsr;
+
+ vec = highest_inservice_irq(vcpu);
+ if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
+ local_irq_save(spsr);
+ VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
+ local_irq_restore(spsr);
+ VPD_CR(vcpu, eoi)=0; // overwrite the data
+ vmx_check_pending_irq(vcpu);
+}
+
+uint64_t guest_read_vivr(VCPU *vcpu)
+{
+ int vec, next, h_inservice;
+ uint64_t spsr;
+
+ local_irq_save(spsr);
+ vec = highest_pending_irq(vcpu);
+ h_inservice = highest_inservice_irq(vcpu);
+ if ( vec == NULL_VECTOR ||
+ irq_masked(vcpu, vec, h_inservice) != IRQ_NO_MASKED ) {
+ local_irq_restore(spsr);
+ return IA64_SPURIOUS_INT_VECTOR;
+ }
+
+ VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63));
+ VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63));
+ update_vhpi(vcpu, NULL_VECTOR); // clear VHPI till EOI or IRR write
+ //vlapic_update_shared_irr(vcpu);
+ local_irq_restore(spsr);
+ return (uint64_t)vec;
+}
+
+static void generate_exirq(VCPU *vcpu)
+{
+ IA64_PSR vpsr;
+ uint64_t isr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ update_vhpi(vcpu, NULL_VECTOR);
+ isr = vpsr.val & IA64_PSR_RI;
+ if ( !vpsr.ic )
+ panic("Interrupt when IC=0\n");
+ vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
+}
+
+vhpi_detection(VCPU *vcpu)
+{
+ uint64_t threshold,vhpi;
+ tpr_t vtpr;
+ IA64_PSR vpsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ vtpr.val = VPD_CR(vcpu, tpr);
+
+ threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+ vhpi = VMX_VPD(vcpu,vhpi);
+ if ( vhpi > threshold ) {
+ // interrupt actived
+ generate_exirq (vcpu);
+ }
+}
+
+vmx_vexirq(VCPU *vcpu)
+{
+ static uint64_t vexirq_count=0;
+
+ vexirq_count ++;
+ printk("Virtual ex-irq %ld\n", vexirq_count);
+ generate_exirq (vcpu);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmmu.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmmu.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,846 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmmu.c: virtual memory management unit components.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/tlb.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vcpu.h>
+#include <linux/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+/*
+ * Architecture ppn is in 4KB unit while XEN
+ * page may be different(1<<PAGE_SHIFT).
+ */
+static inline u64 arch_ppn_to_xen_ppn(u64 appn)
+{
+ return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT;
+}
+
+static inline u64 xen_ppn_to_arch_ppn(u64 xppn)
+{
+ return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT;
+}
+
+
+/*
+ * Get the machine page frame number in 16KB unit
+ * Input:
+ * d:
+ */
+u64 get_mfn(domid_t domid, u64 gpfn, u64 pages)
+{
+ struct domain *d;
+ u64 i, xen_gppn, xen_mppn, mpfn;
+
+ if ( domid == DOMID_SELF ) {
+ d = current->domain;
+ }
+ else {
+ d = find_domain_by_id(domid);
+ }
+ xen_gppn = arch_ppn_to_xen_ppn(gpfn);
+ xen_mppn = __gpfn_to_mfn(d, xen_gppn);
+/*
+ for (i=0; i<pages; i++) {
+ if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) {
+ return INVALID_MFN;
+ }
+ }
+*/
+ mpfn= xen_ppn_to_arch_ppn(xen_mppn);
+ mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn);
+ return mpfn;
+
+}
+
+/*
+ * The VRN bits of va stand for which rr to get.
+ */
+ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va)
+{
+ ia64_rr vrr;
+ vmx_vcpu_get_rr(vcpu, va, &vrr.rrval);
+ return vrr;
+}
+
+
+void recycle_message(thash_cb_t *hcb, u64 para)
+{
+ printk("hcb=%p recycled with %lx\n",hcb,para);
+}
+
+
+/*
+ * Purge all guest TCs in logical processor.
+ * Instead of purging all LP TCs, we should only purge
+ * TCs that belong to this guest.
+ */
+void
+purge_machine_tc_by_domid(domid_t domid)
+{
+#ifndef PURGE_GUEST_TC_ONLY
+ // purge all TCs
+ struct ia64_pal_retval result;
+ u64 addr;
+ u32 count1,count2;
+ u32 stride1,stride2;
+ u32 i,j;
+ u64 psr;
+
+
+ result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0);
+ if ( result.status != 0 ) {
+ panic ("PAL_PTCE_INFO failed\n");
+ }
+ addr = result.v0;
+ count1 = HIGH_32BITS(result.v1);
+ count2 = LOW_32BITS (result.v1);
+ stride1 = HIGH_32BITS(result.v2);
+ stride2 = LOW_32BITS (result.v2);
+
+ local_irq_save(psr);
+ for (i=0; i<count1; i++) {
+ for (j=0; j<count2; j++) {
+ ia64_ptce(addr);
+ addr += stride2;
+ }
+ addr += stride1;
+ }
+ local_irq_restore(psr);
+#else
+ // purge all TCs belong to this guest.
+#endif
+}
+
+static thash_cb_t *init_domain_vhpt(struct vcpu *d)
+{
+ struct pfn_info *page;
+ void *vbase,*vcur;
+ vhpt_special *vs;
+ thash_cb_t *vhpt;
+ PTA pta_value;
+
+ page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
+ if ( page == NULL ) {
+ panic("No enough contiguous memory for init_domain_mm\n");
+ }
+ vbase = page_to_virt(page);
+ printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase);
+ memset(vbase, 0, VCPU_TLB_SIZE);
+ vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
+ vhpt = --((thash_cb_t*)vcur);
+ vhpt->ht = THASH_VHPT;
+ vhpt->vcpu = d;
+ vhpt->hash_func = machine_thash;
+ vs = --((vhpt_special *)vcur);
+
+ /* Setup guest pta */
+ pta_value.val = 0;
+ pta_value.ve = 1;
+ pta_value.vf = 1;
+ pta_value.size = VCPU_TLB_SHIFT - 1; /* 2M */
+ pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT;
+ d->arch.arch_vmx.mpta = pta_value.val;
+
+ vhpt->vs = vs;
+ vhpt->vs->get_mfn = get_mfn;
+ vhpt->vs->tag_func = machine_ttag;
+ vhpt->hash = vbase;
+ vhpt->hash_sz = VCPU_TLB_SIZE/2;
+ vhpt->cch_buf = (u64)vbase + vhpt->hash_sz;
+ vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf;
+ vhpt->recycle_notifier = recycle_message;
+ thash_init(vhpt,VCPU_TLB_SHIFT-1);
+ return vhpt;
+}
+
+
+thash_cb_t *init_domain_tlb(struct vcpu *d)
+{
+ struct pfn_info *page;
+ void *vbase,*vcur;
+ tlb_special_t *ts;
+ thash_cb_t *tlb;
+
+ page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
+ if ( page == NULL ) {
+ panic("No enough contiguous memory for init_domain_mm\n");
+ }
+ vbase = page_to_virt(page);
+ printk("Allocate domain tlb at 0x%lx\n", (u64)vbase);
+ memset(vbase, 0, VCPU_TLB_SIZE);
+ vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
+ tlb = --((thash_cb_t*)vcur);
+ tlb->ht = THASH_TLB;
+ tlb->vcpu = d;
+ ts = --((tlb_special_t *)vcur);
+ tlb->ts = ts;
+ tlb->ts->vhpt = init_domain_vhpt(d);
+ tlb->hash_func = machine_thash;
+ tlb->hash = vbase;
+ tlb->hash_sz = VCPU_TLB_SIZE/2;
+ tlb->cch_buf = (u64)vbase + tlb->hash_sz;
+ tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf;
+ tlb->recycle_notifier = recycle_message;
+ thash_init(tlb,VCPU_TLB_SHIFT-1);
+ return tlb;
+}
+
+/* Allocate physical to machine mapping table for domN
+ * FIXME: Later this interface may be removed, if that table is provided
+ * by control panel. Dom0 has gpfn identical to mfn, which doesn't need
+ * this interface at all.
+ */
+void
+alloc_pmt(struct domain *d)
+{
+ struct pfn_info *page;
+
+ /* Only called once */
+ ASSERT(d->arch.pmt);
+
+ page = alloc_domheap_pages(NULL, get_order(d->max_pages), 0);
+ ASSERT(page);
+
+ d->arch.pmt = page_to_virt(page);
+ memset(d->arch.pmt, 0x55, d->max_pages * 8);
+}
+
+/*
+ * Insert guest TLB to machine TLB.
+ * data: In TLB format
+ */
+void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb)
+{
+ u64 saved_itir, saved_ifa, saved_rr;
+ u64 pages;
+ thash_data_t mtlb;
+ ia64_rr vrr;
+ unsigned int cl = tlb->cl;
+
+ mtlb.ifa = tlb->vadr;
+ mtlb.itir = tlb->itir & ~ITIR_RV_MASK;
+ vrr = vmmu_get_rr(d,mtlb.ifa);
+ //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value);
+ pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
+ mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
+ mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages);
+ if (mtlb.ppn == INVALID_MFN)
+ panic("Machine tlb insert with invalid mfn number.\n");
+
+ __asm __volatile("rsm psr.ic|psr.i;; srlz.i" );
+
+ saved_itir = ia64_getreg(_IA64_REG_CR_ITIR);
+ saved_ifa = ia64_getreg(_IA64_REG_CR_IFA);
+ saved_rr = ia64_get_rr(mtlb.ifa);
+
+ ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir);
+ ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa);
+ /* Only access memory stack which is mapped by TR,
+ * after rr is switched.
+ */
+ ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.rrval));
+ ia64_srlz_d();
+ if ( cl == ISIDE_TLB ) {
+ ia64_itci(mtlb.page_flags);
+ ia64_srlz_i();
+ }
+ else {
+ ia64_itcd(mtlb.page_flags);
+ ia64_srlz_d();
+ }
+ ia64_set_rr(mtlb.ifa,saved_rr);
+ ia64_srlz_d();
+ ia64_setreg(_IA64_REG_CR_IFA, saved_ifa);
+ ia64_setreg(_IA64_REG_CR_ITIR, saved_itir);
+ __asm __volatile("ssm psr.ic|psr.i;; srlz.i" );
+}
+
+u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps)
+{
+ u64 saved_pta, saved_rr0;
+ u64 hash_addr, tag;
+ unsigned long psr;
+ struct vcpu *v = current;
+ ia64_rr vrr;
+
+
+ saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
+ saved_rr0 = ia64_get_rr(0);
+ vrr.rrval = saved_rr0;
+ vrr.rid = rid;
+ vrr.ps = ps;
+
+ va = (va << 3) >> 3; // set VRN to 0.
+ // TODO: Set to enforce lazy mode
+ local_irq_save(psr);
+ ia64_setreg(_IA64_REG_CR_PTA, pta.val);
+ ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
+ ia64_srlz_d();
+
+ hash_addr = ia64_thash(va);
+ ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
+
+ ia64_set_rr(0, saved_rr0);
+ ia64_srlz_d();
+ local_irq_restore(psr);
+ return hash_addr;
+}
+
+u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps)
+{
+ u64 saved_pta, saved_rr0;
+ u64 hash_addr, tag;
+ u64 psr;
+ struct vcpu *v = current;
+ ia64_rr vrr;
+
+ // TODO: Set to enforce lazy mode
+ saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
+ saved_rr0 = ia64_get_rr(0);
+ vrr.rrval = saved_rr0;
+ vrr.rid = rid;
+ vrr.ps = ps;
+
+ va = (va << 3) >> 3; // set VRN to 0.
+ local_irq_save(psr);
+ ia64_setreg(_IA64_REG_CR_PTA, pta.val);
+ ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
+ ia64_srlz_d();
+
+ tag = ia64_ttag(va);
+ ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
+
+ ia64_set_rr(0, saved_rr0);
+ ia64_srlz_d();
+ local_irq_restore(psr);
+ return tag;
+}
+
+/*
+ * Purge machine tlb.
+ * INPUT
+ * rr: guest rr.
+ * va: only bits 0:60 is valid
+ * size: bits format (1<<size) for the address range to purge.
+ *
+ */
+void machine_tlb_purge(u64 rid, u64 va, u64 ps)
+{
+ u64 saved_rr0;
+ u64 psr;
+ ia64_rr vrr;
+
+ va = (va << 3) >> 3; // set VRN to 0.
+ saved_rr0 = ia64_get_rr(0);
+ vrr.rrval = saved_rr0;
+ vrr.rid = rid;
+ vrr.ps = ps;
+ local_irq_save(psr);
+ ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.rrval) );
+ ia64_srlz_d();
+ ia64_ptcl(va, ps << 2);
+ ia64_set_rr( 0, saved_rr0 );
+ ia64_srlz_d();
+ local_irq_restore(psr);
+}
+
+
+int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref)
+{
+ ia64_rr vrr;
+ PTA vpta;
+ IA64_PSR vpsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ vrr = vmx_vcpu_rr(vcpu, vadr);
+ vmx_vcpu_get_pta(vcpu,&vpta.val);
+
+ if ( vrr.ve & vpta.ve ) {
+ switch ( ref ) {
+ case DATA_REF:
+ case NA_REF:
+ return vpsr.dt;
+ case INST_REF:
+ return vpsr.dt && vpsr.it && vpsr.ic;
+ case RSE_REF:
+ return vpsr.dt && vpsr.rt;
+
+ }
+ }
+ return 0;
+}
+
+
+int unimplemented_gva(VCPU *vcpu,u64 vadr)
+{
+ int bit=vcpu->domain->arch.imp_va_msb;
+ u64 ladr =(vadr<<3)>>(3+bit);
+ if(!ladr||ladr==(1U<<(61-bit))-1){
+ return 0;
+ }else{
+ return 1;
+ }
+}
+
+
+/*
+ * Prefetch guest bundle code.
+ * INPUT:
+ * code: buffer pointer to hold the read data.
+ * num: number of dword (8byts) to read.
+ */
+int
+fetch_code(VCPU *vcpu, u64 gip, u64 *code)
+{
+ u64 gpip; // guest physical IP
+ u64 mpa;
+ thash_data_t *tlb;
+ ia64_rr vrr;
+ u64 mfn;
+
+ if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode
+ gpip = gip;
+ }
+ else {
+ vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval);
+ tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu),
+ vrr.rid, gip, ISIDE_TLB );
+ if ( tlb == NULL ) panic("No entry found in ITLB\n");
+ gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) );
+ }
+ mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
+ if ( mfn == INVALID_MFN ) return 0;
+
+ mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT);
+ *code = *(u64*)__va(mpa);
+ return 1;
+}
+
+IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ ia64_rr vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.tc = 1;
+ data.cl=ISIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+
+ sections.tr = 1;
+ sections.tc = 0;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ while (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ thash_purge_and_insert(hcb, &data);
+ return IA64_NO_FAULT;
+}
+
+
+
+
+IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ ia64_rr vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.tc = 1;
+ data.cl=DSIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+ sections.tr = 1;
+ sections.tc = 0;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ if (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ thash_purge_and_insert(hcb, &data);
+ return IA64_NO_FAULT;
+}
+
+/*
+ * Return TRUE/FALSE for success of lock operation
+ */
+int vmx_lock_guest_dtc (VCPU *vcpu, UINT64 va, int lock)
+{
+
+ thash_cb_t *hcb;
+ ia64_rr vrr;
+ u64 preferred_size;
+
+ vmx_vcpu_get_rr(vcpu, va, &vrr);
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ va = PAGEALIGN(va,vrr.ps);
+ preferred_size = PSIZE(vrr.ps);
+ return thash_lock_tc(hcb, va, preferred_size, vrr.rid, DSIDE_TLB, lock);
+}
+
+IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa,
UINT64 idx)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ ia64_rr vrr;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.tc = 0;
+ data.cl=ISIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+ sections.tr = 1;
+ sections.tc = 0;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ if (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ sections.tr = 0;
+ sections.tc = 1;
+ thash_purge_entries(hcb, &data, sections);
+ thash_tr_insert(hcb, &data, ifa, idx);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa,
UINT64 idx)
+{
+
+ thash_data_t data, *ovl;
+ thash_cb_t *hcb;
+ search_section_t sections;
+ ia64_rr vrr;
+
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
+ data.itir=itir;
+ data.vadr=PAGEALIGN(ifa,data.ps);
+ data.tc = 0;
+ data.cl=DSIDE_TLB;
+ vmx_vcpu_get_rr(vcpu, ifa, &vrr);
+ data.rid = vrr.rid;
+ sections.tr = 1;
+ sections.tc = 0;
+
+ ovl = thash_find_overlap(hcb, &data, sections);
+ while (ovl) {
+ // generate MCA.
+ panic("Tlb conflict!!");
+ return;
+ }
+ sections.tr = 0;
+ sections.tc = 1;
+ thash_purge_entries(hcb, &data, sections);
+ thash_tr_insert(hcb, &data, ifa, idx);
+ return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+ thash_cb_t *hcb;
+ ia64_rr rr;
+ search_section_t sections;
+
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr=vmx_vcpu_rr(vcpu,vadr);
+ sections.tr = 1;
+ sections.tc = 1;
+ thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+ thash_cb_t *hcb;
+ ia64_rr rr;
+ search_section_t sections;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr=vmx_vcpu_rr(vcpu,vadr);
+ sections.tr = 1;
+ sections.tc = 1;
+ thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps)
+{
+ thash_cb_t *hcb;
+ ia64_rr vrr;
+ search_section_t sections;
+ thash_data_t data, *ovl;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ vrr=vmx_vcpu_rr(vcpu,vadr);
+ sections.tr = 0;
+ sections.tc = 1;
+ vadr = PAGEALIGN(vadr, ps);
+
+ thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB);
+ thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
+{
+ thash_cb_t *hcb;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ thash_purge_all(hcb);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps)
+{
+ vmx_vcpu_ptc_l(vcpu, vadr, ps);
+ return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps)
+{
+ vmx_vcpu_ptc_l(vcpu, vadr, ps);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+ PTA vpta;
+ ia64_rr vrr;
+ u64 vhpt_offset,tmp;
+ vmx_vcpu_get_pta(vcpu, &vpta.val);
+ vrr=vmx_vcpu_rr(vcpu, vadr);
+ if(vpta.vf){
+ panic("THASH,Don't support long format VHPT");
+ *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0);
+ }else{
+ vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1);
+ *pval = (vadr&VRN_MASK)|
+ (vpta.val<<3>>(vpta.size+3)<<(vpta.size))|
+ vhpt_offset;
+ }
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+ ia64_rr vrr;
+ PTA vpta;
+ vmx_vcpu_get_pta(vcpu, &vpta.val);
+ vrr=vmx_vcpu_rr(vcpu, vadr);
+ if(vpta.vf){
+ panic("THASH,Don't support long format VHPT");
+ *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0);
+ }else{
+ *pval = 1;
+ }
+ return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+ thash_data_t *data;
+ thash_cb_t *hcb;
+ ia64_rr vrr;
+ ISR visr,pt_isr;
+ REGS *regs;
+ u64 vhpt_adr;
+ IA64_PSR vpsr;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ vrr=vmx_vcpu_rr(vcpu,vadr);
+ regs=vcpu_regs(vcpu);
+ pt_isr.val=regs->cr_isr;
+ visr.val=0;
+ visr.ei=pt_isr.ei;
+ visr.ir=pt_isr.ir;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if(vpsr.ic==0){
+ visr.ni=1;
+ }
+ visr.na=1;
+ data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB);
+ if(data){
+ if(data->p==0){
+ visr.na=1;
+ vmx_vcpu_set_isr(vcpu,visr.val);
+ page_not_present(vcpu, vadr);
+ return IA64_FAULT;
+ }else if(data->ma == VA_MATTR_NATPAGE){
+ visr.na = 1;
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ dnat_page_consumption(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1));
+ return IA64_NO_FAULT;
+ }
+ }else{
+ if(!vhpt_enabled(vcpu, vadr, NA_REF)){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ alt_dtlb(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ else{
+ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+ vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+ data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB);
+ if(data){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ dtlb_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ else{
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, visr.val);
+ dvhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ }
+ }
+}
+
+IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
+{
+ thash_data_t *data;
+ thash_cb_t *hcb;
+ ia64_rr rr;
+ PTA vpta;
+ vmx_vcpu_get_pta(vcpu, &vpta.val);
+ if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){
+ *key=1;
+ return IA64_NO_FAULT;
+ }
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr=vmx_vcpu_rr(vcpu,vadr);
+ data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB);
+ if(!data||!data->p){
+ *key=1;
+ }else{
+ *key=data->key;
+ }
+ return IA64_NO_FAULT;
+}
+
+/*
+ * [FIXME] Is there any effective way to move this routine
+ * into vmx_uaccess.h? struct exec_domain is incomplete type
+ * in that way...
+ *
+ * This is the interface to lookup virtual TLB, and then
+ * return corresponding machine address in 2nd parameter.
+ * The 3rd parameter contains how many bytes mapped by
+ * matched vTLB entry, thus to allow caller copy more once.
+ *
+ * If failed to lookup, -EFAULT is returned. Or else reutrn
+ * 0. All upper domain access utilities rely on this routine
+ * to determine the real machine address.
+ *
+ * Yes, put_user and get_user seems to somhow slow upon it.
+ * However it's the necessary steps for any vmx domain virtual
+ * address, since that's difference address space as HV's one.
+ * Later some short-circuit may be created for special case
+ */
+long
+__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len)
+{
+ unsigned long mpfn, gpfn, m, n = *len;
+ thash_cb_t *vtlb;
+ unsigned long end; /* end of the area mapped by current entry */
+ thash_data_t *entry;
+ struct vcpu *v = current;
+ ia64_rr vrr;
+
+ vtlb = vmx_vcpu_get_vtlb(v);
+ vrr = vmx_vcpu_rr(v, va);
+ entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB);
+ if (entry == NULL)
+ return -EFAULT;
+
+ gpfn =(entry->ppn>>(PAGE_SHIFT-12));
+ gpfn =PAGEALIGN(gpfn,(entry->ps-PAGE_SHIFT));
+ gpfn = gpfn | POFFSET(va>>PAGE_SHIFT,(entry->ps-PAGE_SHIFT));
+
+ mpfn = __gpfn_to_mfn(v->domain, gpfn);
+ m = (mpfn<<PAGE_SHIFT) | (va & (PAGE_SIZE - 1));
+ /* machine address may be not continuous */
+ end = PAGEALIGN(m, PAGE_SHIFT) + PAGE_SIZE;
+ /*end = PAGEALIGN(m, entry->ps) + PSIZE(entry->ps);*/
+ /* Current entry can't map all requested area */
+ if ((m + n) > end)
+ n = end - m;
+
+ *ma = m;
+ *len = n;
+ return 0;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_entry.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_entry.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,611 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_entry.S:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
+ * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
+ */
+
+#ifndef VCPU_TLB_SHIFT
+#define VCPU_TLB_SHIFT 22
+#endif
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#include "vmx_minstate.h"
+
+/*
+ * prev_task <- vmx_ia64_switch_to(struct task_struct *next)
+ * With Ingo's new scheduler, interrupts are disabled when this routine
gets
+ * called. The code starting at .map relies on this. The rest of the code
+ * doesn't care about the interrupt masking status.
+ *
+ * Since we allocate domain stack in xenheap, there's no need to map new
+ * domain's stack since all xenheap is mapped by TR. Another different task
+ * for vmx_ia64_switch_to is to switch to bank0 and change current pointer.
+ */
+GLOBAL_ENTRY(vmx_ia64_switch_to)
+ .prologue
+ alloc r16=ar.pfs,1,0,0,0
+ DO_SAVE_SWITCH_STACK
+ .body
+
+ bsw.0 // Switch to bank0, because bank0 r21 is current pointer
+ ;;
+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ movl r25=init_task
+ adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ ;;
+ /*
+ * TR always mapped this task's page, we can skip doing it again.
+ */
+ ld8 sp=[r26] // load kernel stack pointer of new task
+ mov r21=in0 // update "current" application register
+ mov r8=r13 // return pointer to previously running
task
+ mov r13=in0 // set "current" pointer
+ ;;
+ bsw.1
+ ;;
+ DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+ sync.i // ensure "fc"s done by this CPU are
visible on other CPUs
+#endif
+ br.ret.sptk.many rp // boogie on out in new context
+END(vmx_ia64_switch_to)
+
+GLOBAL_ENTRY(ia64_leave_nested)
+ rsm psr.i
+ ;;
+ adds r21=PT(PR)+16,r12
+ ;;
+
+ lfetch [r21],PT(CR_IPSR)-PT(PR)
+ adds r2=PT(B6)+16,r12
+ adds r3=PT(R16)+16,r12
+ ;;
+ lfetch [r21]
+ ld8 r28=[r2],8 // load b6
+ adds r29=PT(R24)+16,r12
+
+ ld8.fill r16=[r3]
+ adds r3=PT(AR_CSD)-PT(R16),r3
+ adds r30=PT(AR_CCV)+16,r12
+ ;;
+ ld8.fill r24=[r29]
+ ld8 r15=[r30] // load ar.ccv
+ ;;
+ ld8 r29=[r2],16 // load b7
+ ld8 r30=[r3],16 // load ar.csd
+ ;;
+ ld8 r31=[r2],16 // load ar.ssd
+ ld8.fill r8=[r3],16
+ ;;
+ ld8.fill r9=[r2],16
+ ld8.fill r10=[r3],PT(R17)-PT(R10)
+ ;;
+ ld8.fill r11=[r2],PT(R18)-PT(R11)
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ mov ar.csd=r30
+ mov ar.ssd=r31
+ ;;
+ rsm psr.i | psr.ic // initiate turning off of interrupt and
interruption collection
+ invala // invalidate ALAT
+ ;;
+ ld8.fill r22=[r2],24
+ ld8.fill r23=[r3],24
+ mov b6=r28
+ ;;
+ ld8.fill r25=[r2],16
+ ld8.fill r26=[r3],16
+ mov b7=r29
+ ;;
+ ld8.fill r27=[r2],16
+ ld8.fill r28=[r3],16
+ ;;
+ ld8.fill r29=[r2],16
+ ld8.fill r30=[r3],24
+ ;;
+ ld8.fill r31=[r2],PT(F9)-PT(R31)
+ adds r3=PT(F10)-PT(F6),r3
+ ;;
+ ldf.fill f9=[r2],PT(F6)-PT(F9)
+ ldf.fill f10=[r3],PT(F8)-PT(F10)
+ ;;
+ ldf.fill f6=[r2],PT(F7)-PT(F6)
+ ;;
+ ldf.fill f7=[r2],PT(F11)-PT(F7)
+ ldf.fill f8=[r3],32
+ ;;
+ srlz.i // ensure interruption collection is off
+ mov ar.ccv=r15
+ ;;
+ bsw.0 // switch back to bank 0 (no stop bit required
beforehand...)
+ ;;
+ ldf.fill f11=[r2]
+// mov r18=r13
+// mov r21=r13
+ adds r16=PT(CR_IPSR)+16,r12
+ adds r17=PT(CR_IIP)+16,r12
+ ;;
+ ld8 r29=[r16],16 // load cr.ipsr
+ ld8 r28=[r17],16 // load cr.iip
+ ;;
+ ld8 r30=[r16],16 // load cr.ifs
+ ld8 r25=[r17],16 // load ar.unat
+ ;;
+ ld8 r26=[r16],16 // load ar.pfs
+ ld8 r27=[r17],16 // load ar.rsc
+ cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore
cr.ifs
+ ;;
+ ld8 r24=[r16],16 // load ar.rnat (may be garbage)
+ ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+ ;;
+ ld8 r31=[r16],16 // load predicates
+ ld8 r22=[r17],16 // load b0
+ ;;
+ ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 // load r1
+ ;;
+ ld8.fill r12=[r16],16
+ ld8.fill r13=[r17],16
+ ;;
+ ld8 r20=[r16],16 // ar.fpsr
+ ld8.fill r15=[r17],16
+ ;;
+ ld8.fill r14=[r16],16
+ ld8.fill r2=[r17]
+ ;;
+ ld8.fill r3=[r16]
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
+ ;;
+ mov b0=r22
+ mov ar.pfs=r26
+ mov cr.ifs=r30
+ mov cr.ipsr=r29
+ mov ar.fpsr=r20
+ mov cr.iip=r28
+ ;;
+ mov ar.rsc=r27
+ mov ar.unat=r25
+ mov pr=r31,-1
+ rfi
+END(ia64_leave_nested)
+
+
+
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+ PT_REGS_UNWIND_INFO(0)
+ /*
+ * work.need_resched etc. mustn't get changed by this CPU before it
returns to
+ ;;
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+ rsm psr.i
+ ;;
+ alloc loc0=ar.pfs,0,1,1,0
+ adds out0=16,r12
+ ;;
+ br.call.sptk.many b0=leave_hypervisor_tail
+ mov ar.pfs=loc0
+ adds r8=IA64_VPD_BASE_OFFSET,r13
+ ;;
+ ld8 r8=[r8]
+ ;;
+ adds r9=VPD(VPSR),r8
+ ;;
+ ld8 r9=[r9]
+ ;;
+ tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT
+ ;;
+(pBN0) add r7=VPD(VBNAT),r8;
+(pBN1) add r7=VPD(VNAT),r8;
+ ;;
+ ld8 r7=[r7]
+ ;;
+ mov ar.unat=r7
+(pBN0) add r4=VPD(VBGR),r8;
+(pBN1) add r4=VPD(VGR),r8;
+(pBN0) add r5=VPD(VBGR)+0x8,r8;
+(pBN1) add r5=VPD(VGR)+0x8,r8;
+ ;;
+ ld8.fill r16=[r4],16
+ ld8.fill r17=[r5],16
+ ;;
+ ld8.fill r18=[r4],16
+ ld8.fill r19=[r5],16
+ ;;
+ ld8.fill r20=[r4],16
+ ld8.fill r21=[r5],16
+ ;;
+ ld8.fill r22=[r4],16
+ ld8.fill r23=[r5],16
+ ;;
+ ld8.fill r24=[r4],16
+ ld8.fill r25=[r5],16
+ ;;
+ ld8.fill r26=[r4],16
+ ld8.fill r27=[r5],16
+ ;;
+ ld8.fill r28=[r4],16
+ ld8.fill r29=[r5],16
+ ;;
+ ld8.fill r30=[r4],16
+ ld8.fill r31=[r5],16
+ ;;
+ bsw.0
+ ;;
+ mov r18=r8 //vpd
+ mov r19=r9 //vpsr
+ adds r20=PT(PR)+16,r12
+ ;;
+ lfetch [r20],PT(CR_IPSR)-PT(PR)
+ adds r16=PT(B6)+16,r12
+ adds r17=PT(B7)+16,r12
+ ;;
+ lfetch [r20]
+ mov r21=r13 // get current
+ ;;
+ ld8 r30=[r16],16 // load b6
+ ld8 r31=[r17],16 // load b7
+ add r20=PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r29=[r20] //load ar_unat
+ mov b6=r30
+ mov b7=r31
+ ld8 r30=[r16],16 //load ar_csd
+ ld8 r31=[r17],16 //load ar_ssd
+ ;;
+ mov ar.unat=r29
+ mov ar.csd=r30
+ mov ar.ssd=r31
+ ;;
+ ld8.fill r8=[r16],16 //load r8
+ ld8.fill r9=[r17],16 //load r9
+ ;;
+ ld8.fill r10=[r16],PT(R1)-PT(R10) //load r10
+ ld8.fill r11=[r17],PT(R12)-PT(R11) //load r11
+ ;;
+ ld8.fill r1=[r16],16 //load r1
+ ld8.fill r12=[r17],16 //load r12
+ ;;
+ ld8.fill r13=[r16],16 //load r13
+ ld8 r30=[r17],16 //load ar_fpsr
+ ;;
+ ld8.fill r15=[r16],16 //load r15
+ ld8.fill r14=[r17],16 //load r14
+ mov ar.fpsr=r30
+ ;;
+ ld8.fill r2=[r16],16 //load r2
+ ld8.fill r3=[r17],16 //load r3
+ ;;
+/*
+(pEml) ld8.fill r4=[r16],16 //load r4
+(pEml) ld8.fill r5=[r17],16 //load r5
+ ;;
+(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6
+(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7
+ ;;
+(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16
+(pNonEml) adds r17=PT(F7)-PT(R5),r17
+ ;;
+*/
+ ld8.fill r4=[r16],16 //load r4
+ ld8.fill r5=[r17],16 //load r5
+ ;;
+ ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6
+ ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7
+ ;;
+
+ ld8 r30=[r16],PT(F6)-PT(AR_CCV)
+ rsm psr.i | psr.ic // initiate turning off of interrupt and interruption
collection
+ ;;
+ srlz.i // ensure interruption collection is off
+ ;;
+ invala // invalidate ALAT
+ ;;
+ ldf.fill f6=[r16],32
+ ldf.fill f7=[r17],32
+ ;;
+ ldf.fill f8=[r16],32
+ ldf.fill f9=[r17],32
+ ;;
+ ldf.fill f10=[r16]
+ ldf.fill f11=[r17]
+ ;;
+ mov ar.ccv=r30
+ adds r16=PT(CR_IPSR)-PT(F10),r16
+ adds r17=PT(CR_IIP)-PT(F11),r17
+ ;;
+ ld8 r31=[r16],16 // load cr.ipsr
+ ld8 r30=[r17],16 // load cr.iip
+ ;;
+ ld8 r29=[r16],16 // load cr.ifs
+ ld8 r28=[r17],16 // load ar.unat
+ ;;
+ ld8 r27=[r16],16 // load ar.pfs
+ ld8 r26=[r17],16 // load ar.rsc
+ ;;
+ ld8 r25=[r16],16 // load ar.rnat (may be garbage)
+ ld8 r24=[r17],16// load ar.bspstore (may be garbage)
+ ;;
+ ld8 r23=[r16],16 // load predicates
+ ld8 r22=[r17],PT(RFI_PFS)-PT(B0) // load b0
+ ;;
+ ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
+ ;;
+//rbs_switch
+ // loadrs has already been shifted
+ alloc r16=ar.pfs,0,0,0,0 // drop current register frame
+ ;;
+ mov ar.rsc=r20
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=r24
+ ;;
+ ld8 r24=[r17] //load rfi_pfs
+ mov ar.unat=r28
+ mov ar.rnat=r25
+ mov ar.rsc=r26
+ ;;
+ mov cr.ipsr=r31
+ mov cr.iip=r30
+ mov cr.ifs=r29
+ cmp.ne p6,p0=r24,r0
+(p6)br.sptk vmx_dorfirfi
+ ;;
+vmx_dorfirfi_back:
+ mov ar.pfs=r27
+
+//vsa_sync_write_start
+ movl r20=__vsa_base
+ ;;
+ ld8 r20=[r20] // read entry point
+ mov r25=r18
+ ;;
+ add r16=PAL_VPS_SYNC_WRITE,r20
+ movl r24=switch_rr7 // calculate return address
+ ;;
+ mov b0=r16
+ br.cond.sptk b0 // call the service
+ ;;
+// switch rr7 and rr5
+switch_rr7:
+ adds r24=SWITCH_MRR5_OFFSET, r21
+ adds r26=SWITCH_MRR6_OFFSET, r21
+ adds r16=SWITCH_MRR7_OFFSET ,r21
+ movl r25=(5<<61)
+ movl r27=(6<<61)
+ movl r17=(7<<61)
+ ;;
+ ld8 r24=[r24]
+ ld8 r26=[r26]
+ ld8 r16=[r16]
+ ;;
+ mov rr[r25]=r24
+ mov rr[r27]=r26
+ mov rr[r17]=r16
+ ;;
+ srlz.i
+ ;;
+ add r24=SWITCH_MPTA_OFFSET, r21
+ ;;
+ ld8 r24=[r24]
+ ;;
+ mov cr.pta=r24
+ ;;
+ srlz.i
+ ;;
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ * must be at bank 0
+ * parameter:
+ * r18:vpd
+ * r19:vpsr
+ * r20:__vsa_base
+ * r22:b0
+ * r23:predicate
+ */
+ mov r24=r22
+ mov r25=r18
+ tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
+ ;;
+ (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+ (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
+ ;;
+ mov pr=r23,-2
+ mov b0=r29
+ ;;
+ br.cond.sptk b0 // call pal service
+END(ia64_leave_hypervisor)
+
+//r24 rfi_pfs
+//r17 address of rfi_pfs
+GLOBAL_ENTRY(vmx_dorfirfi)
+ mov r16=ar.ec
+ movl r20 = vmx_dorfirfi_back
+ ;;
+// clean rfi_pfs
+ st8 [r17]=r0
+ mov b0=r20
+// pfs.pec=ar.ec
+ dep r24 = r16, r24, 52, 6
+ ;;
+ mov ar.pfs=r24
+ ;;
+ br.ret.sptk b0
+ ;;
+END(vmx_dorfirfi)
+
+
+#define VMX_PURGE_RR7 0
+#define VMX_INSERT_RR7 1
+/*
+ * in0: old rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ */
+GLOBAL_ENTRY(vmx_purge_double_mapping)
+ alloc loc1 = ar.pfs,5,9,0,0
+ mov loc0 = rp
+ movl r8 = 1f
+ ;;
+ movl loc4 = KERNEL_TR_PAGE_SHIFT
+ movl loc5 = VCPU_TLB_SHIFT
+ mov loc6 = psr
+ movl loc7 = XEN_RR7_SWITCH_STUB
+ mov loc8 = (1<<VMX_PURGE_RR7)
+ ;;
+ srlz.i
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ ;;
+ mov ar.rsc = 0
+ mov b6 = loc7
+ mov rp = r8
+ ;;
+ br.sptk b6
+1:
+ mov ar.rsc = 3
+ mov rp = loc0
+ ;;
+ mov psr.l = loc6
+ ;;
+ srlz.i
+ ;;
+ br.ret.sptk rp
+END(vmx_purge_double_mapping)
+
+/*
+ * in0: new rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ * in3: pte entry of xen image
+ * in4: pte entry of vhpt table
+ */
+GLOBAL_ENTRY(vmx_insert_double_mapping)
+ alloc loc1 = ar.pfs,5,9,0,0
+ mov loc0 = rp
+ movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image
+ ;;
+ movl loc3 = IA64_TR_VHPT_IN_DOM // TR number for vhpt table
+ movl r8 = 1f
+ movl loc4 = KERNEL_TR_PAGE_SHIFT
+ ;;
+ movl loc5 = VCPU_TLB_SHIFT
+ mov loc6 = psr
+ movl loc7 = XEN_RR7_SWITCH_STUB
+ ;;
+ srlz.i
+ ;;
+ rsm psr.i | psr.ic
+ mov loc8 = (1<<VMX_INSERT_RR7)
+ ;;
+ srlz.i
+ ;;
+ mov ar.rsc = 0
+ mov b6 = loc7
+ mov rp = r8
+ ;;
+ br.sptk b6
+1:
+ mov ar.rsc = 3
+ mov rp = loc0
+ ;;
+ mov psr.l = loc6
+ ;;
+ srlz.i
+ ;;
+ br.ret.sptk rp
+END(vmx_insert_double_mapping)
+
+ .align PAGE_SIZE
+/*
+ * Stub to add double mapping for new domain, which shouldn't
+ * access any memory when active. Before reaching this point,
+ * both psr.i/ic is cleared and rse is set in lazy mode.
+ *
+ * in0: new rr7
+ * in1: virtual address of xen image
+ * in2: virtual address of vhpt table
+ * in3: pte entry of xen image
+ * in4: pte entry of vhpt table
+ * loc2: TR number for xen image
+ * loc3: TR number for vhpt table
+ * loc4: page size for xen image
+ * loc5: page size of vhpt table
+ * loc7: free to use
+ * loc8: purge or insert
+ * r8: will contain old rid value
+ */
+GLOBAL_ENTRY(vmx_switch_rr7)
+ movl loc7 = (7<<61)
+ dep.z loc4 = loc4, 2, 6
+ dep.z loc5 = loc5, 2, 6
+ ;;
+ tbit.nz p6,p7=loc8, VMX_INSERT_RR7
+ mov r8 = rr[loc7]
+ ;;
+ mov rr[loc7] = in0
+(p6)mov cr.ifa = in1
+(p6)mov cr.itir = loc4
+ ;;
+ srlz.i
+ ;;
+(p6)itr.i itr[loc2] = in3
+(p7)ptr.i in1, loc4
+ ;;
+(p6)itr.d dtr[loc2] = in3
+(p7)ptr.d in1, loc4
+ ;;
+ srlz.i
+ ;;
+(p6)mov cr.ifa = in2
+(p6)mov cr.itir = loc5
+ ;;
+(p6)itr.d dtr[loc3] = in4
+(p7)ptr.d in2, loc5
+ ;;
+ srlz.i
+ ;;
+ mov rr[loc7] = r8
+ ;;
+ srlz.i
+ br.sptk rp
+END(vmx_switch_rr7)
+ .align PAGE_SIZE
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_hypercall.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_hypercall.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,235 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_hyparcall.c: handling hypercall from domain
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <asm/vmx_vcpu.h>
+#include <public/xen.h>
+#include <public/event_channel.h>
+#include <asm/vmmu.h>
+#include <asm/tlb.h>
+#include <asm/regionreg.h>
+#include <asm/page.h>
+#include <xen/mm.h>
+#include <xen/multicall.h>
+
+
+void hyper_not_support(void)
+{
+ VCPU *vcpu=current;
+ vmx_vcpu_set_gr(vcpu, 8, -1, 0);
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_mmu_update(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,r33,r34,r35,ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ vmx_vcpu_get_gr(vcpu,17,&r33);
+ vmx_vcpu_get_gr(vcpu,18,&r34);
+ vmx_vcpu_get_gr(vcpu,19,&r35);
+ ret=do_mmu_update((mmu_update_t*)r32,r33,r34,r35);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+unsigned long __hypercall_create_continuation(
+ unsigned int op, unsigned int nr_args, ...)
+{
+ struct mc_state *mcs = &mc_state[smp_processor_id()];
+ VCPU *vcpu = current;
+ struct cpu_user_regs *regs = vcpu_regs(vcpu);
+ unsigned int i;
+ va_list args;
+
+ va_start(args, nr_args);
+ if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) {
+ panic("PREEMPT happen in multicall\n"); // Not support yet
+ } else {
+ vmx_vcpu_set_gr(vcpu, 15, op, 0);
+ for ( i = 0; i < nr_args; i++) {
+ switch (i) {
+ case 0: vmx_vcpu_set_gr(vcpu, 16, va_arg(args, unsigned long), 0);
+ break;
+ case 1: vmx_vcpu_set_gr(vcpu, 17, va_arg(args, unsigned long), 0);
+ break;
+ case 2: vmx_vcpu_set_gr(vcpu, 18, va_arg(args, unsigned long), 0);
+ break;
+ case 3: vmx_vcpu_set_gr(vcpu, 19, va_arg(args, unsigned long), 0);
+ break;
+ case 4: vmx_vcpu_set_gr(vcpu, 20, va_arg(args, unsigned long), 0);
+ break;
+ default: panic("Too many args for hypercall continuation\n");
+ break;
+ }
+ }
+ }
+ vcpu->arch.hypercall_continuation = 1;
+ va_end(args);
+ return op;
+}
+
+void hyper_dom_mem_op(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,r33,r34,r35,r36;
+ u64 ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ vmx_vcpu_get_gr(vcpu,17,&r33);
+ vmx_vcpu_get_gr(vcpu,18,&r34);
+ vmx_vcpu_get_gr(vcpu,19,&r35);
+ vmx_vcpu_get_gr(vcpu,20,&r36);
+ ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36);
+ printf("do_dom_mem return value: %lx\n", ret);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+ /* Hard to define a special return value to indicate hypercall restart.
+ * So just add a new mark, which is SMP safe
+ */
+ if (vcpu->arch.hypercall_continuation == 1)
+ vcpu->arch.hypercall_continuation = 0;
+ else
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+
+void hyper_sched_op(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ ret=do_sched_op(r32);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_dom0_op(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ ret=do_dom0_op((dom0_op_t *)r32);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_event_channel_op(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ ret=do_event_channel_op((evtchn_op_t *)r32);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+void hyper_xen_version(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ ret=do_xen_version((int )r32);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+static int do_lock_page(VCPU *vcpu, u64 va, u64 lock)
+{
+ int i;
+ ia64_rr rr;
+ thash_cb_t *hcb;
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ rr = vmx_vcpu_rr(vcpu, va);
+ return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock);
+}
+
+/*
+ * Lock guest page in vTLB, so that it's not relinquished by recycle
+ * session when HV is servicing that hypercall.
+ */
+void hyper_lock_page(void)
+{
+//TODO:
+ VCPU *vcpu=current;
+ u64 va,lock, ret;
+ vmx_vcpu_get_gr(vcpu,16,&va);
+ vmx_vcpu_get_gr(vcpu,17,&lock);
+ ret=do_lock_page(vcpu, va, lock);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+static int do_set_shared_page(VCPU *vcpu, u64 gpa)
+{
+ u64 shared_info, o_info;
+ struct domain *d = vcpu->domain;
+ struct vcpu *v;
+ if(vcpu->domain!=dom0)
+ return -EPERM;
+ shared_info = __gpa_to_mpa(vcpu->domain, gpa);
+ o_info = (u64)vcpu->domain->shared_info;
+ d->shared_info= (shared_info_t *)__va(shared_info);
+
+ /* Copy existing shared info into new page */
+ if (o_info) {
+ memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE);
+ for_each_vcpu(d, v) {
+ v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
+ }
+ /* If original page belongs to xen heap, then relinguish back
+ * to xen heap. Or else, leave to domain itself to decide.
+ */
+ if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info))))
+ free_xenheap_page(o_info);
+ } else
+ memset(d->shared_info, 0, PAGE_SIZE);
+ return 0;
+}
+
+void hyper_set_shared_page(void)
+{
+ VCPU *vcpu=current;
+ u64 gpa,ret;
+ vmx_vcpu_get_gr(vcpu,16,&gpa);
+
+ ret=do_set_shared_page(vcpu, gpa);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+
+ vmx_vcpu_increment_iip(vcpu);
+}
+
+/*
+void hyper_grant_table_op(void)
+{
+ VCPU *vcpu=current;
+ u64 r32,r33,r34,ret;
+ vmx_vcpu_get_gr(vcpu,16,&r32);
+ vmx_vcpu_get_gr(vcpu,17,&r33);
+ vmx_vcpu_get_gr(vcpu,18,&r34);
+
+ ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34);
+ vmx_vcpu_set_gr(vcpu, 8, ret, 0);
+}
+*/
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_init.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_init.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,375 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_init.c: initialization work for vt specific domain
+ * Copyright (c) 2005, Intel Corporation.
+ * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx>
+ * Fred Yang <fred.yang@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
+ * Disable doubling mapping
+ *
+ * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
+ * Simplied design in first step:
+ * - One virtual environment
+ * - Domain is bound to one LP
+ * Later to support guest SMP:
+ * - Need interface to handle VP scheduled to different LP
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <asm/pal.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/vmx_vcpu.h>
+#include <xen/lib.h>
+#include <asm/vmmu.h>
+#include <public/arch-ia64.h>
+#include <public/io/ioreq.h>
+#include <asm/vmx_phy_mode.h>
+#include <asm/processor.h>
+#include <asm/vmx.h>
+#include <xen/mm.h>
+
+/* Global flag to identify whether Intel vmx feature is on */
+u32 vmx_enabled = 0;
+static u32 vm_order;
+static u64 buffer_size;
+static u64 vp_env_info;
+static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */
+u64 __vsa_base = 0; /* Run-time service base of VMX */
+
+/* Check whether vt feature is enabled or not. */
+void
+identify_vmx_feature(void)
+{
+ pal_status_t ret;
+ u64 avail = 1, status = 1, control = 1;
+
+ vmx_enabled = 0;
+ /* Check VT-i feature */
+ ret = ia64_pal_proc_get_features(&avail, &status, &control);
+ if (ret != PAL_STATUS_SUCCESS) {
+ printk("Get proc features failed.\n");
+ goto no_vti;
+ }
+
+ /* FIXME: do we need to check status field, to see whether
+ * PSR.vm is actually enabled? If yes, aonther call to
+ * ia64_pal_proc_set_features may be reuqired then.
+ */
+ printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
+ avail, status, control, avail & PAL_PROC_VM_BIT);
+ if (!(avail & PAL_PROC_VM_BIT)) {
+ printk("No VT feature supported.\n");
+ goto no_vti;
+ }
+
+ ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
+ if (ret != PAL_STATUS_SUCCESS) {
+ printk("Get vp environment info failed.\n");
+ goto no_vti;
+ }
+
+ /* Does xen has ability to decode itself? */
+ if (!(vp_env_info & VP_OPCODE))
+ printk("WARNING: no opcode provided from hardware(%lx)!!!\n",
vp_env_info);
+ vm_order = get_order(buffer_size);
+ printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order);
+
+ vmx_enabled = 1;
+no_vti:
+ return;
+}
+
+/*
+ * Init virtual environment on current LP
+ * vsa_base is the indicator whether it's first LP to be initialized
+ * for current domain.
+ */
+void
+vmx_init_env(void)
+{
+ u64 status, tmp_base;
+
+ if (!vm_buffer) {
+ vm_buffer = alloc_xenheap_pages(vm_order);
+ ASSERT(vm_buffer);
+ printk("vm_buffer: 0x%lx\n", vm_buffer);
+ }
+
+ status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV :
VP_INIT_ENV_INITALIZE,
+ __pa(vm_buffer),
+ vm_buffer,
+ &tmp_base);
+
+ if (status != PAL_STATUS_SUCCESS) {
+ printk("ia64_pal_vp_init_env failed.\n");
+ return -1;
+ }
+
+ if (!__vsa_base)
+ __vsa_base = tmp_base;
+ else
+ ASSERT(tmp_base != __vsa_base);
+
+#ifdef XEN_DBL_MAPPING
+ /* Init stub for rr7 switch */
+ vmx_init_double_mapping_stub();
+#endif
+}
+
+void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c)
+{
+ struct domain *d = v->domain;
+ shared_iopage_t *sp;
+
+ ASSERT(d != dom0); /* only for non-privileged vti domain */
+ d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg);
+ sp = get_sp(d);
+ memset((char *)sp,0,PAGE_SIZE);
+ /* FIXME: temp due to old CP */
+ sp->sp_global.eport = 2;
+#ifdef V_IOSAPIC_READY
+ sp->vcpu_number = 1;
+#endif
+ /* TEMP */
+ d->arch.vmx_platform.pib_base = 0xfee00000UL;
+
+ /* One more step to enable interrupt assist */
+ set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
+ /* Only open one port for I/O and interrupt emulation */
+ if (v == d->vcpu[0]) {
+ memset(&d->shared_info->evtchn_mask[0], 0xff,
+ sizeof(d->shared_info->evtchn_mask));
+ clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
+ }
+
+ /* FIXME: only support PMT table continuously by far */
+ d->arch.pmt = __va(c->pt_base);
+ d->arch.max_pfn = c->pt_max_pfn;
+
+ vmx_final_setup_domain(d);
+}
+
+typedef union {
+ u64 value;
+ struct {
+ u64 number : 8;
+ u64 revision : 8;
+ u64 model : 8;
+ u64 family : 8;
+ u64 archrev : 8;
+ u64 rv : 24;
+ };
+} cpuid3_t;
+
+/* Allocate vpd from xenheap */
+static vpd_t *alloc_vpd(void)
+{
+ int i;
+ cpuid3_t cpuid3;
+ vpd_t *vpd;
+
+ vpd = alloc_xenheap_pages(get_order(VPD_SIZE));
+ if (!vpd) {
+ printk("VPD allocation failed.\n");
+ return NULL;
+ }
+
+ printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t));
+ memset(vpd, 0, VPD_SIZE);
+ /* CPUID init */
+ for (i = 0; i < 5; i++)
+ vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+ /* Limit the CPUID number to 5 */
+ cpuid3.value = vpd->vcpuid[3];
+ cpuid3.number = 4; /* 5 - 1 */
+ vpd->vcpuid[3] = cpuid3.value;
+
+ vpd->vdc.d_vmsw = 1;
+ return vpd;
+}
+
+
+#ifdef CONFIG_VTI
+/*
+ * Create a VP on intialized VMX environment.
+ */
+static void
+vmx_create_vp(struct vcpu *v)
+{
+ u64 ret;
+ vpd_t *vpd = v->arch.arch_vmx.vpd;
+ u64 ivt_base;
+ extern char vmx_ia64_ivt;
+ /* ia64_ivt is function pointer, so need this tranlation */
+ ivt_base = (u64) &vmx_ia64_ivt;
+ printk("ivt_base: 0x%lx\n", ivt_base);
+ ret = ia64_pal_vp_create(vpd, ivt_base, 0);
+ if (ret != PAL_STATUS_SUCCESS)
+ panic("ia64_pal_vp_create failed. \n");
+}
+
+#ifdef XEN_DBL_MAPPING
+void vmx_init_double_mapping_stub(void)
+{
+ u64 base, psr;
+ extern void vmx_switch_rr7(void);
+
+ base = (u64) &vmx_switch_rr7;
+ base = *((u64*)base);
+
+ psr = ia64_clear_ic();
+ ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB,
+ pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)),
+ RR7_SWITCH_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n",
(u64)(__pa(base)));
+}
+#endif
+
+/* Other non-context related tasks can be done in context switch */
+void
+vmx_save_state(struct vcpu *v)
+{
+ u64 status, psr;
+ u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
+
+ /* FIXME: about setting of pal_proc_vector... time consuming */
+ status = ia64_pal_vp_save(v->arch.arch_vmx.vpd, 0);
+ if (status != PAL_STATUS_SUCCESS)
+ panic("Save vp status failed\n");
+
+#ifdef XEN_DBL_MAPPING
+ /* FIXME: Do we really need purge double mapping for old vcpu?
+ * Since rid is completely different between prev and next,
+ * it's not overlap and thus no MCA possible... */
+ dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
+ vmx_purge_double_mapping(dom_rr7, KERNEL_START,
+ (u64)v->arch.vtlb->ts->vhpt->hash);
+#endif
+
+ /* Need to save KR when domain switch, though HV itself doesn;t
+ * use them.
+ */
+ v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
+ v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
+ v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
+ v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
+ v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
+ v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
+ v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
+ v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
+}
+
+/* Even guest is in physical mode, we still need such double mapping */
+void
+vmx_load_state(struct vcpu *v)
+{
+ u64 status, psr;
+ u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
+ u64 pte_xen, pte_vhpt;
+ int i;
+
+ status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0);
+ if (status != PAL_STATUS_SUCCESS)
+ panic("Restore vp status failed\n");
+
+#ifdef XEN_DBL_MAPPING
+ dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
+ pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
+ pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >>
PAGE_SHIFT), PAGE_KERNEL));
+ vmx_insert_double_mapping(dom_rr7, KERNEL_START,
+ (u64)v->arch.vtlb->ts->vhpt->hash,
+ pte_xen, pte_vhpt);
+#endif
+
+ ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
+ ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
+ ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
+ ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
+ ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
+ ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
+ ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
+ ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
+ /* Guest vTLB is not required to be switched explicitly, since
+ * anchored in vcpu */
+}
+
+#ifdef XEN_DBL_MAPPING
+/* Purge old double mapping and insert new one, due to rr7 change */
+void
+vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7)
+{
+ u64 pte_xen, pte_vhpt, vhpt_base;
+
+ vhpt_base = (u64)v->arch.vtlb->ts->vhpt->hash;
+ vmx_purge_double_mapping(oldrr7, KERNEL_START,
+ vhpt_base);
+
+ pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
+ pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT),
PAGE_KERNEL));
+ vmx_insert_double_mapping(newrr7, KERNEL_START,
+ vhpt_base,
+ pte_xen, pte_vhpt);
+}
+#endif // XEN_DBL_MAPPING
+#endif // CONFIG_VTI
+
+/*
+ * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
+ * is registered here.
+ */
+void
+vmx_final_setup_domain(struct domain *d)
+{
+ struct vcpu *v = d->vcpu[0];
+ vpd_t *vpd;
+
+ /* Allocate resources for vcpu 0 */
+ //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct));
+
+ vpd = alloc_vpd();
+ ASSERT(vpd);
+
+ v->arch.arch_vmx.vpd = vpd;
+ vpd->virt_env_vaddr = vm_buffer;
+
+#ifdef CONFIG_VTI
+ /* v->arch.schedule_tail = arch_vmx_do_launch; */
+ vmx_create_vp(v);
+
+ /* Set this ed to be vmx */
+ set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags);
+
+ /* Physical mode emulation initialization, including
+ * emulation ID allcation and related memory request
+ */
+ physical_mode_init(v);
+
+ vlsapic_reset(v);
+ vtm_init(v);
+#endif
+
+ /* Other vmx specific initialization work */
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_interrupt.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_interrupt.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,388 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_interrupt.c: handle inject interruption.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+
+#include <xen/types.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx_pal_vsa.h>
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+void
+collect_interruption(VCPU *vcpu)
+{
+ u64 ipsr;
+ u64 vdcr;
+ u64 vifs;
+ IA64_PSR vpsr;
+ REGS * regs = vcpu_regs(vcpu);
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+
+ if(vpsr.ic){
+ extern void vmx_dorfirfi(void);
+ if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi)
+ panic("COLLECT interruption for vmx_dorfirfi\n");
+
+ /* Sync mpsr id/da/dd/ss/ed bits to vipsr
+ * since after guest do rfi, we still want these bits on in
+ * mpsr
+ */
+
+ ipsr = regs->cr_ipsr;
+ vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+ | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED));
+ vmx_vcpu_set_ipsr(vcpu, vpsr.val);
+
+ /* Currently, for trap, we do not advance IIP to next
+ * instruction. That's because we assume caller already
+ * set up IIP correctly
+ */
+
+ vmx_vcpu_set_iip(vcpu , regs->cr_iip);
+
+ /* set vifs.v to zero */
+ vifs = VPD_CR(vcpu,ifs);
+ vifs &= ~IA64_IFS_V;
+ vmx_vcpu_set_ifs(vcpu, vifs);
+
+ vmx_vcpu_set_iipa(vcpu, regs->cr_iipa);
+ }
+
+ vdcr = VPD_CR(vcpu,dcr);
+
+ /* Set guest psr
+ * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+ * be: set to the value of dcr.be
+ * pp: set to the value of dcr.pp
+ */
+ vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+ vpsr.val |= ( vdcr & IA64_DCR_BE);
+
+ /* VDCR pp bit position is different from VPSR pp bit */
+ if ( vdcr & IA64_DCR_PP ) {
+ vpsr.val |= IA64_PSR_PP;
+ } else {
+ vpsr.val &= ~IA64_PSR_PP;;
+ }
+
+ vmx_vcpu_set_psr(vcpu, vpsr.val);
+
+}
+int
+inject_guest_interruption(VCPU *vcpu, u64 vec)
+{
+ u64 viva;
+ REGS *regs;
+ regs=vcpu_regs(vcpu);
+
+ collect_interruption(vcpu);
+
+ vmx_vcpu_get_iva(vcpu,&viva);
+ regs->cr_iip = viva + vec;
+}
+
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ * set_ifa: if true, set vIFA
+ * set_itir: if true, set vITIR
+ * set_iha: if true, set vIHA
+ */
+void
+set_ifa_itir_iha (VCPU *vcpu, u64 vadr,
+ int set_ifa, int set_itir, int set_iha)
+{
+ IA64_PSR vpsr;
+ u64 value;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ /* Vol2, Table 8-1 */
+ if ( vpsr.ic ) {
+ if ( set_ifa){
+ vmx_vcpu_set_ifa(vcpu, vadr);
+ }
+ if ( set_itir) {
+ value = vmx_vcpu_get_itir_on_fault(vcpu, vadr);
+ vmx_vcpu_set_itir(vcpu, value);
+ }
+
+ if ( set_iha) {
+ vmx_vcpu_thash(vcpu, vadr, &value);
+ vmx_vcpu_set_iha(vcpu, value);
+ }
+ }
+
+
+}
+
+/*
+ * Data TLB Fault
+ * @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dtlb_fault (VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA */
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ * @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+itlb_fault (VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA */
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR);
+}
+
+
+
+/*
+ * Data Nested TLB Fault
+ * @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+nested_dtlb (VCPU *vcpu)
+{
+ inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ * @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+alt_dtlb (VCPU *vcpu, u64 vadr)
+{
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR);
+}
+
+
+/*
+ * Data TLB Fault
+ * @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+alt_itlb (VCPU *vcpu, u64 vadr)
+{
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ * VHPT Translation Vector
+ */
+static void
+_vhpt_fault(VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA*/
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR);
+
+
+}
+
+/*
+ * VHPT Instruction Fault
+ * @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+ivhpt_fault (VCPU *vcpu, u64 vadr)
+{
+ _vhpt_fault(vcpu, vadr);
+}
+
+
+/*
+ * VHPT Data Fault
+ * @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dvhpt_fault (VCPU *vcpu, u64 vadr)
+{
+ _vhpt_fault(vcpu, vadr);
+}
+
+
+
+/*
+ * Deal with:
+ * General Exception vector
+ */
+void
+_general_exception (VCPU *vcpu)
+{
+ inject_guest_interruption(vcpu,IA64_GENEX_VECTOR);
+}
+
+
+/*
+ * Illegal Operation Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+illegal_op (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+illegal_dep (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+rsv_reg_field (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void
+privilege_op (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+unimpl_daddr (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+privilege_reg (VCPU *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/* Deal with
+ * Nat consumption vector
+ * Parameter:
+ * vaddr: Optional, if t == REGISTER
+ */
+static void
+_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t)
+{
+ /* If vPSR.ic && t == DATA/INST, IFA */
+ if ( t == DATA || t == INSTRUCTION ) {
+ /* IFA */
+ set_ifa_itir_iha (vcpu, vadr, 1, 0, 0);
+ }
+
+ inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * IR Data Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+static void
+ir_nat_page_consumption (VCPU *vcpu, u64 vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+inat_page_consumption (VCPU *vcpu, u64 vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+rnat_consumption (VCPU *vcpu)
+{
+ _nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void
+dnat_page_consumption (VCPU *vcpu, uint64_t vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ * Page not present vector
+ */
+void
+page_not_present(VCPU *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR */
+ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_irq_ia64.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_irq_ia64.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,127 @@
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/jiffies.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/slab.h>
+#include <linux/ptrace.h>
+#include <linux/random.h> /* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+#include <linux/bitops.h>
+
+#include <asm/delay.h>
+#include <asm/intrinsics.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_PERFMON
+# include <asm/perfmon.h>
+#endif
+
+#define IRQ_DEBUG 0
+
+#ifdef CONFIG_VTI
+#define vmx_irq_enter() \
+ add_preempt_count(HARDIRQ_OFFSET);
+
+/* Now softirq will be checked when leaving hypervisor, or else
+ * scheduler irq will be executed too early.
+ */
+#define vmx_irq_exit(void) \
+ sub_preempt_count(HARDIRQ_OFFSET);
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
+{
+ unsigned long saved_tpr;
+ int wake_dom0 = 0;
+
+
+#if IRQ_DEBUG
+ {
+ unsigned long bsp, sp;
+
+ /*
+ * Note: if the interrupt happened while executing in
+ * the context switch routine (ia64_switch_to), we may
+ * get a spurious stack overflow here. This is
+ * because the register and the memory stack are not
+ * switched atomically.
+ */
+ bsp = ia64_getreg(_IA64_REG_AR_BSP);
+ sp = ia64_getreg(_IA64_REG_AR_SP);
+
+ if ((sp - bsp) < 1024) {
+ static unsigned char count;
+ static long last_time;
+
+ if (jiffies - last_time > 5*HZ)
+ count = 0;
+ if (++count < 5) {
+ last_time = jiffies;
+ printk("ia64_handle_irq: DANGER: less than "
+ "1KB of free stack space!!\n"
+ "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+ }
+ }
+ }
+#endif /* IRQ_DEBUG */
+
+ /*
+ * Always set TPR to limit maximum interrupt nesting depth to
+ * 16 (without this, it would be ~240, which could easily lead
+ * to kernel stack overflows).
+ */
+ vmx_irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ if (!IS_RESCHEDULE(vector)) {
+ ia64_setreg(_IA64_REG_CR_TPR, vector);
+ ia64_srlz_d();
+
+ if (vector != IA64_TIMER_VECTOR) {
+ /* FIXME: Leave IRQ re-route later */
+ vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
+ wake_dom0 = 1;
+ }
+ else { // FIXME: Handle Timer only now
+ __do_IRQ(local_vector_to_irq(vector), regs);
+ }
+
+ /*
+ * Disable interrupts and send EOI:
+ */
+ local_irq_disable();
+ ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
+ }
+ else {
+ printf("Oops: RESCHEDULE IPI absorbed by HV\n");
+ }
+ ia64_eoi();
+ vector = ia64_get_ivr();
+ }
+ /*
+ * This must be done *after* the ia64_eoi(). For example, the keyboard
softirq
+ * handler needs to be able to wait for further keyboard interrupts,
which can't
+ * come through until ia64_eoi() has been done.
+ */
+ vmx_irq_exit();
+ if ( wake_dom0 && current != dom0 )
+ vcpu_wake(dom0->vcpu[0]);
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_ivt.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_ivt.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1085 @@
+/*
+ * arch/ia64/kernel/vmx_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@xxxxxxxxxx>
+ * David Mosberger <davidm@xxxxxxxxxx>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ * Asit Mallick <asit.k.mallick@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Kenneth Chen <kenneth.w.chen@xxxxxxxxx>
+ * Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now
uses virtual PT.
+ *
+ * 05/3/20 Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
+ * Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/vhpt.h>
+
+
+#if 0
+ /*
+ * This lets you track the last eight faults that occurred on the CPU. Make
sure ar.k2 isn't
+ * needed for something else before enabling this...
+ */
+# define VMX_DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add
r16=(i),r16;;mov ar.k2=r16
+#else
+# define VMX_DBG_FAULT(i)
+#endif
+
+#include "vmx_minstate.h"
+
+
+
+#define VMX_FAULT(n) \
+vmx_fault_##n:; \
+ br.sptk vmx_fault_##n; \
+ ;; \
+
+
+#define VMX_REFLECT(n) \
+ mov r31=pr;
\
+ mov r19=n; /* prepare to save predicates */
\
+ mov r29=cr.ipsr; \
+ ;; \
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
+(p7) br.sptk.many vmx_dispatch_reflection; \
+ VMX_FAULT(n); \
+
+
+GLOBAL_ENTRY(vmx_panic)
+ br.sptk.many vmx_panic
+ ;;
+END(vmx_panic)
+
+
+
+
+
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global vmx_ia64_ivt
+vmx_ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vmx_vhpt_miss)
+ VMX_FAULT(0)
+END(vmx_vhpt_miss)
+
+ .org vmx_ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(vmx_itlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6) br.sptk vmx_fault_1
+ mov r16 = cr.ifa
+ ;;
+ thash r17 = r16
+ ttag r20 = r16
+ ;;
+vmx_itlb_loop:
+ cmp.eq p6,p0 = r0, r17
+(p6) br vmx_itlb_out
+ ;;
+ adds r22 = VLE_TITAG_OFFSET, r17
+ adds r23 = VLE_CCHAIN_OFFSET, r17
+ ;;
+ ld8 r24 = [r22]
+ ld8 r25 = [r23]
+ ;;
+ lfetch [r25]
+ cmp.eq p6,p7 = r20, r24
+ ;;
+(p7) mov r17 = r25;
+(p7) br.sptk vmx_itlb_loop
+ ;;
+ adds r23 = VLE_PGFLAGS_OFFSET, r17
+ adds r24 = VLE_ITIR_OFFSET, r17
+ ;;
+ ld8 r26 = [r23]
+ ld8 r25 = [r24]
+ ;;
+ mov cr.itir = r25
+ ;;
+ itc.i r26
+ ;;
+ srlz.i
+ ;;
+ mov r23=r31
+ mov r22=b0
+ adds r16=IA64_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r16]
+ ;;
+ adds r19=VPD(VPSR),r18
+ movl r20=__vsa_base
+ ;;
+ ld8 r19=[r19]
+ ld8 r20=[r20]
+ ;;
+ br.sptk ia64_vmm_entry
+ ;;
+vmx_itlb_out:
+ mov r19 = 1
+ br.sptk vmx_dispatch_tlb_miss
+ VMX_FAULT(1);
+END(vmx_itlb_miss)
+
+ .org vmx_ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(vmx_dtlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk vmx_fault_2
+ mov r16 = cr.ifa
+ ;;
+ thash r17 = r16
+ ttag r20 = r16
+ ;;
+vmx_dtlb_loop:
+ cmp.eq p6,p0 = r0, r17
+(p6)br vmx_dtlb_out
+ ;;
+ adds r22 = VLE_TITAG_OFFSET, r17
+ adds r23 = VLE_CCHAIN_OFFSET, r17
+ ;;
+ ld8 r24 = [r22]
+ ld8 r25 = [r23]
+ ;;
+ lfetch [r25]
+ cmp.eq p6,p7 = r20, r24
+ ;;
+(p7)mov r17 = r25;
+(p7)br.sptk vmx_dtlb_loop
+ ;;
+ adds r23 = VLE_PGFLAGS_OFFSET, r17
+ adds r24 = VLE_ITIR_OFFSET, r17
+ ;;
+ ld8 r26 = [r23]
+ ld8 r25 = [r24]
+ ;;
+ mov cr.itir = r25
+ ;;
+ itc.d r26
+ ;;
+ srlz.d;
+ ;;
+ mov r23=r31
+ mov r22=b0
+ adds r16=IA64_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r16]
+ ;;
+ adds r19=VPD(VPSR),r18
+ movl r20=__vsa_base
+ ;;
+ ld8 r19=[r19]
+ ld8 r20=[r20]
+ ;;
+ br.sptk ia64_vmm_entry
+ ;;
+vmx_dtlb_out:
+ mov r19 = 2
+ br.sptk vmx_dispatch_tlb_miss
+ VMX_FAULT(2);
+END(vmx_dtlb_miss)
+
+ .org vmx_ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(vmx_alt_itlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p7)br.sptk vmx_fault_3
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r24=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ shr.u r18=r16,55 // move address bit 59 to bit 4
+ ;;
+ and r18=0x10,r18 // bit 4=address-bit(61)
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+ VMX_FAULT(3);
+END(vmx_alt_itlb_miss)
+
+
+ .org vmx_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(vmx_alt_dtlb_miss)
+ mov r31=pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p7)br.sptk vmx_fault_4
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r20=cr.isr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r24=cr.ipsr
+ ;;
+ and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
+ tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
+ shr.u r18=r16,55 // move address bit 59 to bit 4
+ and r19=r19,r16 // clear ed, reserved bits, and
PTE control bits
+ tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
+ ;;
+ and r18=0x10,r18 // bit 4=address-bit(61)
+(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
+ dep r24=-1,r24,IA64_PSR_ED_BIT,1
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
+(p6) mov cr.ipsr=r24
+ ;;
+(p7) itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+ VMX_FAULT(4);
+END(vmx_alt_dtlb_miss)
+
+ .org vmx_ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(vmx_nested_dtlb_miss)
+ VMX_FAULT(5)
+END(vmx_nested_dtlb_miss)
+
+ .org vmx_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(vmx_ikey_miss)
+ VMX_REFLECT(6)
+END(vmx_ikey_miss)
+
+ .org vmx_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(vmx_dkey_miss)
+ VMX_REFLECT(7)
+END(vmx_dkey_miss)
+
+ .org vmx_ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(vmx_dirty_bit)
+ VMX_REFLECT(8)
+END(vmx_idirty_bit)
+
+ .org vmx_ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(vmx_iaccess_bit)
+ VMX_REFLECT(9)
+END(vmx_iaccess_bit)
+
+ .org vmx_ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(vmx_daccess_bit)
+ VMX_REFLECT(10)
+END(vmx_daccess_bit)
+
+ .org vmx_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(vmx_break_fault)
+ mov r31=pr
+ mov r19=11
+ mov r30=cr.iim
+ movl r29=0x1100
+ ;;
+ cmp.eq p6,p7=r30,r0
+ (p6) br.sptk vmx_fault_11
+ ;;
+ cmp.eq p6,p7=r29,r30
+ (p6) br.dptk.few vmx_hypercall_dispatch
+ (p7) br.sptk.many vmx_dispatch_break_fault
+ ;;
+ VMX_FAULT(11);
+END(vmx_break_fault)
+
+ .org vmx_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(vmx_interrupt)
+ mov r31=pr // prepare to save predicates
+ mov r19=12
+ mov r29=cr.ipsr
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+ tbit.z p0,p15=r29,IA64_PSR_I_BIT
+ ;;
+(p7) br.sptk vmx_dispatch_interrupt
+ ;;
+ mov r27=ar.rsc /* M */
+ mov r20=r1 /* A */
+ mov r25=ar.unat /* M */
+ mov r26=ar.pfs /* I */
+ mov r28=cr.iip /* M */
+ cover /* B (or nothing) */
+ ;;
+ mov r1=sp
+ ;;
+ invala /* M */
+ mov r30=cr.ifs
+ ;;
+ addl r1=-IA64_PT_REGS_SIZE,r1
+ ;;
+ adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line
size */
+ adds r16=PT(CR_IPSR),r1
+ ;;
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+ st8 [r16]=r29 /* save cr.ipsr */
+ ;;
+ lfetch.fault.excl.nt1 [r17]
+ mov r29=b0
+ ;;
+ adds r16=PT(R8),r1 /* initialize first base pointer */
+ adds r17=PT(R9),r1 /* initialize second base pointer */
+ mov r18=r0 /* make sure r18 isn't NaT */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+ ;;
+ st8 [r16]=r28,16 /* save cr.iip */
+ st8 [r17]=r30,16 /* save cr.ifs */
+ mov r8=ar.fpsr /* M */
+ mov r9=ar.csd
+ mov r10=ar.ssd
+ movl r11=FPSR_DEFAULT /* L-unit */
+ ;;
+ st8 [r16]=r25,16 /* save ar.unat */
+ st8 [r17]=r26,16 /* save ar.pfs */
+ shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
+ ;;
+ st8 [r16]=r27,16 /* save ar.rsc */
+ adds r17=16,r17 /* skip over ar_rnat field */
+ ;; /* avoid RAW on r16 & r17 */
+ st8 [r17]=r31,16 /* save predicates */
+ adds r16=16,r16 /* skip over ar_bspstore field */
+ ;;
+ st8 [r16]=r29,16 /* save b0 */
+ st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+ adds r12=-16,r1 /* switch to kernel memory stack (with 16 bytes of
scratch) */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+ mov r13=r21 /* establish `current' */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+ dep r14=-1,r0,60,4
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+ adds r2=IA64_PT_REGS_R16_OFFSET,r1
+ ;;
+ mov r8=ar.ccv
+ movl r1=__gp /* establish kernel global pointer */
+ ;; \
+ bsw.1
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ mov out0=cr.ivr // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
+
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ (p15) ssm psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+ mov r18=b6
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+ mov r19=b7
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+ ;;
+ mov ar.fpsr=r11 /* M-unit */
+ st8 [r2]=r8,8 /* ar.ccv */
+ adds r24=PT(B6)-PT(F7),r3
+ ;;
+ stf.spill [r2]=f6,32
+ stf.spill [r3]=f7,32
+ ;;
+ stf.spill [r2]=f8,32
+ stf.spill [r3]=f9,32
+ ;;
+ stf.spill [r2]=f10
+ stf.spill [r3]=f11
+ adds r25=PT(B7)-PT(F11),r3
+ ;;
+ st8 [r24]=r18,16 /* b6 */
+ st8 [r25]=r19,16 /* b7 */
+ ;;
+ st8 [r24]=r9 /* ar.csd */
+ st8 [r25]=r10 /* ar.ssd */
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_nested
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_ia64_handle_irq
+ ;;
+END(vmx_interrupt)
+
+ .org vmx_ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(vmx_virtual_exirq)
+ VMX_DBG_FAULT(13)
+ mov r31=pr
+ mov r19=13
+ br.sptk vmx_dispatch_vexirq
+END(vmx_virtual_exirq)
+
+ .org vmx_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(14)
+ VMX_FAULT(14)
+
+
+ .org vmx_ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(15)
+ VMX_FAULT(15)
+
+
+ .org vmx_ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(16)
+ VMX_FAULT(16)
+
+ .org vmx_ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(17)
+ VMX_FAULT(17)
+
+ .org vmx_ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(18)
+ VMX_FAULT(18)
+
+ .org vmx_ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ VMX_DBG_FAULT(19)
+ VMX_FAULT(19)
+
+ .org vmx_ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(vmx_page_not_present)
+ VMX_REFLECT(20)
+END(vmx_page_not_present)
+
+ .org vmx_ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(vmx_key_permission)
+ VMX_REFLECT(21)
+END(vmx_key_permission)
+
+ .org vmx_ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(vmx_iaccess_rights)
+ VMX_REFLECT(22)
+END(vmx_iaccess_rights)
+
+ .org vmx_ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(vmx_daccess_rights)
+ VMX_REFLECT(23)
+END(vmx_daccess_rights)
+
+ .org vmx_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(vmx_general_exception)
+ VMX_FAULT(24)
+// VMX_REFLECT(24)
+END(vmx_general_exception)
+
+ .org vmx_ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(vmx_disabled_fp_reg)
+ VMX_REFLECT(25)
+END(vmx_disabled_fp_reg)
+
+ .org vmx_ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(vmx_nat_consumption)
+ VMX_REFLECT(26)
+END(vmx_nat_consumption)
+
+ .org vmx_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(vmx_speculation_vector)
+ VMX_REFLECT(27)
+END(vmx_speculation_vector)
+
+ .org vmx_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(28)
+ VMX_FAULT(28)
+
+ .org vmx_ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(vmx_debug_vector)
+ VMX_DBG_FAULT(29)
+ VMX_FAULT(29)
+END(vmx_debug_vector)
+
+ .org vmx_ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(vmx_unaligned_access)
+ VMX_REFLECT(30)
+END(vmx_unaligned_access)
+
+ .org vmx_ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(vmx_unsupported_data_reference)
+ VMX_REFLECT(31)
+END(vmx_unsupported_data_reference)
+
+ .org vmx_ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(vmx_floating_point_fault)
+ VMX_REFLECT(32)
+END(vmx_floating_point_fault)
+
+ .org vmx_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(vmx_floating_point_trap)
+ VMX_REFLECT(33)
+END(vmx_floating_point_trap)
+
+ .org vmx_ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(vmx_lower_privilege_trap)
+ VMX_REFLECT(34)
+END(vmx_lower_privilege_trap)
+
+ .org vmx_ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(vmx_taken_branch_trap)
+ VMX_REFLECT(35)
+END(vmx_taken_branch_trap)
+
+ .org vmx_ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(vmx_single_step_trap)
+ VMX_REFLECT(36)
+END(vmx_single_step_trap)
+
+ .org vmx_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(vmx_virtualization_fault)
+ VMX_DBG_FAULT(37)
+ mov r31=pr
+ mov r19=37
+ br.sptk vmx_dispatch_virtualization_fault
+END(vmx_virtualization_fault)
+
+ .org vmx_ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(38)
+ VMX_FAULT(38)
+
+ .org vmx_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(39)
+ VMX_FAULT(39)
+
+ .org vmx_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(40)
+ VMX_FAULT(40)
+
+ .org vmx_ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(41)
+ VMX_FAULT(41)
+
+ .org vmx_ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(42)
+ VMX_FAULT(42)
+
+ .org vmx_ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(43)
+ VMX_FAULT(43)
+
+ .org vmx_ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(44)
+ VMX_FAULT(44)
+
+ .org vmx_ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(vmx_ia32_exception)
+ VMX_DBG_FAULT(45)
+ VMX_FAULT(45)
+END(vmx_ia32_exception)
+
+ .org vmx_ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ENTRY(vmx_ia32_intercept)
+ VMX_DBG_FAULT(46)
+ VMX_FAULT(46)
+END(vmx_ia32_intercept)
+
+ .org vmx_ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+ENTRY(vmx_ia32_interrupt)
+ VMX_DBG_FAULT(47)
+ VMX_FAULT(47)
+END(vmx_ia32_interrupt)
+
+ .org vmx_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(48)
+ VMX_FAULT(48)
+
+ .org vmx_ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(49)
+ VMX_FAULT(49)
+
+ .org vmx_ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(50)
+ VMX_FAULT(50)
+
+ .org vmx_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(51)
+ VMX_FAULT(51)
+
+ .org vmx_ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(52)
+ VMX_FAULT(52)
+
+ .org vmx_ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(53)
+ VMX_FAULT(53)
+
+ .org vmx_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(54)
+ VMX_FAULT(54)
+
+ .org vmx_ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(55)
+ VMX_FAULT(55)
+
+ .org vmx_ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(56)
+ VMX_FAULT(56)
+
+ .org vmx_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(57)
+ VMX_FAULT(57)
+
+ .org vmx_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(58)
+ VMX_FAULT(58)
+
+ .org vmx_ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(59)
+ VMX_FAULT(59)
+
+ .org vmx_ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(60)
+ VMX_FAULT(60)
+
+ .org vmx_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(61)
+ VMX_FAULT(61)
+
+ .org vmx_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(62)
+ VMX_FAULT(62)
+
+ .org vmx_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(63)
+ VMX_FAULT(63)
+
+ .org vmx_ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(64)
+ VMX_FAULT(64)
+
+ .org vmx_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(65)
+ VMX_FAULT(65)
+
+ .org vmx_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(66)
+ VMX_FAULT(66)
+
+ .org vmx_ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ VMX_DBG_FAULT(67)
+ VMX_FAULT(67)
+
+ .org vmx_ia64_ivt+0x8000
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+
+ENTRY(vmx_dispatch_reflection)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ VMX_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,4,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out2=cr.iim
+ mov out3=r15
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ (p15) ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_reflect_interruption
+END(vmx_dispatch_reflection)
+
+ENTRY(vmx_dispatch_virtualization_fault)
+ VMX_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,3,0 // now it's safe (must be first in insn
group!)
+ mov out0=r13 //vcpu
+ mov out1=r4 //cause
+ mov out2=r5 //opcode
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ (p15) ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_emulate
+END(vmx_dispatch_virtualization_fault)
+
+
+ENTRY(vmx_dispatch_vexirq)
+ VMX_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,1,0
+ mov out0=r13
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ (p15) ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_vexirq
+END(vmx_dispatch_vexirq)
+
+ENTRY(vmx_dispatch_tlb_miss)
+ VMX_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=r13
+ mov out1=r15
+ mov out2=cr.ifa
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ (p15) ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_hpw_miss
+END(vmx_dispatch_tlb_miss)
+
+
+ENTRY(vmx_dispatch_break_fault)
+ VMX_SAVE_MIN_WITH_COVER_R19
+ ;;
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ (p15)ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_ia64_handle_break
+ ;;
+END(vmx_dispatch_break_fault)
+
+
+ENTRY(vmx_hypercall_dispatch)
+ VMX_SAVE_MIN_WITH_COVER
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ (p15) ssm psr.i // restore psr.i
+ adds r3=16,r2 // set up second base pointer
+ ;;
+ VMX_SAVE_REST
+ ;;
+ movl r14=ia64_leave_hypervisor
+ movl r2=hyper_call_table
+ ;;
+ mov rp=r14
+ shladd r2=r15,3,r2
+ ;;
+ ld8 r2=[r2]
+ ;;
+ mov b6=r2
+ ;;
+ br.call.sptk.many b6=b6
+ ;;
+END(vmx_hypercall_dispatch)
+
+
+
+ENTRY(vmx_dispatch_interrupt)
+ VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ mov out0=cr.ivr // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
+
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ (p15) ssm psr.i
+ adds r3=16,r2 // set up second base pointer for SAVE_REST
+ ;;
+ VMX_SAVE_REST
+ movl r14=ia64_leave_hypervisor
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=vmx_ia64_handle_irq
+END(vmx_dispatch_interrupt)
+
+
+
+ .rodata
+ .align 8
+ .globl hyper_call_table
+hyper_call_table:
+ data8 hyper_not_support //hyper_set_trap_table /* 0 */
+ data8 hyper_mmu_update
+ data8 hyper_not_support //hyper_set_gdt
+ data8 hyper_not_support //hyper_stack_switch
+ data8 hyper_not_support //hyper_set_callbacks
+ data8 hyper_not_support //hyper_fpu_taskswitch /* 5 */
+ data8 hyper_sched_op
+ data8 hyper_dom0_op
+ data8 hyper_not_support //hyper_set_debugreg
+ data8 hyper_not_support //hyper_get_debugreg
+ data8 hyper_not_support //hyper_update_descriptor /* 10 */
+ data8 hyper_not_support //hyper_set_fast_trap
+ data8 hyper_dom_mem_op
+ data8 hyper_not_support //hyper_multicall
+ data8 hyper_not_support //hyper_update_va_mapping
+ data8 hyper_not_support //hyper_set_timer_op /* 15 */
+ data8 hyper_event_channel_op
+ data8 hyper_xen_version
+ data8 hyper_not_support //hyper_console_io
+ data8 hyper_not_support //hyper_physdev_op
+ data8 hyper_not_support //hyper_grant_table_op /* 20 */
+ data8 hyper_not_support //hyper_vm_assist
+ data8 hyper_not_support //hyper_update_va_mapping_otherdomain
+ data8 hyper_not_support //hyper_switch_vm86
+ data8 hyper_not_support //hyper_boot_vcpu
+ data8 hyper_not_support //hyper_ni_hypercall /* 25 */
+ data8 hyper_not_support //hyper_mmuext_op
+ data8 hyper_lock_page
+ data8 hyper_set_shared_page
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_minstate.h
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_minstate.h Thu Sep 1 18:46:28 2005
@@ -0,0 +1,333 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_minstate.h:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/mmu_context.h>
+#include <asm/offsets.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/vmx_vpd.h>
+#include <asm/cache.h>
+#include "entry.h"
+
+#define VMX_MINSTATE_START_SAVE_MIN \
+ mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian,
loadrs=0 */ \
+ ;; \
+ mov.m r28=ar.rnat; \
+ addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r22]; \
+ addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory
stack */ \
+ mov r23=ar.bspstore; /* save ar.bspstore */ \
+ ;; \
+ mov ar.bspstore=r22; /* switch to kernel RBS */ \
+ ;; \
+ mov r18=ar.bsp; \
+ mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
\
+
+
+
+#define VMX_MINSTATE_END_SAVE_MIN \
+ bsw.1; /* switch back to bank 1 (must be last in insn group) */
\
+ ;;
+
+
+#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \
+ /* begin to call pal vps sync_read and cleanup psr.pl */ \
+ add r25=IA64_VPD_BASE_OFFSET, r21; \
+ movl r20=__vsa_base; \
+ ;; \
+ ld8 r25=[r25]; /* read vpd base */ \
+ ld8 r20=[r20]; /* read entry point */ \
+ ;; \
+ mov r6=r25; \
+ add r20=PAL_VPS_SYNC_READ,r20; \
+ ;; \
+{ .mii; \
+ add r22=VPD(VPSR),r25; \
+ mov r24=ip; \
+ mov b0=r20; \
+ ;; \
+}; \
+{ .mmb; \
+ add r24 = 0x20, r24; \
+ mov r16 = cr.ipsr; /* Temp workaround since psr.ic is off */ \
+ br.cond.sptk b0; /* call the service */ \
+ ;; \
+}; \
+ ld8 r7=[r22]; \
+ /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */ \
+ extr.u r30=r16, IA64_PSR_CPL0_BIT, 2; \
+ ;; \
+ dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2; \
+ ;; \
+ extr.u r30=r16, IA64_PSR_BE_BIT, 5; \
+ ;; \
+ dep r7=r30, r7, IA64_PSR_BE_BIT, 5; \
+ ;; \
+ extr.u r30=r16, IA64_PSR_RI_BIT, 2; \
+ ;; \
+ dep r7=r30, r7, IA64_PSR_RI_BIT, 2; \
+ ;; \
+ st8 [r22]=r7; \
+ ;;
+
+
+
+#define IA64_CURRENT_REG IA64_KR(CURRENT) /* r21 is reserved for current
pointer */
+//#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=IA64_CURRENT_REG
+#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=r21
+
+/*
+ * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ * psr.ic: off
+ * r31: contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ * psr.ic: off
+ * r2 = points to &pt_regs.r16
+ * r8 = contents of ar.ccv
+ * r9 = contents of ar.csd
+ * r10 = contents of ar.ssd
+ * r11 = FPSR_DEFAULT
+ * r12 = kernel sp (kernel virtual address)
+ * r13 = points to current task_struct (kernel virtual address)
+ * p15 = TRUE if psr.i is set in cr.ipsr
+ * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ * preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro. This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
+/* switch rr7 */ \
+ movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2)); \
+ movl r17=(7<<61); \
+ movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2)); \
+ movl r22=(6<<61); \
+ movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT <<
2) | 1); \
+ movl r23=(5<<61); \
+ ;; \
+ mov rr[r17]=r16; \
+ mov rr[r22]=r20; \
+ mov rr[r23]=r18; \
+ ;; \
+ srlz.i; \
+ ;; \
+ VMX_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
+ mov r27=ar.rsc; /* M */ \
+ mov r20=r1; /* A */ \
+ mov r26=ar.unat; /* M */ \
+ mov r29=cr.ipsr; /* M */ \
+ mov r18=cr.isr; \
+ COVER; /* B;; (or nothing) */ \
+ ;; \
+ tbit.z p6,p0=r29,IA64_PSR_VM_BIT; \
+ tbit.nz.or p6,p0 = r18,39; \
+ ;; \
+(p6) br.sptk.few vmx_panic; \
+ tbit.z p0,p15=r29,IA64_PSR_I_BIT; \
+ mov r1=r16; \
+/* mov r21=r16; */ \
+ /* switch from user to kernel RBS: */ \
+ ;; \
+ invala; /* M */ \
+ SAVE_IFS; \
+ ;; \
+ VMX_MINSTATE_START_SAVE_MIN \
+ adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */
\
+ adds r16=PT(CR_IPSR),r1; \
+ ;; \
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
+ st8 [r16]=r29; /* save cr.ipsr */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r17]; \
+ tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
+ mov r29=b0 \
+ ;; \
+ adds r16=PT(R8),r1; /* initialize first base pointer */ \
+ adds r17=PT(R9),r1; /* initialize second base pointer */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r8,16; \
+.mem.offset 8,0; st8.spill [r17]=r9,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r10,24; \
+.mem.offset 8,0; st8.spill [r17]=r11,24; \
+ ;; \
+ mov r8=ar.pfs; /* I */ \
+ mov r9=cr.iip; /* M */ \
+ mov r10=ar.fpsr; /* M */ \
+ ;; \
+ st8 [r16]=r9,16; /* save cr.iip */ \
+ st8 [r17]=r30,16; /* save cr.ifs */ \
+ sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
+ ;; \
+ st8 [r16]=r26,16; /* save ar.unat */ \
+ st8 [r17]=r8,16; /* save ar.pfs */ \
+ shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */
\
+ ;; \
+ st8 [r16]=r27,16; /* save ar.rsc */ \
+ st8 [r17]=r28,16; /* save ar.rnat */ \
+ ;; /* avoid RAW on r16 & r17 */ \
+ st8 [r16]=r23,16; /* save ar.bspstore */ \
+ st8 [r17]=r31,16; /* save predicates */ \
+ ;; \
+ st8 [r16]=r29,16; /* save b0 */ \
+ st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */
\
+.mem.offset 8,0; st8.spill [r17]=r12,16; \
+ adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of
scratch) */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r13,16; \
+.mem.offset 8,0; st8.spill [r17]=r10,16; /* save ar.fpsr */ \
+ mov r13=r21; /* establish `current' */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r15,16; \
+.mem.offset 8,0; st8.spill [r17]=r14,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16]=r2,16; \
+.mem.offset 8,0; st8.spill [r17]=r3,16; \
+ adds r2=PT(F6),r1; \
+ ;; \
+ .mem.offset 0,0; st8.spill [r16]=r4,16; \
+ .mem.offset 8,0; st8.spill [r17]=r5,16; \
+ ;; \
+ .mem.offset 0,0; st8.spill [r16]=r6,16; \
+ .mem.offset 8,0; st8.spill [r17]=r7,16; \
+ mov r20=ar.ccv; \
+ ;; \
+ mov r18=cr.iipa; \
+ mov r4=cr.isr; \
+ mov r22=ar.unat; \
+ ;; \
+ st8 [r16]=r18,16; \
+ st8 [r17]=r4; \
+ ;; \
+ adds r16=PT(EML_UNAT),r1; \
+ adds r17=PT(AR_CCV),r1; \
+ ;; \
+ st8 [r16]=r22,8; \
+ st8 [r17]=r20; \
+ mov r4=r24; \
+ mov r5=r25; \
+ ;; \
+ st8 [r16]=r0; \
+ EXTRA; \
+ mov r9=ar.csd; \
+ mov r10=ar.ssd; \
+ movl r11=FPSR_DEFAULT; /* L-unit */ \
+ movl r1=__gp; /* establish kernel global pointer */ \
+ ;; \
+ PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \
+ VMX_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ * psr.ic: on
+ * r2: points to &pt_regs.f6
+ * r3: points to &pt_regs.f7
+ * r4,r5,scrach
+ * r6: points to vpd
+ * r7: vpsr
+ * r9: contents of ar.csd
+ * r10: contents of ar.ssd
+ * r11: FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define VMX_SAVE_REST \
+ tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT; /* guest bank0 or bank1 ? */ \
+ ;; \
+(pBN0) add r4=VPD(VBGR),r6; \
+(pBN0) add r5=VPD(VBGR)+0x8,r6; \
+(pBN0) add r7=VPD(VBNAT),r6; \
+ ;; \
+(pBN1) add r5=VPD(VGR)+0x8,r6; \
+(pBN1) add r4=VPD(VGR),r6; \
+(pBN1) add r7=VPD(VNAT),r6; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r16,16; \
+.mem.offset 8,0; st8.spill [r5]=r17,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r18,16; \
+.mem.offset 8,0; st8.spill [r5]=r19,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r20,16; \
+.mem.offset 8,0; st8.spill [r5]=r21,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r22,16; \
+.mem.offset 8,0; st8.spill [r5]=r23,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r24,16; \
+.mem.offset 8,0; st8.spill [r5]=r25,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r26,16; \
+.mem.offset 8,0; st8.spill [r5]=r27,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r28,16; \
+.mem.offset 8,0; st8.spill [r5]=r29,16; \
+ mov r26=b6; \
+ ;; \
+.mem.offset 0,0; st8.spill [r4]=r30,16; \
+.mem.offset 8,0; st8.spill [r5]=r31,16; \
+ mov r27=b7; \
+ ;; \
+ mov r30=ar.unat; \
+ ;; \
+ st8 [r7]=r30; \
+ mov ar.fpsr=r11; /* M-unit */ \
+ ;; \
+ stf.spill [r2]=f6,32; \
+ stf.spill [r3]=f7,32; \
+ ;; \
+ stf.spill [r2]=f8,32; \
+ stf.spill [r3]=f9,32; \
+ ;; \
+ stf.spill [r2]=f10; \
+ stf.spill [r3]=f11; \
+ ;; \
+ adds r2=PT(B6)-PT(F10),r2; \
+ adds r3=PT(B7)-PT(F11),r3; \
+ ;; \
+ st8 [r2]=r26,16; /* b6 */ \
+ st8 [r3]=r27,16; /* b7 */ \
+ ;; \
+ st8 [r2]=r9; /* ar.csd */ \
+ st8 [r3]=r10; /* ar.ssd */ \
+ ;;
+
+#define VMX_SAVE_MIN_WITH_COVER VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,)
+#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov
r15=r19)
+#define VMX_SAVE_MIN VMX_DO_SAVE_MIN( , mov r30=r0, )
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_phy_mode.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,433 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_phy_mode.c: emulating domain physical mode.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Arun Sharma (arun.sharma@xxxxxxxxx)
+ * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
+ * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
+ */
+
+
+#include <asm/processor.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_phy_mode.h>
+#include <xen/sched.h>
+#include <asm/pgtable.h>
+
+
+int valid_mm_mode[8] = {
+ GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */
+ INV_MODE,
+ INV_MODE,
+ GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */
+ INV_MODE,
+ GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */
+ INV_MODE,
+ GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/
+};
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ * mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+ /* 2004/09/12(Kevin): Allow switch to self */
+ /*
+ * (it,dt,rt): (0,0,0) -> (1,1,1)
+ * This kind of transition usually occurs in the very early
+ * stage of Linux boot up procedure. Another case is in efi
+ * and pal calls. (see "arch/ia64/kernel/head.S")
+ *
+ * (it,dt,rt): (0,0,0) -> (0,1,1)
+ * This kind of transition is found when OSYa exits efi boot
+ * service. Due to gva = gpa in this case (Same region),
+ * data access can be satisfied though itlb entry for physical
+ * emulation is hit.
+ */
+ SW_SELF,0, 0, SW_NOP, 0, 0, 0, SW_P2V,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /*
+ * (it,dt,rt): (0,1,1) -> (1,1,1)
+ * This kind of transition is found in OSYa.
+ *
+ * (it,dt,rt): (0,1,1) -> (0,0,0)
+ * This kind of transition is found in OSYa
+ */
+ SW_NOP, 0, 0, SW_SELF,0, 0, 0, SW_P2V,
+ /* (1,0,0)->(1,1,1) */
+ 0, 0, 0, 0, 0, 0, 0, SW_P2V,
+ /*
+ * (it,dt,rt): (1,0,1) -> (1,1,1)
+ * This kind of transition usually occurs when Linux returns
+ * from the low level TLB miss handlers.
+ * (see "arch/ia64/kernel/ivt.S")
+ */
+ 0, 0, 0, 0, 0, SW_SELF,0, SW_P2V,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ /*
+ * (it,dt,rt): (1,1,1) -> (1,0,1)
+ * This kind of transition usually occurs in Linux low level
+ * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+ *
+ * (it,dt,rt): (1,1,1) -> (0,0,0)
+ * This kind of transition usually occurs in pal and efi calls,
+ * which requires running in physical mode.
+ * (see "arch/ia64/kernel/head.S")
+ * (1,1,1)->(1,0,0)
+ */
+
+ SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF,
+};
+
+void
+physical_mode_init(VCPU *vcpu)
+{
+ UINT64 psr;
+ struct domain * d = vcpu->domain;
+
+ vcpu->arch.old_rsc = 0;
+ vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
+#if 0
+void
+physical_itlb_miss_domn(VCPU *vcpu, u64 vadr)
+{
+ u64 psr;
+ IA64_PSR vpsr;
+ u64 mppn,gppn,mpp1,gpp1;
+ struct domain *d;
+ static u64 test=0;
+ d=vcpu->domain;
+ if(test)
+ panic("domn physical itlb miss happen\n");
+ else
+ test=1;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ gppn=(vadr<<1)>>13;
+ mppn = get_mfn(DOMID_SELF,gppn,1);
+ mppn=(mppn<<12)|(vpsr.cpl<<7);
+ gpp1=0;
+ mpp1 = get_mfn(DOMID_SELF,gpp1,1);
+ mpp1=(mpp1<<12)|(vpsr.cpl<<7);
+// if(vadr>>63)
+// mppn |= PHY_PAGE_UC;
+// else
+// mppn |= PHY_PAGE_WB;
+ mpp1 |= PHY_PAGE_WB;
+ psr=ia64_clear_ic();
+ ia64_itr(0x1, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
+ ia64_srlz_i();
+ ia64_itr(0x2, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
+ ia64_stop();
+ ia64_srlz_i();
+ ia64_itr(0x1, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL),
(mppn|PHY_PAGE_WB), 24);
+ ia64_srlz_i();
+ ia64_itr(0x2, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL),
(mppn|PHY_PAGE_WB), 24);
+ ia64_stop();
+ ia64_srlz_i();
+ ia64_itr(0x1, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
+ ia64_srlz_i();
+ ia64_itr(0x2, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
+ ia64_stop();
+ ia64_srlz_i();
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+#endif
+
+void
+physical_itlb_miss(VCPU *vcpu, u64 vadr)
+{
+ physical_itlb_miss_dom0(vcpu, vadr);
+}
+
+
+void
+physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr)
+{
+ u64 psr;
+ IA64_PSR vpsr;
+ u64 mppn,gppn;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ gppn=(vadr<<1)>>13;
+ mppn = get_mfn(DOMID_SELF,gppn,1);
+ mppn=(mppn<<12)|(vpsr.cpl<<7);
+// if(vadr>>63)
+// mppn |= PHY_PAGE_UC;
+// else
+ mppn |= PHY_PAGE_WB;
+
+ psr=ia64_clear_ic();
+ ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+
+void
+physical_dtlb_miss(VCPU *vcpu, u64 vadr)
+{
+ u64 psr;
+ IA64_PSR vpsr;
+ u64 mppn,gppn;
+// if(vcpu->domain!=dom0)
+// panic("dom n physical dtlb miss happen\n");
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ gppn=(vadr<<1)>>13;
+ mppn = get_mfn(DOMID_SELF,gppn,1);
+ mppn=(mppn<<12)|(vpsr.cpl<<7);
+ if(vadr>>63)
+ mppn |= PHY_PAGE_UC;
+ else
+ mppn |= PHY_PAGE_WB;
+
+ psr=ia64_clear_ic();
+ ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+void
+vmx_init_all_rr(VCPU *vcpu)
+{
+ VMX(vcpu,vrr[VRN0]) = 0x38;
+ VMX(vcpu,vrr[VRN1]) = 0x38;
+ VMX(vcpu,vrr[VRN2]) = 0x38;
+ VMX(vcpu,vrr[VRN3]) = 0x38;
+ VMX(vcpu,vrr[VRN4]) = 0x38;
+ VMX(vcpu,vrr[VRN5]) = 0x38;
+ VMX(vcpu,vrr[VRN6]) = 0x60;
+ VMX(vcpu,vrr[VRN7]) = 0x60;
+
+ VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38);
+ VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60);
+ VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60);
+}
+
+void
+vmx_load_all_rr(VCPU *vcpu)
+{
+ unsigned long psr;
+ ia64_rr phy_rr;
+
+ psr = ia64_clear_ic();
+
+ phy_rr.ps = EMUL_PHY_PAGE_SHIFT;
+ phy_rr.ve = 1;
+
+ /* WARNING: not allow co-exist of both virtual mode and physical
+ * mode in same region
+ */
+ if (is_physical_mode(vcpu)) {
+ if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+ panic("Unexpected domain switch in phy emul\n");
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
+ ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval);
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
+ ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval);
+ } else {
+ ia64_set_rr((VRN0 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0])));
+ ia64_set_rr((VRN4 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4])));
+ }
+
+#if 1
+ /* rr567 will be postponed to last point when resuming back to guest */
+ ia64_set_rr((VRN1 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1])));
+ ia64_set_rr((VRN2 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2])));
+ ia64_set_rr((VRN3 << VRN_SHIFT),
+ vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3])));
+#endif
+ ia64_srlz_d();
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+}
+
+void
+switch_to_physical_rid(VCPU *vcpu)
+{
+ UINT64 psr;
+ ia64_rr phy_rr;
+
+ phy_rr.ps = EMUL_PHY_PAGE_SHIFT;
+ phy_rr.ve = 1;
+
+ /* Save original virtual mode rr[0] and rr[4] */
+ psr=ia64_clear_ic();
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
+ ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval);
+ ia64_srlz_d();
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
+ ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval);
+ ia64_srlz_d();
+
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+
+void
+switch_to_virtual_rid(VCPU *vcpu)
+{
+ UINT64 psr;
+ ia64_rr mrr;
+
+ psr=ia64_clear_ic();
+
+ mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT);
+ ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
+ ia64_srlz_d();
+ mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT);
+ ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
+ ia64_srlz_d();
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ return;
+}
+
+static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr)
+{
+ return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void
+switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
+{
+ int act;
+ REGS * regs=vcpu_regs(vcpu);
+ act = mm_switch_action(old_psr, new_psr);
+ switch (act) {
+ case SW_V2P:
+ vcpu->arch.old_rsc = regs->ar_rsc;
+ switch_to_physical_rid(vcpu);
+ /*
+ * Set rse to enforced lazy, to prevent active rse save/restor when
+ * guest physical mode.
+ */
+ regs->ar_rsc &= ~(IA64_RSC_MODE);
+ vcpu->arch.mode_flags |= GUEST_IN_PHY;
+ break;
+ case SW_P2V:
+ switch_to_virtual_rid(vcpu);
+ /*
+ * recover old mode which is saved when entering
+ * guest physical mode
+ */
+ regs->ar_rsc = vcpu->arch.old_rsc;
+ vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+ break;
+ case SW_SELF:
+ printf("Switch to self-0x%lx!!! MM mode doesn't change...\n",
+ old_psr.val);
+ break;
+ case SW_NOP:
+ printf("No action required for mode transition: (0x%lx -> 0x%lx)\n",
+ old_psr.val, new_psr.val);
+ break;
+ default:
+ /* Sanity check */
+ printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
+ panic("Unexpected virtual <--> physical mode transition");
+ break;
+ }
+ return;
+}
+
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ * - insertions (itc.*, itr.*)
+ * - purges (ptc.* and ptr.*)
+ * - tpa
+ * - tak
+ * - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void
+check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
+{
+
+ if ( (old_psr.dt != new_psr.dt ) ||
+ (old_psr.it != new_psr.it ) ||
+ (old_psr.rt != new_psr.rt )
+ ) {
+ switch_mm_mode (vcpu, old_psr, new_psr);
+ }
+
+ return 0;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ * - insertions (itc.*, itr.*)
+ * - purges (ptc.* and ptr.*)
+ * - tpa
+ * - tak
+ * - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void
+prepare_if_physical_mode(VCPU *vcpu)
+{
+ if (is_physical_mode(vcpu)) {
+ vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+ switch_to_virtual_rid(vcpu);
+ }
+ return;
+}
+
+/* Recover always follows prepare */
+void
+recover_if_physical_mode(VCPU *vcpu)
+{
+ if (is_physical_mode(vcpu)) {
+ vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+ switch_to_physical_rid(vcpu);
+ }
+ return;
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_process.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_process.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,375 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_process.c: handling VMX architecture-related VM exits
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h> /* FOR struct ia64_sal_retval */
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <asm/regionreg.h>
+#include <asm/privop.h>
+#include <asm/ia64_int.h>
+#include <asm/hpsim_ssc.h>
+#include <asm/dom_fw.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/kregs.h>
+#include <asm/vmx.h>
+#include <asm/vmx_mm_def.h>
+#include <xen/mm.h>
+/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
+
+
+extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_sal_retval
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+extern void rnat_consumption (VCPU *vcpu);
+#define DOMN_PAL_REQUEST 0x110000
+IA64FAULT
+vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long
isr, unsigned long iim)
+{
+ static int first_time = 1;
+ struct domain *d = (struct domain *) current->domain;
+ struct vcpu *v = (struct domain *) current;
+ extern unsigned long running_on_sim;
+ unsigned long i, sal_param[8];
+
+#if 0
+ if (first_time) {
+ if (platform_is_hp_ski()) running_on_sim = 1;
+ else running_on_sim = 0;
+ first_time = 0;
+ }
+ if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
+ if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
+ else do_ssc(vcpu_get_gr(current,36), regs);
+ }
+#endif
+ if (iim == d->arch.breakimm) {
+ struct ia64_sal_retval x;
+ switch (regs->r2) {
+ case FW_HYPERCALL_PAL_CALL:
+ //printf("*** PAL hypercall: index=%d\n",regs->r28);
+ //FIXME: This should call a C routine
+ x = pal_emulator_static(VMX_VPD(v, vgr[12]));
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+#if 0
+ if (regs->r8)
+ printk("Failed vpal emulation, with
index:0x%lx\n",
+ VMX_VPD(v, vgr[12]));
+#endif
+ break;
+ case FW_HYPERCALL_SAL_CALL:
+ for (i = 0; i < 8; i++)
+ vmx_vcpu_get_gr(v, 32+i, &sal_param[i]);
+ x = sal_emulator(sal_param[0], sal_param[1],
+ sal_param[2], sal_param[3],
+ sal_param[4], sal_param[5],
+ sal_param[6], sal_param[7]);
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+#if 0
+ if (regs->r8)
+ printk("Failed vsal emulation, with
index:0x%lx\n",
+ sal_param[0]);
+#endif
+ break;
+ case FW_HYPERCALL_EFI_RESET_SYSTEM:
+ printf("efi.reset_system called ");
+ if (current->domain == dom0) {
+ printf("(by dom0)\n ");
+ (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+ }
+ printf("(not supported for non-0 domain)\n");
+ regs->r8 = EFI_UNSUPPORTED;
+ break;
+ case FW_HYPERCALL_EFI_GET_TIME:
+ {
+ unsigned long *tv, *tc;
+ vmx_vcpu_get_gr(v, 32, &tv);
+ vmx_vcpu_get_gr(v, 33, &tc);
+ printf("efi_get_time(%p,%p) called...",tv,tc);
+ tv = __va(translate_domain_mpaddr(tv));
+ if (tc) tc = __va(translate_domain_mpaddr(tc));
+ regs->r8 = (*efi.get_time)(tv,tc);
+ printf("and returns %lx\n",regs->r8);
+ }
+ break;
+ case FW_HYPERCALL_EFI_SET_TIME:
+ case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+ case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+ // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
+ // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
+ // POINTER ARGUMENTS WILL BE VIRTUAL!!
+ case FW_HYPERCALL_EFI_GET_VARIABLE:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+ case FW_HYPERCALL_EFI_SET_VARIABLE:
+ case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+ // FIXME: need fixes in efi.h from 2.6.9
+ regs->r8 = EFI_UNSUPPORTED;
+ break;
+ }
+#if 0
+ if (regs->r8)
+ printk("Failed vgfw emulation, with index:0x%lx\n",
+ regs->r2);
+#endif
+ vmx_vcpu_increment_iip(current);
+ }else if(iim == DOMN_PAL_REQUEST){
+ pal_emul(current);
+ vmx_vcpu_increment_iip(current);
+ } else
+ vmx_reflect_interruption(ifa,isr,iim,11);
+}
+
+static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800,
+ 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000,
+ 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600,
+ 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000,
+ 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00,
+ 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400,
+ 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00,
+ 0x7f00,
+};
+
+
+
+void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim,
+ UINT64 vector)
+{
+ VCPU *vcpu = current;
+ REGS *regs=vcpu_regs(vcpu);
+ UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu);
+ if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){
+ panic("Guest nested fault!");
+ }
+ VPD_CR(vcpu,isr)=isr;
+ VPD_CR(vcpu,iipa) = regs->cr_iip;
+ vector=vec2off[vector];
+ if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+ VPD_CR(vcpu,iim) = iim;
+ else {
+ set_ifa_itir_iha(vcpu,ifa,1,1,1);
+ }
+ inject_guest_interruption(vcpu, vector);
+}
+
+// ONLY gets called from ia64_leave_kernel
+// ONLY call with interrupts disabled?? (else might miss one?)
+// NEVER successful if already reflecting a trap/fault because psr.i==0
+void leave_hypervisor_tail(struct pt_regs *regs)
+{
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
+ // FIXME: Will this work properly if doing an RFI???
+ if (!is_idle_task(d) ) { // always comes from guest
+ extern void vmx_dorfirfi(void);
+ struct pt_regs *user_regs = vcpu_regs(current);
+
+ if (local_softirq_pending())
+ do_softirq();
+ local_irq_disable();
+
+ if (user_regs != regs)
+ printk("WARNING: checking pending interrupt in nested
interrupt!!!\n");
+
+ /* VMX Domain N has other interrupt source, saying DM */
+ if (test_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags))
+ vmx_intr_assist(v);
+
+ /* FIXME: Check event pending indicator, and set
+ * pending bit if necessary to inject back to guest.
+ * Should be careful about window between this check
+ * and above assist, since IOPACKET_PORT shouldn't be
+ * injected into vmx domain.
+ *
+ * Now hardcode the vector as 0x10 temporarily
+ */
+ if
(event_pending(v)&&(!((v->arch.arch_vmx.in_service[0])&(1UL<<0x10)))) {
+ VPD_CR(v, irr[0]) |= 1UL << 0x10;
+ v->arch.irq_new_pending = 1;
+ }
+
+ if ( v->arch.irq_new_pending ) {
+ v->arch.irq_new_pending = 0;
+ vmx_check_pending_irq(v);
+ }
+ }
+}
+
+extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
+
+/* We came here because the H/W VHPT walker failed to find an entry */
+void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr)
+{
+ IA64_PSR vpsr;
+ CACHE_LINE_TYPE type;
+ u64 vhpt_adr;
+ ISR misr;
+ ia64_rr vrr;
+ REGS *regs;
+ thash_cb_t *vtlb, *vhpt;
+ thash_data_t *data, me;
+ vtlb=vmx_vcpu_get_vtlb(vcpu);
+#ifdef VTLB_DEBUG
+ check_vtlb_sanity(vtlb);
+ dump_vtlb(vtlb);
+#endif
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ regs = vcpu_regs(vcpu);
+ misr.val=regs->cr_isr;
+/* TODO
+ if(vcpu->domain->id && vec == 2 &&
+ vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){
+ emulate_ins(&v);
+ return;
+ }
+*/
+
+ if((vec==1)&&(!vpsr.it)){
+ physical_itlb_miss(vcpu, vadr);
+ return;
+ }
+ if((vec==2)&&(!vpsr.dt)){
+
if(vcpu->domain!=dom0&&__gpfn_is_io(vcpu->domain,(vadr<<1)>>(PAGE_SHIFT+1))){
+ emulate_io_inst(vcpu,((vadr<<1)>>1),4); // UC
+ }else{
+ physical_dtlb_miss(vcpu, vadr);
+ }
+ return;
+ }
+ vrr = vmx_vcpu_rr(vcpu,vadr);
+ if(vec == 1) type = ISIDE_TLB;
+ else if(vec == 2) type = DSIDE_TLB;
+ else panic("wrong vec\n");
+
+// prepare_if_physical_mode(vcpu);
+
+ if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){
+ if(vcpu->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(vcpu->domain,
data->ppn>>(PAGE_SHIFT-12))){
+
vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps);
+ emulate_io_inst(vcpu, vadr, data->ma);
+ return IA64_FAULT;
+ }
+ if ( data->ps != vrr.ps ) {
+ machine_tlb_insert(vcpu, data);
+ }
+ else {
+ thash_insert(vtlb->ts->vhpt,data,vadr);
+ }
+ }else if(type == DSIDE_TLB){
+ if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ alt_dtlb(vcpu, vadr);
+ return IA64_FAULT;
+ } else{
+ if(misr.sp){
+ //TODO lds emulation
+ panic("Don't support speculation load");
+ }else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ } else{
+ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+ vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+ data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
+ if(data){
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ dtlb_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ if(misr.sp){
+ //TODO lds emulation
+ panic("Don't support speculation load");
+ }else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ }else{
+ if(vpsr.ic){
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ dvhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ if(misr.sp){
+ //TODO lds emulation
+ panic("Don't support speculation load");
+ }else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+ }
+ }
+ }else if(type == ISIDE_TLB){
+ if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
+ if(!vpsr.ic){
+ misr.ni=1;
+ }
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ alt_itlb(vcpu, vadr);
+ return IA64_FAULT;
+ } else{
+ vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
+ vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
+ data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
+ if(data){
+ if(!vpsr.ic){
+ misr.ni=1;
+ }
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ itlb_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }else{
+ if(!vpsr.ic){
+ misr.ni=1;
+ }
+ vmx_vcpu_set_isr(vcpu, misr.val);
+ ivhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ }
+ }
+ }
+}
+
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_support.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_support.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,164 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_support.c: vmx specific support interface.
+ * Copyright (c) 2005, Intel Corporation.
+ * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <public/io/ioreq.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vcpu.h>
+
+/*
+ * I/O emulation should be atomic from domain point of view. However,
+ * when emulation code is waiting for I/O completion by do_block,
+ * other events like DM interrupt, VBD, etc. may come and unblock
+ * current exection flow. So we have to prepare for re-block if unblocked
+ * by non I/O completion event.
+ */
+void vmx_wait_io(void)
+{
+ struct vcpu *v = current;
+ struct domain *d = v->domain;
+ extern void do_block();
+ int port = iopacket_port(d);
+
+ do {
+ if (!test_bit(port,
+ &d->shared_info->evtchn_pending[0]))
+ do_block();
+
+ /* Unblocked when some event is coming. Clear pending indication
+ * immediately if deciding to go for io assist
+ */
+ if (test_and_clear_bit(port,
+ &d->shared_info->evtchn_pending[0])) {
+ clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
+ clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+ vmx_io_assist(v);
+ }
+
+
+ if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+ /*
+ * Latest event is not I/O completion, so clear corresponding
+ * selector and pending indication, to allow real event coming
+ */
+ clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+
+ /* Here atually one window is leaved before selector is cleared.
+ * However this window only delay the indication to coming event,
+ * nothing losed. Next loop will check I/O channel to fix this
+ * window.
+ */
+ clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
+ }
+ else
+ break;
+ } while (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
+}
+
+/*
+ * Only place to call vmx_io_assist is mmio/legacy_io emulation.
+ * Since I/O emulation is synchronous, it shouldn't be called in
+ * other places. This is not like x86, since IA-64 implements a
+ * per-vp stack without continuation.
+ */
+void vmx_io_assist(struct vcpu *v)
+{
+ vcpu_iodata_t *vio;
+ ioreq_t *p;
+
+ /*
+ * This shared page contains I/O request between emulation code
+ * and device model.
+ */
+ vio = get_vio(v->domain, v->vcpu_id);
+ if (!vio)
+ panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+
+ p = &vio->vp_ioreq;
+
+ if (p->state == STATE_IORESP_HOOK)
+ panic("Not supported: No hook available for DM request\n");
+
+ if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
+ if (p->state != STATE_IORESP_READY) {
+ /* Can't do_block here, for the same reason as other places to
+ * use vmx_wait_io. Simple return is safe since vmx_wait_io will
+ * try to block again
+ */
+ return;
+ } else
+ p->state = STATE_INVALID;
+
+ clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
+ } else
+ return; /* Spurous event? */
+}
+
+/*
+ * VMX domainN has two types of interrupt source: lsapic model within
+ * HV, and device model within domain 0 (service OS). There're another
+ * pending array in share page, manipulated by device model directly.
+ * To conform to VT-i spec, we have to sync pending bits in shared page
+ * into VPD. This has to be done before checking pending interrupt at
+ * resume to guest. For domain 0, all the interrupt sources come from
+ * HV, which then doesn't require this assist.
+ */
+void vmx_intr_assist(struct vcpu *v)
+{
+ vcpu_iodata_t *vio;
+ struct domain *d = v->domain;
+ extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu,
+ unsigned long *pend_irr);
+ int port = iopacket_port(d);
+
+ /* I/O emulation is atomic, so it's impossible to see execution flow
+ * out of vmx_wait_io, when guest is still waiting for response.
+ */
+ if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
+ panic("!!!Bad resume to guest before I/O emulation is done.\n");
+
+ /* Clear indicator specific to interrupt delivered from DM */
+ if (test_and_clear_bit(port,
+ &d->shared_info->evtchn_pending[0])) {
+ if (!d->shared_info->evtchn_pending[port >> 5])
+ clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
+
+ if (!v->vcpu_info->evtchn_pending_sel)
+ clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+ }
+
+ /* Even without event pending, we still need to sync pending bits
+ * between DM and vlsapic. The reason is that interrupt delivery
+ * shares same event channel as I/O emulation, with corresponding
+ * indicator possibly cleared when vmx_wait_io().
+ */
+ vio = get_vio(v->domain, v->vcpu_id);
+ if (!vio)
+ panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
+
+#ifdef V_IOSAPIC_READY
+ vlapic_update_ext_irq(v);
+#else
+ panic("IOSAPIC model is missed in qemu\n");
+#endif
+ return;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_utility.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_utility.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,659 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_utility.c:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/types.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/processor.h>
+#include <asm/vmx_mm_def.h>
+
+
+/*
+ * Return:
+ * 0: Not reserved indirect registers
+ * 1: Is reserved indirect registers
+ */
+int
+is_reserved_indirect_register (
+ int type,
+ int index )
+{
+ switch (type) {
+ case IA64_CPUID:
+ if ( index >= 5 ) {
+ return 1;
+ }
+
+ case IA64_DBR:
+ case IA64_IBR:
+ //bugbugbug:check with pal about the max ibr/dbr!!!!
+ break;
+
+ case IA64_PMC:
+ //bugbugbug:check with pal about the max ibr/dbr!!!!
+ break;
+
+ case IA64_PMD:
+ //bugbugbug:check with pal about the max ibr/dbr!!!!
+ break;
+
+ case IA64_PKR:
+ //bugbugbug:check with pal about the max pkr!!!!
+ break;
+
+ case IA64_RR:
+ //bugbugbug:check with pal about the max rr!!!!
+ break;
+
+ default:
+ panic ("Unsupported instruction!");
+ }
+
+ return 0;
+
+}
+
+/*
+ * Return:
+ * Set all ignored fields in value to 0 and return
+ */
+u64
+indirect_reg_igfld_MASK (
+ int type,
+ int index,
+ u64 value
+ )
+{
+ u64 nvalue;
+
+ nvalue = value;
+ switch ( type ) {
+ case IA64_CPUID:
+ if ( index == 2 ) {
+ nvalue = 0;
+ }
+ break;
+
+ case IA64_DBR:
+ case IA64_IBR:
+ /* Refer to SDM Vol2 Table 7-1,7-2 */
+ if ( index % 2 != 0) {
+ /* Ignore field: {61:60} */
+ nvalue = value & (~MASK (60, 2));
+ }
+ break;
+ case IA64_PMC:
+ if ( index == 0 ) {
+ /* Ignore field: 3:1 */
+ nvalue = value & (~MASK (1, 3));
+ }
+ break;
+ case IA64_PMD:
+ if ( index >= 4 ) {
+ /* Ignore field: 7:7 */
+ /* bugbug: this code is correct for generic
+ * PMD. However, for implementation specific
+ * PMD, it's WRONG. need more info to judge
+ * what's implementation specific PMD.
+ */
+ nvalue = value & (~MASK (7, 1));
+ }
+ break;
+ case IA64_PKR:
+ case IA64_RR:
+ break;
+ default:
+ panic ("Unsupported instruction!");
+ }
+
+ return nvalue;
+}
+
+/*
+ * Return:
+ * Set all ignored fields in value to 0 and return
+ */
+u64
+cr_igfld_mask (int index, u64 value)
+{
+ u64 nvalue;
+
+ nvalue = value;
+
+ switch ( index ) {
+ case IA64_REG_CR_IVA:
+ /* Ignore filed: 14:0 */
+ nvalue = value & (~MASK (0, 15));
+ break;
+
+ case IA64_REG_CR_IHA:
+ /* Ignore filed: 1:0 */
+ nvalue = value & (~MASK (0, 2));
+ break;
+
+ case IA64_REG_CR_LID:
+ /* Ignore filed: 63:32 */
+ nvalue = value & (~MASK (32, 32));
+ break;
+
+ case IA64_REG_CR_TPR:
+ /* Ignore filed: 63:17,3:0 */
+ nvalue = value & (~MASK (17, 47));
+ nvalue = nvalue & (~MASK (0, 4));
+ break;
+
+ case IA64_REG_CR_EOI:
+ /* Ignore filed: 63:0 */
+ nvalue = 0;
+ break;
+
+ case IA64_REG_CR_ITV:
+ case IA64_REG_CR_PMV:
+ case IA64_REG_CR_CMCV:
+ case IA64_REG_CR_LRR0:
+ case IA64_REG_CR_LRR1:
+ /* Ignore filed: 63:17,12:12 */
+ nvalue = value & (~MASK (17, 47));
+ nvalue = nvalue & (~MASK (12, 1));
+ break;
+ }
+
+ return nvalue;
+}
+
+
+/*
+ * Return:
+ * 1: PSR reserved fields are not zero
+ * 0: PSR reserved fields are all zero
+ */
+int
+check_psr_rsv_fields (u64 value)
+{
+ /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46
+ * These reserved fields shall all be zero
+ * Otherwise we will panic
+ */
+
+ if ( value & MASK (0, 1) ||
+ value & MASK (6, 7) ||
+ value & MASK (16, 1) ||
+ value & MASK (28, 4) ||
+ value & MASK (46, 18)
+ ) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+
+/*
+ * Return:
+ * 1: CR reserved fields are not zero
+ * 0: CR reserved fields are all zero
+ */
+int
+check_cr_rsv_fields (int index, u64 value)
+{
+ switch (index) {
+ case IA64_REG_CR_DCR:
+ if ( (value & MASK ( 3, 5 )) ||
+ (value & MASK (15, 49))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_ITM:
+ case IA64_REG_CR_IVA:
+ case IA64_REG_CR_IIP:
+ case IA64_REG_CR_IFA:
+ case IA64_REG_CR_IIPA:
+ case IA64_REG_CR_IIM:
+ case IA64_REG_CR_IHA:
+ case IA64_REG_CR_EOI:
+ return 0;
+
+ case IA64_REG_CR_PTA:
+ if ( (value & MASK ( 1, 1 )) ||
+ (value & MASK (9, 6))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IPSR:
+ return check_psr_rsv_fields (value);
+
+
+ case IA64_REG_CR_ISR:
+ if ( (value & MASK ( 24, 8 )) ||
+ (value & MASK (44, 20))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_ITIR:
+ if ( (value & MASK ( 0, 2 )) ||
+ (value & MASK (32, 32))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IFS:
+ if ( (value & MASK ( 38, 25 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_LID:
+ if ( (value & MASK ( 0, 16 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IVR:
+ if ( (value & MASK ( 8, 56 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_TPR:
+ if ( (value & MASK ( 8, 8 ))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_IRR0:
+ if ( (value & MASK ( 1, 1 )) ||
+ (value & MASK (3, 13))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_ITV:
+ case IA64_REG_CR_PMV:
+ case IA64_REG_CR_CMCV:
+ if ( (value & MASK ( 8, 4 )) ||
+ (value & MASK (13, 3))) {
+ return 1;
+ }
+ return 0;
+
+ case IA64_REG_CR_LRR0:
+ case IA64_REG_CR_LRR1:
+ if ( (value & MASK ( 11, 1 )) ||
+ (value & MASK (14, 1))) {
+ return 1;
+ }
+ return 0;
+ }
+
+
+ panic ("Unsupported CR");
+}
+
+
+
+/*
+ * Return:
+ * 0: Indirect Reg reserved fields are not zero
+ * 1: Indirect Reg reserved fields are all zero
+ */
+int
+check_indirect_reg_rsv_fields ( int type, int index, u64 value )
+{
+
+ switch ( type ) {
+ case IA64_CPUID:
+ if ( index == 3 ) {
+ if ( value & MASK (40, 24 )) {
+ return 0;
+ }
+ } else if ( index == 4 ) {
+ if ( value & MASK (2, 62 )) {
+ return 0;
+ }
+ }
+ break;
+
+ case IA64_DBR:
+ case IA64_IBR:
+ case IA64_PMC:
+ case IA64_PMD:
+ break;
+
+ case IA64_PKR:
+ if ( value & MASK (4, 4) ||
+ value & MASK (32, 32 )) {
+ return 0;
+ }
+ break;
+
+ case IA64_RR:
+ if ( value & MASK (1, 1) ||
+ value & MASK (32, 32 )) {
+ return 0;
+ }
+ break;
+
+ default:
+ panic ("Unsupported instruction!");
+ }
+
+ return 1;
+}
+
+
+
+
+/* Return
+ * Same format as isr_t
+ * Only ei/ni bits are valid, all other bits are zero
+ */
+u64
+set_isr_ei_ni (VCPU *vcpu)
+{
+
+ IA64_PSR vpsr,ipsr;
+ ISR visr;
+ REGS *regs;
+
+ regs=vcpu_regs(vcpu);
+
+ visr.val = 0;
+
+ vpsr.val = vmx_vcpu_get_psr (vcpu);
+
+ if (!vpsr.ic == 1 ) {
+ /* Set ISR.ni */
+ visr.ni = 1;
+ }
+ ipsr.val = regs->cr_ipsr;
+
+ visr.ei = ipsr.ri;
+ return visr.val;
+}
+
+
+/* Set up ISR.na/code{3:0}/r/w for no-access instructions
+ * Refer to SDM Vol Table 5-1
+ * Parameter:
+ * setr: if 1, indicates this function will set up ISR.r
+ * setw: if 1, indicates this function will set up ISR.w
+ * Return:
+ * Same format as ISR. All fields are zero, except na/code{3:0}/r/w
+ */
+u64
+set_isr_for_na_inst(VCPU *vcpu, int op)
+{
+ ISR visr;
+ visr.val = 0;
+ switch (op) {
+ case IA64_INST_TPA:
+ visr.na = 1;
+ visr.code = 0;
+ break;
+ case IA64_INST_TAK:
+ visr.na = 1;
+ visr.code = 3;
+ break;
+ }
+ return visr.val;
+}
+
+
+
+/*
+ * Set up ISR for registe Nat consumption fault
+ * Parameters:
+ * read: if 1, indicates this is a read access;
+ * write: if 1, indicates this is a write access;
+ */
+void
+set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write)
+{
+ ISR visr;
+ u64 value;
+ /* Need set up ISR: code, ei, ni, na, r/w */
+ visr.val = 0;
+
+ /* ISR.code{7:4} =1,
+ * Set up ISR.code{3:0}, ISR.na
+ */
+ visr.code = (1 << 4);
+ if (inst) {
+
+ value = set_isr_for_na_inst (vcpu,inst);
+ visr.val = visr.val | value;
+ }
+
+ /* Set up ISR.r/w */
+ visr.r = read;
+ visr.w = write;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu,visr.val);
+}
+
+
+
+/*
+ * Set up ISR for break fault
+ */
+void set_break_isr (VCPU *vcpu)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: ei, ni */
+
+ visr.val = 0;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr(vcpu, visr.val);
+}
+
+
+
+
+
+
+/*
+ * Set up ISR for Priviledged Operation fault
+ */
+void set_privileged_operation_isr (VCPU *vcpu,int inst)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: code, ei, ni, na */
+
+ visr.val = 0;
+
+ /* Set up na, code{3:0} for no-access instruction */
+ value = set_isr_for_na_inst (vcpu, inst);
+ visr.val = visr.val | value;
+
+
+ /* ISR.code{7:4} =1 */
+ visr.code = (1 << 4) | visr.code;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+
+/*
+ * Set up ISR for Priviledged Register fault
+ */
+void set_privileged_reg_isr (VCPU *vcpu, int inst)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: code, ei, ni */
+
+ visr.val = 0;
+
+ /* ISR.code{7:4} =2 */
+ visr.code = 2 << 4;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+
+
+/*
+ * Set up ISR for Reserved Register/Field fault
+ */
+void set_rsv_reg_field_isr (VCPU *vcpu)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: code, ei, ni */
+
+ visr.val = 0;
+
+ /* ISR.code{7:4} =4 */
+ visr.code = (3 << 4) | visr.code;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+
+/*
+ * Set up ISR for Illegal Operation fault
+ */
+void set_illegal_op_isr (VCPU *vcpu)
+{
+ ISR visr;
+ u64 value;
+
+ /* Need set up ISR: ei, ni */
+
+ visr.val = 0;
+
+ /* Set up ei/ni */
+ value = set_isr_ei_ni (vcpu);
+ visr.val = visr.val | value;
+
+ vmx_vcpu_set_isr (vcpu, visr.val);
+}
+
+
+void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access)
+{
+ ISR isr;
+
+ isr.val = 0;
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag;
+ isr.na = non_access;
+ isr.r = 1;
+ isr.w = 0;
+ vmx_vcpu_set_isr(vcpu, isr.val);
+ return;
+}
+
+void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access)
+{
+ u64 value;
+ ISR isr;
+
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_PRIV_OP_FAULT;
+ isr.na = non_access;
+ vmx_vcpu_set_isr(vcpu, isr.val);
+
+ return;
+}
+
+
+IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index)
+{
+ u64 sof;
+ REGS *regs;
+ regs=vcpu_regs(vcpu);
+ sof = regs->cr_ifs & 0x7f;
+ if(reg_index >= sof + 32)
+ return IA64_FAULT;
+ return IA64_NO_FAULT;;
+}
+
+
+int is_reserved_rr_register(VCPU* vcpu, int reg_index)
+{
+ return (reg_index >= 8);
+}
+
+#define ITIR_RSV_MASK (0x3UL | (((1UL<<32)-1) << 32))
+int is_reserved_itir_field(VCPU* vcpu, u64 itir)
+{
+ if ( itir & ITIR_RSV_MASK ) {
+ return 1;
+ }
+ return 0;
+}
+
+int is_reserved_rr_field(VCPU* vcpu, u64 reg_value)
+{
+ ia64_rr rr;
+ rr.rrval = reg_value;
+
+ if(rr.reserved0 != 0 || rr.reserved1 != 0){
+ return 1;
+ }
+ if(rr.ps < 12 || rr.ps > 28){
+ // page too big or small.
+ return 1;
+ }
+ if(rr.ps > 15 && rr.ps % 2 != 0){
+ // unsupported page size.
+ return 1;
+ }
+ return 0;
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_vcpu.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,446 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Fred yang (fred.yang@xxxxxxxxx)
+ * Arun Sharma (arun.sharma@xxxxxxxxx)
+ * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <xen/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/vmx.h>
+
+//u64 fire_itc;
+//u64 fire_itc2;
+//u64 fire_itm;
+//u64 fire_itm2;
+/*
+ * Copyright (c) 2005 Intel Corporation.
+ * Anthony Xu (anthony.xu@xxxxxxxxx)
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/**************************************************************************
+ VCPU general register access routines
+**************************************************************************/
+#include <asm/hw_irq.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/kregs.h>
+
+//unsigned long last_guest_rsm = 0x0;
+struct guest_psr_bundle{
+ unsigned long ip;
+ unsigned long psr;
+};
+
+struct guest_psr_bundle guest_psr_buf[100];
+unsigned long guest_psr_index = 0;
+
+void
+vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value)
+{
+
+ UINT64 mask;
+ REGS *regs;
+ IA64_PSR old_psr, new_psr;
+ old_psr.val=vmx_vcpu_get_psr(vcpu);
+
+ regs=vcpu_regs(vcpu);
+ /* We only support guest as:
+ * vpsr.pk = 0
+ * vpsr.is = 0
+ * Otherwise panic
+ */
+ if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
+ panic ("Setting unsupport guest psr!");
+ }
+
+ /*
+ * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+ * Since these bits will become 0, after success execution of each
+ * instruction, we will change set them to mIA64_PSR
+ */
+ VMX_VPD(vcpu,vpsr) = value &
+ (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+ ));
+
+ if ( !old_psr.i && (value & IA64_PSR_I) ) {
+ // vpsr.i 0->1
+ vcpu->arch.irq_new_condition = 1;
+ }
+ new_psr.val=vmx_vcpu_get_psr(vcpu);
+ {
+ struct pt_regs *regs = vcpu_regs(vcpu);
+ guest_psr_buf[guest_psr_index].ip = regs->cr_iip;
+ guest_psr_buf[guest_psr_index].psr = new_psr.val;
+ if (++guest_psr_index >= 100)
+ guest_psr_index = 0;
+ }
+#if 0
+ if (old_psr.i != new_psr.i) {
+ if (old_psr.i)
+ last_guest_rsm = vcpu_regs(vcpu)->cr_iip;
+ else
+ last_guest_rsm = 0;
+ }
+#endif
+
+ /*
+ * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+ * , except for the following bits:
+ * ic/i/dt/si/rt/mc/it/bn/vm
+ */
+ mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+ IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+ IA64_PSR_VM;
+
+ regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) );
+
+ check_mm_mode_switch(vcpu, old_psr, new_psr);
+ return IA64_NO_FAULT;
+}
+
+/* Adjust slot both in pt_regs and vpd, upon vpsr.ri which
+ * should have sync with ipsr in entry.
+ *
+ * Clear some bits due to successfully emulation.
+ */
+IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu)
+{
+ // TODO: trap_bounce?? Eddie
+ REGS *regs = vcpu_regs(vcpu);
+ IA64_PSR vpsr;
+ IA64_PSR *ipsr = (IA64_PSR *)®s->cr_ipsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if (vpsr.ri == 2) {
+ vpsr.ri = 0;
+ regs->cr_iip += 16;
+ } else {
+ vpsr.ri++;
+ }
+
+ ipsr->ri = vpsr.ri;
+ vpsr.val &=
+ (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+ ));
+
+ VMX_VPD(vcpu, vpsr) = vpsr.val;
+
+ ipsr->val &=
+ (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
+ ));
+
+ return (IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ IA64_PSR vpsr;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+
+ if(!vpsr.ic)
+ VPD_CR(vcpu,ifs) = regs->cr_ifs;
+ regs->cr_ifs = IA64_IFS_V;
+ return (IA64_NO_FAULT);
+}
+
+
+thash_cb_t *
+vmx_vcpu_get_vtlb(VCPU *vcpu)
+{
+ return vcpu->arch.vtlb;
+}
+
+
+struct virutal_platform_def *
+vmx_vcpu_get_plat(VCPU *vcpu)
+{
+ return &(vcpu->domain->arch.vmx_platform);
+}
+
+
+ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr)
+{
+ return (ia64_rr)VMX(vcpu,vrr[vadr>>61]);
+}
+
+
+IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ ia64_rr oldrr,newrr;
+ thash_cb_t *hcb;
+ oldrr=vmx_vcpu_rr(vcpu,reg);
+ newrr.rrval=val;
+#if 1
+ if(oldrr.ps!=newrr.ps){
+ hcb = vmx_vcpu_get_vtlb(vcpu);
+ thash_purge_all(hcb);
+ }
+#endif
+ VMX(vcpu,vrr[reg>>61]) = val;
+ switch((u64)(reg>>61)) {
+ case VRN5:
+ VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val);
+ break;
+ case VRN6:
+ VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val);
+ break;
+ case VRN7:
+ VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val);
+ /* Change double mapping for this domain */
+#ifdef XEN_DBL_MAPPING
+ vmx_change_double_mapping(vcpu,
+ vmx_vrrtomrr(vcpu,oldrr.rrval),
+ vmx_vrrtomrr(vcpu,newrr.rrval));
+#endif
+ break;
+ default:
+ ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val));
+ break;
+ }
+
+ return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+ VCPU protection key register access routines
+**************************************************************************/
+
+IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ UINT64 val = (UINT64)ia64_get_pkr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ ia64_set_pkr(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+#if 0
+int tlb_debug=0;
+check_entry(u64 va, u64 ps, char *str)
+{
+ va &= ~ (PSIZE(ps)-1);
+ if ( va == 0x2000000002908000UL ||
+ va == 0x600000000000C000UL ) {
+ stop();
+ }
+ if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps);
+}
+#endif
+
+
+u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa)
+{
+ ia64_rr rr,rr1;
+ rr=vmx_vcpu_rr(vcpu,ifa);
+ rr1.rrval=0;
+ rr1.ps=rr.ps;
+ rr1.rid=rr.rid;
+ return (rr1.rrval);
+}
+
+
+
+
+IA64FAULT vmx_vcpu_rfi(VCPU *vcpu)
+{
+ // TODO: Only allowed for current vcpu
+ UINT64 ifs, psr;
+ REGS *regs = vcpu_regs(vcpu);
+ psr = VPD_CR(vcpu,ipsr);
+ vmx_vcpu_set_psr(vcpu,psr);
+ ifs=VPD_CR(vcpu,ifs);
+ if((ifs>>63)&&(ifs<<1)){
+ ifs=(regs->cr_ifs)&0x7f;
+ regs->rfi_pfs = (ifs<<7)|ifs;
+ regs->cr_ifs = VPD_CR(vcpu,ifs);
+ }
+ regs->cr_iip = VPD_CR(vcpu,iip);
+ return (IA64_NO_FAULT);
+}
+
+
+UINT64
+vmx_vcpu_get_psr(VCPU *vcpu)
+{
+ return VMX_VPD(vcpu,vpsr);
+}
+
+
+IA64FAULT
+vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val)
+{
+ IA64_PSR vpsr;
+
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.bn ) {
+ *val=VMX_VPD(vcpu,vgr[reg-16]);
+ // Check NAT bit
+ if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) {
+ // TODO
+ //panic ("NAT consumption fault\n");
+ return IA64_FAULT;
+ }
+
+ }
+ else {
+ *val=VMX_VPD(vcpu,vbgr[reg-16]);
+ if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) {
+ //panic ("NAT consumption fault\n");
+ return IA64_FAULT;
+ }
+
+ }
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT
+vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat)
+{
+ IA64_PSR vpsr;
+ vpsr.val = vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.bn ) {
+ VMX_VPD(vcpu,vgr[reg-16]) = val;
+ if(nat){
+ VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) );
+ }else{
+ VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) );
+ }
+ }
+ else {
+ VMX_VPD(vcpu,vbgr[reg-16]) = val;
+ if(nat){
+ VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) );
+ }else{
+ VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) );
+ }
+ }
+ return IA64_NO_FAULT;
+}
+
+
+
+IA64FAULT
+vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val)
+{
+ REGS *regs=vcpu_regs(vcpu);
+ int nat;
+ //TODO, Eddie
+ if (!regs) return 0;
+ if (reg >= 16 && reg < 32) {
+ return vmx_vcpu_get_bgr(vcpu,reg,val);
+ }
+ getreg(reg,val,&nat,regs); // FIXME: handle NATs later
+ if(nat){
+ return IA64_FAULT;
+ }
+ return IA64_NO_FAULT;
+}
+
+// returns:
+// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
+// IA64_NO_FAULT otherwise
+
+IA64FAULT
+vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ long sof = (regs->cr_ifs) & 0x7f;
+ //TODO Eddie
+
+ if (!regs) return IA64_ILLOP_FAULT;
+ if (reg >= sof + 32) return IA64_ILLOP_FAULT;
+ if ( reg >= 16 && reg < 32 ) {
+ return vmx_vcpu_set_bgr(vcpu,reg, value, nat);
+ }
+ setreg(reg,value,nat,regs);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+ UINT64 vpsr;
+ vpsr = vmx_vcpu_get_psr(vcpu);
+ vpsr &= (~imm24);
+ vmx_vcpu_set_psr(vcpu, vpsr);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+ UINT64 vpsr;
+ vpsr = vmx_vcpu_get_psr(vcpu);
+ vpsr |= imm24;
+ vmx_vcpu_set_psr(vcpu, vpsr);
+ return IA64_NO_FAULT;
+}
+
+
+IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
+{
+ vmx_vcpu_set_psr(vcpu, val);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT
+vmx_vcpu_set_tpr(VCPU *vcpu, u64 val)
+{
+ VPD_CR(vcpu,tpr)=val;
+ vcpu->arch.irq_new_condition = 1;
+ return IA64_NO_FAULT;
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_virt.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_virt.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1511 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_virt.c:
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Fred yang (fred.yang@xxxxxxxxx)
+ * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+
+
+#include <asm/privop.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/processor.h>
+#include <asm/delay.h> // Debug only
+#include <asm/vmmu.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/smp.h>
+
+#include <asm/virt_event.h>
+extern UINT64 privop_trace;
+
+void
+ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause)
+{
+ *cause=0;
+ switch (slot_type) {
+ case M:
+ if (inst.generic.major==0){
+ if(inst.M28.x3==0){
+ if(inst.M44.x4==6){
+ *cause=EVENT_SSM;
+ }else if(inst.M44.x4==7){
+ *cause=EVENT_RSM;
+ }else if(inst.M30.x4==8&&inst.M30.x2==2){
+ *cause=EVENT_MOV_TO_AR_IMM;
+ }
+ }
+ }
+ else if(inst.generic.major==1){
+ if(inst.M28.x3==0){
+ if(inst.M32.x6==0x2c){
+ *cause=EVENT_MOV_TO_CR;
+ }else if(inst.M33.x6==0x24){
+ *cause=EVENT_MOV_FROM_CR;
+ }else if(inst.M35.x6==0x2d){
+ *cause=EVENT_MOV_TO_PSR;
+ }else if(inst.M36.x6==0x25){
+ *cause=EVENT_MOV_FROM_PSR;
+ }else if(inst.M29.x6==0x2A){
+ *cause=EVENT_MOV_TO_AR;
+ }else if(inst.M31.x6==0x22){
+ *cause=EVENT_MOV_FROM_AR;
+ }else if(inst.M45.x6==0x09){
+ *cause=EVENT_PTC_L;
+ }else if(inst.M45.x6==0x0A){
+ *cause=EVENT_PTC_G;
+ }else if(inst.M45.x6==0x0B){
+ *cause=EVENT_PTC_GA;
+ }else if(inst.M45.x6==0x0C){
+ *cause=EVENT_PTR_D;
+ }else if(inst.M45.x6==0x0D){
+ *cause=EVENT_PTR_I;
+ }else if(inst.M46.x6==0x1A){
+ *cause=EVENT_THASH;
+ }else if(inst.M46.x6==0x1B){
+ *cause=EVENT_TTAG;
+ }else if(inst.M46.x6==0x1E){
+ *cause=EVENT_TPA;
+ }else if(inst.M46.x6==0x1F){
+ *cause=EVENT_TAK;
+ }else if(inst.M47.x6==0x34){
+ *cause=EVENT_PTC_E;
+ }else if(inst.M41.x6==0x2E){
+ *cause=EVENT_ITC_D;
+ }else if(inst.M41.x6==0x2F){
+ *cause=EVENT_ITC_I;
+ }else if(inst.M42.x6==0x00){
+ *cause=EVENT_MOV_TO_RR;
+ }else if(inst.M42.x6==0x01){
+ *cause=EVENT_MOV_TO_DBR;
+ }else if(inst.M42.x6==0x02){
+ *cause=EVENT_MOV_TO_IBR;
+ }else if(inst.M42.x6==0x03){
+ *cause=EVENT_MOV_TO_PKR;
+ }else if(inst.M42.x6==0x04){
+ *cause=EVENT_MOV_TO_PMC;
+ }else if(inst.M42.x6==0x05){
+ *cause=EVENT_MOV_TO_PMD;
+ }else if(inst.M42.x6==0x0E){
+ *cause=EVENT_ITR_D;
+ }else if(inst.M42.x6==0x0F){
+ *cause=EVENT_ITR_I;
+ }else if(inst.M43.x6==0x10){
+ *cause=EVENT_MOV_FROM_RR;
+ }else if(inst.M43.x6==0x11){
+ *cause=EVENT_MOV_FROM_DBR;
+ }else if(inst.M43.x6==0x12){
+ *cause=EVENT_MOV_FROM_IBR;
+ }else if(inst.M43.x6==0x13){
+ *cause=EVENT_MOV_FROM_PKR;
+ }else if(inst.M43.x6==0x14){
+ *cause=EVENT_MOV_FROM_PMC;
+/*
+ }else if(inst.M43.x6==0x15){
+ *cause=EVENT_MOV_FROM_PMD;
+*/
+ }else if(inst.M43.x6==0x17){
+ *cause=EVENT_MOV_FROM_CPUID;
+ }
+ }
+ }
+ break;
+ case B:
+ if(inst.generic.major==0){
+ if(inst.B8.x6==0x02){
+ *cause=EVENT_COVER;
+ }else if(inst.B8.x6==0x08){
+ *cause=EVENT_RFI;
+ }else if(inst.B8.x6==0x0c){
+ *cause=EVENT_BSW_0;
+ }else if(inst.B8.x6==0x0d){
+ *cause=EVENT_BSW_1;
+ }
+ }
+ }
+}
+
+IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst)
+{
+ UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+ return vmx_vcpu_reset_psr_sm(vcpu,imm24);
+}
+
+IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst)
+{
+ UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+ return vmx_vcpu_set_psr_sm(vcpu,imm24);
+}
+
+unsigned long last_guest_psr = 0x0;
+IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 tgt = inst.M33.r1;
+ UINT64 val;
+ IA64FAULT fault;
+
+/*
+ if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
+ return vmx_vcpu_set_gr(vcpu, tgt, val);
+ else return fault;
+ */
+ val = vmx_vcpu_get_psr(vcpu);
+ val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+ last_guest_psr = val;
+ return vmx_vcpu_set_gr(vcpu, tgt, val, 0);
+}
+
+/**
+ * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
+ */
+IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+ if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
+ panic(" get_psr nat bit fault\n");
+
+ val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32));
+#if 0
+ if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32))))
+ while(1);
+ else
+ last_mov_from_psr = 0;
+#endif
+ return vmx_vcpu_set_psr_l(vcpu,val);
+}
+
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst)
+{
+ IA64_PSR vpsr;
+ REGS *regs;
+#ifdef CHECK_FAULT
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ regs=vcpu_regs(vcpu);
+ vpsr.val=regs->cr_ipsr;
+ if ( vpsr.is == 1 ) {
+ panic ("We do not support IA32 instruction yet");
+ }
+
+ return vmx_vcpu_rfi(vcpu);
+}
+
+IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst)
+{
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ return vmx_vcpu_bsw0(vcpu);
+}
+
+IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst)
+{
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ return vmx_vcpu_bsw1(vcpu);
+}
+
+IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst)
+{
+ return vmx_vcpu_cover(vcpu);
+}
+
+IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,r3;
+ ISR isr;
+ IA64_PSR vpsr;
+
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (unimplemented_gva(vcpu,r3) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7));
+}
+
+IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst)
+{
+ u64 r3;
+ ISR isr;
+ IA64_PSR vpsr;
+
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+ return vmx_vcpu_ptc_e(vcpu,r3);
+}
+
+IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst)
+{
+ return vmx_emul_ptc_l(vcpu, inst);
+}
+
+IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst)
+{
+ return vmx_emul_ptc_l(vcpu, inst);
+}
+
+IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3)
+{
+ ISR isr;
+ IA64FAULT ret1, ret2;
+
+#ifdef VMAL_NO_FAULT_CHECK
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3);
+ ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2);
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) {
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+ }
+ if (unimplemented_gva(vcpu,r3) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,r3;
+ if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
+ return IA64_FAULT;
+ return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7));
+}
+
+IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,r3;
+ if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
+ return IA64_FAULT;
+ return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7));
+}
+
+
+IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+ IA64_PSR vpsr;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(unimplemented_gva(vcpu, r3)){
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_thash(vcpu, r3, &r1);
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+ IA64_PSR vpsr;
+ #ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(unimplemented_gva(vcpu, r3)){
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
+ return IA64_NO_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_ttag(vcpu, r3, &r1);
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+
+IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(vpsr.cpl!=0){
+ visr.val=0;
+ vcpu_set_isr(vcpu, visr.val);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,1);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if (unimplemented_gva(vcpu,r3) ) {
+ // inject unimplemented_data_address_fault
+ visr.val = set_isr_ei_ni(vcpu);
+ visr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ // FAULT_UNIMPLEMENTED_DATA_ADDRESS.
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+
+ if(vmx_vcpu_tpa(vcpu, r3, &r1)){
+ return IA64_FAULT;
+ }
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst)
+{
+ u64 r1,r3;
+ ISR visr;
+ IA64_PSR vpsr;
+ int fault=IA64_NO_FAULT;
+#ifdef CHECK_FAULT
+ visr.val=0;
+ if(check_target_register(vcpu, inst.M46.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(vpsr.cpl!=0){
+ vcpu_set_isr(vcpu, visr.val);
+ return IA64_FAULT;
+ }
+#endif
+ if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,1);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif
+ }
+ if(vmx_vcpu_tak(vcpu, r3, &r1)){
+ return IA64_FAULT;
+ }
+ vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+
+/************************************
+ * Insert translation register/cache
+************************************/
+
+IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte, slot;
+ ISR isr;
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.ic ) {
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if(is_reserved_rr_register(vcpu, slot)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ if (vmx_vcpu_get_itir(vcpu,&itir)){
+ return(IA64_FAULT);
+ }
+ if (vmx_vcpu_get_ifa(vcpu,&ifa)){
+ return(IA64_FAULT);
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (is_reserved_itir_field(vcpu, itir)) {
+ // TODO
+ return IA64_FAULT;
+ }
+ if (unimplemented_gva(vcpu,ifa) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot));
+}
+
+IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte, slot;
+ ISR isr;
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.ic ) {
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
+#ifdef VMAL_NO_FAULT_CHECK
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif // VMAL_NO_FAULT_CHECK
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if(is_reserved_rr_register(vcpu, slot)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ if (vmx_vcpu_get_itir(vcpu,&itir)){
+ return(IA64_FAULT);
+ }
+ if (vmx_vcpu_get_ifa(vcpu,&ifa)){
+ return(IA64_FAULT);
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (is_reserved_itir_field(vcpu, itir)) {
+ // TODO
+ return IA64_FAULT;
+ }
+ if (unimplemented_gva(vcpu,ifa) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot));
+}
+
+IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64
*pte)
+{
+ UINT64 fault;
+ ISR isr;
+ IA64_PSR vpsr;
+ IA64FAULT ret1;
+
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.ic ) {
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+
+#ifdef VMAL_NO_FAULT_CHECK
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte);
+#ifdef VMAL_NO_FAULT_CHECK
+ if( ret1 != IA64_NO_FAULT ){
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+
+ if (vmx_vcpu_get_itir(vcpu,itir)){
+ return(IA64_FAULT);
+ }
+ if (vmx_vcpu_get_ifa(vcpu,ifa)){
+ return(IA64_FAULT);
+ }
+#ifdef VMAL_NO_FAULT_CHECK
+ if (unimplemented_gva(vcpu,ifa) ) {
+ isr.val = set_isr_ei_ni(vcpu);
+ isr.code = IA64_RESERVED_REG_FAULT;
+ vcpu_set_isr(vcpu, isr.val);
+ unimpl_daddr(vcpu);
+ return IA64_FAULT;
+ }
+#endif // VMAL_NO_FAULT_CHECK
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 itir, ifa, pte;
+
+ if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
+ return IA64_FAULT;
+ }
+
+ return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa));
+}
+
+IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 itir, ifa, pte;
+
+ if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
+ return IA64_FAULT;
+ }
+
+ return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa));
+
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+*************************************/
+
+IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
+{
+ // I27 and M30 are identical for these fields
+ if(inst.M30.ar3!=44){
+ panic("Can't support ar register other than itc");
+ }
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ UINT64 imm;
+ if(inst.M30.s){
+ imm = -inst.M30.imm;
+ }else{
+ imm = inst.M30.imm;
+ }
+ return (vmx_vcpu_set_itc(vcpu, imm));
+}
+
+IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
+{
+ // I26 and M29 are identical for these fields
+ u64 r2;
+ if(inst.M29.ar3!=44){
+ panic("Can't support ar register other than itc");
+ }
+ if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ return (vmx_vcpu_set_itc(vcpu, r2));
+}
+
+
+IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst)
+{
+ // I27 and M30 are identical for these fields
+ if(inst.M31.ar3!=44){
+ panic("Can't support ar register other than itc");
+ }
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu,inst.M31.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.si&& vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ u64 r1;
+ vmx_vcpu_get_itc(vcpu,&r1);
+ vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0);
+ return IA64_NO_FAULT;
+}
+
+
+/********************************
+ * Moves to privileged registers
+********************************/
+
+IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_pkr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_rr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_dbr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_ibr(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_pmc(vcpu,r3,r2));
+}
+
+IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r2;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+ return (vmx_vcpu_set_pmd(vcpu,r3,r2));
+}
+
+
+/**********************************
+ * Moves from privileged registers
+ **********************************/
+
+IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_rr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_pkr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_dbr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_ibr(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if (vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_pmc(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst)
+{
+ u64 r3,r1;
+#ifdef CHECK_FAULT
+ if(check_target_register(vcpu, inst.M43.r1)){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if(is_reserved_indirect_register(vcpu,r3)){
+ set_rsv_reg_field_isr(vcpu);
+ rsv_reg_field(vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ vmx_vcpu_get_cpuid(vcpu,r3,&r1);
+ return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
+}
+
+IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst)
+{
+ u64 r2,cr3;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+
if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+ if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){
+#ifdef CHECK_FAULT
+ set_isr_reg_nat_consumption(vcpu,0,0);
+ rnat_comsumption(vcpu);
+ return IA64_FAULT;
+#endif //CHECK_FAULT
+ }
+#ifdef CHECK_FAULT
+ if ( check_cr_rsv_fields (inst.M32.cr3, r2)) {
+ /* Inject Reserved Register/Field fault
+ * into guest */
+ set_rsv_reg_field_isr (vcpu,0);
+ rsv_reg_field (vcpu);
+ return IA64_FAULT;
+ }
+#endif //CHECK_FAULT
+ extern u64 cr_igfld_mask(int index, u64 value);
+ r2 = cr_igfld_mask(inst.M32.cr3,r2);
+ VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2;
+ switch (inst.M32.cr3) {
+ case 0: return vmx_vcpu_set_dcr(vcpu,r2);
+ case 1: return vmx_vcpu_set_itm(vcpu,r2);
+ case 2: return vmx_vcpu_set_iva(vcpu,r2);
+ case 8: return vmx_vcpu_set_pta(vcpu,r2);
+ case 16:return vmx_vcpu_set_ipsr(vcpu,r2);
+ case 17:return vmx_vcpu_set_isr(vcpu,r2);
+ case 19:return vmx_vcpu_set_iip(vcpu,r2);
+ case 20:return vmx_vcpu_set_ifa(vcpu,r2);
+ case 21:return vmx_vcpu_set_itir(vcpu,r2);
+ case 22:return vmx_vcpu_set_iipa(vcpu,r2);
+ case 23:return vmx_vcpu_set_ifs(vcpu,r2);
+ case 24:return vmx_vcpu_set_iim(vcpu,r2);
+ case 25:return vmx_vcpu_set_iha(vcpu,r2);
+ case 64:printk("SET LID to 0x%lx\n", r2);
+ return vmx_vcpu_set_lid(vcpu,r2);
+ case 65:return IA64_NO_FAULT;
+ case 66:return vmx_vcpu_set_tpr(vcpu,r2);
+ case 67:return vmx_vcpu_set_eoi(vcpu,r2);
+ case 68:return IA64_NO_FAULT;
+ case 69:return IA64_NO_FAULT;
+ case 70:return IA64_NO_FAULT;
+ case 71:return IA64_NO_FAULT;
+ case 72:return vmx_vcpu_set_itv(vcpu,r2);
+ case 73:return vmx_vcpu_set_pmv(vcpu,r2);
+ case 74:return vmx_vcpu_set_cmcv(vcpu,r2);
+ case 80:return vmx_vcpu_set_lrr0(vcpu,r2);
+ case 81:return vmx_vcpu_set_lrr1(vcpu,r2);
+ default: return IA64_NO_FAULT;
+ }
+}
+
+
+#define cr_get(cr) \
+ ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\
+ vmx_vcpu_set_gr(vcpu, tgt, val,0):fault;
+
+
+IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 tgt = inst.M33.r1;
+ UINT64 val;
+ IA64FAULT fault;
+#ifdef CHECK_FAULT
+ IA64_PSR vpsr;
+ vpsr.val=vmx_vcpu_get_psr(vcpu);
+ if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3||
+ (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){
+ set_illegal_op_isr(vcpu);
+ illegal_op(vcpu);
+ return IA64_FAULT;
+ }
+ if ( vpsr.cpl != 0) {
+ /* Inject Privileged Operation fault into guest */
+ set_privileged_operation_isr (vcpu, 0);
+ privilege_op (vcpu);
+ return IA64_FAULT;
+ }
+#endif // CHECK_FAULT
+
+// from_cr_cnt[inst.M33.cr3]++;
+ switch (inst.M33.cr3) {
+ case 0: return cr_get(dcr);
+ case 1: return cr_get(itm);
+ case 2: return cr_get(iva);
+ case 8: return cr_get(pta);
+ case 16:return cr_get(ipsr);
+ case 17:return cr_get(isr);
+ case 19:return cr_get(iip);
+ case 20:return cr_get(ifa);
+ case 21:return cr_get(itir);
+ case 22:return cr_get(iipa);
+ case 23:return cr_get(ifs);
+ case 24:return cr_get(iim);
+ case 25:return cr_get(iha);
+// case 64:val = ia64_getreg(_IA64_REG_CR_LID);
+// return vmx_vcpu_set_gr(vcpu,tgt,val,0);
+ case 64:return cr_get(lid);
+ case 65:
+ vmx_vcpu_get_ivr(vcpu,&val);
+ return vmx_vcpu_set_gr(vcpu,tgt,val,0);
+ case 66:return cr_get(tpr);
+ case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0);
+ case 68:return cr_get(irr0);
+ case 69:return cr_get(irr1);
+ case 70:return cr_get(irr2);
+ case 71:return cr_get(irr3);
+ case 72:return cr_get(itv);
+ case 73:return cr_get(pmv);
+ case 74:return cr_get(cmcv);
+ case 80:return cr_get(lrr0);
+ case 81:return cr_get(lrr1);
+ default:
+ panic("Read reserved cr register");
+ }
+}
+
+
+static void post_emulation_action(VCPU *vcpu)
+{
+ if ( vcpu->arch.irq_new_condition ) {
+ vcpu->arch.irq_new_condition = 0;
+ vhpi_detection(vcpu);
+ }
+}
+
+//#define BYPASS_VMAL_OPCODE
+extern IA64_SLOT_TYPE slot_types[0x20][3];
+IA64_BUNDLE __vmx_get_domain_bundle(u64 iip)
+{
+ IA64_BUNDLE bundle;
+
+ fetch_code( current,iip, &bundle.i64[0]);
+ fetch_code( current,iip+8, &bundle.i64[1]);
+ return bundle;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void
+vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode)
+{
+ IA64_BUNDLE bundle;
+ int slot;
+ IA64_SLOT_TYPE slot_type;
+ IA64FAULT status;
+ INST64 inst;
+ REGS * regs;
+ UINT64 iip;
+ regs = vcpu_regs(vcpu);
+ iip = regs->cr_iip;
+ IA64_PSR vpsr;
+/*
+ if (privop_trace) {
+ static long i = 400;
+ //if (i > 0) printf("privop @%p\n",iip);
+ if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
+ iip,ia64_get_itc(),ia64_get_itm());
+ i--;
+ }
+*/
+#ifdef VTLB_DEBUG
+ check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu));
+ dump_vtlb(vmx_vcpu_get_vtlb(vcpu));
+#endif
+#if 0
+if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) {
+ printf ("VMAL decode error: cause - %lx; op - %lx\n",
+ cause, opcode );
+ return;
+}
+#endif
+#ifdef BYPASS_VMAL_OPCODE
+ // make a local copy of the bundle containing the privop
+ bundle = __vmx_get_domain_bundle(iip);
+ slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+ if (!slot) inst.inst = bundle.slot0;
+ else if (slot == 1)
+ inst.inst = bundle.slot1a + (bundle.slot1b<<18);
+ else if (slot == 2) inst.inst = bundle.slot2;
+ else printf("priv_handle_op: illegal slot: %d\n", slot);
+ slot_type = slot_types[bundle.template][slot];
+ ia64_priv_decoder(slot_type, inst, &cause);
+ if(cause==0){
+ printf("This instruction at 0x%lx slot %d can't be virtualized", iip,
slot);
+ panic("123456\n");
+ }
+#else
+ inst.inst=opcode;
+#endif /* BYPASS_VMAL_OPCODE */
+
+ /*
+ * Switch to actual virtual rid in rr0 and rr4,
+ * which is required by some tlb related instructions.
+ */
+ prepare_if_physical_mode(vcpu);
+
+ switch(cause) {
+ case EVENT_RSM:
+ status=vmx_emul_rsm(vcpu, inst);
+ break;
+ case EVENT_SSM:
+ status=vmx_emul_ssm(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PSR:
+ status=vmx_emul_mov_to_psr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PSR:
+ status=vmx_emul_mov_from_psr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_CR:
+ status=vmx_emul_mov_from_cr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_CR:
+ status=vmx_emul_mov_to_cr(vcpu, inst);
+ break;
+ case EVENT_BSW_0:
+ status=vmx_emul_bsw0(vcpu, inst);
+ break;
+ case EVENT_BSW_1:
+ status=vmx_emul_bsw1(vcpu, inst);
+ break;
+ case EVENT_COVER:
+ status=vmx_emul_cover(vcpu, inst);
+ break;
+ case EVENT_RFI:
+ status=vmx_emul_rfi(vcpu, inst);
+ break;
+ case EVENT_ITR_D:
+ status=vmx_emul_itr_d(vcpu, inst);
+ break;
+ case EVENT_ITR_I:
+ status=vmx_emul_itr_i(vcpu, inst);
+ break;
+ case EVENT_PTR_D:
+ status=vmx_emul_ptr_d(vcpu, inst);
+ break;
+ case EVENT_PTR_I:
+ status=vmx_emul_ptr_i(vcpu, inst);
+ break;
+ case EVENT_ITC_D:
+ status=vmx_emul_itc_d(vcpu, inst);
+ break;
+ case EVENT_ITC_I:
+ status=vmx_emul_itc_i(vcpu, inst);
+ break;
+ case EVENT_PTC_L:
+ status=vmx_emul_ptc_l(vcpu, inst);
+ break;
+ case EVENT_PTC_G:
+ status=vmx_emul_ptc_g(vcpu, inst);
+ break;
+ case EVENT_PTC_GA:
+ status=vmx_emul_ptc_ga(vcpu, inst);
+ break;
+ case EVENT_PTC_E:
+ status=vmx_emul_ptc_e(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_RR:
+ status=vmx_emul_mov_to_rr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_RR:
+ status=vmx_emul_mov_from_rr(vcpu, inst);
+ break;
+ case EVENT_THASH:
+ status=vmx_emul_thash(vcpu, inst);
+ break;
+ case EVENT_TTAG:
+ status=vmx_emul_ttag(vcpu, inst);
+ break;
+ case EVENT_TPA:
+ status=vmx_emul_tpa(vcpu, inst);
+ break;
+ case EVENT_TAK:
+ status=vmx_emul_tak(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_AR_IMM:
+ status=vmx_emul_mov_to_ar_imm(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_AR:
+ status=vmx_emul_mov_to_ar_reg(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_AR:
+ status=vmx_emul_mov_from_ar_reg(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_DBR:
+ status=vmx_emul_mov_to_dbr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_IBR:
+ status=vmx_emul_mov_to_ibr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PMC:
+ status=vmx_emul_mov_to_pmc(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PMD:
+ status=vmx_emul_mov_to_pmd(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PKR:
+ status=vmx_emul_mov_to_pkr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_DBR:
+ status=vmx_emul_mov_from_dbr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_IBR:
+ status=vmx_emul_mov_from_ibr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PMC:
+ status=vmx_emul_mov_from_pmc(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PKR:
+ status=vmx_emul_mov_from_pkr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_CPUID:
+ status=vmx_emul_mov_from_cpuid(vcpu, inst);
+ break;
+ case EVENT_VMSW:
+ printf ("Unimplemented instruction %d\n", cause);
+ status=IA64_FAULT;
+ break;
+ default:
+ printf("unknown cause %d, iip: %lx, ipsr: %lx\n",
cause,regs->cr_iip,regs->cr_ipsr);
+ while(1);
+ /* For unknown cause, let hardware to re-execute */
+ status=IA64_RETRY;
+ break;
+// panic("unknown cause in virtualization intercept");
+ };
+
+#if 0
+ if (status == IA64_FAULT)
+ panic("Emulation failed with cause %d:\n", cause);
+#endif
+
+ if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) {
+ vmx_vcpu_increment_iip(vcpu);
+ }
+
+ recover_if_physical_mode(vcpu);
+ post_emulation_action (vcpu);
+//TODO set_irq_check(v);
+ return;
+
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_vsa.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vmx_vsa.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,84 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vmx_vsa.c: Call PAL virtualization services.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Arun Sharma <arun.sharma@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
+ */
+
+#include <asm/asmmacro.h>
+
+
+ .text
+
+/*
+ * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2,
+ * UINT64 arg3, UINT64 arg4, UINT64 arg5,
+ * UINT64 arg6, UINT64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ * rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+ .regstk 4,4,0,0
+
+rpsave = loc0
+pfssave = loc1
+psrsave = loc2
+entry = loc3
+hostret = r24
+
+ alloc pfssave=ar.pfs,4,4,0,0
+ mov rpsave=rp
+ movl entry=@gprel(__vsa_base)
+1: mov hostret=ip
+ mov r25=in1 // copy arguments
+ mov r26=in2
+ mov r27=in3
+ mov psrsave=psr
+ ;;
+ add entry=entry,gp
+ tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
+ tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
+ ;;
+ ld8 entry=[entry] // read entry point
+ ;;
+ add hostret=2f-1b,hostret // calculate return address
+ add entry=entry,in0
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.d
+ mov b6=entry
+ br.cond.sptk b6 // call the service
+2:
+ // Architectural sequence for enabling interrupts if necessary
+(p7) ssm psr.ic
+ ;;
+(p7) srlz.d
+ ;;
+(p6) ssm psr.i
+ ;;
+ mov rp=rpsave
+ mov ar.pfs=pfssave
+ mov r8=r31
+ ;;
+ srlz.d
+ br.ret.sptk rp
+
+END(ia64_call_vsa)
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vtlb.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/vmx/vtlb.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1094 @@
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
+ * XiaoYan Feng (Fleming Feng) (Fleming.feng@xxxxxxxxx)
+ */
+
+#include <linux/sched.h>
+#include <asm/tlb.h>
+#include <asm/mm.h>
+#include <asm/vmx_mm_def.h>
+#include <asm/gcc_intrin.h>
+#include <linux/interrupt.h>
+#include <asm/vmx_vcpu.h>
+#define MAX_CCH_LENGTH 40
+
+
+static void cch_mem_init(thash_cb_t *hcb)
+{
+ thash_cch_mem_t *p, *q;
+
+ hcb->cch_freelist = p = hcb->cch_buf;
+
+ for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz;
+ p++, q++ ) {
+ p->next = q;
+ }
+ p->next = NULL;
+}
+
+static thash_data_t *cch_alloc(thash_cb_t *hcb)
+{
+ thash_cch_mem_t *p;
+
+ if ( (p = hcb->cch_freelist) != NULL ) {
+ hcb->cch_freelist = p->next;
+ }
+ return &(p->data);
+}
+
+static void cch_free(thash_cb_t *hcb, thash_data_t *cch)
+{
+ thash_cch_mem_t *p = (thash_cch_mem_t*)cch;
+
+ p->next = hcb->cch_freelist;
+ hcb->cch_freelist = p;
+}
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE
cl)
+{
+ u64 size1,sa1,ea1;
+
+ if ( tlb->rid != rid || tlb->cl != cl )
+ return 0;
+ size1 = PSIZE(tlb->ps);
+ sa1 = tlb->vadr & ~(size1-1); // mask the low address bits
+ ea1 = sa1 + size1;
+
+ if ( va >= sa1 && (va < ea1 || ea1 == 0) )
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Only for TLB format.
+ */
+static int
+__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64
sva, u64 eva)
+{
+ uint64_t size1,size2,sa1,ea1,ea2;
+
+ if ( entry->invalid || entry->rid != rid || entry->cl != cl ) {
+ return 0;
+ }
+ size1=PSIZE(entry->ps);
+ sa1 = entry->vadr & ~(size1-1); // mask the low address bits
+ ea1 = sa1 + size1;
+ if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) )
+ return 0;
+ else
+ return 1;
+
+}
+
+static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr)
+{
+ if ( hcb->remove_notifier ) {
+ (hcb->remove_notifier)(hcb,tr);
+ }
+ tr->invalid = 1;
+}
+
+static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx)
+{
+ *tr = *data;
+ tr->tr_idx = idx;
+}
+
+
+static void __init_tr(thash_cb_t *hcb)
+{
+ int i;
+ thash_data_t *tr;
+
+ for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) {
+ tr[i].invalid = 1;
+ }
+ for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) {
+ tr[i].invalid = 1;
+ }
+}
+
+/*
+ * Replace TR entry.
+ */
+static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx)
+{
+ thash_data_t *tr;
+
+ if ( insert->cl == ISIDE_TLB ) {
+ tr = &ITR(hcb,idx);
+ }
+ else {
+ tr = &DTR(hcb,idx);
+ }
+ if ( !INVALID_TLB(tr) ) {
+ __rem_tr(hcb, tr);
+ }
+ __set_tr (tr, insert, idx);
+}
+
+/*
+ * remove TR entry.
+ */
+static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx)
+{
+ thash_data_t *tr;
+
+ if ( cl == ISIDE_TLB ) {
+ tr = &ITR(hcb,idx);
+ }
+ else {
+ tr = &DTR(hcb,idx);
+ }
+ if ( !INVALID_TLB(tr) ) {
+ __rem_tr(hcb, tr);
+ }
+}
+
+/*
+ * Delete an thash entry in collision chain.
+ * prev: the previous entry.
+ * rem: the removed entry.
+ */
+static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t
*rem)
+{
+ //prev->next = rem->next;
+ if ( hcb->remove_notifier ) {
+ (hcb->remove_notifier)(hcb,rem);
+ }
+ cch_free (hcb, rem);
+}
+
+/*
+ * Delete an thash entry leading collision chain.
+ */
+static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash)
+{
+ thash_data_t *next=hash->next;
+
+ if ( hcb->remove_notifier ) {
+ (hcb->remove_notifier)(hcb,hash);
+ }
+ if ( next != NULL ) {
+ *hash = *next;
+ cch_free (hcb, next);
+ }
+ else {
+ INVALIDATE_HASH(hcb, hash);
+ }
+}
+
+thash_data_t *__vtr_lookup(thash_cb_t *hcb,
+ u64 rid, u64 va,
+ CACHE_LINE_TYPE cl)
+{
+ thash_data_t *tr;
+ int num,i;
+
+ if ( cl == ISIDE_TLB ) {
+ tr = &ITR(hcb,0);
+ num = NITRS;
+ }
+ else {
+ tr = &DTR(hcb,0);
+ num = NDTRS;
+ }
+ for ( i=0; i<num; i++ ) {
+ if ( !INVALID_ENTRY(hcb,&tr[i]) &&
+ __is_translated(&tr[i], rid, va, cl) )
+ return &tr[i];
+ }
+ return NULL;
+}
+
+
+/*
+ * Find overlap VHPT entry within current collision chain
+ * base on internal priv info.
+ */
+static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb)
+{
+ thash_data_t *cch;
+ thash_internal_t *priv = &hcb->priv;
+
+
+ for (cch=priv->cur_cch; cch; cch = cch->next) {
+ if ( priv->tag == cch->etag ) {
+ return cch;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Find overlap TLB/VHPT entry within current collision chain
+ * base on internal priv info.
+ */
+static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb)
+{
+ thash_data_t *cch;
+ thash_internal_t *priv = &hcb->priv;
+
+ /* Find overlap TLB entry */
+ for (cch=priv->cur_cch; cch; cch = cch->next) {
+ if ( ( cch->tc ? priv->s_sect.tc : priv->s_sect.tr ) &&
+ __is_tlb_overlap(hcb, cch, priv->rid, priv->cl,
+ priv->_curva, priv->_eva) ) {
+ return cch;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Get the machine format of VHPT entry.
+ * PARAS:
+ * 1: tlb: means the tlb format hash entry converting to VHPT.
+ * 2: va means the guest virtual address that must be coverd by
+ * the translated machine VHPT.
+ * 3: vhpt: means the machine format VHPT converting from tlb.
+ * NOTES:
+ * 1: In case of the machine address is discontiguous,
+ * "tlb" needs to be covered by several machine VHPT. va
+ * is used to choice one of them.
+ * 2: Foreign map is supported in this API.
+ * RETURN:
+ * 0/1: means successful or fail.
+ *
+ */
+int __tlb_to_vhpt(thash_cb_t *hcb,
+ thash_data_t *tlb, u64 va,
+ thash_data_t *vhpt)
+{
+ u64 pages,mfn;
+ ia64_rr vrr;
+
+ ASSERT ( hcb->ht == THASH_VHPT );
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
+ mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages);
+ if ( mfn == INVALID_MFN ) return 0;
+
+ // TODO with machine discontinuous address space issue.
+ vhpt->etag = (hcb->vs->tag_func)( hcb->pta,
+ tlb->vadr, tlb->rid, tlb->ps);
+ //vhpt->ti = 0;
+ vhpt->itir = tlb->itir & ~ITIR_RV_MASK;
+ vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
+ vhpt->ppn = mfn;
+ vhpt->next = 0;
+ return 1;
+}
+
+
+/*
+ * Insert an entry to hash table.
+ * NOTES:
+ * 1: TLB entry may be TR, TC or Foreign Map. For TR entry,
+ * itr[]/dtr[] need to be updated too.
+ * 2: Inserting to collision chain may trigger recycling if
+ * the buffer for collision chain is empty.
+ * 3: The new entry is inserted at the next of hash table.
+ * (I.e. head of the collision chain)
+ * 4: The buffer holding the entry is allocated internally
+ * from cch_buf or just in the hash table.
+ * 5: Return the entry in hash table or collision chain.
+ * 6: Input parameter, entry, should be in TLB format.
+ * I.e. Has va, rid, ps...
+ * 7: This API is invoked by emulating ITC/ITR and tlb_miss.
+ *
+ */
+
+void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx)
+{
+ if ( hcb->ht != THASH_TLB || entry->tc ) {
+ panic("wrong parameter\n");
+ }
+ entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
+ entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
+ rep_tr(hcb, entry, idx);
+ return ;
+}
+
+thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry)
+{
+ thash_data_t *cch;
+
+ cch = cch_alloc(hcb);
+ if(cch == NULL){
+ // recycle
+ if ( hcb->recycle_notifier ) {
+ hcb->recycle_notifier(hcb,(u64)entry);
+ }
+ thash_purge_all(hcb);
+ cch = cch_alloc(hcb);
+ }
+ return cch;
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ * 1: When inserting VHPT to thash, "va" is a must covered
+ * address by the inserted machine VHPT entry.
+ * 2: The format of entry is always in TLB.
+ * 3: The caller need to make sure the new entry will not overlap
+ * with any existed entry.
+ */
+void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+ thash_data_t *hash_table, *cch;
+ int flag;
+ ia64_rr vrr;
+ u64 gppn;
+ u64 ppns, ppne;
+
+ hash_table = (hcb->hash_func)(hcb->pta,
+ va, entry->rid, entry->ps);
+ if( INVALID_ENTRY(hcb, hash_table) ) {
+ *hash_table = *entry;
+ hash_table->next = 0;
+ }
+ else {
+ // TODO: Add collision chain length limitation.
+ cch = __alloc_chain(hcb,entry);
+
+ *cch = *hash_table;
+ *hash_table = *entry;
+ hash_table->next = cch;
+ }
+ if(hcb->vcpu->domain->domain_id==0){
+ thash_insert(hcb->ts->vhpt, entry, va);
+ return;
+ }
+ flag = 1;
+ gppn =
(POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT;
+ ppns = PAGEALIGN((entry->ppn<<12),entry->ps);
+ ppne = ppns + PSIZE(entry->ps);
+ if(((ppns<=0xa0000)&&(ppne>0xa0000))||((ppne>0xc0000)&&(ppns<=0xc0000)))
+ flag = 0;
+ if((__gpfn_is_mem(hcb->vcpu->domain, gppn)&&flag))
+ thash_insert(hcb->ts->vhpt, entry, va);
+ return ;
+}
+
+static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+ thash_data_t *hash_table, *cch;
+ ia64_rr vrr;
+
+ hash_table = (hcb->hash_func)(hcb->pta,
+ va, entry->rid, entry->ps);
+ if( INVALID_ENTRY(hcb, hash_table) ) {
+ if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
+ panic("Can't convert to machine VHPT entry\n");
+ }
+ hash_table->next = 0;
+ }
+ else {
+ // TODO: Add collision chain length limitation.
+ cch = __alloc_chain(hcb,entry);
+
+ *cch = *hash_table;
+ if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
+ panic("Can't convert to machine VHPT entry\n");
+ }
+ hash_table->next = cch;
+ if(hash_table->tag==hash_table->next->tag)
+ while(1);
+ }
+ return /*hash_table*/;
+}
+
+void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
+{
+ thash_data_t *hash_table;
+ ia64_rr vrr;
+
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr);
+ if ( entry->ps != vrr.ps && entry->tc ) {
+ panic("Not support for multiple page size now\n");
+ }
+ entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
+ entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
+ (hcb->ins_hash)(hcb, entry, va);
+
+}
+
+static void rem_thash(thash_cb_t *hcb, thash_data_t *entry)
+{
+ thash_data_t *hash_table, *p, *q;
+ thash_internal_t *priv = &hcb->priv;
+ int idx;
+
+ hash_table = priv->hash_base;
+ if ( hash_table == entry ) {
+// if ( PURGABLE_ENTRY(hcb, entry) ) {
+ __rem_hash_head (hcb, entry);
+// }
+ return ;
+ }
+ // remove from collision chain
+ p = hash_table;
+ for ( q=p->next; q; q = p->next ) {
+ if ( q == entry ){
+// if ( PURGABLE_ENTRY(hcb,q ) ) {
+ p->next = q->next;
+ __rem_chain(hcb, entry);
+// }
+ return ;
+ }
+ p = q;
+ }
+ panic("Entry not existed or bad sequence\n");
+}
+
+static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry)
+{
+ thash_data_t *hash_table, *p, *q;
+ thash_internal_t *priv = &hcb->priv;
+ int idx;
+
+ if ( !entry->tc ) {
+ return rem_tr(hcb, entry->cl, entry->tr_idx);
+ }
+ rem_thash(hcb, entry);
+}
+
+int cch_depth=0;
+/*
+ * Purge the collision chain starting from cch.
+ * NOTE:
+ * For those UN-Purgable entries(FM), this function will return
+ * the head of left collision chain.
+ */
+static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch)
+{
+ thash_data_t *next;
+
+ if ( ++cch_depth > MAX_CCH_LENGTH ) {
+ printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n");
+ while(1);
+ }
+ if ( cch -> next ) {
+ next = thash_rem_cch(hcb, cch->next);
+ }
+ else {
+ next = NULL;
+ }
+ if ( PURGABLE_ENTRY(hcb, cch) ) {
+ __rem_chain(hcb, cch);
+ return next;
+ }
+ else {
+ cch->next = next;
+ return cch;
+ }
+}
+
+/*
+ * Purge one hash line (include the entry in hash table).
+ * Can only be called by thash_purge_all.
+ * Input:
+ * hash: The head of collision chain (hash table)
+ *
+ */
+static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash)
+{
+ if ( INVALID_ENTRY(hcb, hash) ) return;
+
+ if ( hash->next ) {
+ cch_depth = 0;
+ hash->next = thash_rem_cch(hcb, hash->next);
+ }
+ // Then hash table itself.
+ if ( PURGABLE_ENTRY(hcb, hash) ) {
+ __rem_hash_head(hcb, hash);
+ }
+}
+
+
+/*
+ * Find an overlap entry in hash table and its collision chain.
+ * Refer to SDM2 4.1.1.4 for overlap definition.
+ * PARAS:
+ * 1: in: TLB format entry, rid:ps must be same with vrr[].
+ * va & ps identify the address space for overlap lookup
+ * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX)
+ * 3: cl means I side or D side.
+ * RETURNS:
+ * NULL to indicate the end of findings.
+ * NOTES:
+ *
+ */
+thash_data_t *thash_find_overlap(thash_cb_t *hcb,
+ thash_data_t *in, search_section_t s_sect)
+{
+ return (hcb->find_overlap)(hcb, in->vadr,
+ PSIZE(in->ps), in->rid, in->cl, s_sect);
+}
+
+static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb,
+ u64 va, u64 size, int rid, char cl, search_section_t s_sect)
+{
+ thash_data_t *hash_table;
+ thash_internal_t *priv = &hcb->priv;
+ u64 tag;
+ ia64_rr vrr;
+
+ priv->_curva = va & ~(size-1);
+ priv->_eva = priv->_curva + size;
+ priv->rid = rid;
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ priv->ps = vrr.ps;
+ hash_table = (hcb->hash_func)(hcb->pta,
+ priv->_curva, rid, priv->ps);
+
+ priv->s_sect = s_sect;
+ priv->cl = cl;
+ priv->_tr_idx = 0;
+ priv->hash_base = hash_table;
+ priv->cur_cch = hash_table;
+ return (hcb->next_overlap)(hcb);
+}
+
+static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb,
+ u64 va, u64 size, int rid, char cl, search_section_t s_sect)
+{
+ thash_data_t *hash_table;
+ thash_internal_t *priv = &hcb->priv;
+ u64 tag;
+ ia64_rr vrr;
+
+ priv->_curva = va & ~(size-1);
+ priv->_eva = priv->_curva + size;
+ priv->rid = rid;
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ priv->ps = vrr.ps;
+ hash_table = (hcb->hash_func)( hcb->pta,
+ priv->_curva, rid, priv->ps);
+ tag = (hcb->vs->tag_func)( hcb->pta,
+ priv->_curva, rid, priv->ps);
+
+ priv->tag = tag;
+ priv->hash_base = hash_table;
+ priv->cur_cch = hash_table;
+ return (hcb->next_overlap)(hcb);
+}
+
+
+static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb)
+{
+ thash_data_t *tr;
+ thash_internal_t *priv = &hcb->priv;
+ int num;
+
+ if ( priv->cl == ISIDE_TLB ) {
+ num = NITRS;
+ tr = &ITR(hcb,0);
+ }
+ else {
+ num = NDTRS;
+ tr = &DTR(hcb,0);
+ }
+ for (; priv->_tr_idx < num; priv->_tr_idx ++ ) {
+ if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx],
+ priv->rid, priv->cl,
+ priv->_curva, priv->_eva) ) {
+ return &tr[priv->_tr_idx++];
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Similar with vtlb_next_overlap but find next entry.
+ * NOTES:
+ * Intermediate position information is stored in hcb->priv.
+ */
+static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb)
+{
+ thash_data_t *ovl;
+ thash_internal_t *priv = &hcb->priv;
+ u64 addr,rr_psize;
+ ia64_rr vrr;
+
+ if ( priv->s_sect.tr ) {
+ ovl = vtr_find_next_overlap (hcb);
+ if ( ovl ) return ovl;
+ priv->s_sect.tr = 0;
+ }
+ if ( priv->s_sect.v == 0 ) return NULL;
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
+ rr_psize = PSIZE(vrr.ps);
+
+ while ( priv->_curva < priv->_eva ) {
+ if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
+ ovl = _vtlb_next_overlap_in_chain(hcb);
+ if ( ovl ) {
+ priv->cur_cch = ovl->next;
+ return ovl;
+ }
+ }
+ priv->_curva += rr_psize;
+ priv->hash_base = (hcb->hash_func)( hcb->pta,
+ priv->_curva, priv->rid, priv->ps);
+ priv->cur_cch = priv->hash_base;
+ }
+ return NULL;
+}
+
+static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb)
+{
+ thash_data_t *ovl;
+ thash_internal_t *priv = &hcb->priv;
+ u64 addr,rr_psize;
+ ia64_rr vrr;
+
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
+ rr_psize = PSIZE(vrr.ps);
+
+ while ( priv->_curva < priv->_eva ) {
+ if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
+ ovl = _vhpt_next_overlap_in_chain(hcb);
+ if ( ovl ) {
+ priv->cur_cch = ovl->next;
+ return ovl;
+ }
+ }
+ priv->_curva += rr_psize;
+ priv->hash_base = (hcb->hash_func)( hcb->pta,
+ priv->_curva, priv->rid, priv->ps);
+ priv->tag = (hcb->vs->tag_func)( hcb->pta,
+ priv->_curva, priv->rid, priv->ps);
+ priv->cur_cch = priv->hash_base;
+ }
+ return NULL;
+}
+
+
+/*
+ * Find and purge overlap entries in hash table and its collision chain.
+ * PARAS:
+ * 1: in: TLB format entry, rid:ps must be same with vrr[].
+ * rid, va & ps identify the address space for purge
+ * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX)
+ * 3: cl means I side or D side.
+ * NOTES:
+ *
+ */
+void thash_purge_entries(thash_cb_t *hcb,
+ thash_data_t *in, search_section_t p_sect)
+{
+ return thash_purge_entries_ex(hcb, in->rid, in->vadr,
+ in->ps, p_sect, in->cl);
+}
+
+void thash_purge_entries_ex(thash_cb_t *hcb,
+ u64 rid, u64 va, u64 ps,
+ search_section_t p_sect,
+ CACHE_LINE_TYPE cl)
+{
+ thash_data_t *ovl;
+
+ ovl = (hcb->find_overlap)(hcb, va, PSIZE(ps), rid, cl, p_sect);
+ while ( ovl != NULL ) {
+ (hcb->rem_hash)(hcb, ovl);
+ ovl = (hcb->next_overlap)(hcb);
+ };
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ * Notes: Only TC entry can purge and insert.
+ */
+void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in)
+{
+ thash_data_t *ovl;
+ search_section_t sections;
+
+#ifdef XEN_DEBUGGER
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,in->vadr);
+ if ( in->ps != vrr.ps || hcb->ht != THASH_TLB || !in->tc ) {
+ panic ("Oops, wrong call for purge_and_insert\n");
+ return;
+ }
+#endif
+ in->vadr = PAGEALIGN(in->vadr,in->ps);
+ in->ppn = PAGEALIGN(in->ppn, in->ps-12);
+ sections.tr = 0;
+ sections.tc = 1;
+ ovl = (hcb->find_overlap)(hcb, in->vadr, PSIZE(in->ps),
+ in->rid, in->cl, sections);
+ if(ovl)
+ (hcb->rem_hash)(hcb, ovl);
+#ifdef XEN_DEBUGGER
+ ovl = (hcb->next_overlap)(hcb);
+ if ( ovl ) {
+ panic ("Oops, 2+ overlaps for purge_and_insert\n");
+ return;
+ }
+#endif
+ (hcb->ins_hash)(hcb, in, in->vadr);
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+// TODO: add sections.
+void thash_purge_all(thash_cb_t *hcb)
+{
+ thash_data_t *hash_table;
+
+#ifdef VTLB_DEBUG
+ extern u64 sanity_check;
+ static u64 statistics_before_purge_all=0;
+ if ( statistics_before_purge_all ) {
+ sanity_check = 1;
+ check_vtlb_sanity(hcb);
+ }
+#endif
+
+ hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
+
+ for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
+ thash_rem_line(hcb, hash_table);
+ }
+}
+
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ * in: TLB format for both VHPT & TLB.
+ */
+thash_data_t *vtlb_lookup(thash_cb_t *hcb,
+ thash_data_t *in)
+{
+ return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl);
+}
+
+thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb,
+ u64 rid, u64 va,
+ CACHE_LINE_TYPE cl)
+{
+ thash_data_t *hash_table, *cch;
+ u64 tag;
+ ia64_rr vrr;
+
+ ASSERT ( hcb->ht == THASH_VTLB );
+
+ cch = __vtr_lookup(hcb, rid, va, cl);;
+ if ( cch ) return cch;
+
+ vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
+ hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps);
+
+ if ( INVALID_ENTRY(hcb, hash_table ) )
+ return NULL;
+
+
+ for (cch=hash_table; cch; cch = cch->next) {
+ if ( __is_translated(cch, rid, va, cl) )
+ return cch;
+ }
+ return NULL;
+}
+
+/*
+ * Lock/Unlock TC if found.
+ * NOTES: Only the page in prefered size can be handled.
+ * return:
+ * 1: failure
+ * 0: success
+ */
+int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int
lock)
+{
+ thash_data_t *ovl;
+ search_section_t sections;
+
+ sections.tr = 1;
+ sections.tc = 1;
+ ovl = (hcb->find_overlap)(hcb, va, size, rid, cl, sections);
+ if ( ovl ) {
+ if ( !ovl->tc ) {
+// panic("Oops, TR for lock\n");
+ return 0;
+ }
+ else if ( lock ) {
+ if ( ovl->locked ) {
+ DPRINTK("Oops, already locked entry\n");
+ }
+ ovl->locked = 1;
+ }
+ else if ( !lock ) {
+ if ( !ovl->locked ) {
+ DPRINTK("Oops, already unlocked entry\n");
+ }
+ ovl->locked = 0;
+ }
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Notifier when TLB is deleted from hash table and its collision chain.
+ * NOTES:
+ * The typical situation is that TLB remove needs to inform
+ * VHPT to remove too.
+ * PARAS:
+ * 1: hcb is TLB object.
+ * 2: The format of entry is always in TLB.
+ *
+ */
+void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry)
+{
+ thash_cb_t *vhpt;
+ search_section_t s_sect;
+
+ s_sect.v = 0;
+ thash_purge_entries(hcb->ts->vhpt, entry, s_sect);
+ machine_tlb_purge(entry->rid, entry->vadr, entry->ps);
+}
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(thash_cb_t *hcb, u64 sz)
+{
+ thash_data_t *hash_table;
+
+ cch_mem_init (hcb);
+ hcb->magic = THASH_CB_MAGIC;
+ hcb->pta.val = hcb->hash;
+ hcb->pta.vf = 1;
+ hcb->pta.ve = 1;
+ hcb->pta.size = sz;
+ hcb->get_rr_fn = vmmu_get_rr;
+ ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 );
+ if ( hcb->ht == THASH_TLB ) {
+ hcb->remove_notifier = tlb_remove_notifier;
+ hcb->find_overlap = vtlb_find_overlap;
+ hcb->next_overlap = vtlb_next_overlap;
+ hcb->rem_hash = rem_vtlb;
+ hcb->ins_hash = vtlb_insert;
+ __init_tr(hcb);
+ }
+ else {
+ hcb->remove_notifier = NULL;
+ hcb->find_overlap = vhpt_find_overlap;
+ hcb->next_overlap = vhpt_next_overlap;
+ hcb->rem_hash = rem_thash;
+ hcb->ins_hash = vhpt_insert;
+ }
+ hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
+
+ for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
+ INVALIDATE_HASH(hcb,hash_table);
+ }
+}
+
+#ifdef VTLB_DEBUG
+static u64 cch_length_statistics[MAX_CCH_LENGTH+1];
+u64 sanity_check=0;
+u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash)
+{
+ thash_data_t *cch;
+ thash_data_t *ovl;
+ search_section_t s_sect;
+ u64 num=0;
+
+ s_sect.v = 0;
+ for (cch=hash; cch; cch=cch->next) {
+ ovl = thash_find_overlap(vhpt, cch, s_sect);
+ while ( ovl != NULL ) {
+ ovl->checked = 1;
+ ovl = (vhpt->next_overlap)(vhpt);
+ };
+ num ++;
+ }
+ if ( num >= MAX_CCH_LENGTH ) {
+ cch_length_statistics[MAX_CCH_LENGTH] ++;
+ }
+ else {
+ cch_length_statistics[num] ++;
+ }
+ return num;
+}
+
+void check_vtlb_sanity(thash_cb_t *vtlb)
+{
+// struct pfn_info *page;
+ u64 hash_num, i, psr;
+ static u64 check_ok_num, check_fail_num,check_invalid;
+// void *vb1, *vb2;
+ thash_data_t *hash, *cch;
+ thash_data_t *ovl;
+ search_section_t s_sect;
+ thash_cb_t *vhpt = vtlb->ts->vhpt;
+ u64 invalid_ratio;
+
+ if ( sanity_check == 0 ) return;
+ sanity_check --;
+ s_sect.v = 0;
+// page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
+// if ( page == NULL ) {
+// panic("No enough contiguous memory for init_domain_mm\n");
+// };
+// vb1 = page_to_virt(page);
+// printf("Allocated page=%lp vbase=%lp\n", page, vb1);
+// vb2 = vb1 + vtlb->hash_sz;
+ hash_num = vhpt->hash_sz / sizeof(thash_data_t);
+// printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num);
+ printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n",
+ vtlb, vtlb->hash,vtlb->hash_sz,
+ vhpt, vhpt->hash, vhpt->hash_sz);
+ //memcpy(vb1, vtlb->hash, vtlb->hash_sz);
+ //memcpy(vb2, vhpt->hash, vhpt->hash_sz);
+ for ( i=0; i <
sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
+ cch_length_statistics[i] = 0;
+ }
+
+ local_irq_save(psr);
+
+ hash = vhpt->hash;
+ for (i=0; i < hash_num; i++) {
+ if ( !INVALID_ENTRY(vhpt, hash) ) {
+ for ( cch= hash; cch; cch=cch->next) {
+ cch->checked = 0;
+ }
+ }
+ hash ++;
+ }
+ printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num);
+ check_invalid = 0;
+ check_ok_num=0;
+ hash = vtlb->hash;
+ for ( i=0; i< hash_num; i++ ) {
+ if ( !INVALID_ENTRY(vtlb, hash) ) {
+ check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash);
+ }
+ else {
+ check_invalid++;
+ }
+ hash ++;
+ }
+ printf("Done vtlb entry check, hash=%lp\n", hash);
+ printf("check_ok_num = 0x%lx check_invalid=0x%lx\n",
check_ok_num,check_invalid);
+ invalid_ratio = 1000*check_invalid / hash_num;
+ printf("%02ld.%01ld%% entries are invalid\n",
+ invalid_ratio/10, invalid_ratio % 10 );
+ for (i=0; i<NDTRS; i++) {
+ ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect);
+ while ( ovl != NULL ) {
+ ovl->checked = 1;
+ ovl = (vhpt->next_overlap)(vhpt);
+ };
+ }
+ printf("Done dTR\n");
+ for (i=0; i<NITRS; i++) {
+ ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect);
+ while ( ovl != NULL ) {
+ ovl->checked = 1;
+ ovl = (vhpt->next_overlap)(vhpt);
+ };
+ }
+ printf("Done iTR\n");
+ check_fail_num = 0;
+ check_invalid = 0;
+ check_ok_num=0;
+ hash = vhpt->hash;
+ for (i=0; i < hash_num; i++) {
+ if ( !INVALID_ENTRY(vhpt, hash) ) {
+ for ( cch= hash; cch; cch=cch->next) {
+ if ( !cch->checked ) {
+ printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash,
cch);
+ check_fail_num ++;
+ }
+ else {
+ check_ok_num++;
+ }
+ }
+ }
+ else {
+ check_invalid ++;
+ }
+ hash ++;
+ }
+ local_irq_restore(psr);
+ printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n",
+ check_ok_num, check_fail_num, check_invalid);
+ //memcpy(vtlb->hash, vb1, vtlb->hash_sz);
+ //memcpy(vhpt->hash, vb2, vhpt->hash_sz);
+ printf("The statistics of collision chain length is listed\n");
+ for ( i=0; i <
sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
+ printf("CCH length=%02ld, chain number=%ld\n", i,
cch_length_statistics[i]);
+ }
+// free_domheap_pages(page, VCPU_TLB_ORDER);
+ printf("Done check_vtlb\n");
+}
+
+void dump_vtlb(thash_cb_t *vtlb)
+{
+ static u64 dump_vtlb=0;
+ thash_data_t *hash, *cch, *tr;
+ u64 hash_num,i;
+
+ if ( dump_vtlb == 0 ) return;
+ dump_vtlb --;
+ hash_num = vtlb->hash_sz / sizeof(thash_data_t);
+ hash = vtlb->hash;
+
+ printf("Dump vTC\n");
+ for ( i = 0; i < hash_num; i++ ) {
+ if ( !INVALID_ENTRY(vtlb, hash) ) {
+ printf("VTLB at hash=%lp\n", hash);
+ for (cch=hash; cch; cch=cch->next) {
+ printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+ cch, cch->vadr, cch->ps, cch->rid);
+ }
+ }
+ hash ++;
+ }
+ printf("Dump vDTR\n");
+ for (i=0; i<NDTRS; i++) {
+ tr = &DTR(vtlb,i);
+ printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+ tr, tr->vadr, tr->ps, tr->rid);
+ }
+ printf("Dump vITR\n");
+ for (i=0; i<NITRS; i++) {
+ tr = &ITR(vtlb,i);
+ printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
+ tr, tr->vadr, tr->ps, tr->rid);
+ }
+ printf("End of vTLB dump\n");
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/acpi.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/acpi.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,678 @@
+/*
+ * acpi.c - Architecture-Specific Low-Level ACPI Support
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@xxxxxxxxxxx>
+ * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ * Copyright (C) 2000 Intel Corp.
+ * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@xxxxxxxxx>
+ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ * Copyright (C) 2001 Jenna Hall <jenna.s.hall@xxxxxxxxx>
+ * Copyright (C) 2001 Takayoshi Kochi <t-kochi@xxxxxxxxxxxxx>
+ * Copyright (C) 2002 Erich Focht <efocht@xxxxxxxxxx>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/mmzone.h>
+#include <asm/io.h>
+//#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/system.h>
+#include <asm/numa.h>
+#include <asm/sal.h>
+//#include <asm/cyclone.h>
+
+#define BAD_MADT_ENTRY(entry, end) ( \
+ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
+ ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
+
+#define PREFIX "ACPI: "
+
+void (*pm_idle) (void);
+EXPORT_SYMBOL(pm_idle);
+void (*pm_power_off) (void);
+
+unsigned char acpi_kbd_controller_present = 1;
+unsigned char acpi_legacy_devices;
+
+const char *
+acpi_get_sysname (void)
+{
+/* #ifdef CONFIG_IA64_GENERIC */
+ unsigned long rsdp_phys;
+ struct acpi20_table_rsdp *rsdp;
+ struct acpi_table_xsdt *xsdt;
+ struct acpi_table_header *hdr;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys) {
+ printk(KERN_ERR "ACPI 2.0 RSDP not found, default to
\"dig\"\n");
+ return "dig";
+ }
+
+ rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
+ if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
+ printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to
\"dig\"\n");
+ return "dig";
+ }
+
+ xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
+ hdr = &xsdt->header;
+ if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
+ printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to
\"dig\"\n");
+ return "dig";
+ }
+
+ if (!strcmp(hdr->oem_id, "HP")) {
+ return "hpzx1";
+ }
+ else if (!strcmp(hdr->oem_id, "SGI")) {
+ return "sn2";
+ }
+
+ return "dig";
+/*
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+ return "hpsim";
+# elif defined (CONFIG_IA64_HP_ZX1)
+ return "hpzx1";
+# elif defined (CONFIG_IA64_SGI_SN2)
+ return "sn2";
+# elif defined (CONFIG_IA64_DIG)
+ return "dig";
+# else
+# error Unknown platform. Fix acpi.c.
+# endif
+#endif
+*/
+}
+
+#ifdef CONFIG_ACPI_BOOT
+
+#define ACPI_MAX_PLATFORM_INTERRUPTS 256
+
+#if 0
+/* Array to record platform interrupt vectors for generic interrupt routing. */
+int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
+ [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
+};
+
+enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
+
+/*
+ * Interrupt routing API for device drivers. Provides interrupt vector for
+ * a generic platform event. Currently only CPEI is implemented.
+ */
+int
+acpi_request_vector (u32 int_type)
+{
+ int vector = -1;
+
+ if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
+ /* corrected platform error interrupt */
+ vector = platform_intr_list[int_type];
+ } else
+ printk(KERN_ERR "acpi_request_vector(): invalid interrupt
type\n");
+ return vector;
+}
+#endif
+char *
+__acpi_map_table (unsigned long phys_addr, unsigned long size)
+{
+ return __va(phys_addr);
+}
+
+/* --------------------------------------------------------------------------
+ Boot-time Table Parsing
+ --------------------------------------------------------------------------
*/
+
+static int total_cpus __initdata;
+static int available_cpus __initdata;
+struct acpi_table_madt * acpi_madt __initdata;
+static u8 has_8259;
+
+#if 0
+static int __init
+acpi_parse_lapic_addr_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_addr_ovr *lapic;
+
+ lapic = (struct acpi_table_lapic_addr_ovr *) header;
+
+ if (BAD_MADT_ENTRY(lapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (lapic->address) {
+ iounmap((void *) ipi_base_addr);
+ ipi_base_addr = (unsigned long) ioremap(lapic->address, 0);
+ }
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lsapic *lsapic;
+
+ lsapic = (struct acpi_table_lsapic *) header;
+
+ if (BAD_MADT_ENTRY(lsapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) |
lsapic->eid);
+
+ if (!lsapic->flags.enabled)
+ printk(" disabled");
+ else {
+ printk(" enabled");
+#ifdef CONFIG_SMP
+ smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) |
lsapic->eid;
+ if (hard_smp_processor_id()
+ == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus])
+ printk(" (BSP)");
+#endif
+ ++available_cpus;
+ }
+
+ printk("\n");
+
+ total_cpus++;
+ return 0;
+}
+
+
+static int __init
+acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_lapic_nmi *lacpi_nmi;
+
+ lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
+
+ if (BAD_MADT_ENTRY(lacpi_nmi, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support lapic_nmi entries */
+ return 0;
+}
+
+
+static int __init
+acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_iosapic *iosapic;
+
+ iosapic = (struct acpi_table_iosapic *) header;
+
+ if (BAD_MADT_ENTRY(iosapic, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ iosapic_init(iosapic->address, iosapic->global_irq_base);
+
+ return 0;
+}
+
+
+static int __init
+acpi_parse_plat_int_src (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_plat_int_src *plintsrc;
+ int vector;
+
+ plintsrc = (struct acpi_table_plat_int_src *) header;
+
+ if (BAD_MADT_ENTRY(plintsrc, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /*
+ * Get vector assignment for this interrupt, set attributes,
+ * and program the IOSAPIC routing table.
+ */
+ vector = iosapic_register_platform_intr(plintsrc->type,
+ plintsrc->global_irq,
+ plintsrc->iosapic_vector,
+ plintsrc->eid,
+ plintsrc->id,
+ (plintsrc->flags.polarity == 1)
? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ (plintsrc->flags.trigger == 1)
? IOSAPIC_EDGE : IOSAPIC_LEVEL);
+
+ platform_intr_list[plintsrc->type] = vector;
+ return 0;
+}
+
+
+static int __init
+acpi_parse_int_src_ovr (
+ acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_int_src_ovr *p;
+
+ p = (struct acpi_table_int_src_ovr *) header;
+
+ if (BAD_MADT_ENTRY(p, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ iosapic_override_isa_irq(p->bus_irq, p->global_irq,
+ (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH :
IOSAPIC_POL_LOW,
+ (p->flags.trigger == 1) ? IOSAPIC_EDGE :
IOSAPIC_LEVEL);
+ return 0;
+}
+
+
+static int __init
+acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
+{
+ struct acpi_table_nmi_src *nmi_src;
+
+ nmi_src = (struct acpi_table_nmi_src*) header;
+
+ if (BAD_MADT_ENTRY(nmi_src, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ /* TBD: Support nimsrc entries */
+ return 0;
+}
+/* Hook from generic ACPI tables.c */
+void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strncmp(oem_id, "IBM", 3) &&
+ (!strncmp(oem_table_id, "SERMOW", 6))){
+
+ /* Unfortunatly ITC_DRIFT is not yet part of the
+ * official SAL spec, so the ITC_DRIFT bit is not
+ * set by the BIOS on this hardware.
+ */
+ sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
+
+ /*Start cyclone clock*/
+ cyclone_setup(0);
+ }
+}
+
+static int __init
+acpi_parse_madt (unsigned long phys_addr, unsigned long size)
+{
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+
+ /* remember the value for reference after free_initmem() */
+#ifdef CONFIG_ITANIUM
+ has_8259 = 1; /* Firmware on old Itanium systems is broken */
+#else
+ has_8259 = acpi_madt->flags.pcat_compat;
+#endif
+ iosapic_system_init(has_8259);
+
+ /* Get base address of IPI Message Block */
+
+ if (acpi_madt->lapic_address)
+ ipi_base_addr = (unsigned long)
ioremap(acpi_madt->lapic_address, 0);
+
+ printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr);
+
+ acpi_madt_oem_check(acpi_madt->header.oem_id,
+ acpi_madt->header.oem_table_id);
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+
+#undef SLIT_DEBUG
+
+#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
+
+static int __initdata srat_num_cpus; /* number of cpus */
+static u32 __initdata pxm_flag[PXM_FLAG_LEN];
+#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
+#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
+/* maps to convert between proximity domain and logical node ID */
+int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS];
+int __initdata nid_to_pxm_map[MAX_NUMNODES];
+static struct acpi_table_slit __initdata *slit_table;
+
+/*
+ * ACPI 2.0 SLIT (System Locality Information Table)
+ * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
+ */
+void __init
+acpi_numa_slit_init (struct acpi_table_slit *slit)
+{
+ u32 len;
+
+ len = sizeof(struct acpi_table_header) + 8
+ + slit->localities * slit->localities;
+ if (slit->header.length != len) {
+ printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d
actual\n",
+ len, slit->header.length);
+ memset(numa_slit, 10, sizeof(numa_slit));
+ return;
+ }
+ slit_table = slit;
+}
+
+void __init
+acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
+{
+ /* record this node in proximity bitmap */
+ pxm_bit_set(pa->proximity_domain);
+
+ node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) |
(pa->lsapic_eid);
+ /* nid should be overridden as logical node id later */
+ node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
+ srat_num_cpus++;
+}
+
+void __init
+acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
+{
+ unsigned long paddr, size;
+ u8 pxm;
+ struct node_memblk_s *p, *q, *pend;
+
+ pxm = ma->proximity_domain;
+
+ /* fill node memory chunk structure */
+ paddr = ma->base_addr_hi;
+ paddr = (paddr << 32) | ma->base_addr_lo;
+ size = ma->length_hi;
+ size = (size << 32) | ma->length_lo;
+
+ /* Ignore disabled entries */
+ if (!ma->flags.enabled)
+ return;
+
+ /* record this node in proximity bitmap */
+ pxm_bit_set(pxm);
+
+ /* Insertion sort based on base address */
+ pend = &node_memblk[num_node_memblks];
+ for (p = &node_memblk[0]; p < pend; p++) {
+ if (paddr < p->start_paddr)
+ break;
+ }
+ if (p < pend) {
+ for (q = pend - 1; q >= p; q--)
+ *(q + 1) = *q;
+ }
+ p->start_paddr = paddr;
+ p->size = size;
+ p->nid = pxm;
+ num_node_memblks++;
+}
+
+void __init
+acpi_numa_arch_fixup (void)
+{
+ int i, j, node_from, node_to;
+
+ /* If there's no SRAT, fix the phys_id */
+ if (srat_num_cpus == 0) {
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+ return;
+ }
+
+ /* calculate total number of nodes in system from PXM bitmap */
+ numnodes = 0; /* init total nodes in system */
+
+ memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
+ memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
+ for (i = 0; i < MAX_PXM_DOMAINS; i++) {
+ if (pxm_bit_test(i)) {
+ pxm_to_nid_map[i] = numnodes;
+ node_set_online(numnodes);
+ nid_to_pxm_map[numnodes++] = i;
+ }
+ }
+
+ /* set logical node id in memory chunk structure */
+ for (i = 0; i < num_node_memblks; i++)
+ node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
+
+ /* assign memory bank numbers for each chunk on each node */
+ for (i = 0; i < numnodes; i++) {
+ int bank;
+
+ bank = 0;
+ for (j = 0; j < num_node_memblks; j++)
+ if (node_memblk[j].nid == i)
+ node_memblk[j].bank = bank++;
+ }
+
+ /* set logical node id in cpu structure */
+ for (i = 0; i < srat_num_cpus; i++)
+ node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
+
+ printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes);
+ printk(KERN_INFO "Number of memory chunks in system = %d\n",
num_node_memblks);
+
+ if (!slit_table) return;
+ memset(numa_slit, -1, sizeof(numa_slit));
+ for (i=0; i<slit_table->localities; i++) {
+ if (!pxm_bit_test(i))
+ continue;
+ node_from = pxm_to_nid_map[i];
+ for (j=0; j<slit_table->localities; j++) {
+ if (!pxm_bit_test(j))
+ continue;
+ node_to = pxm_to_nid_map[j];
+ node_distance(node_from, node_to) =
+ slit_table->entry[i*slit_table->localities + j];
+ }
+ }
+
+#ifdef SLIT_DEBUG
+ printk("ACPI 2.0 SLIT locality table:\n");
+ for (i = 0; i < numnodes; i++) {
+ for (j = 0; j < numnodes; j++)
+ printk("%03d ", node_distance(i,j));
+ printk("\n");
+ }
+#endif
+}
+#endif /* CONFIG_ACPI_NUMA */
+
+#if 0
+unsigned int
+acpi_register_gsi (u32 gsi, int polarity, int trigger)
+{
+ return acpi_register_irq(gsi, polarity, trigger);
+}
+EXPORT_SYMBOL(acpi_register_gsi);
+static int __init
+acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
+{
+ struct acpi_table_header *fadt_header;
+ struct fadt_descriptor_rev2 *fadt;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ fadt_header = (struct acpi_table_header *) __va(phys_addr);
+ if (fadt_header->revision != 3)
+ return -ENODEV; /* Only deal with ACPI 2.0 FADT */
+
+ fadt = (struct fadt_descriptor_rev2 *) fadt_header;
+
+ if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
+ acpi_kbd_controller_present = 0;
+
+ if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
+ acpi_legacy_devices = 1;
+
+ acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE);
+ return 0;
+}
+#endif
+
+unsigned long __init
+acpi_find_rsdp (void)
+{
+ unsigned long rsdp_phys = 0;
+
+ if (efi.acpi20)
+ rsdp_phys = __pa(efi.acpi20);
+ else if (efi.acpi)
+ printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer
supported\n");
+ return rsdp_phys;
+}
+
+#if 0
+int __init
+acpi_boot_init (void)
+{
+
+ /*
+ * MADT
+ * ----
+ * Parse the Multiple APIC Description Table (MADT), if exists.
+ * Note that this table provides platform SMP configuration
+ * information -- the successor to MPS tables.
+ */
+
+ if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
+ printk(KERN_ERR PREFIX "Can't find MADT\n");
+ goto skip_madt;
+ }
+
+ /* Local APIC */
+
+ if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
acpi_parse_lapic_addr_ovr, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing LAPIC address override
entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
< 1)
+ printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC
entries\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
< 0)
+ printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
+
+ /* I/O APIC */
+
+ if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic,
NR_IOSAPICS) < 1)
+ printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC
entries\n");
+
+ /* System-Level Interrupt Routing */
+
+ if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC,
acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
+ printk(KERN_ERR PREFIX "Error parsing platform interrupt source
entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR,
acpi_parse_int_src_ovr, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing interrupt source
overrides entry\n");
+
+ if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
+ printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
+ skip_madt:
+
+ /*
+ * FADT says whether a legacy keyboard controller is present.
+ * The FADT also contains an SCI_INT line, by which the system
+ * gets interrupts such as power and sleep buttons. If it's not
+ * on a Legacy interrupt, it needs to be setup.
+ */
+ if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
+ printk(KERN_ERR PREFIX "Can't find FADT\n");
+
+#ifdef CONFIG_SMP
+ if (available_cpus == 0) {
+ printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+ printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+ smp_boot_data.cpu_phys_id[available_cpus] =
hard_smp_processor_id();
+ available_cpus = 1; /* We've got at least one of these, no? */
+ }
+ smp_boot_data.cpu_count = available_cpus;
+
+ smp_build_cpu_map();
+# ifdef CONFIG_ACPI_NUMA
+ if (srat_num_cpus == 0) {
+ int cpu, i = 1;
+ for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
+ if (smp_boot_data.cpu_phys_id[cpu] !=
hard_smp_processor_id())
+ node_cpuid[i++].phys_id =
smp_boot_data.cpu_phys_id[cpu];
+ }
+ build_cpu_to_node_map();
+# endif
+#endif
+ /* Make boot-up look pretty */
+ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
total_cpus);
+ return 0;
+}
+int
+acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
+{
+ int vector;
+
+ if (has_8259 && gsi < 16)
+ *irq = isa_irq_to_vector(gsi);
+ else {
+ vector = gsi_to_vector(gsi);
+ if (vector == -1)
+ return -1;
+
+ *irq = vector;
+ }
+ return 0;
+}
+
+int
+acpi_register_irq (u32 gsi, u32 polarity, u32 trigger)
+{
+ if (has_8259 && gsi < 16)
+ return isa_irq_to_vector(gsi);
+
+ return iosapic_register_intr(gsi,
+ (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
IOSAPIC_POL_LOW,
+ (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
IOSAPIC_LEVEL);
+}
+EXPORT_SYMBOL(acpi_register_irq);
+#endif
+#endif /* CONFIG_ACPI_BOOT */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/dom0_ops.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/dom0_ops.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,237 @@
+/******************************************************************************
+ * Arch-specific dom0_ops.c
+ *
+ * Process command requests from domain-0 guest OS.
+ *
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <public/dom0_ops.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <asm/pdb.h>
+#include <xen/trace.h>
+#include <xen/console.h>
+#include <public/sched_ctl.h>
+
+long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
+{
+ long ret = 0;
+
+ if ( !IS_PRIV(current->domain) )
+ return -EPERM;
+
+ switch ( op->cmd )
+ {
+ case DOM0_GETPAGEFRAMEINFO:
+ {
+ struct pfn_info *page;
+ unsigned long pfn = op->u.getpageframeinfo.pfn;
+ domid_t dom = op->u.getpageframeinfo.domain;
+ struct domain *d;
+
+ ret = -EINVAL;
+
+ if ( unlikely(pfn >= max_page) ||
+ unlikely((d = find_domain_by_id(dom)) == NULL) )
+ break;
+
+ page = &frame_table[pfn];
+
+ if ( likely(get_page(page, d)) )
+ {
+ ret = 0;
+
+ op->u.getpageframeinfo.type = NOTAB;
+
+ if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
+ {
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ default:
+ panic("No such page type\n");
+ break;
+ }
+ }
+
+ put_page(page);
+ }
+
+ put_domain(d);
+
+ copy_to_user(u_dom0_op, op, sizeof(*op));
+ }
+ break;
+
+ case DOM0_GETPAGEFRAMEINFO2:
+ {
+#define GPF2_BATCH 128
+ int n,j;
+ int num = op->u.getpageframeinfo2.num;
+ domid_t dom = op->u.getpageframeinfo2.domain;
+ unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
+ struct domain *d;
+ unsigned long *l_arr;
+ ret = -ESRCH;
+
+ if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
+ break;
+
+ if ( unlikely(num > 1024) )
+ {
+ ret = -E2BIG;
+ break;
+ }
+
+ l_arr = (unsigned long *)alloc_xenheap_page();
+
+ ret = 0;
+ for( n = 0; n < num; )
+ {
+ int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
+
+ if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
+ {
+ ret = -EINVAL;
+ break;
+ }
+
+ for( j = 0; j < k; j++ )
+ {
+ struct pfn_info *page;
+ unsigned long mfn = l_arr[j];
+
+ if ( unlikely(mfn >= max_page) )
+ goto e2_err;
+
+ page = &frame_table[mfn];
+
+ if ( likely(get_page(page, d)) )
+ {
+ unsigned long type = 0;
+
+ switch( page->u.inuse.type_info & PGT_type_mask )
+ {
+ default:
+ panic("No such page type\n");
+ break;
+ }
+
+ if ( page->u.inuse.type_info & PGT_pinned )
+ type |= LPINTAB;
+ l_arr[j] |= type;
+ put_page(page);
+ }
+ else
+ {
+ e2_err:
+ l_arr[j] |= XTAB;
+ }
+
+ }
+
+ if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
+ {
+ ret = -EINVAL;
+ break;
+ }
+
+ n += j;
+ }
+
+ free_xenheap_page((unsigned long)l_arr);
+
+ put_domain(d);
+ }
+ break;
+#ifndef CONFIG_VTI
+ /*
+ * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 -
+ * it actually allocates and maps pages.
+ */
+ case DOM0_GETMEMLIST:
+ {
+ unsigned long i;
+ struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
+ unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
+ unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
+ unsigned long pfn;
+ unsigned long *buffer = op->u.getmemlist.buffer;
+ struct page *page;
+
+ ret = -EINVAL;
+ if ( d != NULL )
+ {
+ ret = 0;
+
+ for ( i = start_page; i < (start_page + nr_pages); i++ )
+ {
+ page = map_new_domain_page(d, i << PAGE_SHIFT);
+ if ( page == NULL )
+ {
+ ret = -ENOMEM;
+ break;
+ }
+ pfn = page_to_pfn(page);
+ if ( put_user(pfn, buffer) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+ buffer++;
+ }
+
+ op->u.getmemlist.num_pfns = i - start_page;
+ copy_to_user(u_dom0_op, op, sizeof(*op));
+
+ put_domain(d);
+ }
+ }
+ break;
+#else
+ case DOM0_GETMEMLIST:
+ {
+ int i;
+ struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
+ unsigned long max_pfns = op->u.getmemlist.max_pfns;
+ unsigned long pfn;
+ unsigned long *buffer = op->u.getmemlist.buffer;
+ struct list_head *list_ent;
+
+ ret = -EINVAL;
+ if (!d) {
+ ret = 0;
+
+ spin_lock(&d->page_alloc_lock);
+ list_ent = d->page_list.next;
+ for (i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++) {
+ pfn = list_entry(list_ent, struct pfn_info, list) -
+ frame_table;
+ if (put_user(pfn, buffer)) {
+ ret = -EFAULT;
+ break;
+ }
+ buffer++;
+ list_ent = frame_table[pfn].list.next;
+ }
+ spin_unlock(&d->page_alloc_lock);
+
+ op->u.getmemlist.num_pfns = i;
+ copy_to_user(u_dom0_op, op, sizeof(*op));
+
+ put_domain(d);
+ }
+ }
+ break;
+#endif // CONFIG_VTI
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/dom_fw.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/dom_fw.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,688 @@
+/*
+ * Xen domain firmware emulation support
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <xen/config.h>
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+
+#include <linux/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <xen/acpi.h>
+
+#include <asm/dom_fw.h>
+
+struct ia64_boot_param *dom_fw_init(struct domain *, char *,int,char *,int);
+extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
+extern struct domain *dom0;
+extern unsigned long dom0_start;
+
+extern unsigned long running_on_sim;
+
+
+unsigned long dom_fw_base_mpa = -1;
+unsigned long imva_fw_base = -1;
+
+// return domain (meta)physical address for a given imva
+// this function is a call-back from dom_fw_init
+unsigned long dom_pa(unsigned long imva)
+{
+ if (dom_fw_base_mpa == -1 || imva_fw_base == -1) {
+ printf("dom_pa: uninitialized! (spinning...)\n");
+ while(1);
+ }
+ if (imva - imva_fw_base > PAGE_SIZE) {
+ printf("dom_pa: bad offset! imva=%p, imva_fw_base=%p
(spinning...)\n",imva,imva_fw_base);
+ while(1);
+ }
+ return dom_fw_base_mpa + (imva - imva_fw_base);
+}
+
+// builds a hypercall bundle at domain physical address
+void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned
long hypercall)
+{
+ unsigned long imva;
+
+ if (d == dom0) paddr += dom0_start;
+ imva = domain_mpa_to_imva(d,paddr);
+ build_hypercall_bundle(imva,d->arch.breakimm,hypercall,1);
+}
+
+
+// builds a hypercall bundle at domain physical address
+void dom_fw_hypercall_patch(struct domain *d, unsigned long paddr, unsigned
long hypercall,unsigned long ret)
+{
+ unsigned long imva;
+
+ if (d == dom0) paddr += dom0_start;
+ imva = domain_mpa_to_imva(d,paddr);
+ build_hypercall_bundle(imva,d->arch.breakimm,hypercall,ret);
+}
+
+
+// FIXME: This is really a hack: Forcing the boot parameter block
+// at domain mpaddr 0 page, then grabbing only the low bits of the
+// Xen imva, which is the offset into the page
+unsigned long dom_fw_setup(struct domain *d, char *args, int arglen)
+{
+ struct ia64_boot_param *bp;
+
+ dom_fw_base_mpa = 0;
+ if (d == dom0) dom_fw_base_mpa += dom0_start;
+ imva_fw_base = domain_mpa_to_imva(d,dom_fw_base_mpa);
+ bp = dom_fw_init(d,args,arglen,imva_fw_base,PAGE_SIZE);
+ return dom_pa((unsigned long)bp);
+}
+
+
+/* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */
+
+#define MB (1024*1024UL)
+
+#define NUM_EFI_SYS_TABLES 6
+#define PASS_THRU_IOPORT_SPACE
+#ifdef PASS_THRU_IOPORT_SPACE
+# define NUM_MEM_DESCS 4
+#else
+# define NUM_MEM_DESCS 3
+#endif
+
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+ offset OFFSET seconds east of UTC,
+ and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+ Return nonzero if successful. */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+ const unsigned short int __mon_yday[2][13] =
+ {
+ /* Normal years. */
+ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ /* Leap years. */
+ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+ long int days, rem, y;
+ const unsigned short int *ip;
+
+ days = t / SECS_PER_DAY;
+ rem = t % SECS_PER_DAY;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+ tp->hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ tp->minute = rem / 60;
+ tp->second = rem % 60;
+ /* January 1, 1970 was a Thursday. */
+ y = 1970;
+
+# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+# define __isleap(year) \
+ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+ while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long int yg = y + days / 365 - (days % 365 < 0);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+ - LEAPS_THRU_END_OF (y - 1));
+ y = yg;
+ }
+ tp->year = y;
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < (long int) ip[y]; --y)
+ continue;
+ days -= ip[y];
+ tp->month = y + 1;
+ tp->day = days + 1;
+ return 1;
+}
+
+extern struct ia64_pal_retval pal_emulator_static (unsigned long);
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
+
+#ifndef XEN
+static efi_status_t
+fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+ struct {
+ int tv_sec; /* must be 32bits to work */
+ int tv_usec;
+ } tv32bits;
+
+ ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+ memset(tm, 0, sizeof(*tm));
+ offtime(tv32bits.tv_sec, tm);
+
+ if (tc)
+ memset(tc, 0, sizeof(*tc));
+#else
+# error Not implemented yet...
+#endif
+ return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long
data_size, efi_char16_t *data)
+{
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
+ ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+# error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+ return EFI_UNSUPPORTED;
+}
+#endif /* !XEN */
+
+struct sal_ret_values
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+ unsigned long in3, unsigned long in4, unsigned long in5,
+ unsigned long in6, unsigned long in7)
+{
+ long r9 = 0;
+ long r10 = 0;
+ long r11 = 0;
+ long status;
+
+ /*
+ * Don't do a "switch" here since that gives us code that
+ * isn't self-relocatable.
+ */
+ status = 0;
+ if (index == SAL_FREQ_BASE) {
+ if (!running_on_sim)
+ status = ia64_sal_freq_base(in1,&r9,&r10);
+ else switch (in1) {
+ case SAL_FREQ_BASE_PLATFORM:
+ r9 = 200000000;
+ break;
+
+ case SAL_FREQ_BASE_INTERVAL_TIMER:
+ r9 = 700000000;
+ break;
+
+ case SAL_FREQ_BASE_REALTIME_CLOCK:
+ r9 = 1;
+ break;
+
+ default:
+ status = -1;
+ break;
+ }
+ } else if (index == SAL_PCI_CONFIG_READ) {
+ if (current->domain == dom0) {
+ u64 value;
+ // note that args 2&3 are swapped!!
+ status = ia64_sal_pci_config_read(in1,in3,in2,&value);
+ r9 = value;
+ }
+ else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n");
+ } else if (index == SAL_PCI_CONFIG_WRITE) {
+ if (current->domain == dom0) {
+ if (((in1 & ~0xffffffffUL) && (in4 == 0)) ||
+ (in4 > 1) ||
+ (in2 > 8) || (in2 & (in2-1)))
+ printf("***
SAL_PCI_CONF_WRITE?!?(adr=%p,typ=%p,sz=%p,val=%p)\n",in1,in4,in2,in3);
+ // note that args are in a different order!!
+ status = ia64_sal_pci_config_write(in1,in4,in2,in3);
+ }
+ else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n");
+ } else if (index == SAL_SET_VECTORS) {
+ printf("*** CALLED SAL_SET_VECTORS. IGNORED...\n");
+ } else if (index == SAL_GET_STATE_INFO) {
+ printf("*** CALLED SAL_GET_STATE_INFO. IGNORED...\n");
+ } else if (index == SAL_GET_STATE_INFO_SIZE) {
+ printf("*** CALLED SAL_GET_STATE_INFO_SIZE. IGNORED...\n");
+ } else if (index == SAL_CLEAR_STATE_INFO) {
+ printf("*** CALLED SAL_CLEAR_STATE_INFO. IGNORED...\n");
+ } else if (index == SAL_MC_RENDEZ) {
+ printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n");
+ } else if (index == SAL_MC_SET_PARAMS) {
+ printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n");
+ } else if (index == SAL_CACHE_FLUSH) {
+ printf("*** CALLED SAL_CACHE_FLUSH. IGNORED...\n");
+ } else if (index == SAL_CACHE_INIT) {
+ printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n");
+ } else if (index == SAL_UPDATE_PAL) {
+ printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n");
+ } else {
+ printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n");
+ status = -1;
+ }
+ return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+struct ia64_pal_retval
+xen_pal_emulator(unsigned long index, unsigned long in1,
+ unsigned long in2, unsigned long in3)
+{
+ long r9 = 0;
+ long r10 = 0;
+ long r11 = 0;
+ long status = -1;
+
+#define USE_PAL_EMULATOR
+#ifdef USE_PAL_EMULATOR
+ return pal_emulator_static(index);
+#endif
+ if (running_on_sim) return pal_emulator_static(index);
+ if (index >= PAL_COPY_PAL) {
+ printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
+ index);
+ }
+ else switch (index) {
+ case PAL_MEM_ATTRIB:
+ status = ia64_pal_mem_attrib(&r9);
+ break;
+ case PAL_FREQ_BASE:
+ status = ia64_pal_freq_base(&r9);
+ break;
+ case PAL_PROC_GET_FEATURES:
+ status = ia64_pal_proc_get_features(&r9,&r10,&r11);
+ break;
+ case PAL_BUS_GET_FEATURES:
+ status = ia64_pal_bus_get_features(&r9,&r10,&r11);
+ break;
+ case PAL_FREQ_RATIOS:
+ status = ia64_pal_freq_ratios(&r9,&r10,&r11);
+ break;
+ case PAL_PTCE_INFO:
+ {
+ // return hard-coded xen-specific values because ptc.e
+ // is emulated on xen to always flush everything
+ // these values result in only one ptc.e instruction
+ status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0;
+ }
+ break;
+ case PAL_VERSION:
+ status = ia64_pal_version(&r9,&r10);
+ break;
+ case PAL_VM_PAGE_SIZE:
+ status = ia64_pal_vm_page_size(&r9,&r10);
+ break;
+ case PAL_DEBUG_INFO:
+ status = ia64_pal_debug_info(&r9,&r10);
+ break;
+ case PAL_CACHE_SUMMARY:
+ status = ia64_pal_cache_summary(&r9,&r10);
+ break;
+ case PAL_VM_SUMMARY:
+ // FIXME: what should xen return for these, figure out later
+ // For now, linux does the right thing if pal call fails
+ // In particular, rid_size must be set properly!
+ //status = ia64_pal_vm_summary(&r9,&r10);
+ break;
+ case PAL_RSE_INFO:
+ status = ia64_pal_rse_info(&r9,&r10);
+ break;
+ case PAL_VM_INFO:
+ status = ia64_pal_vm_info(in1,in2,&r9,&r10);
+ break;
+ case PAL_REGISTER_INFO:
+ status = ia64_pal_register_info(in1,&r9,&r10);
+ break;
+ case PAL_CACHE_FLUSH:
+ /* FIXME */
+ printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n");
+ BUG();
+ break;
+ case PAL_PERF_MON_INFO:
+ {
+ unsigned long pm_buffer[16];
+ int i;
+ status = ia64_pal_perf_mon_info(pm_buffer,&r9);
+ if (status != 0) {
+ while(1)
+ printk("PAL_PERF_MON_INFO fails
ret=%d\n",status);
+ break;
+ }
+ if (copy_to_user((void __user *)in1,pm_buffer,128)) {
+ while(1)
+ printk("xen_pal_emulator: PAL_PERF_MON_INFO "
+ "can't copy to user!!!!\n");
+ status = -1;
+ break;
+ }
+ }
+ break;
+ case PAL_CACHE_INFO:
+ {
+ pal_cache_config_info_t ci;
+ status = ia64_pal_cache_config_info(in1,in2,&ci);
+ if (status != 0) break;
+ r9 = ci.pcci_info_1.pcci1_data;
+ r10 = ci.pcci_info_2.pcci2_data;
+ }
+ break;
+ case PAL_VM_TR_READ: /* FIXME: vcpu_get_tr?? */
+ printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n");
+ break;
+ case PAL_HALT_INFO: /* inappropriate info for guest? */
+ printk("PAL_HALT_INFO NOT IMPLEMENTED, IGNORED!\n");
+ break;
+ default:
+ printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
+ index);
+ break;
+ }
+ return ((struct ia64_pal_retval) {status, r9, r10, r11});
+}
+
+#define NFUNCPTRS 20
+
+void print_md(efi_memory_desc_t *md)
+{
+#if 1
+ printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx)
(%luMB)\n",
+ md->type, md->attribute, md->phys_addr,
+ md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+ md->num_pages >> (20 - EFI_PAGE_SHIFT));
+#endif
+}
+
+#define LSAPIC_NUM 16 // TEMP
+static u32 lsapic_flag=1;
+
+/* Provide only one LP to guest */
+static int
+acpi_update_lsapic (acpi_table_entry_header *header)
+{
+ struct acpi_table_lsapic *lsapic;
+
+ lsapic = (struct acpi_table_lsapic *) header;
+ if (!lsapic)
+ return -EINVAL;
+
+ if (lsapic->flags.enabled && lsapic_flag) {
+ printk("enable lsapic entry: 0x%lx\n", (u64)lsapic);
+ lsapic_flag = 0; /* disable all the following processros */
+ } else if (lsapic->flags.enabled) {
+ printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic);
+ lsapic->flags.enabled = 0;
+ } else
+ printk("lsapic entry is already disabled: 0x%lx\n",
(u64)lsapic);
+
+ return 0;
+}
+
+static int
+acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size)
+{
+ u8 checksum=0;
+ u8* ptr;
+ int len;
+ struct acpi_table_madt* acpi_madt;
+
+ if (!phys_addr || !size)
+ return -EINVAL;
+
+ acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
+ acpi_madt->header.checksum=0;
+
+ /* re-calculate MADT checksum */
+ ptr = (u8*)acpi_madt;
+ len = acpi_madt->header.length;
+ while (len>0){
+ checksum = (u8)( checksum + (*ptr++) );
+ len--;
+ }
+ acpi_madt->header.checksum = 0x0 - checksum;
+
+ return 0;
+}
+
+/* base is physical address of acpi table */
+void touch_acpi_table(void)
+{
+ u64 count = 0;
+ count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic,
NR_CPUS);
+ if ( count < 1)
+ printk("Error parsing MADT - no LAPIC entires\n");
+ printk("Total %d lsapic entry\n", count);
+ acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);
+
+ return;
+}
+
+
+struct ia64_boot_param *
+dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int
fw_mem_size)
+{
+ efi_system_table_t *efi_systab;
+ efi_runtime_services_t *efi_runtime;
+ efi_config_table_t *efi_tables;
+ struct ia64_sal_systab *sal_systab;
+ efi_memory_desc_t *efi_memmap, *md;
+ unsigned long *pal_desc, *sal_desc;
+ struct ia64_sal_desc_entry_point *sal_ed;
+ struct ia64_boot_param *bp;
+ unsigned long *pfn;
+ unsigned char checksum = 0;
+ char *cp, *cmd_line, *fw_vendor;
+ int i = 0;
+ unsigned long maxmem = d->max_pages * PAGE_SIZE;
+ unsigned long start_mpaddr = ((d==dom0)?dom0_start:0);
+
+# define MAKE_MD(typ, attr, start, end, abs) \
+ do { \
+ md = efi_memmap + i++; \
+ md->type = typ; \
+ md->pad = 0; \
+ md->phys_addr = abs ? start : start_mpaddr + start; \
+ md->virt_addr = 0; \
+ md->num_pages = (end - start) >> 12; \
+ md->attribute = attr; \
+ print_md(md); \
+ } while (0)
+
+/* FIXME: should check size but for now we have a whole MB to play with.
+ And if stealing code from fw-emu.c, watch out for new fw_vendor on the end!
+ if (fw_mem_size < sizeof(fw_mem_proto)) {
+ printf("sys_fw_init: insufficient space for fw_mem\n");
+ return 0;
+ }
+*/
+ memset(fw_mem, 0, fw_mem_size);
+
+#ifdef XEN
+#else
+ pal_desc = (unsigned long *) &pal_emulator_static;
+ sal_desc = (unsigned long *) &sal_emulator;
+#endif
+
+ cp = fw_mem;
+ efi_systab = (void *) cp; cp += sizeof(*efi_systab);
+ efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+ efi_tables = (void *) cp; cp += NUM_EFI_SYS_TABLES *
sizeof(*efi_tables);
+ sal_systab = (void *) cp; cp += sizeof(*sal_systab);
+ sal_ed = (void *) cp; cp += sizeof(*sal_ed);
+ efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+ bp = (void *) cp; cp += sizeof(*bp);
+ pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
+ cmd_line = (void *) cp;
+
+ if (args) {
+ if (arglen >= 1024)
+ arglen = 1023;
+ memcpy(cmd_line, args, arglen);
+ } else {
+ arglen = 0;
+ }
+ cmd_line[arglen] = '\0';
+
+ memset(efi_systab, 0, sizeof(efi_systab));
+ efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+ efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
+ efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+ cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit
boundary
+#define FW_VENDOR
"X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to
64-bit boundary
+
+ memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR));
+ efi_systab->fw_vendor = dom_pa(fw_vendor);
+
+ efi_systab->fw_revision = 1;
+ efi_systab->runtime = (void *) dom_pa(efi_runtime);
+ efi_systab->nr_tables = NUM_EFI_SYS_TABLES;
+ efi_systab->tables = dom_pa(efi_tables);
+
+ efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+ efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+ efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+#define EFI_HYPERCALL_PATCH(tgt,call) do { \
+
dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \
+ tgt = dom_pa(pfn); \
+ *pfn++ = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
+ *pfn++ = 0; \
+ } while (0)
+
+ EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
+
EFI_HYPERCALL_PATCH(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
+ EFI_HYPERCALL_PATCH(efi_runtime->get_variable,EFI_GET_VARIABLE);
+
EFI_HYPERCALL_PATCH(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
+ EFI_HYPERCALL_PATCH(efi_runtime->set_variable,EFI_SET_VARIABLE);
+
EFI_HYPERCALL_PATCH(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
+ EFI_HYPERCALL_PATCH(efi_runtime->reset_system,EFI_RESET_SYSTEM);
+
+ efi_tables[0].guid = SAL_SYSTEM_TABLE_GUID;
+ efi_tables[0].table = dom_pa(sal_systab);
+ for (i = 1; i < NUM_EFI_SYS_TABLES; i++) {
+ efi_tables[i].guid = NULL_GUID;
+ efi_tables[i].table = 0;
+ }
+ if (d == dom0) {
+ printf("Domain0 EFI passthrough:");
+ i = 1;
+ if (efi.mps) {
+ efi_tables[i].guid = MPS_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.mps);
+ printf(" MPS=%0xlx",efi_tables[i].table);
+ i++;
+ }
+
+ touch_acpi_table();
+
+ if (efi.acpi20) {
+ efi_tables[i].guid = ACPI_20_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.acpi20);
+ printf(" ACPI 2.0=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ if (efi.acpi) {
+ efi_tables[i].guid = ACPI_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.acpi);
+ printf(" ACPI=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ if (efi.smbios) {
+ efi_tables[i].guid = SMBIOS_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.smbios);
+ printf(" SMBIOS=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ if (efi.hcdp) {
+ efi_tables[i].guid = HCDP_TABLE_GUID;
+ efi_tables[i].table = __pa(efi.hcdp);
+ printf(" HCDP=%0xlx",efi_tables[i].table);
+ i++;
+ }
+ printf("\n");
+ }
+
+ /* fill in the SAL system table: */
+ memcpy(sal_systab->signature, "SST_", 4);
+ sal_systab->size = sizeof(*sal_systab);
+ sal_systab->sal_rev_minor = 1;
+ sal_systab->sal_rev_major = 0;
+ sal_systab->entry_count = 1;
+
+ strcpy(sal_systab->oem_id, "Xen/ia64");
+ strcpy(sal_systab->product_id, "Xen/ia64");
+
+ /* fill in an entry point: */
+ sal_ed->type = SAL_DESC_ENTRY_POINT;
+#define FW_HYPERCALL_PATCH(tgt,call,ret) do { \
+
dom_fw_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call,ret); \
+ tgt = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
+ } while (0)
+ FW_HYPERCALL_PATCH(sal_ed->pal_proc,PAL_CALL,0);
+ FW_HYPERCALL_PATCH(sal_ed->sal_proc,SAL_CALL,1);
+ sal_ed->gp = 0; // will be ignored
+
+ for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+ checksum += *cp;
+
+ sal_systab->checksum = -checksum;
+
+ /* simulate 1MB free memory at physical address zero */
+ i = 0;
+ MAKE_MD(EFI_BOOT_SERVICES_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);
+ /* hypercall patches live here, masquerade as reserved PAL memory */
+ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
+ MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 0);
+#ifdef PASS_THRU_IOPORT_SPACE
+ if (d == dom0 && !running_on_sim) {
+ /* pass through the I/O port space */
+ efi_memory_desc_t *efi_get_io_md(void);
+ efi_memory_desc_t *ia64_efi_io_md = efi_get_io_md();
+ u32 type;
+ u64 iostart, ioend, ioattr;
+
+ type = ia64_efi_io_md->type;
+ iostart = ia64_efi_io_md->phys_addr;
+ ioend = ia64_efi_io_md->phys_addr +
+ (ia64_efi_io_md->num_pages << 12);
+ ioattr = ia64_efi_io_md->attribute;
+ MAKE_MD(type,ioattr,iostart,ioend, 1);
+ }
+ else
+ MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0);
+#endif
+
+ bp->efi_systab = dom_pa(fw_mem);
+ bp->efi_memmap = dom_pa(efi_memmap);
+ bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_version = 1;
+ bp->command_line = dom_pa(cmd_line);
+ bp->console_info.num_cols = 80;
+ bp->console_info.num_rows = 25;
+ bp->console_info.orig_x = 0;
+ bp->console_info.orig_y = 24;
+ bp->fpswa = 0;
+
+ return bp;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/domain.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/domain.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1103 @@
+/*
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
+ *
+ * Copyright (C) 2005 Intel Co
+ * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>
+ *
+ * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> Add CONFIG_VTI domain
support
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/softirq.h>
+#include <xen/mm.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/mpspec.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+//#include <xen/shadow.h>
+#include <xen/console.h>
+
+#include <xen/elf.h>
+//#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h> /* for MAX_DMA_ADDRESS */
+
+#include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */
+
+#include <asm/vcpu.h> /* for function declarations */
+#include <public/arch-ia64.h>
+#include <asm/vmx.h>
+#include <asm/vmx_vcpu.h>
+#include <asm/vmx_vpd.h>
+#include <asm/pal.h>
+#include <public/io/ioreq.h>
+
+#define CONFIG_DOMAIN0_CONTIGUOUS
+unsigned long dom0_start = -1L;
+unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
+//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
+unsigned long dom0_align = 256*1024*1024;
+#ifdef DOMU_BUILD_STAGING
+unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
+unsigned long domU_staging_start;
+unsigned long domU_staging_align = 64*1024;
+unsigned long *domU_staging_area;
+#endif
+
+// initialized by arch/ia64/setup.c:find_initrd()
+unsigned long initrd_start = 0, initrd_end = 0;
+
+#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
+
+//extern int loadelfimage(char *);
+extern int readelfimage_base_and_size(char *, unsigned long,
+ unsigned long *, unsigned long *, unsigned long *);
+
+unsigned long map_domain_page0(struct domain *);
+extern unsigned long dom_fw_setup(struct domain *, char *, int);
+
+/* this belongs in include/asm, but there doesn't seem to be a suitable place
*/
+void free_perdomain_pt(struct domain *d)
+{
+ printf("free_perdomain_pt: not implemented\n");
+ //free_page((unsigned long)d->mm.perdomain_pt);
+}
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+ hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+ hlt_counter--;
+}
+
+static void default_idle(void)
+{
+ if ( hlt_counter == 0 )
+ {
+ local_irq_disable();
+ if ( !softirq_pending(smp_processor_id()) )
+ safe_halt();
+ //else
+ local_irq_enable();
+ }
+}
+
+void continue_cpu_idle_loop(void)
+{
+ int cpu = smp_processor_id();
+ for ( ; ; )
+ {
+#ifdef IA64
+// __IRQ_STAT(cpu, idle_timestamp) = jiffies
+#else
+ irq_stat[cpu].idle_timestamp = jiffies;
+#endif
+ while ( !softirq_pending(cpu) )
+ default_idle();
+ raise_softirq(SCHEDULE_SOFTIRQ);
+ do_softirq();
+ }
+}
+
+void startup_cpu_idle_loop(void)
+{
+ /* Just some sanity to ensure that the scheduler is set up okay. */
+ ASSERT(current->domain == IDLE_DOMAIN_ID);
+ raise_softirq(SCHEDULE_SOFTIRQ);
+ do_softirq();
+
+ /*
+ * Declares CPU setup done to the boot processor.
+ * Therefore memory barrier to ensure state is visible.
+ */
+ smp_mb();
+#if 0
+//do we have to ensure the idle task has a shared page so that, for example,
+//region registers can be loaded from it. Apparently not...
+ idle0_task.shared_info = (void *)alloc_xenheap_page();
+ memset(idle0_task.shared_info, 0, PAGE_SIZE);
+ /* pin mapping */
+ // FIXME: Does this belong here? Or do only at domain switch time?
+ {
+ /* WARNING: following must be inlined to avoid nested fault */
+ unsigned long psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
+ pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >>
PAGE_SHIFT, PAGE_KERNEL)),
+ PAGE_SHIFT);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+ }
+#endif
+
+ continue_cpu_idle_loop();
+}
+
+struct vcpu *arch_alloc_vcpu_struct(void)
+{
+ /* Per-vp stack is used here. So we need keep vcpu
+ * same page as per-vp stack */
+ return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER);
+}
+
+void arch_free_vcpu_struct(struct vcpu *v)
+{
+ free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
+}
+
+static void init_switch_stack(struct vcpu *v)
+{
+ struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ extern void ia64_ret_from_clone;
+
+ memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
+ sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
+ sw->b0 = (unsigned long) &ia64_ret_from_clone;
+ sw->ar_fpsr = FPSR_DEFAULT;
+ v->arch._thread.ksp = (unsigned long) sw - 16;
+ // stay on kernel stack because may get interrupts!
+ // ia64_ret_from_clone (which b0 gets in new_thread) switches
+ // to user stack
+ v->arch._thread.on_ustack = 0;
+ memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
+}
+
+void arch_do_createdomain(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct thread_info *ti = alloc_thread_info(v);
+
+ /* Clear thread_info to clear some important fields, like preempt_count
*/
+ memset(ti, 0, sizeof(struct thread_info));
+ init_switch_stack(v);
+
+ d->shared_info = (void *)alloc_xenheap_page();
+ if (!d->shared_info) {
+ printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
+ while (1);
+ }
+ memset(d->shared_info, 0, PAGE_SIZE);
+ d->shared_info->vcpu_data[0].arch.privregs =
+ alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
+ printf("arch_vcpu_info=%p\n",
d->shared_info->vcpu_data[0].arch.privregs);
+ memset(d->shared_info->vcpu_data[0].arch.privregs, 0, PAGE_SIZE);
+ v->vcpu_info = &(d->shared_info->vcpu_data[0]);
+
+ d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
+
+#ifdef CONFIG_VTI
+ /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
+ * to this solution. Maybe it can be deferred until we know created
+ * one as vmx domain */
+ v->arch.vtlb = init_domain_tlb(v);
+#endif
+
+ /* We may also need emulation rid for region4, though it's unlikely
+ * to see guest issue uncacheable access in metaphysical mode. But
+ * keep such info here may be more sane.
+ */
+ if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL)
+ || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL))
+ BUG();
+ VCPU(v, metaphysical_mode) = 1;
+ v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
+ v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
+ v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
+ v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
+#define DOMAIN_RID_BITS_DEFAULT 18
+ if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
+ BUG();
+ v->arch.starting_rid = d->arch.starting_rid;
+ v->arch.ending_rid = d->arch.ending_rid;
+ // the following will eventually need to be negotiated dynamically
+ d->xen_vastart = XEN_START_ADDR;
+ d->xen_vaend = XEN_END_ADDR;
+ d->shared_info_va = SHAREDINFO_ADDR;
+ d->arch.breakimm = 0x1000;
+ v->arch.breakimm = d->arch.breakimm;
+
+ d->arch.mm = xmalloc(struct mm_struct);
+ if (unlikely(!d->arch.mm)) {
+ printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
+ return -ENOMEM;
+ }
+ memset(d->arch.mm, 0, sizeof(*d->arch.mm));
+ d->arch.mm->pgd = pgd_alloc(d->arch.mm);
+ if (unlikely(!d->arch.mm->pgd)) {
+ printk("Can't allocate pgd for domain %d\n",d->domain_id);
+ return -ENOMEM;
+ }
+}
+
+void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
+{
+ struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
+
+ printf("arch_getdomaininfo_ctxt\n");
+ c->regs = *regs;
+ c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
+#if 0
+ if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs,
+ v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) {
+ printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs);
+ return -EFAULT;
+ }
+#endif
+
+ c->shared = v->domain->shared_info->arch;
+}
+
+int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
+{
+ struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
+ struct domain *d = v->domain;
+ int i, rc, ret;
+ unsigned long progress = 0;
+
+ printf("arch_set_info_guest\n");
+ if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ return 0;
+
+ if (c->flags & VGCF_VMX_GUEST) {
+ if (!vmx_enabled) {
+ printk("No VMX hardware feature for vmx domain.\n");
+ return -EINVAL;
+ }
+
+ vmx_setup_platform(v, c);
+ }
+
+ *regs = c->regs;
+ new_thread(v, regs->cr_iip, 0, 0);
+
+ v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
+ if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs,
+ c->vcpu.privregs, sizeof(mapped_regs_t))) {
+ printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n",
c->vcpu.privregs);
+ return -EFAULT;
+ }
+
+ v->arch.domain_itm_last = -1L;
+ d->shared_info->arch = c->shared;
+
+ /* Don't redo final setup */
+ set_bit(_VCPUF_initialised, &v->vcpu_flags);
+ return 0;
+}
+
+void arch_do_boot_vcpu(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ printf("arch_do_boot_vcpu: not implemented\n");
+
+ d->shared_info->vcpu_data[v->vcpu_id].arch.privregs =
+ alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
+ printf("arch_vcpu_info=%p\n",
d->shared_info->vcpu_data[v->vcpu_id].arch.privregs);
+ memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0,
PAGE_SIZE);
+ return;
+}
+
+void domain_relinquish_resources(struct domain *d)
+{
+ /* FIXME */
+ printf("domain_relinquish_resources: not implemented\n");
+}
+
+// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
+// and linux/arch/ia64/kernel/process.c:kernel_thread()
+void new_thread(struct vcpu *v,
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info)
+{
+ struct domain *d = v->domain;
+ struct pt_regs *regs;
+ struct ia64_boot_param *bp;
+ extern char saved_command_line[];
+
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) start_pc += dom0_start;
+#endif
+
+ regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
+ if (VMX_DOMAIN(v)) {
+ /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
+ regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro
*/
+ } else {
+ regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
+ | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
+ regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
+ }
+ regs->cr_iip = start_pc;
+ regs->cr_ifs = 1UL << 63; /* or clear? */
+ regs->ar_fpsr = FPSR_DEFAULT;
+
+ if (VMX_DOMAIN(v)) {
+#ifdef CONFIG_VTI
+ vmx_init_all_rr(v);
+ if (d == dom0)
+ VMX_VPD(v,vgr[12]) =
dom_fw_setup(d,saved_command_line,256L);
+ /* Virtual processor context setup */
+ VMX_VPD(v, vpsr) = IA64_PSR_BN;
+ VPD_CR(v, dcr) = 0;
+#endif
+ } else {
+ init_all_rr(v);
+ if (d == dom0)
+ regs->r28 = dom_fw_setup(d,saved_command_line,256L);
+ else {
+ regs->ar_rsc |= (2 << 2); /* force PL2/3 */
+ regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0
console=tty0 root=/dev/hda1",256L); //FIXME
+ }
+ VCPU(v, banknum) = 1;
+ VCPU(v, metaphysical_mode) = 1;
+ d->shared_info->arch.flags = (d == dom0) ?
(SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN)
: 0;
+ }
+}
+
+static struct page * map_new_domain0_page(unsigned long mpaddr)
+{
+ if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+ printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr);
+printk("map_new_domain0_page:
start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
+ while(1);
+ }
+ return pfn_to_page((mpaddr >> PAGE_SHIFT));
+}
+
+/* allocate new page for domain and map it to the specified metaphysical addr
*/
+struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr)
+{
+ struct mm_struct *mm = d->arch.mm;
+ struct page *p = (struct page *)0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+extern unsigned long vhpt_paddr, vhpt_pend;
+
+ if (!mm->pgd) {
+ printk("map_new_domain_page: domain pgd must exist!\n");
+ return(p);
+ }
+ pgd = pgd_offset(mm,mpaddr);
+ if (pgd_none(*pgd))
+ pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
+
+ pud = pud_offset(pgd, mpaddr);
+ if (pud_none(*pud))
+ pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
+
+ pmd = pmd_offset(pud, mpaddr);
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
+// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
+
+ pte = pte_offset_map(pmd, mpaddr);
+ if (pte_none(*pte)) {
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) p = map_new_domain0_page(mpaddr);
+ else
+#endif
+ {
+ p = alloc_domheap_page(d);
+ // zero out pages for security reasons
+ memset(__va(page_to_phys(p)),0,PAGE_SIZE);
+ }
+ if (unlikely(!p)) {
+printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
+ return(p);
+ }
+if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) {
+ printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p));
+}
+ set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT,
+ __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+ }
+ else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+ return p;
+}
+
+/* map a physical address to the specified metaphysical addr */
+void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long
physaddr)
+{
+ struct mm_struct *mm = d->arch.mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (!mm->pgd) {
+ printk("map_domain_page: domain pgd must exist!\n");
+ return;
+ }
+ pgd = pgd_offset(mm,mpaddr);
+ if (pgd_none(*pgd))
+ pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
+
+ pud = pud_offset(pgd, mpaddr);
+ if (pud_none(*pud))
+ pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
+
+ pmd = pmd_offset(pud, mpaddr);
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
+// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
+
+ pte = pte_offset_map(pmd, mpaddr);
+ if (pte_none(*pte)) {
+ set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
+ __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
+ }
+ else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr);
+}
+
+void mpafoo(unsigned long mpaddr)
+{
+ extern unsigned long privop_trace;
+ if (mpaddr == 0x3800)
+ privop_trace = 1;
+}
+
+unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
+{
+ struct mm_struct *mm = d->arch.mm;
+ pgd_t *pgd = pgd_offset(mm, mpaddr);
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) {
+ if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+ //printk("lookup_domain_mpa: bad dom0 mpaddr
%p!\n",mpaddr);
+//printk("lookup_domain_mpa:
start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
+ mpafoo(mpaddr);
+ }
+ pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
+ __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
+ pte = &pteval;
+ return *(unsigned long *)pte;
+ }
+#endif
+tryagain:
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd,mpaddr);
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud,mpaddr);
+ if (pmd_present(*pmd)) {
+ pte = pte_offset_map(pmd,mpaddr);
+ if (pte_present(*pte)) {
+//printk("lookup_domain_page: found mapping for %lx,
pte=%lx\n",mpaddr,pte_val(*pte));
+ return *(unsigned long *)pte;
+ }
+ }
+ }
+ }
+ /* if lookup fails and mpaddr is "legal", "create" the page */
+ if ((mpaddr >> PAGE_SHIFT) < d->max_pages) {
+ if (map_new_domain_page(d,mpaddr)) goto tryagain;
+ }
+ printk("lookup_domain_mpa: bad mpa %p (> %p\n",
+ mpaddr,d->max_pages<<PAGE_SHIFT);
+ mpafoo(mpaddr);
+ return 0;
+}
+
+// FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
+#ifndef CONFIG_VTI
+unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
+{
+ unsigned long pte = lookup_domain_mpa(d,mpaddr);
+ unsigned long imva;
+
+ pte &= _PAGE_PPN_MASK;
+ imva = __va(pte);
+ imva |= mpaddr & ~PAGE_MASK;
+ return(imva);
+}
+#else // CONFIG_VTI
+unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
+{
+ unsigned long imva = __gpa_to_mpa(d, mpaddr);
+
+ return __va(imva);
+}
+#endif // CONFIG_VTI
+
+// remove following line if not privifying in memory
+//#define HAVE_PRIVIFY_MEMORY
+#ifndef HAVE_PRIVIFY_MEMORY
+#define privify_memory(x,y) do {} while(0)
+#endif
+
+// see arch/x86/xxx/domain_build.c
+int elf_sanity_check(Elf_Ehdr *ehdr)
+{
+ return (IS_ELF(*ehdr));
+}
+
+static void copy_memory(void *dst, void *src, int size)
+{
+ int remain;
+
+ if (IS_XEN_ADDRESS(dom0,src)) {
+ memcpy(dst,src,size);
+ }
+ else {
+ printf("About to call __copy_from_user(%p,%p,%d)\n",
+ dst,src,size);
+ while (remain = __copy_from_user(dst,src,size)) {
+ printf("incomplete user copy, %d remain of %d\n",
+ remain,size);
+ dst += size - remain; src += size - remain;
+ size -= remain;
+ }
+ }
+}
+
+void loaddomainelfimage(struct domain *d, unsigned long image_start)
+{
+ char *elfbase = image_start;
+ //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
+ Elf_Ehdr ehdr;
+ Elf_Phdr phdr;
+ int h, filesz, memsz, paddr;
+ unsigned long elfaddr, dom_mpaddr, dom_imva;
+ struct page *p;
+ unsigned long pteval;
+
+ copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
+ for ( h = 0; h < ehdr.e_phnum; h++ ) {
+ copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
+ sizeof(Elf_Phdr));
+ //if ( !is_loadable_phdr(phdr) )
+ if ((phdr.p_type != PT_LOAD)) {
+ continue;
+ }
+ filesz = phdr.p_filesz; memsz = phdr.p_memsz;
+ elfaddr = elfbase + phdr.p_offset;
+ dom_mpaddr = phdr.p_paddr;
+//printf("p_offset: %x, size=%x\n",elfaddr,filesz);
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (d == dom0) {
+ if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) {
+ printf("Domain0 doesn't fit in allocated space!\n");
+ while(1);
+ }
+ dom_imva = __va(dom_mpaddr + dom0_start);
+ copy_memory(dom_imva,elfaddr,filesz);
+ if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
+//FIXME: This test for code seems to find a lot more than objdump -x does
+ if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
+ }
+ else
+#endif
+ while (memsz > 0) {
+#ifdef DOMU_AUTO_RESTART
+ pteval = lookup_domain_mpa(d,dom_mpaddr);
+ if (pteval) dom_imva = __va(pteval & _PFN_MASK);
+ else { printf("loaddomainelfimage: BAD!\n"); while(1); }
+#else
+ p = map_new_domain_page(d,dom_mpaddr);
+ if (unlikely(!p)) BUG();
+ dom_imva = __va(page_to_phys(p));
+#endif
+ if (filesz > 0) {
+ if (filesz >= PAGE_SIZE)
+ copy_memory(dom_imva,elfaddr,PAGE_SIZE);
+ else { // copy partial page, zero the rest of page
+ copy_memory(dom_imva,elfaddr,filesz);
+ memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
+ }
+//FIXME: This test for code seems to find a lot more than objdump -x does
+ if (phdr.p_flags & PF_X)
+ privify_memory(dom_imva,PAGE_SIZE);
+ }
+ else if (memsz > 0) // always zero out entire page
+ memset(dom_imva,0,PAGE_SIZE);
+ memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
+ elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
+ }
+ }
+}
+
+int
+parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
+{
+ Elf_Ehdr ehdr;
+
+ copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
+
+ if ( !elf_sanity_check(&ehdr) ) {
+ printk("ELF sanity check failed.\n");
+ return -EINVAL;
+ }
+
+ if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
+ {
+ printk("ELF program headers extend beyond end of image.\n");
+ return -EINVAL;
+ }
+
+ if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
+ {
+ printk("ELF section headers extend beyond end of image.\n");
+ return -EINVAL;
+ }
+
+#if 0
+ /* Find the section-header strings table. */
+ if ( ehdr.e_shstrndx == SHN_UNDEF )
+ {
+ printk("ELF image has no section-header strings table
(shstrtab).\n");
+ return -EINVAL;
+ }
+#endif
+
+ *entry = ehdr.e_entry;
+printf("parsedomainelfimage: entry point = %p\n",*entry);
+
+ return 0;
+}
+
+
+void alloc_dom0(void)
+{
+#ifdef CONFIG_DOMAIN0_CONTIGUOUS
+ if (platform_is_hp_ski()) {
+ dom0_size = 128*1024*1024; //FIXME: Should be configurable
+ }
+ printf("alloc_dom0: starting (initializing %d
MB...)\n",dom0_size/(1024*1024));
+
+ /* FIXME: The first trunk (say 256M) should always be assigned to
+ * Dom0, since Dom0's physical == machine address for DMA purpose.
+ * Some old version linux, like 2.4, assumes physical memory existing
+ * in 2nd 64M space.
+ */
+ dom0_start = alloc_boot_pages(
+ dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
+ dom0_start <<= PAGE_SHIFT;
+ if (!dom0_start) {
+ printf("construct_dom0: can't allocate contiguous memory size=%p\n",
+ dom0_size);
+ while(1);
+ }
+ printf("alloc_dom0: dom0_start=%p\n",dom0_start);
+#else
+ dom0_start = 0;
+#endif
+
+}
+
+#ifdef DOMU_BUILD_STAGING
+void alloc_domU_staging(void)
+{
+ domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
+ printf("alloc_domU_staging: starting (initializing %d
MB...)\n",domU_staging_size/(1024*1024));
+ domU_staging_start = alloc_boot_pages(
+ domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT);
+ domU_staging_start <<= PAGE_SHIFT;
+ if (!domU_staging_size) {
+ printf("alloc_domU_staging: can't allocate, spinning...\n");
+ while(1);
+ }
+ else domU_staging_area = (unsigned long *)__va(domU_staging_start);
+ printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area);
+
+}
+
+unsigned long
+domU_staging_read_8(unsigned long at)
+{
+ // no way to return errors so just do it
+ return domU_staging_area[at>>3];
+
+}
+
+unsigned long
+domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b,
+ unsigned long c, unsigned long d)
+{
+ if (at + 32 > domU_staging_size) return -1;
+ if (at & 0x1f) return -1;
+ at >>= 3;
+ domU_staging_area[at++] = a;
+ domU_staging_area[at++] = b;
+ domU_staging_area[at++] = c;
+ domU_staging_area[at] = d;
+ return 0;
+
+}
+#endif
+
+/*
+ * Domain 0 has direct access to all devices absolutely. However
+ * the major point of this stub here, is to allow alloc_dom_mem
+ * handled with order > 0 request. Dom0 requires that bit set to
+ * allocate memory for other domains.
+ */
+void physdev_init_dom0(struct domain *d)
+{
+ set_bit(_DOMF_physdev_access, &d->domain_flags);
+}
+
+extern unsigned long running_on_sim;
+unsigned int vmx_dom0 = 0;
+int construct_dom0(struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pt_pages;
+ unsigned long count;
+ unsigned long alloc_start, alloc_end;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct vcpu *v = d->vcpu[0];
+
+ struct domain_setup_info dsi;
+ unsigned long p_start;
+ unsigned long pkern_start;
+ unsigned long pkern_entry;
+ unsigned long pkern_end;
+ unsigned long ret, progress = 0;
+
+//printf("construct_dom0: starting\n");
+ /* Sanity! */
+#ifndef CLONE_DOMAIN0
+ if ( d != dom0 )
+ BUG();
+ if ( test_bit(_DOMF_constructed, &d->domain_flags) )
+ BUG();
+#endif
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ alloc_start = dom0_start;
+ alloc_end = dom0_start + dom0_size;
+ d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE;
+ image_start = __va(ia64_boot_param->initrd_start);
+ image_len = ia64_boot_param->initrd_size;
+//printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
+//printk("First word of image: %lx\n",*(unsigned long *)image_start);
+
+//printf("construct_dom0: about to call parseelfimage\n");
+ dsi.image_addr = (unsigned long)image_start;
+ dsi.image_len = image_len;
+ rc = parseelfimage(&dsi);
+ if ( rc != 0 )
+ return rc;
+
+#ifdef CONFIG_VTI
+ /* Temp workaround */
+ if (running_on_sim)
+ dsi.xen_section_string = (char *)1;
+
+ /* Check whether dom0 is vti domain */
+ if ((!vmx_enabled) && !dsi.xen_section_string) {
+ printk("Lack of hardware support for unmodified vmx dom0\n");
+ panic("");
+ }
+
+ if (vmx_enabled && !dsi.xen_section_string) {
+ printk("Dom0 is vmx domain!\n");
+ vmx_dom0 = 1;
+ }
+#endif
+
+ p_start = dsi.v_start;
+ pkern_start = dsi.v_kernstart;
+ pkern_end = dsi.v_kernend;
+ pkern_entry = dsi.v_kernentry;
+
+//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx,
pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
+
+ if ( (p_start & (PAGE_SIZE-1)) != 0 )
+ {
+ printk("Initial guest OS must load to a page boundary.\n");
+ return -EINVAL;
+ }
+
+ printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
+ " Kernel image: %lx->%lx\n"
+ " Entry address: %lx\n"
+ " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
+ pkern_start, pkern_end, pkern_entry);
+
+ if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ // if high 3 bits of pkern start are non-zero, error
+
+ // if pkern end is after end of metaphysical memory, error
+ // (we should be able to deal with this... later)
+
+
+ //
+
+#if 0
+ strcpy(d->name,"Domain0");
+#endif
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+#ifdef CONFIG_VTI
+ /* Construct a frame-allocation list for the initial domain, since these
+ * pages are allocated by boot allocator and pfns are not set properly
+ */
+ for ( mfn = (alloc_start>>PAGE_SHIFT);
+ mfn < (alloc_end>>PAGE_SHIFT);
+ mfn++ )
+ {
+ page = &frame_table[mfn];
+ page_set_owner(page, d);
+ page->u.inuse.type_info = 0;
+ page->count_info = PGC_allocated | 1;
+ list_add_tail(&page->list, &d->page_list);
+
+ /* Construct 1:1 mapping */
+ machine_to_phys_mapping[mfn] = mfn;
+ }
+
+ /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
+ * for dom0
+ */
+ d->arch.pmt = NULL;
+#endif
+
+ /* Copy the OS image. */
+ loaddomainelfimage(d,image_start);
+
+ /* Copy the initial ramdisk. */
+ //if ( initrd_len != 0 )
+ // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+
+ /* Sync d/i cache conservatively */
+ ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+ if (ret != PAL_STATUS_SUCCESS)
+ panic("PAL CACHE FLUSH failed for dom0.\n");
+ printk("Sync i/d cache for dom0 image SUCC\n");
+
+#if 0
+ /* Set up start info area. */
+ //si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = d->tot_pages;
+ si->shared_info = virt_to_phys(d->shared_info);
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ //si->pt_base = vpt_start;
+ //si->nr_pt_frames = nr_pt_pages;
+ //si->mfn_list = vphysmap_start;
+
+ if ( initrd_len != 0 )
+ {
+ //si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%08lx\n",
+ si->mod_len, si->mod_start);
+ }
+
+ dst = si->cmd_line;
+ if ( cmdline != NULL )
+ {
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( cmdline[i] == '\0' )
+ break;
+ *dst++ = cmdline[i];
+ }
+ }
+ *dst = '\0';
+
+ zap_low_mappings(); /* Do the same for the idle page tables. */
+#endif
+
+ /* Give up the VGA console if DOM0 is configured to grab it. */
+ if (cmdline != NULL)
+ console_endboot(strstr(cmdline, "tty0") != NULL);
+
+ /* VMX specific construction for Dom0, if hardware supports VMX
+ * and Dom0 is unmodified image
+ */
+ printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
+ if (vmx_dom0)
+ vmx_final_setup_domain(dom0);
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+
+ new_thread(v, pkern_entry, 0, 0);
+ physdev_init_dom0(d);
+
+ // FIXME: Hack for keyboard input
+#ifdef CLONE_DOMAIN0
+if (d == dom0)
+#endif
+ serial_input_init();
+ if (d == dom0) {
+ VCPU(v, delivery_mask[0]) = -1L;
+ VCPU(v, delivery_mask[1]) = -1L;
+ VCPU(v, delivery_mask[2]) = -1L;
+ VCPU(v, delivery_mask[3]) = -1L;
+ }
+ else __set_bit(0x30, VCPU(v, delivery_mask));
+
+ return 0;
+}
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int construct_domU(struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ int i, rc;
+ struct vcpu *v = d->vcpu[0];
+ unsigned long pkern_entry;
+
+#ifndef DOMU_AUTO_RESTART
+ if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG();
+#endif
+
+ printk("*** LOADING DOMAIN %d ***\n",d->domain_id);
+
+ d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size
+ // FIXME: use domain0 command line
+ rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
+ printk("parsedomainelfimage returns %d\n",rc);
+ if ( rc != 0 ) return rc;
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+ /* Copy the OS image. */
+ printk("calling loaddomainelfimage(%p,%p)\n",d,image_start);
+ loaddomainelfimage(d,image_start);
+ printk("loaddomainelfimage returns\n");
+
+ set_bit(_DOMF_constructed, &d->domain_flags);
+
+ printk("calling new_thread, entry=%p\n",pkern_entry);
+#ifdef DOMU_AUTO_RESTART
+ v->domain->arch.image_start = image_start;
+ v->domain->arch.image_len = image_len;
+ v->domain->arch.entry = pkern_entry;
+#endif
+ new_thread(v, pkern_entry, 0, 0);
+ printk("new_thread returns\n");
+ __set_bit(0x30, VCPU(v, delivery_mask));
+
+ return 0;
+}
+
+#ifdef DOMU_AUTO_RESTART
+void reconstruct_domU(struct vcpu *v)
+{
+ /* re-copy the OS image to reset data values to original */
+ printk("reconstruct_domU: restarting domain %d...\n",
+ v->domain->domain_id);
+ loaddomainelfimage(v->domain,v->domain->arch.image_start);
+ new_thread(v, v->domain->arch.entry, 0, 0);
+}
+#endif
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int launch_domainU(unsigned long size)
+{
+#ifdef CLONE_DOMAIN0
+ static int next = CLONE_DOMAIN0+1;
+#else
+ static int next = 1;
+#endif
+
+ struct domain *d = do_createdomain(next,0);
+ if (!d) {
+ printf("launch_domainU: couldn't create\n");
+ return 1;
+ }
+ else next++;
+ if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) {
+ printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n",
+ d->domain_id,domU_staging_area,size);
+ return 2;
+ }
+ domain_unpause_by_systemcontroller(d);
+}
+
+void machine_restart(char * __unused)
+{
+ if (platform_is_hp_ski()) dummy();
+ printf("machine_restart called: spinning....\n");
+ while(1);
+}
+
+void machine_halt(void)
+{
+ if (platform_is_hp_ski()) dummy();
+ printf("machine_halt called: spinning....\n");
+ while(1);
+}
+
+void dummy_called(char *function)
+{
+ if (platform_is_hp_ski()) asm("break 0;;");
+ printf("dummy called in %s: spinning....\n", function);
+ while(1);
+}
+
+
+#if 0
+void switch_to(struct vcpu *prev, struct vcpu *next)
+{
+ struct vcpu *last;
+
+ __switch_to(prev,next,last);
+ //set_current(next);
+}
+#endif
+
+void domain_pend_keyboard_interrupt(int irq)
+{
+ vcpu_pend_interrupt(dom0->vcpu[0],irq);
+}
+
+void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
+{
+ if ( v->processor == newcpu )
+ return;
+
+ set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
+ v->processor = newcpu;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/grant_table.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/grant_table.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1288 @@
+#ifndef CONFIG_VTI
+// temporarily in arch/ia64 until can merge into common/grant_table.c
+/******************************************************************************
+ * common/grant_table.c
+ *
+ * Mechanism for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Copyright (c) 2005 Christopher Clark
+ * Copyright (c) 2004 K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define GRANT_DEBUG 0
+#define GRANT_DEBUG_VERBOSE 0
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/shadow.h>
+#include <xen/mm.h>
+#ifdef __ia64__
+#define __addr_ok(a) 1 // FIXME-ia64: a variant of access_ok??
+// FIXME-ia64: need to implement real cmpxchg_user on ia64
+//#define cmpxchg_user(_p,_o,_n) ((*_p == _o) ? ((*_p = _n), 0) : ((_o = *_p),
0))
+// FIXME-ia64: these belong in an asm/grant_table.h... PAGE_SIZE different
+#undef ORDER_GRANT_FRAMES
+//#undef NUM_GRANT_FRAMES
+#define ORDER_GRANT_FRAMES 0
+//#define NUM_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES)
+#endif
+
+#define PIN_FAIL(_lbl, _rc, _f, _a...) \
+ do { \
+ DPRINTK( _f, ## _a ); \
+ rc = (_rc); \
+ goto _lbl; \
+ } while ( 0 )
+
+static inline int
+get_maptrack_handle(
+ grant_table_t *t)
+{
+ unsigned int h;
+ if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) )
+ return -1;
+ t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
+ t->map_count++;
+ return h;
+}
+
+static inline void
+put_maptrack_handle(
+ grant_table_t *t, int handle)
+{
+ t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
+ t->maptrack_head = handle;
+ t->map_count--;
+}
+
+static int
+__gnttab_activate_grant_ref(
+ struct domain *mapping_d, /* IN */
+ struct vcpu *mapping_ed,
+ struct domain *granting_d,
+ grant_ref_t ref,
+ u16 dev_hst_ro_flags,
+ unsigned long host_virt_addr,
+ unsigned long *pframe ) /* OUT */
+{
+ domid_t sdom;
+ u16 sflags;
+ active_grant_entry_t *act;
+ grant_entry_t *sha;
+ s16 rc = 1;
+ unsigned long frame = 0;
+ int retries = 0;
+
+ /*
+ * Objectives of this function:
+ * . Make the record ( granting_d, ref ) active, if not already.
+ * . Update shared grant entry of owner, indicating frame is mapped.
+ * . Increment the owner act->pin reference counts.
+ * . get_page on shared frame if new mapping.
+ * . get_page_type if this is first RW mapping of frame.
+ * . Add PTE to virtual address space of mapping_d, if necessary.
+ * Returns:
+ * . -ve: error
+ * . 1: ok
+ * . 0: ok and TLB invalidate of host_virt_addr needed.
+ *
+ * On success, *pframe contains mfn.
+ */
+
+ /*
+ * We bound the number of times we retry CMPXCHG on memory locations that
+ * we share with a guest OS. The reason is that the guest can modify that
+ * location at a higher rate than we can read-modify-CMPXCHG, so the guest
+ * could cause us to livelock. There are a few cases where it is valid for
+ * the guest to race our updates (e.g., to change the GTF_readonly flag),
+ * so we allow a few retries before failing.
+ */
+
+ act = &granting_d->grant_table->active[ref];
+ sha = &granting_d->grant_table->shared[ref];
+
+ spin_lock(&granting_d->grant_table->lock);
+
+ if ( act->pin == 0 )
+ {
+ /* CASE 1: Activating a previously inactive entry. */
+
+ sflags = sha->flags;
+ sdom = sha->domid;
+
+ for ( ; ; )
+ {
+ u32 scombo, prev_scombo, new_scombo;
+
+ if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
+ unlikely(sdom != mapping_d->domain_id) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
+ sflags, sdom, mapping_d->domain_id);
+
+ /* Merge two 16-bit values into a 32-bit combined update. */
+ /* NB. Endianness! */
+ prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
+
+ new_scombo = scombo | GTF_reading;
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
+ {
+ new_scombo |= GTF_writing;
+ if ( unlikely(sflags & GTF_readonly) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Attempt to write-pin a r/o grant entry.\n");
+ }
+
+ /* NB. prev_scombo is updated in place to seen value. */
+ if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
+ prev_scombo,
+ new_scombo)) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Fault while modifying shared flags and domid.\n");
+
+ /* Did the combined update work (did we see what we expected?). */
+ if ( likely(prev_scombo == scombo) )
+ break;
+
+ if ( retries++ == 4 )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Shared grant entry is unstable.\n");
+
+ /* Didn't see what we expected. Split out the seen flags & dom. */
+ /* NB. Endianness! */
+ sflags = (u16)prev_scombo;
+ sdom = (u16)(prev_scombo >> 16);
+ }
+
+ /* rmb(); */ /* not on x86 */
+
+ frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+ if ( unlikely(!pfn_valid(frame)) ||
+ unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
+ get_page(&frame_table[frame], granting_d) :
+ get_page_and_type(&frame_table[frame], granting_d,
+ PGT_writable_page))) )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ clear_bit(_GTF_reading, &sha->flags);
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Could not pin the granted frame (%lx)!\n", frame);
+ }
+#endif
+
+ if ( dev_hst_ro_flags & GNTMAP_device_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+ GNTPIN_devr_inc : GNTPIN_devw_inc;
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+ GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ act->domid = sdom;
+ act->frame = frame;
+ }
+ else
+ {
+ /* CASE 2: Active modications to an already active entry. */
+
+ /*
+ * A cheesy check for possible pin-count overflow.
+ * A more accurate check cannot be done with a single comparison.
+ */
+ if ( (act->pin & 0x80808080U) != 0 )
+ PIN_FAIL(unlock_out, ENOSPC,
+ "Risk of counter overflow %08x\n", act->pin);
+
+ frame = act->frame;
+
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) &&
+ !((sflags = sha->flags) & GTF_writing) )
+ {
+ for ( ; ; )
+ {
+ u16 prev_sflags;
+
+ if ( unlikely(sflags & GTF_readonly) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Attempt to write-pin a r/o grant entry.\n");
+
+ prev_sflags = sflags;
+
+ /* NB. prev_sflags is updated in place to seen value. */
+ if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
+ prev_sflags | GTF_writing)) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Fault while modifying shared flags.\n");
+
+ if ( likely(prev_sflags == sflags) )
+ break;
+
+ if ( retries++ == 4 )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Shared grant entry is unstable.\n");
+
+ sflags = prev_sflags;
+ }
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+ if ( unlikely(!get_page_type(&frame_table[frame],
+ PGT_writable_page)) )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Attempt to write-pin a unwritable page.\n");
+ }
+#endif
+ }
+
+ if ( dev_hst_ro_flags & GNTMAP_device_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+ GNTPIN_devr_inc : GNTPIN_devw_inc;
+
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
+ GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ }
+
+ /*
+ * At this point:
+ * act->pin updated to reflect mapping.
+ * sha->flags updated to indicate to granting domain mapping done.
+ * frame contains the mfn.
+ */
+
+ spin_unlock(&granting_d->grant_table->lock);
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+ if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
+ {
+ /* Write update into the pagetable. */
+ l1_pgentry_t pte;
+ pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED |
_PAGE_DIRTY);
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
+ l1e_add_flags(pte,_PAGE_RW);
+ rc = update_grant_va_mapping( host_virt_addr, pte,
+ mapping_d, mapping_ed );
+
+ /*
+ * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
+ * This is done in the outer gnttab_map_grant_ref.
+ */
+
+ if ( rc < 0 )
+ {
+ /* Failure: undo and abort. */
+
+ spin_lock(&granting_d->grant_table->lock);
+
+ if ( dev_hst_ro_flags & GNTMAP_readonly )
+ {
+ act->pin -= GNTPIN_hstr_inc;
+ }
+ else
+ {
+ act->pin -= GNTPIN_hstw_inc;
+ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
+ }
+ }
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &sha->flags);
+ put_page(&frame_table[frame]);
+ }
+
+ spin_unlock(&granting_d->grant_table->lock);
+ }
+
+ }
+#endif
+
+ *pframe = frame;
+ return rc;
+
+ unlock_out:
+ spin_unlock(&granting_d->grant_table->lock);
+ return rc;
+}
+
+/*
+ * Returns 0 if TLB flush / invalidate required by caller.
+ * va will indicate the address to be invalidated.
+ */
+static int
+__gnttab_map_grant_ref(
+ gnttab_map_grant_ref_t *uop,
+ unsigned long *va)
+{
+ domid_t dom;
+ grant_ref_t ref;
+ struct domain *ld, *rd;
+ struct vcpu *led;
+ u16 dev_hst_ro_flags;
+ int handle;
+ unsigned long frame = 0, host_virt_addr;
+ int rc;
+
+ led = current;
+ ld = led->domain;
+
+ /* Bitwise-OR avoids short-circuiting which screws control flow. */
+ if ( unlikely(__get_user(dom, &uop->dom) |
+ __get_user(ref, &uop->ref) |
+ __get_user(host_virt_addr, &uop->host_addr) |
+ __get_user(dev_hst_ro_flags, &uop->flags)) )
+ {
+ DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
+ return -EFAULT; /* don't set status */
+ }
+
+
+ if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) &&
+ unlikely(!__addr_ok(host_virt_addr)))
+ {
+ DPRINTK("Bad virtual address (%lx) or flags (%x).\n",
+ host_virt_addr, dev_hst_ro_flags);
+ (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
+ return GNTST_bad_gntref;
+ }
+
+ if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
+ unlikely((dev_hst_ro_flags &
+ (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
+ {
+ DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
+ (void)__put_user(GNTST_bad_gntref, &uop->handle);
+ return GNTST_bad_gntref;
+ }
+
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
+ {
+ if ( rd != NULL )
+ put_domain(rd);
+ DPRINTK("Could not find domain %d\n", dom);
+ (void)__put_user(GNTST_bad_domain, &uop->handle);
+ return GNTST_bad_domain;
+ }
+
+ /* Get a maptrack handle. */
+ if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
+ {
+ int i;
+ grant_mapping_t *new_mt;
+ grant_table_t *lgt = ld->grant_table;
+
+ /* Grow the maptrack table. */
+ new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
+ if ( new_mt == NULL )
+ {
+ put_domain(rd);
+ DPRINTK("No more map handles available\n");
+ (void)__put_user(GNTST_no_device_space, &uop->handle);
+ return GNTST_no_device_space;
+ }
+
+ memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
+ for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
+ new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
+
+ free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
+ lgt->maptrack = new_mt;
+ lgt->maptrack_order += 1;
+ lgt->maptrack_limit <<= 1;
+
+ printk("Doubled maptrack size\n");
+ handle = get_maptrack_handle(ld->grant_table);
+ }
+
+#if GRANT_DEBUG_VERBOSE
+ DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
+ ref, dom, dev_hst_ro_flags);
+#endif
+
+ if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
+ dev_hst_ro_flags,
+ host_virt_addr, &frame)))
+ {
+ /*
+ * Only make the maptrack live _after_ writing the pte, in case we
+ * overwrite the same frame number, causing a maptrack walk to find it
+ */
+ ld->grant_table->maptrack[handle].domid = dom;
+
+ ld->grant_table->maptrack[handle].ref_and_flags
+ = (ref << MAPTRACK_REF_SHIFT) |
+ (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
+
+ (void)__put_user(frame, &uop->dev_bus_addr);
+
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ *va = host_virt_addr;
+
+ (void)__put_user(handle, &uop->handle);
+ }
+ else
+ {
+ (void)__put_user(rc, &uop->handle);
+ put_maptrack_handle(ld->grant_table, handle);
+ }
+
+ put_domain(rd);
+ return rc;
+}
+
+static long
+gnttab_map_grant_ref(
+ gnttab_map_grant_ref_t *uop, unsigned int count)
+{
+ int i, flush = 0;
+ unsigned long va = 0;
+
+ for ( i = 0; i < count; i++ )
+ if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
+ flush++;
+
+#ifdef __ia64__
+// FIXME-ia64: probably need to do something here to avoid stale mappings?
+#else
+ if ( flush == 1 )
+ flush_tlb_one_mask(current->domain->cpumask, va);
+ else if ( flush != 0 )
+ flush_tlb_mask(current->domain->cpumask);
+#endif
+
+ return 0;
+}
+
+static int
+__gnttab_unmap_grant_ref(
+ gnttab_unmap_grant_ref_t *uop,
+ unsigned long *va)
+{
+ domid_t dom;
+ grant_ref_t ref;
+ u16 handle;
+ struct domain *ld, *rd;
+
+ active_grant_entry_t *act;
+ grant_entry_t *sha;
+ grant_mapping_t *map;
+ u16 flags;
+ s16 rc = 1;
+ unsigned long frame, virt;
+
+ ld = current->domain;
+
+ /* Bitwise-OR avoids short-circuiting which screws control flow. */
+ if ( unlikely(__get_user(virt, &uop->host_addr) |
+ __get_user(frame, &uop->dev_bus_addr) |
+ __get_user(handle, &uop->handle)) )
+ {
+ DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
+ return -EFAULT; /* don't set status */
+ }
+
+ map = &ld->grant_table->maptrack[handle];
+
+ if ( unlikely(handle >= ld->grant_table->maptrack_limit) ||
+ unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
+ {
+ DPRINTK("Bad handle (%d).\n", handle);
+ (void)__put_user(GNTST_bad_handle, &uop->status);
+ return GNTST_bad_handle;
+ }
+
+ dom = map->domid;
+ ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+ flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
+
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
+ {
+ if ( rd != NULL )
+ put_domain(rd);
+ DPRINTK("Could not find domain %d\n", dom);
+ (void)__put_user(GNTST_bad_domain, &uop->status);
+ return GNTST_bad_domain;
+ }
+
+#if GRANT_DEBUG_VERBOSE
+ DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
+ ref, dom, handle);
+#endif
+
+ act = &rd->grant_table->active[ref];
+ sha = &rd->grant_table->shared[ref];
+
+ spin_lock(&rd->grant_table->lock);
+
+ if ( frame == 0 )
+ {
+ frame = act->frame;
+ }
+ else
+ {
+ if ( unlikely(frame != act->frame) )
+ PIN_FAIL(unmap_out, GNTST_general_error,
+ "Bad frame number doesn't match gntref.\n");
+ if ( flags & GNTMAP_device_map )
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+ : GNTPIN_devw_inc;
+
+ map->ref_and_flags &= ~GNTMAP_device_map;
+ (void)__put_user(0, &uop->dev_bus_addr);
+
+ /* Frame is now unmapped for device access. */
+ }
+
+ if ( (virt != 0) &&
+ (flags & GNTMAP_host_map) &&
+ ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
+ {
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+ l1_pgentry_t *pl1e;
+ unsigned long _ol1e;
+
+ pl1e = &linear_pg_table[l1_linear_offset(virt)];
+
+ if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
+ {
+ DPRINTK("Could not find PTE entry for address %lx\n", virt);
+ rc = -EINVAL;
+ goto unmap_out;
+ }
+
+ /*
+ * Check that the virtual address supplied is actually mapped to
+ * act->frame.
+ */
+ if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
+ {
+ DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
+ _ol1e, virt, frame);
+ rc = -EINVAL;
+ goto unmap_out;
+ }
+
+ /* Delete pagetable entry. */
+ if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
+ {
+ DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n",
+ pl1e, virt);
+ rc = -EINVAL;
+ goto unmap_out;
+ }
+#endif
+
+ map->ref_and_flags &= ~GNTMAP_host_map;
+
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
+ : GNTPIN_hstw_inc;
+
+ rc = 0;
+ *va = virt;
+ }
+
+ if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
+ {
+ map->ref_and_flags = 0;
+ put_maptrack_handle(ld->grant_table, handle);
+ }
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here? I think not and then
+// this can probably be macro-ized into nothingness
+#else
+ /* If just unmapped a writable mapping, mark as dirtied */
+ if ( unlikely(shadow_mode_log_dirty(rd)) &&
+ !( flags & GNTMAP_readonly ) )
+ mark_dirty(rd, frame);
+#endif
+
+ /* If the last writable mapping has been removed, put_page_type */
+ if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
+ ( !( flags & GNTMAP_readonly ) ) )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
+ }
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &sha->flags);
+ put_page(&frame_table[frame]);
+ }
+
+ unmap_out:
+ (void)__put_user(rc, &uop->status);
+ spin_unlock(&rd->grant_table->lock);
+ put_domain(rd);
+ return rc;
+}
+
+static long
+gnttab_unmap_grant_ref(
+ gnttab_unmap_grant_ref_t *uop, unsigned int count)
+{
+ int i, flush = 0;
+ unsigned long va = 0;
+
+ for ( i = 0; i < count; i++ )
+ if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
+ flush++;
+
+#ifdef __ia64__
+// FIXME-ia64: probably need to do something here to avoid stale mappings?
+#else
+ if ( flush == 1 )
+ flush_tlb_one_mask(current->domain->cpumask, va);
+ else if ( flush != 0 )
+ flush_tlb_mask(current->domain->cpumask);
+#endif
+
+ return 0;
+}
+
+static long
+gnttab_setup_table(
+ gnttab_setup_table_t *uop, unsigned int count)
+{
+ gnttab_setup_table_t op;
+ struct domain *d;
+ int i;
+ unsigned long addr;
+
+ if ( count != 1 )
+ return -EINVAL;
+
+ if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
+ {
+ DPRINTK("Fault while reading gnttab_setup_table_t.\n");
+ return -EFAULT;
+ }
+
+ if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
+ {
+ DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
+ NR_GRANT_FRAMES);
+ (void)put_user(GNTST_general_error, &uop->status);
+ return 0;
+ }
+
+ if ( op.dom == DOMID_SELF )
+ {
+ op.dom = current->domain->domain_id;
+ }
+ else if ( unlikely(!IS_PRIV(current->domain)) )
+ {
+ (void)put_user(GNTST_permission_denied, &uop->status);
+ return 0;
+ }
+
+ if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
+ {
+ DPRINTK("Bad domid %d.\n", op.dom);
+ (void)put_user(GNTST_bad_domain, &uop->status);
+ return 0;
+ }
+
+ if ( op.nr_frames <= NR_GRANT_FRAMES )
+ {
+ ASSERT(d->grant_table != NULL);
+ (void)put_user(GNTST_okay, &uop->status);
+#ifdef __ia64__
+ if (d == dom0) {
+ for ( i = 0; i < op.nr_frames; i++ )
+ (void)put_user(
+ (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
+ &uop->frame_list[i]);
+ } else {
+ /* IA64 hack - need to map it somewhere */
+ addr = (1UL << 40);
+ map_domain_page(d, addr, virt_to_phys(d->grant_table->shared));
+ (void)put_user(addr >> PAGE_SHIFT, &uop->frame_list[0]);
+ }
+#else
+ for ( i = 0; i < op.nr_frames; i++ )
+ (void)put_user(
+ (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
+ &uop->frame_list[i]);
+#endif
+ }
+
+ put_domain(d);
+ return 0;
+}
+
+#if GRANT_DEBUG
+static int
+gnttab_dump_table(gnttab_dump_table_t *uop)
+{
+ grant_table_t *gt;
+ gnttab_dump_table_t op;
+ struct domain *d;
+ u32 shared_mfn;
+ active_grant_entry_t *act;
+ grant_entry_t sha_copy;
+ grant_mapping_t *maptrack;
+ int i;
+
+
+ if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
+ {
+ DPRINTK("Fault while reading gnttab_dump_table_t.\n");
+ return -EFAULT;
+ }
+
+ if ( op.dom == DOMID_SELF )
+ {
+ op.dom = current->domain->domain_id;
+ }
+
+ if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
+ {
+ DPRINTK("Bad domid %d.\n", op.dom);
+ (void)put_user(GNTST_bad_domain, &uop->status);
+ return 0;
+ }
+
+ ASSERT(d->grant_table != NULL);
+ gt = d->grant_table;
+ (void)put_user(GNTST_okay, &uop->status);
+
+ shared_mfn = virt_to_phys(d->grant_table->shared);
+
+ DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
+ op.dom, shared_mfn);
+
+ ASSERT(d->grant_table->active != NULL);
+ ASSERT(d->grant_table->shared != NULL);
+ ASSERT(d->grant_table->maptrack != NULL);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ sha_copy = gt->shared[i];
+
+ if ( sha_copy.flags )
+ {
+ DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
+ "dom:(%hu) frame:(%lx)\n",
+ op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
+ }
+ }
+
+ spin_lock(>->lock);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ act = >->active[i];
+
+ if ( act->pin )
+ {
+ DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) "
+ "dom:(%hu) frame:(%lx)\n",
+ op.dom, i, act->pin, act->domid, act->frame);
+ }
+ }
+
+ for ( i = 0; i < gt->maptrack_limit; i++ )
+ {
+ maptrack = >->maptrack[i];
+
+ if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
+ {
+ DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) "
+ "dom:(%hu)\n",
+ op.dom, i,
+ maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
+ maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
+ maptrack->domid);
+ }
+ }
+
+ spin_unlock(>->lock);
+
+ put_domain(d);
+ return 0;
+}
+#endif
+
+long
+do_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ long rc;
+
+ if ( count > 512 )
+ return -EINVAL;
+
+ LOCK_BIGLOCK(current->domain);
+
+ rc = -EFAULT;
+ switch ( cmd )
+ {
+ case GNTTABOP_map_grant_ref:
+ if ( unlikely(!array_access_ok(
+ uop, count, sizeof(gnttab_map_grant_ref_t))) )
+ goto out;
+ rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
+ break;
+ case GNTTABOP_unmap_grant_ref:
+ if ( unlikely(!array_access_ok(
+ uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
+ goto out;
+ rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
+ break;
+ case GNTTABOP_setup_table:
+ rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
+ break;
+#if GRANT_DEBUG
+ case GNTTABOP_dump_table:
+ rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
+ break;
+#endif
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+out:
+ UNLOCK_BIGLOCK(current->domain);
+
+ return rc;
+}
+
+int
+gnttab_check_unmap(
+ struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
+{
+ /* Called when put_page is invoked on a page belonging to a foreign domain.
+ * Instead of decrementing the frame table ref count, locate the grant
+ * table entry, if any, and if found, decrement that count.
+ * Called a _lot_ at domain creation because pages mapped by priv domains
+ * also traverse this.
+ */
+
+ /* Note: If the same frame is mapped multiple times, and then one of
+ * the ptes is overwritten, which maptrack handle gets invalidated?
+ * Advice: Don't do it. Explicitly unmap.
+ */
+
+ unsigned int handle, ref, refcount;
+ grant_table_t *lgt, *rgt;
+ active_grant_entry_t *act;
+ grant_mapping_t *map;
+ int found = 0;
+
+ lgt = ld->grant_table;
+
+#if GRANT_DEBUG_VERBOSE
+ if ( ld->domain_id != 0 )
+ {
+ DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
+ rd->domain_id, ld->domain_id, frame, readonly);
+ }
+#endif
+
+ /* Fast exit if we're not mapping anything using grant tables */
+ if ( lgt->map_count == 0 )
+ return 0;
+
+ if ( get_domain(rd) == 0 )
+ {
+ DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
+ rd->domain_id);
+ return 0;
+ }
+
+ rgt = rd->grant_table;
+
+ for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
+ {
+ map = &lgt->maptrack[handle];
+
+ if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
+ ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
+ {
+ ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
+ act = &rgt->active[ref];
+
+ spin_lock(&rgt->lock);
+
+ if ( act->frame != frame )
+ {
+ spin_unlock(&rgt->lock);
+ continue;
+ }
+
+ refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
+ : GNTPIN_hstw_mask );
+ if ( refcount == 0 )
+ {
+ spin_unlock(&rgt->lock);
+ continue;
+ }
+
+ /* gotcha */
+ DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
+ rd->domain_id, ld->domain_id, frame, readonly);
+
+ if ( readonly )
+ act->pin -= GNTPIN_hstr_inc;
+ else
+ {
+ act->pin -= GNTPIN_hstw_inc;
+
+ /* any more granted writable mappings? */
+ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
+ {
+ clear_bit(_GTF_writing, &rgt->shared[ref].flags);
+ put_page_type(&frame_table[frame]);
+ }
+ }
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &rgt->shared[ref].flags);
+ put_page(&frame_table[frame]);
+ }
+ spin_unlock(&rgt->lock);
+
+ clear_bit(GNTMAP_host_map, &map->ref_and_flags);
+
+ if ( !(map->ref_and_flags & GNTMAP_device_map) )
+ put_maptrack_handle(lgt, handle);
+
+ found = 1;
+ break;
+ }
+ }
+ put_domain(rd);
+
+ return found;
+}
+
+int
+gnttab_prepare_for_transfer(
+ struct domain *rd, struct domain *ld, grant_ref_t ref)
+{
+ grant_table_t *rgt;
+ grant_entry_t *sha;
+ domid_t sdom;
+ u16 sflags;
+ u32 scombo, prev_scombo;
+ int retries = 0;
+ unsigned long target_pfn;
+
+ DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+ rd->domain_id, ld->domain_id, ref);
+
+ if ( unlikely((rgt = rd->grant_table) == NULL) ||
+ unlikely(ref >= NR_GRANT_ENTRIES) )
+ {
+ DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n",
+ rd->domain_id, ref);
+ return 0;
+ }
+
+ spin_lock(&rgt->lock);
+
+ sha = &rgt->shared[ref];
+
+ sflags = sha->flags;
+ sdom = sha->domid;
+
+ for ( ; ; )
+ {
+ target_pfn = sha->frame;
+
+ if ( unlikely(target_pfn >= max_page ) )
+ {
+ DPRINTK("Bad pfn (%lx)\n", target_pfn);
+ goto fail;
+ }
+
+ if ( unlikely(sflags != GTF_accept_transfer) ||
+ unlikely(sdom != ld->domain_id) )
+ {
+ DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
+ sflags, sdom, ld->domain_id);
+ goto fail;
+ }
+
+ /* Merge two 16-bit values into a 32-bit combined update. */
+ /* NB. Endianness! */
+ prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
+
+ /* NB. prev_scombo is updated in place to seen value. */
+ if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
+ prev_scombo | GTF_transfer_committed)) )
+ {
+ DPRINTK("Fault while modifying shared flags and domid.\n");
+ goto fail;
+ }
+
+ /* Did the combined update work (did we see what we expected?). */
+ if ( likely(prev_scombo == scombo) )
+ break;
+
+ if ( retries++ == 4 )
+ {
+ DPRINTK("Shared grant entry is unstable.\n");
+ goto fail;
+ }
+
+ /* Didn't see what we expected. Split out the seen flags & dom. */
+ /* NB. Endianness! */
+ sflags = (u16)prev_scombo;
+ sdom = (u16)(prev_scombo >> 16);
+ }
+
+ spin_unlock(&rgt->lock);
+ return 1;
+
+ fail:
+ spin_unlock(&rgt->lock);
+ return 0;
+}
+
+void
+gnttab_notify_transfer(
+ struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
+{
+ grant_entry_t *sha;
+ unsigned long pfn;
+
+ DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+ rd->domain_id, ld->domain_id, ref);
+
+ sha = &rd->grant_table->shared[ref];
+
+ spin_lock(&rd->grant_table->lock);
+
+#ifdef __ia64__
+// FIXME-ia64: any error checking need to be done here?
+#else
+ pfn = sha->frame;
+
+ if ( unlikely(pfn >= max_page ) )
+ DPRINTK("Bad pfn (%lx)\n", pfn);
+ else
+ {
+ machine_to_phys_mapping[frame] = pfn;
+
+ if ( unlikely(shadow_mode_log_dirty(ld)))
+ mark_dirty(ld, frame);
+
+ if (shadow_mode_translate(ld))
+ __phys_to_machine_mapping[pfn] = frame;
+ }
+#endif
+ sha->frame = __mfn_to_gpfn(rd, frame);
+ sha->domid = rd->domain_id;
+ wmb();
+ sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
+
+ spin_unlock(&rd->grant_table->lock);
+
+ return;
+}
+
+int
+grant_table_create(
+ struct domain *d)
+{
+ grant_table_t *t;
+ int i;
+
+ if ( (t = xmalloc(grant_table_t)) == NULL )
+ goto no_mem;
+
+ /* Simple stuff. */
+ memset(t, 0, sizeof(*t));
+ spin_lock_init(&t->lock);
+
+ /* Active grant table. */
+ if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES))
+ == NULL )
+ goto no_mem;
+ memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
+
+ /* Tracking of mapped foreign frames table */
+ if ( (t->maptrack = alloc_xenheap_page()) == NULL )
+ goto no_mem;
+ t->maptrack_order = 0;
+ t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t);
+ memset(t->maptrack, 0, PAGE_SIZE);
+ for ( i = 0; i < t->maptrack_limit; i++ )
+ t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
+
+ /* Shared grant table. */
+ t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES);
+ if ( t->shared == NULL )
+ goto no_mem;
+ memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
+
+#ifdef __ia64__
+// I don't think there's anything to do here on ia64?...
+#else
+ for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+ {
+ SHARE_PFN_WITH_DOMAIN(
+ virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
+ machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] =
+ INVALID_M2P_ENTRY;
+ }
+#endif
+
+ /* Okay, install the structure. */
+ wmb(); /* avoid races with lock-free access to d->grant_table */
+ d->grant_table = t;
+ return 0;
+
+ no_mem:
+ if ( t != NULL )
+ {
+ xfree(t->active);
+ if ( t->maptrack != NULL )
+ free_xenheap_page(t->maptrack);
+ xfree(t);
+ }
+ return -ENOMEM;
+}
+
+void
+gnttab_release_dev_mappings(grant_table_t *gt)
+{
+ grant_mapping_t *map;
+ domid_t dom;
+ grant_ref_t ref;
+ u16 handle;
+ struct domain *ld, *rd;
+ unsigned long frame;
+ active_grant_entry_t *act;
+ grant_entry_t *sha;
+
+ ld = current->domain;
+
+ for ( handle = 0; handle < gt->maptrack_limit; handle++ )
+ {
+ map = >->maptrack[handle];
+
+ if ( map->ref_and_flags & GNTMAP_device_map )
+ {
+ dom = map->domid;
+ ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+
+ DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
+ handle, ref,
+ map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
+
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
+ {
+ if ( rd != NULL )
+ put_domain(rd);
+
+ printk(KERN_WARNING "Grant release: No dom%d\n", dom);
+ continue;
+ }
+
+ act = &rd->grant_table->active[ref];
+ sha = &rd->grant_table->shared[ref];
+
+ spin_lock(&rd->grant_table->lock);
+
+ if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
+ {
+ frame = act->frame;
+
+ if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
+ ( (act->pin & GNTPIN_devw_mask) > 0 ) )
+ {
+ clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
+ }
+
+ act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
+
+ if ( act->pin == 0 )
+ {
+ clear_bit(_GTF_reading, &sha->flags);
+ map->ref_and_flags = 0;
+ put_page(&frame_table[frame]);
+ }
+ else
+ map->ref_and_flags &= ~GNTMAP_device_map;
+ }
+
+ spin_unlock(&rd->grant_table->lock);
+
+ put_domain(rd);
+ }
+ }
+}
+
+
+void
+grant_table_destroy(
+ struct domain *d)
+{
+ grant_table_t *t;
+
+ if ( (t = d->grant_table) != NULL )
+ {
+ /* Free memory relating to this grant table. */
+ d->grant_table = NULL;
+ free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES);
+ free_xenheap_page(t->maptrack);
+ xfree(t->active);
+ xfree(t);
+ }
+}
+
+void
+grant_table_init(
+ void)
+{
+ /* Nothing. */
+}
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hpsimserial.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/hpsimserial.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,23 @@
+/*
+ * HP Ski simulator serial I/O
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+#include <xen/sched.h>
+#include <xen/serial.h>
+#include "hpsim_ssc.h"
+
+static void hp_ski_putc(struct serial_port *port, char c)
+{
+ ia64_ssc(c,0,0,0,SSC_PUTCHAR);
+}
+
+static struct uart_driver hp_ski = { .putc = hp_ski_putc };
+
+void hpsim_serial_init(void)
+{
+ serial_register_uart(0, &hp_ski, 0);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hypercall.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/hypercall.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,182 @@
+/*
+ * Hypercall implementations
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h> /* FOR struct ia64_sal_retval */
+
+#include <asm/vcpu.h>
+#include <asm/dom_fw.h>
+
+extern unsigned long translate_domain_mpaddr(unsigned long);
+extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64);
+extern struct ia64_sal_retval
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+
+unsigned long idle_when_pending = 0;
+unsigned long pal_halt_light_count = 0;
+
+int
+ia64_hypercall (struct pt_regs *regs)
+{
+ struct vcpu *v = (struct domain *) current;
+ struct ia64_sal_retval x;
+ struct ia64_pal_retval y;
+ unsigned long *tv, *tc;
+ int pi;
+
+ switch (regs->r2) {
+ case FW_HYPERCALL_PAL_CALL:
+ //printf("*** PAL hypercall: index=%d\n",regs->r28);
+ //FIXME: This should call a C routine
+#if 0
+ // This is very conservative, but avoids a possible
+ // (and deadly) freeze in paravirtualized domains due
+ // to a yet-to-be-found bug where pending_interruption
+ // is zero when it shouldn't be. Since PAL is called
+ // in the idle loop, this should resolve it
+ VCPU(v,pending_interruption) = 1;
+#endif
+ if (regs->r28 == PAL_HALT_LIGHT) {
+#define SPURIOUS_VECTOR 15
+ pi = vcpu_check_pending_interrupts(v);
+ if (pi != SPURIOUS_VECTOR) {
+ if (!VCPU(v,pending_interruption))
+ idle_when_pending++;
+ vcpu_pend_unspecified_interrupt(v);
+//printf("idle w/int#%d pending!\n",pi);
+//this shouldn't happen, but it apparently does quite a bit! so don't
+//allow it to happen... i.e. if a domain has an interrupt pending and
+//it tries to halt itself because it thinks it is idle, just return here
+//as deliver_pending_interrupt is called on the way out and will deliver it
+ }
+ else {
+ pal_halt_light_count++;
+ do_sched_op(SCHEDOP_yield);
+ }
+ //break;
+ }
+ else if (regs->r28 >= PAL_COPY_PAL) { /* FIXME */
+ printf("stacked PAL hypercalls not supported\n");
+ regs->r8 = -1;
+ break;
+ }
+ else y = xen_pal_emulator(regs->r28,regs->r29,
+ regs->r30,regs->r31);
+ regs->r8 = y.status; regs->r9 = y.v0;
+ regs->r10 = y.v1; regs->r11 = y.v2;
+ break;
+ case FW_HYPERCALL_SAL_CALL:
+ x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33),
+ vcpu_get_gr(v,34),vcpu_get_gr(v,35),
+ vcpu_get_gr(v,36),vcpu_get_gr(v,37),
+ vcpu_get_gr(v,38),vcpu_get_gr(v,39));
+ regs->r8 = x.status; regs->r9 = x.v0;
+ regs->r10 = x.v1; regs->r11 = x.v2;
+ break;
+ case FW_HYPERCALL_EFI_RESET_SYSTEM:
+ printf("efi.reset_system called ");
+ if (current->domain == dom0) {
+ printf("(by dom0)\n ");
+ (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
+ }
+#ifdef DOMU_AUTO_RESTART
+ else {
+ reconstruct_domU(current);
+ return 0; // don't increment ip!
+ }
+#else
+ printf("(not supported for non-0 domain)\n");
+ regs->r8 = EFI_UNSUPPORTED;
+#endif
+ break;
+ case FW_HYPERCALL_EFI_GET_TIME:
+ tv = vcpu_get_gr(v,32);
+ tc = vcpu_get_gr(v,33);
+ //printf("efi_get_time(%p,%p) called...",tv,tc);
+ tv = __va(translate_domain_mpaddr(tv));
+ if (tc) tc = __va(translate_domain_mpaddr(tc));
+ regs->r8 = (*efi.get_time)(tv,tc);
+ //printf("and returns %lx\n",regs->r8);
+ break;
+ case FW_HYPERCALL_EFI_SET_TIME:
+ case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
+ case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
+ // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
+ // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
+ // POINTER ARGUMENTS WILL BE VIRTUAL!!
+ case FW_HYPERCALL_EFI_GET_VARIABLE:
+ // FIXME: need fixes in efi.h from 2.6.9
+ case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
+ case FW_HYPERCALL_EFI_SET_VARIABLE:
+ case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
+ // FIXME: need fixes in efi.h from 2.6.9
+ regs->r8 = EFI_UNSUPPORTED;
+ break;
+ case 0xffff: // test dummy hypercall
+ regs->r8 = dump_privop_counts_to_user(
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33));
+ break;
+ case 0xfffe: // test dummy hypercall
+ regs->r8 = zero_privop_counts_to_user(
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33));
+ break;
+ case 0xfffd: // test dummy hypercall
+ regs->r8 = launch_domainU(
+ vcpu_get_gr(v,32));
+ break;
+ case 0xfffc: // test dummy hypercall
+ regs->r8 = domU_staging_write_32(
+ vcpu_get_gr(v,32),
+ vcpu_get_gr(v,33),
+ vcpu_get_gr(v,34),
+ vcpu_get_gr(v,35),
+ vcpu_get_gr(v,36));
+ break;
+ case 0xfffb: // test dummy hypercall
+ regs->r8 = domU_staging_read_8(vcpu_get_gr(v,32));
+ break;
+
+ case __HYPERVISOR_dom0_op:
+ regs->r8 = do_dom0_op(regs->r14);
+ break;
+
+ case __HYPERVISOR_dom_mem_op:
+#ifdef CONFIG_VTI
+ regs->r8 = do_dom_mem_op(regs->r14, regs->r15, regs->r16,
regs->r17, regs->r18);
+#else
+ /* we don't handle reservations; just return success */
+ regs->r8 = regs->r16;
+#endif
+ break;
+
+ case __HYPERVISOR_event_channel_op:
+ regs->r8 = do_event_channel_op(regs->r14);
+ break;
+
+#ifndef CONFIG_VTI
+ case __HYPERVISOR_grant_table_op:
+ regs->r8 = do_grant_table_op(regs->r14, regs->r15, regs->r16);
+ break;
+#endif
+
+ case __HYPERVISOR_console_io:
+ regs->r8 = do_console_io(regs->r14, regs->r15, regs->r16);
+ break;
+
+ default:
+ printf("unknown hypercall %x\n", regs->r2);
+ regs->r8 = (unsigned long)-1;
+ }
+ return 1;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hyperprivop.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/hyperprivop.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1592 @@
+/*
+ * arch/ia64/kernel/hyperprivop.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <public/arch-ia64.h>
+
+#if 1 // change to 0 to turn off all fast paths
+#define FAST_HYPERPRIVOPS
+#define FAST_HYPERPRIVOP_CNT
+#define FAST_REFLECT_CNT
+//#define FAST_TICK
+#define FAST_BREAK
+#define FAST_ACCESS_REFLECT
+#define FAST_RFI
+#define FAST_SSM_I
+#define FAST_PTC_GA
+#undef RFI_TO_INTERRUPT // not working yet
+#endif
+
+#ifdef CONFIG_SMP
+#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
+#undef FAST_PTC_GA
+#endif
+
+// FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
+//#define HANDLE_AR_UNAT
+
+// FIXME: This is defined in include/asm-ia64/hw_irq.h but this
+// doesn't appear to be include'able from assembly?
+#define IA64_TIMER_VECTOR 0xef
+
+// Should be included from common header file (also in process.c)
+// NO PSR_CLR IS DIFFERENT! (CPL)
+#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
+#define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
+// note IA64_PSR_PK removed from following, why is this necessary?
+#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
+ IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
+ IA64_PSR_IT | IA64_PSR_BN)
+
+#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
+ IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
+ IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
+ IA64_PSR_MC | IA64_PSR_IS | \
+ IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
+ IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
+
+// Note: not hand-scheduled for now
+// Registers at entry
+// r16 == cr.isr
+// r17 == cr.iim
+// r18 == XSI_PSR_IC_OFS
+// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+// r31 == pr
+GLOBAL_ENTRY(fast_hyperprivop)
+#ifndef FAST_HYPERPRIVOPS // see beginning of file
+ br.sptk.many dispatch_break_fault ;;
+#endif
+ // HYPERPRIVOP_SSM_I?
+ // assumes domain interrupts pending, so just do it
+ cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
+(p7) br.sptk.many hyper_ssm_i;;
+
+ // FIXME. This algorithm gives up (goes to the slow path) if there
+ // are ANY interrupts pending, even if they are currently
+ // undeliverable. This should be improved later...
+ adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r20=[r20] ;;
+ cmp.eq p7,p0=r0,r20
+(p7) br.cond.sptk.many 1f
+ movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r20=[r20];;
+ adds r21=IA64_VCPU_IRR0_OFFSET,r20;
+ adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
+ ld8 r23=[r21],16; ld8 r24=[r22],16;;
+ ld8 r21=[r21]; ld8 r22=[r22];;
+ or r23=r23,r24; or r21=r21,r22;;
+ or r20=r23,r21;;
+1: // when we get to here r20=~=interrupts pending
+
+ // HYPERPRIVOP_RFI?
+ cmp.eq p7,p6=XEN_HYPER_RFI,r17
+(p7) br.sptk.many hyper_rfi;;
+
+ // HYPERPRIVOP_GET_IVR?
+ cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
+(p7) br.sptk.many hyper_get_ivr;;
+
+ cmp.ne p7,p0=r20,r0
+(p7) br.spnt.many dispatch_break_fault ;;
+
+ // HYPERPRIVOP_COVER?
+ cmp.eq p7,p6=XEN_HYPER_COVER,r17
+(p7) br.sptk.many hyper_cover;;
+
+ // HYPERPRIVOP_SSM_DT?
+ cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
+(p7) br.sptk.many hyper_ssm_dt;;
+
+ // HYPERPRIVOP_RSM_DT?
+ cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
+(p7) br.sptk.many hyper_rsm_dt;;
+
+ // HYPERPRIVOP_GET_TPR?
+ cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
+(p7) br.sptk.many hyper_get_tpr;;
+
+ // HYPERPRIVOP_SET_TPR?
+ cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
+(p7) br.sptk.many hyper_set_tpr;;
+
+ // HYPERPRIVOP_EOI?
+ cmp.eq p7,p6=XEN_HYPER_EOI,r17
+(p7) br.sptk.many hyper_eoi;;
+
+ // HYPERPRIVOP_SET_ITM?
+ cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
+(p7) br.sptk.many hyper_set_itm;;
+
+ // HYPERPRIVOP_SET_RR?
+ cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
+(p7) br.sptk.many hyper_set_rr;;
+
+ // HYPERPRIVOP_GET_RR?
+ cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
+(p7) br.sptk.many hyper_get_rr;;
+
+ // HYPERPRIVOP_PTC_GA?
+ cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
+(p7) br.sptk.many hyper_ptc_ga;;
+
+ // HYPERPRIVOP_ITC_D?
+ cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
+(p7) br.sptk.many hyper_itc_d;;
+
+ // HYPERPRIVOP_ITC_I?
+ cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
+(p7) br.sptk.many hyper_itc_i;;
+
+ // HYPERPRIVOP_THASH?
+ cmp.eq p7,p6=XEN_HYPER_THASH,r17
+(p7) br.sptk.many hyper_thash;;
+
+ // if not one of the above, give up for now and do it the slow way
+ br.sptk.many dispatch_break_fault ;;
+
+
+// give up for now if: ipsr.be==1, ipsr.pp==1
+// from reflect_interruption, don't need to:
+// - printf first extint (debug only)
+// - check for interrupt collection enabled (routine will force on)
+// - set ifa (not valid for extint)
+// - set iha (not valid for extint)
+// - set itir (not valid for extint)
+// DO need to
+// - increment the HYPER_SSM_I fast_hyperprivop counter
+// - set shared_mem iip to instruction after HYPER_SSM_I
+// - set cr.iip to guest iva+0x3000
+// - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
+// be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
+// i = shared_mem interrupt_delivery_enabled
+// ic = shared_mem interrupt_collection_enabled
+// ri = instruction after HYPER_SSM_I
+// all other bits unchanged from real cr.ipsr
+// - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
+// - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
+// and isr.ri to cr.isr.ri (all other bits zero)
+// - cover and set shared_mem precover_ifs to cr.ifs
+// ^^^ MISSED THIS FOR fast_break??
+// - set shared_mem ifs and incomplete_regframe to 0
+// - set shared_mem interrupt_delivery_enabled to 0
+// - set shared_mem interrupt_collection_enabled to 0
+// - set r31 to SHAREDINFO_ADDR
+// - virtual bank switch 0
+// maybe implement later
+// - verify that there really IS a deliverable interrupt pending
+// - set shared_mem iva
+// needs to be done but not implemented (in reflect_interruption)
+// - set shared_mem iipa
+// don't know for sure
+// - set shared_mem unat
+// r16 == cr.isr
+// r17 == cr.iim
+// r18 == XSI_PSR_IC
+// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+// r31 == pr
+ENTRY(hyper_ssm_i)
+#ifndef FAST_SSM_I
+ br.spnt.few dispatch_break_fault ;;
+#endif
+ // give up for now if: ipsr.be==1, ipsr.pp==1
+ mov r30=cr.ipsr;;
+ mov r29=cr.iip;;
+ extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+ extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0
+(p7) br.sptk.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ // set shared_mem iip to instruction after HYPER_SSM_I
+ extr.u r20=r30,41,2 ;;
+ cmp.eq p6,p7=2,r20 ;;
+(p6) mov r20=0
+(p6) adds r29=16,r29
+(p7) adds r20=1,r20 ;;
+ dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet
+ adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r29 ;;
+ // set shared_mem isr
+ extr.u r16=r16,38,1;; // grab cr.isr.ir bit
+ dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
+ dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
+ adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r16 ;;
+ // set cr.ipsr
+ mov r29=r30 ;;
+ movl r28=DELIVER_PSR_SET;;
+ movl r27=~DELIVER_PSR_CLR;;
+ or r29=r29,r28;;
+ and r29=r29,r27;;
+ mov cr.ipsr=r29;;
+ // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
+ extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
+ cmp.eq p6,p7=3,r29;;
+(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
+(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
+ ;;
+ // FOR SSM_I ONLY, also turn on psr.i and psr.ic
+ movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
+ movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+ or r30=r30,r28;;
+ and r30=r30,r27;;
+ adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r30 ;;
+ // set shared_mem interrupt_delivery_enabled to 0
+ // set shared_mem interrupt_collection_enabled to 0
+ st8 [r18]=r0;;
+ // cover and set shared_mem precover_ifs to cr.ifs
+ // set shared_mem ifs and incomplete_regframe to 0
+ cover ;;
+ mov r20=cr.ifs;;
+ adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r21]=r0 ;;
+ adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r0 ;;
+ adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r20 ;;
+ // leave cr.ifs alone for later rfi
+ // set iip to go to domain IVA break instruction vector
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
+ adds r22=IA64_VCPU_IVA_OFFSET,r22;;
+ ld8 r23=[r22];;
+ movl r24=0x3000;;
+ add r24=r24,r23;;
+ mov cr.iip=r24;;
+ // OK, now all set to go except for switch to virtual bank0
+ mov r30=r2; mov r29=r3;;
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ // FIXME?: ar.unat is not really handled correctly,
+ // but may not matter if the OS is NaT-clean
+ .mem.offset 0,0; st8.spill [r2]=r16,16;
+ .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r18,16;
+ .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r20,16;
+ .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r22,16;
+ .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r24,16;
+ .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r26,16;
+ .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r28,16;
+ .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r30,16;
+ .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+ movl r31=XSI_IPSR;;
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+// reflect domain clock interrupt
+// r31 == pr
+// r30 == cr.ivr
+// r29 == rp
+GLOBAL_ENTRY(fast_tick_reflect)
+#ifndef FAST_TICK // see beginning of file
+ br.cond.sptk.many rp;;
+#endif
+ mov r28=IA64_TIMER_VECTOR;;
+ cmp.ne p6,p0=r28,r30
+(p6) br.cond.spnt.few rp;;
+ movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
+ ld8 r26=[r20];;
+ mov r27=ar.itc;;
+ adds r27=200,r27;; // safety margin
+ cmp.ltu p6,p0=r26,r27
+(p6) br.cond.spnt.few rp;;
+ mov r17=cr.ipsr;;
+ // slow path if: ipsr.be==1, ipsr.pp==1
+ extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
+ cmp.ne p6,p0=r21,r0
+(p6) br.cond.spnt.few rp;;
+ extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
+ cmp.ne p6,p0=r21,r0
+(p6) br.cond.spnt.few rp;;
+ // definitely have a domain tick
+ mov cr.eoi=r0;;
+ mov rp=r29;;
+ mov cr.itm=r26;; // ensure next tick
+#ifdef FAST_REFLECT_CNT
+ movl r20=fast_reflect_count+((0x3000>>8)*8);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ // vcpu_pend_timer(current)
+ movl r18=XSI_PSR_IC;;
+ adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20];;
+ cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
+(p6) br.cond.spnt.few fast_tick_reflect_done;;
+ tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
+(p6) br.cond.spnt.few fast_tick_reflect_done;;
+ extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
+ extr.u r26=r20,6,2;; // r26 has irr index of itv.vector
+ movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r19=[r19];;
+ adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
+ adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
+ ld8 r24=[r22];;
+ ld8 r23=[r23];;
+ cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
+(p6) br.cond.spnt.few fast_tick_reflect_done;;
+ // set irr bit
+ adds r21=IA64_VCPU_IRR0_OFFSET,r19;
+ shl r26=r26,3;;
+ add r21=r21,r26;;
+ mov r25=1;;
+ shl r22=r25,r27;;
+ ld8 r23=[r21];;
+ or r22=r22,r23;;
+ st8 [r21]=r22;;
+ // set PSCB(pending_interruption)!
+ adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r25;;
+
+ // if interrupted at pl0, we're done
+ extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
+ cmp.eq p6,p0=r16,r0;;
+(p6) br.cond.spnt.few fast_tick_reflect_done;;
+ // if guest vpsr.i is off, we're done
+ adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r21=[r21];;
+ cmp.eq p6,p0=r21,r0
+(p6) br.cond.spnt.few fast_tick_reflect_done;;
+
+ // OK, we have a clock tick to deliver to the active domain!
+ // so deliver to iva+0x3000
+ // r17 == cr.ipsr
+ // r18 == XSI_PSR_IC
+ // r19 == IA64_KR(CURRENT)
+ // r31 == pr
+ mov r16=cr.isr;;
+ mov r29=cr.iip;;
+ adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r29 ;;
+ // set shared_mem isr
+ extr.u r16=r16,38,1;; // grab cr.isr.ir bit
+ dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
+ extr.u r20=r17,41,2 ;; // get ipsr.ri
+ dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
+ adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r16 ;;
+ // set cr.ipsr (make sure cpl==2!)
+ mov r29=r17 ;;
+ movl r28=DELIVER_PSR_SET;;
+ movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+ or r29=r29,r28;;
+ and r29=r29,r27;;
+ mov cr.ipsr=r29;;
+ // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
+ extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
+ cmp.eq p6,p7=3,r29;;
+(p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2
+(p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
+ ;;
+ movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
+ movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
+ dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
+ or r17=r17,r28;;
+ and r17=r17,r27;;
+ ld4 r16=[r18],4;;
+ cmp.ne p6,p0=r16,r0;;
+(p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
+ ld4 r16=[r18],-4;;
+ cmp.ne p6,p0=r16,r0;;
+(p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;;
+ adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r17 ;;
+ // set shared_mem interrupt_delivery_enabled to 0
+ // set shared_mem interrupt_collection_enabled to 0
+ st8 [r18]=r0;;
+ // cover and set shared_mem precover_ifs to cr.ifs
+ // set shared_mem ifs and incomplete_regframe to 0
+ cover ;;
+ mov r20=cr.ifs;;
+ adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r21]=r0 ;;
+ adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r0 ;;
+ adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r20 ;;
+ // leave cr.ifs alone for later rfi
+ // set iip to go to domain IVA break instruction vector
+ adds r22=IA64_VCPU_IVA_OFFSET,r19;;
+ ld8 r23=[r22];;
+ movl r24=0x3000;;
+ add r24=r24,r23;;
+ mov cr.iip=r24;;
+ // OK, now all set to go except for switch to virtual bank0
+ mov r30=r2; mov r29=r3;;
+#ifdef HANDLE_AR_UNAT
+ mov r28=ar.unat;
+#endif
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ .mem.offset 0,0; st8.spill [r2]=r16,16;
+ .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r18,16;
+ .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r20,16;
+ .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r22,16;
+ .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r24,16;
+ .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r26,16;
+ .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r28,16;
+ .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r30,16;
+ .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+#ifdef HANDLE_AR_UNAT
+ // bank0 regs have no NaT bit, so ensure they are NaT clean
+ mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0;
+ mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0;
+ mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0;
+ mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;;
+#endif
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+#ifdef HANDLE_AR_UNAT
+ mov ar.unat=r28;
+#endif
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+fast_tick_reflect_done:
+ mov pr=r31,-1 ;;
+ rfi
+END(fast_tick_reflect)
+
+// reflect domain breaks directly to domain
+// r16 == cr.isr
+// r17 == cr.iim
+// r18 == XSI_PSR_IC
+// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
+// r31 == pr
+GLOBAL_ENTRY(fast_break_reflect)
+#ifndef FAST_BREAK // see beginning of file
+ br.sptk.many dispatch_break_fault ;;
+#endif
+ mov r30=cr.ipsr;;
+ mov r29=cr.iip;;
+ extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0 ;;
+(p7) br.spnt.few dispatch_break_fault ;;
+ extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0 ;;
+(p7) br.spnt.few dispatch_break_fault ;;
+#if 1 /* special handling in case running on simulator */
+ movl r20=first_break;;
+ ld4 r23=[r20];;
+ movl r21=0x80001;
+ movl r22=0x80002;;
+ cmp.ne p7,p0=r23,r0;;
+(p7) br.spnt.few dispatch_break_fault ;;
+ cmp.eq p7,p0=r21,r17;
+(p7) br.spnt.few dispatch_break_fault ;;
+ cmp.eq p7,p0=r22,r17;
+(p7) br.spnt.few dispatch_break_fault ;;
+#endif
+ movl r20=0x2c00;
+ // save iim in shared_info
+ adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r17;;
+ // fall through
+
+
+// reflect to domain ivt+r20
+// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
+// r16 == cr.isr
+// r18 == XSI_PSR_IC
+// r20 == offset into ivt
+// r29 == iip
+// r30 == ipsr
+// r31 == pr
+ENTRY(fast_reflect)
+#ifdef FAST_REFLECT_CNT
+ movl r22=fast_reflect_count;
+ shr r23=r20,5;;
+ add r22=r22,r23;;
+ ld8 r21=[r22];;
+ adds r21=1,r21;;
+ st8 [r22]=r21;;
+#endif
+ // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
+ adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r29;;
+ // set shared_mem isr
+ adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r16 ;;
+ // set cr.ipsr
+ mov r29=r30 ;;
+ movl r28=DELIVER_PSR_SET;;
+ movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+ or r29=r29,r28;;
+ and r29=r29,r27;;
+ mov cr.ipsr=r29;;
+ // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
+ extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
+ cmp.eq p6,p7=3,r29;;
+(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
+(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
+ ;;
+ movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
+ movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
+ or r30=r30,r28;;
+ and r30=r30,r27;;
+ // also set shared_mem ipsr.i and ipsr.ic appropriately
+ ld8 r24=[r18];;
+ extr.u r22=r24,32,32
+ cmp4.eq p6,p7=r24,r0;;
+(p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
+(p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
+ cmp4.eq p6,p7=r22,r0;;
+(p6) dep r30=0,r30,IA64_PSR_I_BIT,1
+(p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
+ adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r30 ;;
+ // set shared_mem interrupt_delivery_enabled to 0
+ // set shared_mem interrupt_collection_enabled to 0
+ st8 [r18]=r0;;
+ // cover and set shared_mem precover_ifs to cr.ifs
+ // set shared_mem ifs and incomplete_regframe to 0
+ cover ;;
+ mov r24=cr.ifs;;
+ adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r21]=r0 ;;
+ adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r0 ;;
+ adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r24 ;;
+ // vpsr.i = vpsr.ic = 0 on delivery of interruption
+ st8 [r18]=r0;;
+ // FIXME: need to save iipa and isr to be arch-compliant
+ // set iip to go to domain IVA break instruction vector
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
+ adds r22=IA64_VCPU_IVA_OFFSET,r22;;
+ ld8 r23=[r22];;
+ add r20=r20,r23;;
+ mov cr.iip=r20;;
+ // OK, now all set to go except for switch to virtual bank0
+ mov r30=r2; mov r29=r3;;
+#ifdef HANDLE_AR_UNAT
+ mov r28=ar.unat;
+#endif
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ .mem.offset 0,0; st8.spill [r2]=r16,16;
+ .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r18,16;
+ .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r20,16;
+ .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r22,16;
+ .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r24,16;
+ .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r26,16;
+ .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r28,16;
+ .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r30,16;
+ .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+#ifdef HANDLE_AR_UNAT
+ // bank0 regs have no NaT bit, so ensure they are NaT clean
+ mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0;
+ mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0;
+ mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0;
+ mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;;
+#endif
+ movl r31=XSI_IPSR;;
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+#ifdef HANDLE_AR_UNAT
+ mov ar.unat=r28;
+#endif
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+// reflect access faults (0x2400,0x2800,0x5300) directly to domain
+// r16 == isr
+// r17 == ifa
+// r19 == reflect number (only pass-thru to dispatch_reflection)
+// r20 == offset into ivt
+// r31 == pr
+GLOBAL_ENTRY(fast_access_reflect)
+#ifndef FAST_ACCESS_REFLECT // see beginning of file
+ br.spnt.few dispatch_reflection ;;
+#endif
+ mov r30=cr.ipsr;;
+ mov r29=cr.iip;;
+ extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0
+(p7) br.spnt.few dispatch_reflection ;;
+ extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+ cmp.ne p7,p0=r21,r0
+(p7) br.spnt.few dispatch_reflection ;;
+ extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
+ cmp.eq p7,p0=r21,r0
+(p7) br.spnt.few dispatch_reflection ;;
+ movl r18=XSI_PSR_IC;;
+ ld8 r21=[r18];;
+ cmp.eq p7,p0=r0,r21
+(p7) br.spnt.few dispatch_reflection ;;
+ // set shared_mem ifa, FIXME: should we validate it?
+ mov r17=cr.ifa;;
+ adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r21]=r17 ;;
+ // get rr[ifa] and save to itir in shared memory (extra bits ignored)
+ shr.u r22=r17,61
+ adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
+ adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+ shladd r22=r22,3,r21;;
+ ld8 r22=[r22];;
+ st8 [r23]=r22;;
+ br.cond.sptk.many fast_reflect;;
+
+
+// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
+ENTRY(hyper_rfi)
+#ifndef FAST_RFI
+ br.spnt.few dispatch_break_fault ;;
+#endif
+ // if no interrupts pending, proceed
+ mov r30=r0
+ cmp.eq p7,p0=r20,r0
+(p7) br.sptk.many 1f
+ ;;
+ adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r21=[r20];; // r21 = vcr.ipsr
+ extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
+ mov r30=r22
+ // r30 determines whether we might deliver an immediate extint
+1:
+ adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r21=[r20];; // r21 = vcr.ipsr
+ extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
+ // if turning on psr.be, give up for now and do it the slow way
+ cmp.ne p7,p0=r22,r0
+(p7) br.spnt.few dispatch_break_fault ;;
+ // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
+ movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
+ and r22=r20,r21
+ ;;
+ cmp.ne p7,p0=r22,r20
+(p7) br.spnt.few dispatch_break_fault ;;
+ // if was in metaphys mode, do it the slow way (FIXME later?)
+ adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r20=[r20];;
+ cmp.ne p7,p0=r20,r0
+(p7) br.spnt.few dispatch_break_fault ;;
+ // if domain hasn't already done virtual bank switch
+ // do it the slow way (FIXME later?)
+#if 0
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r20=[r20];;
+ cmp.eq p7,p0=r20,r0
+(p7) br.spnt.few dispatch_break_fault ;;
+#endif
+ // validate vcr.iip, if in Xen range, do it the slow way
+ adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r22=[r20];;
+ movl r23=XEN_VIRT_SPACE_LOW
+ movl r24=XEN_VIRT_SPACE_HIGH ;;
+ cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) &&
+(p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high)
+(p7) br.spnt.few dispatch_break_fault ;;
+#ifndef RFI_TO_INTERRUPT // see beginning of file
+ cmp.ne p6,p0=r30,r0
+(p6) br.cond.spnt.few dispatch_break_fault ;;
+#endif
+
+1: // OK now, let's do an rfi.
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
+ ld8 r23=[r20];;
+ adds r23=1,r23;;
+ st8 [r20]=r23;;
+#endif
+#ifdef RFI_TO_INTERRUPT
+ // maybe do an immediate interrupt delivery?
+ cmp.ne p6,p0=r30,r0
+(p6) br.cond.spnt.few rfi_check_extint;;
+#endif
+
+just_do_rfi:
+ // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
+ mov cr.iip=r22;;
+ adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20];;
+ dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
+ mov cr.ifs=r20 ;;
+ // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
+ dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
+ // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
+ mov r19=r0 ;;
+ extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
+ cmp.ne p7,p6=r23,r0 ;;
+ // not done yet
+(p7) dep r19=-1,r19,32,1
+ extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
+ cmp.ne p7,p6=r23,r0 ;;
+(p7) dep r19=-1,r19,0,1 ;;
+ st8 [r18]=r19 ;;
+ // force on psr.ic, i, dt, rt, it, bn
+ movl
r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
+ ;;
+ or r21=r21,r20
+ ;;
+ mov cr.ipsr=r21
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r21=[r20];;
+ cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
+(p7) br.cond.spnt.few 1f;
+ // OK, now all set to go except for switch to virtual bank1
+ mov r22=1;; st4 [r20]=r22;
+ mov r30=r2; mov r29=r3;;
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ // FIXME?: ar.unat is not really handled correctly,
+ // but may not matter if the OS is NaT-clean
+ .mem.offset 0,0; ld8.fill r16=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
+ .mem.offset 0,0; ld8.fill r18=[r2],16 ;
+ .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
+ .mem.offset 8,0; ld8.fill r20=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
+ .mem.offset 8,0; ld8.fill r22=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
+ .mem.offset 8,0; ld8.fill r24=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
+ .mem.offset 8,0; ld8.fill r26=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
+ .mem.offset 8,0; ld8.fill r28=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
+ .mem.offset 8,0; ld8.fill r30=[r2],16 ;
+ .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+1: mov pr=r31,-1
+ ;;
+ rfi
+ ;;
+
+#ifdef RFI_TO_INTERRUPT
+GLOBAL_ENTRY(rfi_check_extint)
+ //br.sptk.many dispatch_break_fault ;;
+
+ // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
+ // make sure none of these get trashed in case going to just_do_rfi
+ movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r30=[r30];;
+ adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
+ mov r25=192
+ adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
+ ld8 r23=[r16];;
+ cmp.eq p6,p0=r23,r0;;
+(p6) adds r16=-8,r16;;
+(p6) adds r24=-8,r24;;
+(p6) adds r25=-64,r25;;
+(p6) ld8 r23=[r16];;
+(p6) cmp.eq p6,p0=r23,r0;;
+(p6) adds r16=-8,r16;;
+(p6) adds r24=-8,r24;;
+(p6) adds r25=-64,r25;;
+(p6) ld8 r23=[r16];;
+(p6) cmp.eq p6,p0=r23,r0;;
+(p6) adds r16=-8,r16;;
+(p6) adds r24=-8,r24;;
+(p6) adds r25=-64,r25;;
+(p6) ld8 r23=[r16];;
+(p6) cmp.eq p6,p0=r23,r0;;
+ cmp.eq p6,p0=r23,r0
+(p6) br.cond.spnt.few just_do_rfi; // this is actually an error
+ // r16 points to non-zero element of irr, r23 has value
+ // r24 points to corr element of insvc, r25 has elt*64
+ ld8 r26=[r24];;
+ cmp.geu p6,p0=r26,r23
+(p6) br.cond.spnt.many just_do_rfi;
+
+ // not masked by insvc, get vector number
+ shr.u r26=r23,1;;
+ or r26=r23,r26;;
+ shr.u r27=r26,2;;
+ or r26=r26,r27;;
+ shr.u r27=r26,4;;
+ or r26=r26,r27;;
+ shr.u r27=r26,8;;
+ or r26=r26,r27;;
+ shr.u r27=r26,16;;
+ or r26=r26,r27;;
+ shr.u r27=r26,32;;
+ or r26=r26,r27;;
+ andcm r26=0xffffffffffffffff,r26;;
+ popcnt r26=r26;;
+ sub r26=63,r26;;
+ // r26 now contains the bit index (mod 64)
+ mov r27=1;;
+ shl r27=r27,r26;;
+ // r27 now contains the (within the proper word) bit mask
+ add r26=r25,r26
+ // r26 now contains the vector [0..255]
+ adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20] ;;
+ extr.u r28=r20,16,1
+ extr.u r29=r20,4,4 ;;
+ cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
+(p6) br.cond.spnt.few just_do_rfi;;
+ shl r29=r29,4;;
+ adds r29=15,r29;;
+ cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
+(p6) br.cond.spnt.few just_do_rfi;;
+
+// this doesn't work yet (dies early after getting to user mode)
+// but happens relatively infrequently, so fix it later.
+// NOTE that these will be counted incorrectly for now (for privcnt output)
+GLOBAL_ENTRY(rfi_with_interrupt)
+#if 1
+ br.sptk.many dispatch_break_fault ;;
+#endif
+
+ // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
+ // r18 == XSI_PSR_IC
+ // r21 == vipsr (ipsr in shared_mem)
+ // r30 == IA64_KR(CURRENT)
+ // r31 == pr
+ mov r17=cr.ipsr;;
+ mov r16=cr.isr;;
+ // set shared_mem isr
+ extr.u r16=r16,38,1;; // grab cr.isr.ir bit
+ dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
+ extr.u r20=r21,41,2 ;; // get v(!)psr.ri
+ dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
+ adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r22]=r16 ;;
+ // set cr.ipsr (make sure cpl==2!)
+ mov r29=r17 ;;
+ movl r28=DELIVER_PSR_SET;;
+ movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
+ or r29=r29,r28;;
+ and r29=r29,r27;;
+ mov cr.ipsr=r29;;
+ // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
+ // set shared_mem interrupt_delivery_enabled to 0
+ // set shared_mem interrupt_collection_enabled to 0
+ st8 [r18]=r0;;
+ // cover and set shared_mem precover_ifs to cr.ifs
+ // set shared_mem ifs and incomplete_regframe to 0
+#if 0
+ cover ;;
+ mov r20=cr.ifs;;
+ adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r22]=r0 ;;
+ adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r22]=r0 ;;
+ adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r22]=r20 ;;
+ // leave cr.ifs alone for later rfi
+#else
+ adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r22]=r0 ;;
+ adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r22];;
+ st8 [r22]=r0 ;;
+ adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st8 [r22]=r20 ;;
+#endif
+ // set iip to go to domain IVA break instruction vector
+ adds r22=IA64_VCPU_IVA_OFFSET,r30;;
+ ld8 r23=[r22];;
+ movl r24=0x3000;;
+ add r24=r24,r23;;
+ mov cr.iip=r24;;
+#if 0
+ // OK, now all set to go except for switch to virtual bank0
+ mov r30=r2; mov r29=r3;;
+ adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
+ adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
+ bsw.1;;
+ // FIXME: need to handle ar.unat!
+ .mem.offset 0,0; st8.spill [r2]=r16,16;
+ .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r18,16;
+ .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r20,16;
+ .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r22,16;
+ .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r24,16;
+ .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r26,16;
+ .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r28,16;
+ .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+ .mem.offset 0,0; st8.spill [r2]=r30,16;
+ .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
+ movl r31=XSI_IPSR;;
+ bsw.0 ;;
+ mov r2=r30; mov r3=r29;;
+#else
+ bsw.1;;
+ movl r31=XSI_IPSR;;
+ bsw.0 ;;
+#endif
+ adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
+ st4 [r20]=r0 ;;
+ mov pr=r31,-1 ;;
+ rfi
+#endif // RFI_TO_INTERRUPT
+
+ENTRY(hyper_cover)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
+ cover ;;
+ adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
+ mov r30=cr.ifs;;
+ adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
+ ld4 r21=[r20] ;;
+ cmp.eq p6,p7=r21,r0 ;;
+(p6) st8 [r22]=r30;;
+(p7) st4 [r20]=r0;;
+ mov cr.ifs=r0;;
+ // adjust return address to skip over break instruction
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+// return from metaphysical mode (meta=1) to virtual mode (meta=0)
+ENTRY(hyper_ssm_dt)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r21=[r20];;
+ cmp.eq p7,p0=r21,r0 // meta==0?
+(p7) br.spnt.many 1f ;; // already in virtual mode
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
+ adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
+ ld4 r23=[r22];;
+ mov rr[r0]=r23;;
+ srlz.i;;
+ st4 [r20]=r0 ;;
+ // adjust return address to skip over break instruction
+1: extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+// go to metaphysical mode (meta=1) from virtual mode (meta=0)
+ENTRY(hyper_rsm_dt)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld4 r21=[r20];;
+ cmp.ne p7,p0=r21,r0 // meta==0?
+(p7) br.spnt.many 1f ;; // already in metaphysical mode
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
+ adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
+ ld4 r23=[r22];;
+ mov rr[r0]=r23;;
+ srlz.i;;
+ adds r21=1,r0 ;;
+ st4 [r20]=r21 ;;
+ // adjust return address to skip over break instruction
+1: extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+
+ENTRY(hyper_get_tpr)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r8=[r20];;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_get_tpr)
+
+// if we get to here, there are no interrupts pending so we
+// can change virtual tpr to any value without fear of provoking
+// (or accidentally missing) delivering an interrupt
+ENTRY(hyper_set_tpr)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ movl r27=0xff00;;
+ adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ andcm r8=r8,r27;;
+ st8 [r20]=r8;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_set_tpr)
+
+ENTRY(hyper_get_ivr)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
+ ld8 r21=[r22];;
+ adds r21=1,r21;;
+ st8 [r22]=r21;;
+#endif
+ mov r8=15;;
+ // when we get to here r20=~=interrupts pending
+ cmp.eq p7,p0=r20,r0;;
+(p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
+(p7) st4 [r20]=r0;;
+(p7) br.spnt.many 1f ;;
+ movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r30=[r30];;
+ adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
+ mov r25=192
+ adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
+ ld8 r23=[r22];;
+ cmp.eq p6,p0=r23,r0;;
+(p6) adds r22=-8,r22;;
+(p6) adds r24=-8,r24;;
+(p6) adds r25=-64,r25;;
+(p6) ld8 r23=[r22];;
+(p6) cmp.eq p6,p0=r23,r0;;
+(p6) adds r22=-8,r22;;
+(p6) adds r24=-8,r24;;
+(p6) adds r25=-64,r25;;
+(p6) ld8 r23=[r22];;
+(p6) cmp.eq p6,p0=r23,r0;;
+(p6) adds r22=-8,r22;;
+(p6) adds r24=-8,r24;;
+(p6) adds r25=-64,r25;;
+(p6) ld8 r23=[r22];;
+(p6) cmp.eq p6,p0=r23,r0;;
+ cmp.eq p6,p0=r23,r0
+(p6) br.cond.spnt.few 1f; // this is actually an error
+ // r22 points to non-zero element of irr, r23 has value
+ // r24 points to corr element of insvc, r25 has elt*64
+ ld8 r26=[r24];;
+ cmp.geu p6,p0=r26,r23
+(p6) br.cond.spnt.many 1f;
+ // not masked by insvc, get vector number
+ shr.u r26=r23,1;;
+ or r26=r23,r26;;
+ shr.u r27=r26,2;;
+ or r26=r26,r27;;
+ shr.u r27=r26,4;;
+ or r26=r26,r27;;
+ shr.u r27=r26,8;;
+ or r26=r26,r27;;
+ shr.u r27=r26,16;;
+ or r26=r26,r27;;
+ shr.u r27=r26,32;;
+ or r26=r26,r27;;
+ andcm r26=0xffffffffffffffff,r26;;
+ popcnt r26=r26;;
+ sub r26=63,r26;;
+ // r26 now contains the bit index (mod 64)
+ mov r27=1;;
+ shl r27=r27,r26;;
+ // r27 now contains the (within the proper word) bit mask
+ add r26=r25,r26
+ // r26 now contains the vector [0..255]
+ adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20] ;;
+ extr.u r28=r20,16,1
+ extr.u r29=r20,4,4 ;;
+ cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS
+(p6) br.cond.spnt.few 1f;
+ shl r29=r29,4;;
+ adds r29=15,r29;;
+ cmp.ge p6,p0=r29,r26
+(p6) br.cond.spnt.few 1f;
+ // OK, have an unmasked vector to process/return
+ ld8 r25=[r24];;
+ or r25=r25,r27;;
+ st8 [r24]=r25;;
+ ld8 r25=[r22];;
+ andcm r25=r25,r27;;
+ st8 [r22]=r25;;
+ mov r8=r26;;
+ // if its a clock tick, remember itm to avoid delivering it twice
+ adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
+ ld8 r20=[r20];;
+ extr.u r20=r20,0,8;;
+ cmp.eq p6,p0=r20,r8
+ adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30
+ adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;;
+ ld8 r23=[r23];;
+(p6) st8 [r22]=r23;;
+ // all done
+1: mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_get_ivr)
+
+ENTRY(hyper_eoi)
+ // when we get to here r20=~=interrupts pending
+ cmp.ne p7,p0=r20,r0
+(p7) br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
+ adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
+ ld8 r23=[r22];;
+ cmp.eq p6,p0=r23,r0;;
+(p6) adds r22=-8,r22;;
+(p6) ld8 r23=[r22];;
+(p6) cmp.eq p6,p0=r23,r0;;
+(p6) adds r22=-8,r22;;
+(p6) ld8 r23=[r22];;
+(p6) cmp.eq p6,p0=r23,r0;;
+(p6) adds r22=-8,r22;;
+(p6) ld8 r23=[r22];;
+(p6) cmp.eq p6,p0=r23,r0;;
+ cmp.eq p6,p0=r23,r0
+(p6) br.cond.spnt.few 1f; // this is actually an error
+ // r22 points to non-zero element of insvc, r23 has value
+ shr.u r24=r23,1;;
+ or r24=r23,r24;;
+ shr.u r25=r24,2;;
+ or r24=r24,r25;;
+ shr.u r25=r24,4;;
+ or r24=r24,r25;;
+ shr.u r25=r24,8;;
+ or r24=r24,r25;;
+ shr.u r25=r24,16;;
+ or r24=r24,r25;;
+ shr.u r25=r24,32;;
+ or r24=r24,r25;;
+ andcm r24=0xffffffffffffffff,r24;;
+ popcnt r24=r24;;
+ sub r24=63,r24;;
+ // r24 now contains the bit index
+ mov r25=1;;
+ shl r25=r25,r24;;
+ andcm r23=r23,r25;;
+ st8 [r22]=r23;;
+1: mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_eoi)
+
+ENTRY(hyper_set_itm)
+ // when we get to here r20=~=interrupts pending
+ cmp.ne p7,p0=r20,r0
+(p7) br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
+ ld8 r21=[r20];;
+ movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r20=[r20];;
+ adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
+ st8 [r20]=r8;;
+ cmp.geu p6,p0=r21,r8;;
+(p6) mov r21=r8;;
+ // now "safe set" cr.itm=r21
+ mov r23=100;;
+2: mov cr.itm=r21;;
+ srlz.d;;
+ mov r22=ar.itc ;;
+ cmp.leu p6,p0=r21,r22;;
+ add r21=r21,r23;;
+ shl r23=r23,1;;
+(p6) br.cond.spnt.few 2b;;
+1: mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_set_itm)
+
+ENTRY(hyper_get_rr)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ extr.u r25=r8,61,3;;
+ adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+ shl r25=r25,3;;
+ add r20=r20,r25;;
+ ld8 r8=[r20];;
+1: mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_get_rr)
+
+ENTRY(hyper_set_rr)
+ extr.u r25=r8,61,3;;
+ cmp.leu p7,p0=7,r25 // punt on setting rr7
+(p7) br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ extr.u r26=r9,8,24 // r26 = r9.rid
+ movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r20=[r20];;
+ adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
+ ld4 r22=[r21];;
+ adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
+ ld4 r23=[r21];;
+ adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
+ add r22=r26,r22;;
+ cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
+(p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
+ // r21=starting_rid
+ adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+ shl r25=r25,3;;
+ add r20=r20,r25;;
+ st8 [r20]=r9;; // store away exactly what was passed
+ // but adjust value actually placed in rr[r8]
+ // r22 contains adjusted rid, "mangle" it (see regionreg.c)
+ // and set ps to PAGE_SHIFT and ve to 1
+ extr.u r27=r22,0,8
+ extr.u r28=r22,8,8
+ extr.u r29=r22,16,8;;
+ dep.z r23=PAGE_SHIFT,2,6;;
+ dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
+ dep r23=r27,r23,24,8;;
+ dep r23=r28,r23,16,8;;
+ dep r23=r29,r23,8,8
+ cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
+(p6) st4 [r24]=r23
+ mov rr[r8]=r23;;
+ // done, mosey on back
+1: mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_set_rr)
+
+// this routine was derived from optimized assembly output from
+// vcpu_thash so it is dense and difficult to read but it works
+// On entry:
+// r18 == XSI_PSR_IC
+// r31 == pr
+GLOBAL_ENTRY(hyper_thash)
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ shr.u r20 = r8, 61
+ addl r25 = 1, r0
+ movl r17 = 0xe000000000000000
+ ;;
+ and r21 = r17, r8 // VHPT_Addr1
+ ;;
+ shladd r28 = r20, 3, r18
+ adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
+ ;;
+ adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
+ addl r28 = 32767, r0
+ ld8 r24 = [r19] // pta
+ ;;
+ ld8 r23 = [r27] // rrs[vadr>>61]
+ extr.u r26 = r24, 2, 6
+ ;;
+ extr.u r22 = r23, 2, 6
+ shl r30 = r25, r26
+ ;;
+ shr.u r19 = r8, r22
+ shr.u r29 = r24, 15
+ ;;
+ adds r17 = -1, r30
+ ;;
+ shladd r27 = r19, 3, r0
+ extr.u r26 = r17, 15, 46
+ ;;
+ andcm r24 = r29, r26
+ and r19 = r28, r27
+ shr.u r25 = r27, 15
+ ;;
+ and r23 = r26, r25
+ ;;
+ or r22 = r24, r23
+ ;;
+ dep.z r20 = r22, 15, 46
+ ;;
+ or r16 = r20, r21
+ ;;
+ or r8 = r19, r16
+ // done, update iip/ipsr to next instruction
+ mov r24=cr.ipsr
+ mov r25=cr.iip;;
+ extr.u r26=r24,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r25=16,r25
+(p7) adds r26=1,r26
+ ;;
+ dep r24=r26,r24,41,2
+ ;;
+ mov cr.ipsr=r24
+ mov cr.iip=r25
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_thash)
+
+ENTRY(hyper_ptc_ga)
+#ifndef FAST_PTC_GA
+ br.spnt.few dispatch_break_fault ;;
+#endif
+ // FIXME: validate not flushing Xen addresses
+#ifdef FAST_HYPERPRIVOP_CNT
+ movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+ ld8 r21=[r20];;
+ adds r21=1,r21;;
+ st8 [r20]=r21;;
+#endif
+ mov r28=r8
+ extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
+ mov r20=1
+ shr.u r24=r8,61
+ addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga)
+ movl r26=0x8000000000000000 // INVALID_TI_TAG
+ mov r30=ar.lc
+ ;;
+ shl r19=r20,r19
+ cmp.eq p7,p0=7,r24
+(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
+ ;;
+ cmp.le p7,p0=r19,r0 // skip flush if size<=0
+(p7) br.cond.dpnt 2f ;;
+ extr.u r24=r19,0,PAGE_SHIFT
+ shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages
+ cmp.ne p7,p0=r24,r0 ;;
+(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
+ mov ar.lc=r23
+ movl r29=PAGE_SIZE;;
+1:
+ thash r25=r28 ;;
+ adds r25=16,r25 ;;
+ ld8 r24=[r25] ;;
+ // FIXME: should check if tag matches, not just blow it away
+ or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
+ st8 [r25]=r24
+ ptc.ga r28,r27 ;;
+ srlz.i ;;
+ add r28=r29,r28
+ br.cloop.sptk.few 1b
+ ;;
+2:
+ mov ar.lc=r30 ;;
+ mov r29=cr.ipsr
+ mov r30=cr.iip;;
+ movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r27=[r27];;
+ adds r25=IA64_VCPU_DTLB_OFFSET,r27
+ adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+ ld8 r24=[r25]
+ ld8 r27=[r26] ;;
+ and r24=-2,r24
+ and r27=-2,r27 ;;
+ st8 [r25]=r24 // set 1-entry i/dtlb as not present
+ st8 [r26]=r27 ;;
+ // increment to point to next instruction
+ extr.u r26=r29,41,2 ;;
+ cmp.eq p6,p7=2,r26 ;;
+(p6) mov r26=0
+(p6) adds r30=16,r30
+(p7) adds r26=1,r26
+ ;;
+ dep r29=r26,r29,41,2
+ ;;
+ mov cr.ipsr=r29
+ mov cr.iip=r30
+ mov pr=r31,-1 ;;
+ rfi
+ ;;
+END(hyper_ptc_ga)
+
+ENTRY(hyper_itc_d)
+ br.spnt.many dispatch_break_fault ;;
+END(hyper_itc_d)
+
+ENTRY(hyper_itc_i)
+ br.spnt.many dispatch_break_fault ;;
+END(hyper_itc_i)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/idle0_task.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/idle0_task.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,58 @@
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/desc.h>
+
+#define INIT_MM(name) \
+{ \
+ .pgd = swapper_pg_dir, \
+ .mm_users = ATOMIC_INIT(2), \
+ .mm_count = ATOMIC_INIT(1), \
+ .page_table_lock = SPIN_LOCK_UNLOCKED, \
+ .mmlist = LIST_HEAD_INIT(name.mmlist), \
+}
+
+#define IDLE0_EXEC_DOMAIN(_ed,_d) \
+{ \
+ processor: 0, \
+ mm: 0, \
+ thread: INIT_THREAD, \
+ domain: (_d) \
+}
+
+#define IDLE0_DOMAIN(_t) \
+{ \
+ domain_id: IDLE_DOMAIN_ID, \
+ domain_flags:DOMF_idle_domain, \
+ refcnt: ATOMIC_INIT(1) \
+}
+
+struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_SYMBOL(init_mm);
+
+struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain);
+#if 0
+struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
+ &idle0_domain);
+#endif
+
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is properly aligned due to the way process
stacks are
+ * handled. This is done by having a special ".data.init_task" section...
+ */
+union {
+ struct {
+ struct domain task;
+ } s;
+ unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
+} init_task_mem asm ("init_task") __attribute__((section(".data.init_task")));
+// = {{
+ ;
+//.task = IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain),
+//};
+//};
+
+EXPORT_SYMBOL(init_task);
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/irq.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/irq.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1503 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ *
+ * Copyright (C) Ashok Raj<ashok.raj@xxxxxxxxx>, Intel Corporation 2004
+ *
+ * 4/14/2004: Added code to handle cpu migration and do safe irq
+ * migration without lossing interrupts for iosapic
+ * architecture.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#ifndef XEN
+#include <linux/signal.h>
+#endif
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#ifndef XEN
+#include <linux/random.h>
+#include <linux/cpu.h>
+#endif
+#include <linux/ctype.h>
+#ifndef XEN
+#include <linux/smp_lock.h>
+#endif
+#include <linux/init.h>
+#ifndef XEN
+#include <linux/kernel_stat.h>
+#endif
+#include <linux/irq.h>
+#ifndef XEN
+#include <linux/proc_fs.h>
+#endif
+#include <linux/seq_file.h>
+#ifndef XEN
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#endif
+
+#include <asm/atomic.h>
+#ifndef XEN
+#include <asm/cpu.h>
+#endif
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#ifndef XEN
+#include <asm/tlbflush.h>
+#endif
+#include <asm/delay.h>
+#include <asm/irq.h>
+
+#ifdef XEN
+#include <xen/event.h>
+#define _irq_desc irq_desc
+#define irq_descp(irq) &irq_desc[irq]
+#define apicid_to_phys_cpu_present(x) 1
+#endif
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the appropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t _irq_desc[NR_IRQS] __cacheline_aligned = {
+ [0 ... NR_IRQS-1] = {
+ .status = IRQ_DISABLED,
+ .handler = &no_irq_type,
+ .lock = SPIN_LOCK_UNLOCKED
+ }
+};
+
+/*
+ * This is updated when the user sets irq affinity via /proc
+ */
+cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
+
+#ifdef CONFIG_IA64_GENERIC
+irq_desc_t * __ia64_irq_desc (unsigned int irq)
+{
+ return _irq_desc + irq;
+}
+
+ia64_vector __ia64_irq_to_vector (unsigned int irq)
+{
+ return (ia64_vector) irq;
+}
+
+unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
+{
+ return (unsigned int) vec;
+}
+#endif
+
+static void register_irq_proc (unsigned int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+#ifdef XEN
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+#else
+irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
+{ return IRQ_NONE; }
+#endif
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesn't deserve
+ * a generic callback i think.
+ */
+#ifdef CONFIG_X86
+ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+#endif
+#endif
+#ifdef CONFIG_IA64
+ printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq,
smp_processor_id());
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none disable_none
+#define end_none enable_none
+
+struct hw_interrupt_type no_irq_type = {
+ "none",
+ startup_none,
+ shutdown_none,
+ enable_none,
+ disable_none,
+ ack_none,
+ end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+#ifndef XEN
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int j, i = *(loff_t *) v;
+ struct irqaction * action;
+ irq_desc_t *idesc;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_puts(p, " ");
+ for (j=0; j<NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ idesc = irq_descp(i);
+ spin_lock_irqsave(&idesc->lock, flags);
+ action = idesc->action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", idesc->handler->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&idesc->lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_puts(p, "NMI: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", nmi_count(j));
+ seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_puts(p, "LOC: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ",
irq_stat[j].apic_timer_irqs);
+ seq_putc(p, '\n');
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+ }
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+inline void synchronize_irq(unsigned int irq)
+{
+#ifndef XEN
+ struct irq_desc *desc = irq_desc + irq;
+
+ while (desc->status & IRQ_INPROGRESS)
+ cpu_relax();
+#endif
+}
+EXPORT_SYMBOL(synchronize_irq);
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq,
+ struct pt_regs *regs, struct irqaction *action)
+{
+ int status = 1; /* Force the "do bottom halves" bit */
+ int retval = 0;
+
+#ifndef XEN
+ if (!(action->flags & SA_INTERRUPT))
+#endif
+ local_irq_enable();
+
+#ifdef XEN
+ action->handler(irq, action->dev_id, regs);
+#else
+ do {
+ status |= action->flags;
+ retval |= action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+#endif
+ local_irq_disable();
+ return retval;
+}
+
+#ifndef XEN
+static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ struct irqaction *action;
+
+ if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
+ printk(KERN_ERR "irq event %d: bogus return value %x\n",
+ irq, action_ret);
+ } else {
+ printk(KERN_ERR "irq %d: nobody cared!\n", irq);
+ }
+ dump_stack();
+ printk(KERN_ERR "handlers:\n");
+ action = desc->action;
+ do {
+ printk(KERN_ERR "[<%p>]", action->handler);
+ print_symbol(" (%s)",
+ (unsigned long)action->handler);
+ printk("\n");
+ action = action->next;
+ } while (action);
+}
+
+static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ static int count = 100;
+
+ if (count) {
+ count--;
+ __report_bad_irq(irq, desc, action_ret);
+ }
+}
+#endif
+
+static int noirqdebug;
+
+static int __init noirqdebug_setup(char *str)
+{
+ noirqdebug = 1;
+ printk("IRQ lockup detection disabled\n");
+ return 1;
+}
+
+__setup("noirqdebug", noirqdebug_setup);
+
+/*
+ * If 99,900 of the previous 100,000 interrupts have not been handled then
+ * assume that the IRQ is stuck in some manner. Drop a diagnostic and try to
+ * turn the IRQ off.
+ *
+ * (The other 100-of-100,000 interrupts may have been a correctly-functioning
+ * device sharing an IRQ with the failing one)
+ *
+ * Called under desc->lock
+ */
+#ifndef XEN
+static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+{
+ if (action_ret != IRQ_HANDLED) {
+ desc->irqs_unhandled++;
+ if (action_ret != IRQ_NONE)
+ report_bad_irq(irq, desc, action_ret);
+ }
+
+ desc->irq_count++;
+ if (desc->irq_count < 100000)
+ return;
+
+ desc->irq_count = 0;
+ if (desc->irqs_unhandled > 99900) {
+ /*
+ * The interrupt is stuck
+ */
+ __report_bad_irq(irq, desc, action_ret);
+ /*
+ * Now kill the IRQ
+ */
+ printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ desc->irqs_unhandled = 0;
+}
+#endif
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+
+/**
+ * disable_irq_nosync - disable an irq without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and Enables are
+ * nested.
+ * Unlike disable_irq(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
+ *
+ * This function may be called from IRQ context.
+ */
+
+inline void disable_irq_nosync(unsigned int irq)
+{
+ irq_desc_t *desc = irq_descp(irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL(disable_irq_nosync);
+
+/**
+ * disable_irq - disable an irq and wait for completion
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Enables and Disables are
+ * nested.
+ * This function waits for any pending IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while
+ * holding a resource the IRQ handler may need you will deadlock.
+ *
+ * This function may be called - with care - from IRQ context.
+ */
+
+void disable_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_descp(irq);
+
+ disable_irq_nosync(irq);
+ if (desc->action)
+ synchronize_irq(irq);
+}
+EXPORT_SYMBOL(disable_irq);
+
+/**
+ * enable_irq - enable handling of an irq
+ * @irq: Interrupt to enable
+ *
+ * Undoes the effect of one call to disable_irq(). If this
+ * matches the last disable, processing of interrupts on this
+ * IRQ line is re-enabled.
+ *
+ * This function may be called from IRQ context.
+ */
+
+void enable_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_descp(irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
+ case 1: {
+ unsigned int status = desc->status & ~IRQ_DISABLED;
+ desc->status = status;
+#ifndef XEN
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status = status | IRQ_REPLAY;
+ hw_resend_irq(desc->handler,irq);
+ }
+#endif
+ desc->handler->enable(irq);
+ /* fall-through */
+ }
+ default:
+ desc->depth--;
+ break;
+ case 0:
+ printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n",
+ irq, (void *) __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL(enable_irq);
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+
+#ifndef XEN
+ kstat_this_cpu.irqs[irq]++;
+#endif
+ if (desc->status & IRQ_PER_CPU) {
+ irqreturn_t action_ret;
+
+ /*
+ * No locking required for CPU-local interrupts:
+ */
+ desc->handler->ack(irq);
+ action_ret = handle_IRQ_event(irq, regs, desc->action);
+#ifndef XEN
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret);
+#endif
+ desc->handler->end(irq);
+ return 1;
+ }
+
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+ /*
+ * REPLAY is when Linux resends an IRQ that was dropped earlier
+ * WAITING is used by probe to mark irqs that are being tested
+ */
+#ifdef XEN
+ status = desc->status & ~IRQ_REPLAY;
+#else
+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+#endif
+ status |= IRQ_PENDING; /* we _want_ to handle it */
+
+ /*
+ * If the IRQ is disabled for whatever reason, we cannot
+ * use the action we have.
+ */
+ action = NULL;
+ if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
+ action = desc->action;
+ status &= ~IRQ_PENDING; /* we commit to handling */
+ status |= IRQ_INPROGRESS; /* we are handling it */
+ }
+ desc->status = status;
+
+ /*
+ * If there is no IRQ handler or it was disabled, exit early.
+ * Since we set PENDING, if another processor is handling
+ * a different instance of this same irq, the other processor
+ * will take care of it.
+ */
+ if (unlikely(!action))
+ goto out;
+
+ /*
+ * Edge triggered interrupts need to remember
+ * pending events.
+ * This applies to any hw interrupts that allow a second
+ * instance of the same irq to arrive while we are in do_IRQ
+ * or in the handler. But the code here only handles the _second_
+ * instance of the irq, not the third or fourth. So it is mostly
+ * useful for irq hardware that does not mask cleanly in an
+ * SMP environment.
+ */
+ for (;;) {
+ irqreturn_t action_ret;
+
+ spin_unlock(&desc->lock);
+
+ action_ret = handle_IRQ_event(irq, regs, action);
+
+ spin_lock(&desc->lock);
+#ifndef XEN
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret);
+#endif
+ if (likely(!(desc->status & IRQ_PENDING)))
+ break;
+ desc->status &= ~IRQ_PENDING;
+ }
+ desc->status &= ~IRQ_INPROGRESS;
+
+out:
+ /*
+ * The ->end() handler has to deal with interrupts which got
+ * disabled while the handler was running.
+ */
+ desc->handler->end(irq);
+ spin_unlock(&desc->lock);
+
+ return 1;
+}
+
+/**
+ * request_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. From the point this
+ * call is made your handler function may be invoked. Since
+ * your handler function must clear any interrupt the board
+ * raises, you must take care both to initialise your hardware
+ * and to set up the interrupt handler in the right order.
+ *
+ * Dev_id must be globally unique. Normally the address of the
+ * device data structure is used as the cookie. Since the handler
+ * receives this value it makes sense to use it.
+ *
+ * If your interrupt is shared you must pass a non NULL dev_id
+ * as this is required when freeing the interrupt.
+ *
+ * Flags:
+ *
+ * SA_SHIRQ Interrupt is shared
+ *
+ * SA_INTERRUPT Disable local interrupts while processing
+ *
+ * SA_SAMPLE_RANDOM The interrupt can be used for entropy
+ *
+ */
+
+int request_irq(unsigned int irq,
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+{
+ int retval;
+ struct irqaction * action;
+
+#if 1
+ /*
+ * Sanity-check: shared interrupts should REALLY pass in
+ * a real dev-ID, otherwise we'll have trouble later trying
+ * to figure out which interrupt is which (messes up the
+ * interrupt freeing logic etc).
+ */
+ if (irqflags & SA_SHIRQ) {
+ if (!dev_id)
+ printk(KERN_ERR "Bad boy: %s called us without a
dev_id!\n", devname);
+ }
+#endif
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+ if (!handler)
+ return -EINVAL;
+
+ action = xmalloc(struct irqaction);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+#ifndef XEN
+ action->flags = irqflags;
+ action->mask = 0;
+#endif
+ action->name = devname;
+#ifndef XEN
+ action->next = NULL;
+#endif
+ action->dev_id = dev_id;
+
+ retval = setup_irq(irq, action);
+ if (retval)
+ xfree(action);
+ return retval;
+}
+
+EXPORT_SYMBOL(request_irq);
+
+/**
+ * free_irq - free an interrupt
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
+ *
+ * Remove an interrupt handler. The handler is removed and if the
+ * interrupt line is no longer in use by any driver it is disabled.
+ * On a shared IRQ the caller must ensure the interrupt is disabled
+ * on the card it drives before calling this function. The function
+ * does not return until any executing interrupts for this IRQ
+ * have completed.
+ *
+ * This function must not be called from interrupt context.
+ */
+
+#ifdef XEN
+void free_irq(unsigned int irq)
+#else
+void free_irq(unsigned int irq, void *dev_id)
+#endif
+{
+ irq_desc_t *desc;
+ struct irqaction **p;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS)
+ return;
+
+ desc = irq_descp(irq);
+ spin_lock_irqsave(&desc->lock,flags);
+#ifdef XEN
+ if (desc->action) {
+ struct irqaction * action = desc->action;
+ desc->action = NULL;
+#else
+ p = &desc->action;
+ for (;;) {
+ struct irqaction * action = *p;
+ if (action) {
+ struct irqaction **pp = p;
+ p = &action->next;
+ if (action->dev_id != dev_id)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *pp = action->next;
+ if (!desc->action) {
+#endif
+ desc->status |= IRQ_DISABLED;
+ desc->handler->shutdown(irq);
+#ifndef XEN
+ }
+#endif
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ /* Wait to make sure it's not being used on another CPU
*/
+ synchronize_irq(irq);
+ xfree(action);
+ return;
+ }
+ printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+ spin_unlock_irqrestore(&desc->lock,flags);
+#ifndef XEN
+ return;
+ }
+#endif
+}
+
+EXPORT_SYMBOL(free_irq);
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ * probe_irq_on - begin an interrupt autodetect
+ *
+ * Commence probing for an interrupt. The interrupts are scanned
+ * and a mask of potential interrupt lines is returned.
+ *
+ */
+
+#ifndef XEN
+unsigned long probe_irq_on(void)
+{
+ unsigned int i;
+ irq_desc_t *desc;
+ unsigned long val;
+ unsigned long delay;
+
+ down(&probe_sem);
+ /*
+ * something may have generated an irq long ago and we want to
+ * flush such a longstanding irq before considering it as spurious.
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_descp(i);
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action)
+ desc->handler->startup(i);
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /* Wait for longstanding interrupts to trigger. */
+ for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+ /* about 20ms delay */ barrier();
+
+ /*
+ * enable any unassigned irqs
+ * (we must startup again here because if a longstanding irq
+ * happened in the previous stage, it may have masked itself)
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_descp(i);
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action) {
+ desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ if (desc->handler->startup(i))
+ desc->status |= IRQ_PENDING;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /*
+ * Wait for spurious interrupts to trigger
+ */
+ for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+ /* about 100ms delay */ barrier();
+
+ /*
+ * Now filter out any obviously spurious interrupts
+ */
+ val = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_descp(i);
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ /* It triggered already - consider it spurious. */
+ if (!(status & IRQ_WAITING)) {
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ } else
+ if (i < 32)
+ val |= 1 << i;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ return val;
+}
+
+EXPORT_SYMBOL(probe_irq_on);
+
+/**
+ * probe_irq_mask - scan a bitmap of interrupt lines
+ * @val: mask of interrupts to consider
+ *
+ * Scan the ISA bus interrupt lines and return a bitmap of
+ * active interrupts. The interrupt probe logic state is then
+ * returned to its previous value.
+ *
+ * Note: we need to scan all the irq's even though we will
+ * only return ISA irq numbers - just so that we reset them
+ * all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+ int i;
+ unsigned int mask;
+
+ mask = 0;
+ for (i = 0; i < 16; i++) {
+ irq_desc_t *desc = irq_descp(i);
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING))
+ mask |= 1 << i;
+
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ up(&probe_sem);
+
+ return mask & val;
+}
+EXPORT_SYMBOL(probe_irq_mask);
+
+/**
+ * probe_irq_off - end an interrupt autodetect
+ * @val: mask of potential interrupts (unused)
+ *
+ * Scans the unused interrupt lines and returns the line which
+ * appears to have triggered the interrupt. If no interrupt was
+ * found then zero is returned. If more than one interrupt is
+ * found then minus the first candidate is returned to indicate
+ * their is doubt.
+ *
+ * The interrupt probe logic state is returned to its previous
+ * value.
+ *
+ * BUGS: When used in a module (which arguably shouldn't happen)
+ * nothing prevents two IRQ probe callers from overlapping. The
+ * results of this are non-optimal.
+ */
+
+int probe_irq_off(unsigned long val)
+{
+ int i, irq_found, nr_irqs;
+
+ nr_irqs = 0;
+ irq_found = 0;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_descp(i);
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = i;
+ nr_irqs++;
+ }
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ up(&probe_sem);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+ return irq_found;
+}
+
+EXPORT_SYMBOL(probe_irq_off);
+#endif
+
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+ int shared = 0;
+ unsigned long flags;
+ struct irqaction *old, **p;
+ irq_desc_t *desc = irq_descp(irq);
+
+#ifndef XEN
+ if (desc->handler == &no_irq_type)
+ return -ENOSYS;
+ /*
+ * Some drivers like serial.c use request_irq() heavily,
+ * so we have to be careful not to interfere with a
+ * running system.
+ */
+ if (new->flags & SA_SAMPLE_RANDOM) {
+ /*
+ * This function might sleep, we want to call it first,
+ * outside of the atomic block.
+ * Yes, this might clear the entropy pool if the wrong
+ * driver is attempted to be loaded, without actually
+ * installing a new handler, but is this really a problem,
+ * only the sysadmin is able to do this.
+ */
+ rand_initialize_irq(irq);
+ }
+
+ if (new->flags & SA_PERCPU_IRQ) {
+ desc->status |= IRQ_PER_CPU;
+ desc->handler = &irq_type_ia64_lsapic;
+ }
+#endif
+
+ /*
+ * The following block of code has to be executed atomically
+ */
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ if ((old = *p) != NULL) {
+#ifdef XEN
+ if (1) {
+ /* Can't share interrupts unless both agree to */
+#else
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+#endif
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return -EBUSY;
+ }
+
+#ifndef XEN
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+#endif
+ }
+
+ *p = new;
+
+#ifndef XEN
+ if (!shared) {
+#else
+ {
+#endif
+ desc->depth = 0;
+#ifdef XEN
+ desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
+#else
+ desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING |
IRQ_INPROGRESS);
+#endif
+ desc->handler->startup(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifndef XEN
+ register_irq_proc(irq);
+#endif
+ return 0;
+}
+
+#ifndef XEN
+
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+#ifdef CONFIG_SMP
+
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
+
+static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
+
+void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+{
+ cpumask_t mask = CPU_MASK_NONE;
+
+ cpu_set(cpu_logical_id(hwid), mask);
+
+ if (irq < NR_IRQS) {
+ irq_affinity[irq] = mask;
+ irq_redir[irq] = (char) (redir & 0xff);
+ }
+}
+
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : "");
+
+ len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]);
+ if (count - len < 2)
+ return -EINVAL;
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ unsigned int irq = (unsigned long) data;
+ int full_count = count, err;
+ cpumask_t new_value, tmp;
+# define R_PREFIX_LEN 16
+ char rbuf[R_PREFIX_LEN];
+ int rlen;
+ int prelen;
+ irq_desc_t *desc = irq_descp(irq);
+ unsigned long flags;
+
+ if (!desc->handler->set_affinity)
+ return -EIO;
+
+ /*
+ * If string being written starts with a prefix of 'r' or 'R'
+ * and some limited number of spaces, set IA64_IRQ_REDIRECTED.
+ * If more than (R_PREFIX_LEN - 2) spaces are passed, they won't
+ * all be trimmed as part of prelen, the untrimmed spaces will
+ * cause the hex parsing to fail, and this write() syscall will
+ * fail with EINVAL.
+ */
+
+ if (!count)
+ return -EINVAL;
+ rlen = min(sizeof(rbuf)-1, count);
+ if (copy_from_user(rbuf, buffer, rlen))
+ return -EFAULT;
+ rbuf[rlen] = 0;
+ prelen = 0;
+ if (tolower(*rbuf) == 'r') {
+ prelen = strspn(rbuf, "Rr ");
+ irq |= IA64_IRQ_REDIRECTED;
+ }
+
+ err = cpumask_parse(buffer+prelen, count-prelen, new_value);
+ if (err)
+ return err;
+
+ /*
+ * Do not allow disabling IRQs completely - it's a too easy
+ * way to make the system unusable accidentally :-) At least
+ * one online CPU still has to be targeted.
+ */
+ cpus_and(tmp, new_value, cpu_online_map);
+ if (cpus_empty(tmp))
+ return -EINVAL;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ pending_irq_cpumask[irq] = new_value;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ return full_count;
+}
+
+void move_irq(int irq)
+{
+ /* note - we hold desc->lock */
+ cpumask_t tmp;
+ irq_desc_t *desc = irq_descp(irq);
+
+ if (!cpus_empty(pending_irq_cpumask[irq])) {
+ cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+ if (unlikely(!cpus_empty(tmp))) {
+ desc->handler->set_affinity(irq,
pending_irq_cpumask[irq]);
+ }
+ cpus_clear(pending_irq_cpumask[irq]);
+ }
+}
+
+
+#endif /* CONFIG_SMP */
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+unsigned int vectors_in_migration[NR_IRQS];
+
+/*
+ * Since cpu_online_map is already updated, we just need to check for
+ * affinity that has zeros
+ */
+static void migrate_irqs(void)
+{
+ cpumask_t mask;
+ irq_desc_t *desc;
+ int irq, new_cpu;
+
+ for (irq=0; irq < NR_IRQS; irq++) {
+ desc = irq_descp(irq);
+
+ /*
+ * No handling for now.
+ * TBD: Implement a disable function so we can now
+ * tell CPU not to respond to these local intr sources.
+ * such as ITV,CPEI,MCA etc.
+ */
+ if (desc->status == IRQ_PER_CPU)
+ continue;
+
+ cpus_and(mask, irq_affinity[irq], cpu_online_map);
+ if (any_online_cpu(mask) == NR_CPUS) {
+ /*
+ * Save it for phase 2 processing
+ */
+ vectors_in_migration[irq] = irq;
+
+ new_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpu);
+
+ /*
+ * Al three are essential, currently WARN_ON.. maybe
panic?
+ */
+ if (desc->handler && desc->handler->disable &&
+ desc->handler->enable &&
desc->handler->set_affinity) {
+ desc->handler->disable(irq);
+ desc->handler->set_affinity(irq, mask);
+ desc->handler->enable(irq);
+ } else {
+ WARN_ON((!(desc->handler) ||
!(desc->handler->disable) ||
+ !(desc->handler->enable) ||
+
!(desc->handler->set_affinity)));
+ }
+ }
+ }
+}
+
+void fixup_irqs(void)
+{
+ unsigned int irq;
+ extern void ia64_process_pending_intr(void);
+
+ ia64_set_itv(1<<16);
+ /*
+ * Phase 1: Locate irq's bound to this cpu and
+ * relocate them for cpu removal.
+ */
+ migrate_irqs();
+
+ /*
+ * Phase 2: Perform interrupt processing for all entries reported in
+ * local APIC.
+ */
+ ia64_process_pending_intr();
+
+ /*
+ * Phase 3: Now handle any interrupts not captured in local APIC.
+ * This is to account for cases that device interrupted during the time
the
+ * rte was being disabled and re-programmed.
+ */
+ for (irq=0; irq < NR_IRQS; irq++) {
+ if (vectors_in_migration[irq]) {
+ vectors_in_migration[irq]=0;
+ do_IRQ(irq, NULL);
+ }
+ }
+
+ /*
+ * Now let processor die. We do irq disable and max_xtp() to
+ * ensure there is no more interrupts routed to this processor.
+ * But the local timer interrupt can have 1 pending which we
+ * take care in timer_interrupt().
+ */
+ max_xtp();
+ local_irq_disable();
+}
+#endif
+
+#ifndef XEN
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+ if (count - len < 2)
+ return -EINVAL;
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ cpumask_t *mask = (cpumask_t *)data;
+ unsigned long full_count = count, err;
+ cpumask_t new_value;
+
+ err = cpumask_parse(buffer, count, new_value);
+ if (err)
+ return err;
+
+ *mask = new_value;
+ return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+ char name [MAX_NAMELEN];
+
+ if (!root_irq_dir || (irq_descp(irq)->handler == &no_irq_type) ||
irq_dir[irq])
+ return;
+
+ memset(name, 0, MAX_NAMELEN);
+ sprintf(name, "%d", irq);
+
+ /* create /proc/irq/1234 */
+ irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#ifdef CONFIG_SMP
+ {
+ struct proc_dir_entry *entry;
+
+ /* create /proc/irq/1234/smp_affinity */
+ entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+ if (entry) {
+ entry->nlink = 1;
+ entry->data = (void *)(long)irq;
+ entry->read_proc = irq_affinity_read_proc;
+ entry->write_proc = irq_affinity_write_proc;
+ }
+
+ smp_affinity_entry[irq] = entry;
+ }
+#endif
+}
+
+cpumask_t prof_cpu_mask = CPU_MASK_ALL;
+
+void init_irq_proc (void)
+{
+ struct proc_dir_entry *entry;
+ int i;
+
+ /* create /proc/irq */
+ root_irq_dir = proc_mkdir("irq", 0);
+
+ /* create /proc/irq/prof_cpu_mask */
+ entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+ if (!entry)
+ return;
+
+ entry->nlink = 1;
+ entry->data = (void *)&prof_cpu_mask;
+ entry->read_proc = prof_cpu_mask_read_proc;
+ entry->write_proc = prof_cpu_mask_write_proc;
+
+ /*
+ * Create entries for all existing IRQs.
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ if (irq_descp(i)->handler == &no_irq_type)
+ continue;
+ register_irq_proc(i);
+ }
+}
+#endif
+
+
+#ifdef XEN
+/*
+ * HANDLING OF GUEST-BOUND PHYSICAL IRQS
+ */
+
+#define IRQ_MAX_GUESTS 7
+typedef struct {
+ u8 nr_guests;
+ u8 in_flight;
+ u8 shareable;
+ struct domain *guest[IRQ_MAX_GUESTS];
+} irq_guest_action_t;
+
+static void __do_IRQ_guest(int irq)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ struct domain *d;
+ int i;
+
+ for ( i = 0; i < action->nr_guests; i++ )
+ {
+ d = action->guest[i];
+ if ( !test_and_set_bit(irq, &d->pirq_mask) )
+ action->in_flight++;
+ send_guest_pirq(d, irq);
+ }
+}
+
+int pirq_guest_unmask(struct domain *d)
+{
+ irq_desc_t *desc;
+ int i, j, pirq;
+ u32 m;
+ shared_info_t *s = d->shared_info;
+
+ for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ )
+ {
+ m = d->pirq_mask[i];
+ while ( (j = ffs(m)) != 0 )
+ {
+ m &= ~(1 << --j);
+ pirq = (i << 5) + j;
+ desc = &irq_desc[pirq];
+ spin_lock_irq(&desc->lock);
+ if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
+ test_and_clear_bit(pirq, &d->pirq_mask) &&
+ (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
+ desc->handler->end(pirq);
+ spin_unlock_irq(&desc->lock);
+ }
+ }
+
+ return 0;
+}
+
+int pirq_guest_bind(struct vcpu *d, int irq, int will_share)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action;
+ unsigned long flags;
+ int rc = 0;
+
+ if ( !IS_CAPABLE_PHYSDEV(d->domain) )
+ return -EPERM;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( !(desc->status & IRQ_GUEST) )
+ {
+ if ( desc->action != NULL )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
+ irq, desc->action->name);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ action = xmalloc(irq_guest_action_t);
+ if ( (desc->action = (struct irqaction *)action) == NULL )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ action->nr_guests = 0;
+ action->in_flight = 0;
+ action->shareable = will_share;
+
+ desc->depth = 0;
+ desc->status |= IRQ_GUEST;
+ desc->status &= ~IRQ_DISABLED;
+ desc->handler->startup(irq);
+
+ /* Attempt to bind the interrupt target to the correct CPU. */
+#if 0 /* FIXME CONFIG_SMP ??? */
+ if ( desc->handler->set_affinity != NULL )
+ desc->handler->set_affinity(
+ irq, apicid_to_phys_cpu_present(d->processor));
+#endif
+ }
+ else if ( !will_share || !action->shareable )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
+ irq);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ if ( action->nr_guests == IRQ_MAX_GUESTS )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
+ rc = -EBUSY;
+ goto out;
+ }
+
+ action->guest[action->nr_guests++] = d;
+
+ out:
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return rc;
+}
+
+int pirq_guest_unbind(struct domain *d, int irq)
+{
+ irq_desc_t *desc = &irq_desc[irq];
+ irq_guest_action_t *action;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( test_and_clear_bit(irq, &d->pirq_mask) &&
+ (--action->in_flight == 0) )
+ desc->handler->end(irq);
+
+ if ( action->nr_guests == 1 )
+ {
+ desc->action = NULL;
+ xfree(action);
+ desc->depth = 1;
+ desc->status |= IRQ_DISABLED;
+ desc->status &= ~IRQ_GUEST;
+ desc->handler->shutdown(irq);
+ }
+ else
+ {
+ i = 0;
+ while ( action->guest[i] != d )
+ i++;
+ memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+ action->nr_guests--;
+ }
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
+
+#endif
+
+#ifdef XEN
+#ifdef IA64
+// this is a temporary hack until real console input is implemented
+irqreturn_t guest_forward_keyboard_input(int irq, void *nada, struct pt_regs
*regs)
+{
+ domain_pend_keyboard_interrupt(irq);
+}
+
+void serial_input_init(void)
+{
+ int retval;
+ int irq = 0x30; // FIXME
+
+ retval =
request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL);
+ if (retval) {
+ printk("serial_input_init: broken request_irq call\n");
+ while(1);
+ }
+}
+#endif
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/ivt.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/ivt.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1975 @@
+
+#ifdef XEN
+//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled??
+// these are all hacked out for now as the entire IVT
+// will eventually be replaced... just want to use it
+// for startup code to handle TLB misses
+//#define ia64_leave_kernel 0
+//#define ia64_ret_from_syscall 0
+//#define ia64_handle_irq 0
+//#define ia64_fault 0
+#define ia64_illegal_op_fault 0
+#define ia64_prepare_handle_unaligned 0
+#define ia64_bad_break 0
+#define ia64_trace_syscall 0
+#define sys_call_table 0
+#define sys_ni_syscall 0
+#include <asm/vhpt.h>
+#endif
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@xxxxxxxxxx>
+ * David Mosberger <davidm@xxxxxxxxxx>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ * Asit Mallick <asit.k.mallick@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Kenneth Chen <kenneth.w.chen@xxxxxxxxx>
+ * Fenghua Yu <fenghua.yu@xxxxxxxxx>
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now
uses virtual PT.
+ */
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+#if 1
+# define PSR_DEFAULT_BITS psr.ac
+#else
+# define PSR_DEFAULT_BITS 0
+#endif
+
+#if 0
+ /*
+ * This lets you track the last eight faults that occurred on the CPU. Make
sure ar.k2 isn't
+ * needed for something else before enabling this...
+ */
+# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov
ar.k2=r16
+#else
+# define DBG_FAULT(i)
+#endif
+
+#define MINSTATE_VIRT /* needed by minstate.h */
+#include "minstate.h"
+
+#define FAULT(n)
\
+ mov r31=pr;
\
+ mov r19=n;; /* prepare to save predicates */
\
+ br.sptk.many dispatch_to_fault_handler
+
+#ifdef XEN
+#define REFLECT(n)
\
+ mov r31=pr;
\
+ mov r19=n;; /* prepare to save predicates */
\
+ br.sptk.many dispatch_reflection
+#endif
+
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vhpt_miss)
+ DBG_FAULT(0)
+ /*
+ * The VHPT vector is invoked when the TLB entry for the virtual page
table
+ * is missing. This happens only as a result of a previous
+ * (the "original") TLB miss, which may either be caused by an
instruction
+ * fetch or a data access (or non-access).
+ *
+ * What we do here is normal TLB miss handing for the _original_ miss,
followed
+ * by inserting the TLB entry for the virtual page table page that the
VHPT
+ * walker was attempting to access. The latter gets inserted as long
+ * as both L1 and L2 have valid mappings for the faulting address.
+ * The TLB entry for the original miss gets inserted only if
+ * the L3 entry indicates that the page is present.
+ *
+ * do_page_fault gets invoked in the following cases:
+ * - the faulting virtual address uses unimplemented address bits
+ * - the faulting virtual address has no L1, L2, or L3 mapping
+ */
+ mov r16=cr.ifa // get address that caused the
TLB miss
+#ifdef CONFIG_HUGETLB_PAGE
+ movl r18=PAGE_SHIFT
+ mov r25=cr.itir
+#endif
+ ;;
+ rsm psr.dt // use physical addressing for
data
+ mov r31=pr // save the predicate registers
+#ifdef XEN
+ movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
+#else
+ mov r19=IA64_KR(PT_BASE) // get page table base address
+#endif
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into
r17
+ ;;
+ shr r22=r21,3
+#ifdef CONFIG_HUGETLB_PAGE
+ extr.u r26=r25,2,6
+ ;;
+ cmp.ne p8,p0=r18,r26
+ sub r27=r26,r18
+ ;;
+(p8) dep r25=r18,r25,2,6
+(p8) shr r22=r22,r27
+#endif
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to
region 5?
+ shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the
faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in
place
+
+ srlz.d
+ LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at
swapper_pg_dir
+
+ .pred.rel "mutex", p6, p7
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) <<
7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all
zeroes?
+ shr.u r18=r22,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page
table entry
+ ;;
+(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
+ dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page
table entry
+ ;;
+(p7) ld8 r18=[r21] // read the L3 PTE
+ mov r19=cr.isr // cr.isr bit 0 tells us if
this is an insn miss
+ ;;
+(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
+ mov r22=cr.iha // get the VHPT address that
caused the TLB miss
+ ;; // avoid RAW on p7
+(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB
miss?
+ dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page
address
+ ;;
+(p10) itc.i r18 // insert the instruction TLB
entry
+(p11) itc.d r18 // insert the data TLB entry
+(p6) br.cond.spnt.many page_fault // handle bad address/page not
present (page fault)
+ mov cr.ifa=r22
+
+#ifdef CONFIG_HUGETLB_PAGE
+(p8) mov cr.itir=r25 // change to default page-size
for VHPT
+#endif
+
+ /*
+ * Now compute and insert the TLB entry for the virtual page table. We
never
+ * execute in a page table page so there is no need to set the
exception deferral
+ * bit.
+ */
+ adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
+ ;;
+(p7) itc.d r24
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above
"itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ /*
+ * Re-check L2 and L3 pagetable. If they changed, we may have received
a ptc.g
+ * between reading the pagetable and the "itc". If so, flush the entry
we
+ * inserted and retry.
+ */
+ ld8 r25=[r21] // read L3 PTE again
+ ld8 r26=[r17] // read L2 entry again
+ ;;
+ cmp.ne p6,p7=r26,r20 // did L2 entry change
+ mov r27=PAGE_SHIFT<<2
+ ;;
+(p6) ptc.l r22,r27 // purge PTE page translation
+(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
+ ;;
+(p6) ptc.l r16,r27 // purge translation
+#endif
+
+ mov pr=r31,-1 // restore predicate registers
+ rfi
+END(vhpt_miss)
+
+ .org ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(itlb_miss)
+ DBG_FAULT(1)
+#ifdef XEN
+ VHPT_CCHAIN_LOOKUP(itlb_miss,i)
+#ifdef VHPT_GLOBAL
+ br.cond.sptk page_fault
+ ;;
+#endif
+#endif
+ /*
+ * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
+ */
+ mov r16=cr.ifa // get virtual address
+ mov r29=b0 // save b0
+ mov r31=pr // save predicates
+.itlb_fault:
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault
continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
+ ;;
+ mov b0=r29
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt page_fault
+ ;;
+ itc.i r18
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above
"itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r19=[r17] // read L3 PTE again and see if
same
+ mov r20=PAGE_SHIFT<<2 // setup page size for purge
+ ;;
+ cmp.ne p7,p0=r18,r19
+ ;;
+(p7) ptc.l r16,r20
+#endif
+ mov pr=r31,-1
+ rfi
+END(itlb_miss)
+
+ .org ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(dtlb_miss)
+ DBG_FAULT(2)
+#ifdef XEN
+ VHPT_CCHAIN_LOOKUP(dtlb_miss,d)
+#ifdef VHPT_GLOBAL
+ br.cond.sptk page_fault
+ ;;
+#endif
+#endif
+ /*
+ * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
+ */
+ mov r16=cr.ifa // get virtual address
+ mov r29=b0 // save b0
+ mov r31=pr // save predicates
+dtlb_fault:
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault
continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
+ ;;
+ mov b0=r29
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt page_fault
+ ;;
+ itc.d r18
+ ;;
+#ifdef CONFIG_SMP
+ /*
+ * Tell the assemblers dependency-violation checker that the above
"itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r19=[r17] // read L3 PTE again and see if
same
+ mov r20=PAGE_SHIFT<<2 // setup page size for purge
+ ;;
+ cmp.ne p7,p0=r18,r19
+ ;;
+(p7) ptc.l r16,r20
+#endif
+ mov pr=r31,-1
+ rfi
+END(dtlb_miss)
+
+ .org ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(alt_itlb_miss)
+ DBG_FAULT(3)
+#ifdef XEN
+//#ifdef VHPT_GLOBAL
+// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i)
+// br.cond.sptk page_fault
+// ;;
+//#endif
+#endif
+#ifdef XEN
+ mov r31=pr
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+late_alt_itlb_miss:
+ movl r17=PAGE_KERNEL
+ mov r21=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ ;;
+#else
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=PAGE_KERNEL
+ mov r21=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r31=pr
+ ;;
+#endif
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into
r21
+ ;;
+ cmp.gt p8,p0=6,r22 // user mode
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
+(p8) mov r29=b0 // save b0
+(p8) br.cond.dptk .itlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+#ifdef XEN
+ shr.u r18=r16,55 // move address bit 59 to bit 4
+ ;;
+ and r18=0x10,r18 // bit 4=address-bit(59)
+#else
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+#endif
+ cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
+(p8) br.cond.spnt page_fault
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(alt_itlb_miss)
+
+ .org ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(alt_dtlb_miss)
+ DBG_FAULT(4)
+#ifdef XEN
+//#ifdef VHPT_GLOBAL
+// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d)
+// br.cond.sptk page_fault
+// ;;
+//#endif
+#endif
+#ifdef XEN
+ mov r31=pr
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+late_alt_dtlb_miss:
+ movl r17=PAGE_KERNEL
+ mov r20=cr.isr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r21=cr.ipsr
+ ;;
+#else
+#endif
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into
r21
+ ;;
+ cmp.gt p8,p0=6,r22 // access to region 0-5
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
+(p8) mov r29=b0 // save b0
+(p8) br.cond.dptk dtlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
+ tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
+#ifdef XEN
+ shr.u r18=r16,55 // move address bit 59 to bit 4
+ and r19=r19,r16 // clear ed, reserved bits, and
PTE control bits
+ tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
+ ;;
+ and r18=0x10,r18 // bit 4=address-bit(59)
+#else
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ and r19=r19,r16 // clear ed, reserved bits, and
PTE control bits
+ tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+#endif
+ cmp.ne p8,p0=r0,r23
+(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
+(p8) br.cond.spnt page_fault
+#ifdef XEN
+ ;;
+ // Test for Xen address, if not handle via page_fault
+ // note that 0xf000 (cached) and 0xe800 (uncached) addresses
+ // should be OK.
+ extr.u r22=r16,59,5;;
+ cmp.eq p8,p0=0x1e,r22
+(p8) br.cond.spnt 1f;;
+ cmp.ne p8,p0=0x1d,r22
+(p8) br.cond.sptk page_fault ;;
+1:
+#endif
+
+ dep r21=-1,r21,IA64_PSR_ED_BIT,1
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
+(p6) mov cr.ipsr=r21
+ ;;
+(p7) itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(alt_dtlb_miss)
+
+ .org ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(nested_dtlb_miss)
+ /*
+ * In the absence of kernel bugs, we get here when the virtually mapped
linear
+ * page table is accessed non-speculatively (e.g., in the Dirty-bit,
Instruction
+ * Access-bit, or Data Access-bit faults). If the DTLB entry for the
virtual page
+ * table is missing, a nested TLB miss fault is triggered and control is
+ * transferred to this point. When this happens, we lookup the pte for
the
+ * faulting address by walking the page table in physical mode and
return to the
+ * continuation point passed in register r30 (or call page_fault if the
address is
+ * not mapped).
+ *
+ * Input: r16: faulting address
+ * r29: saved b0
+ * r30: continuation address
+ * r31: saved pr
+ *
+ * Output: r17: physical address of L3 PTE of faulting address
+ * r29: saved b0
+ * r30: continuation address
+ * r31: saved pr
+ *
+ * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
+ */
+ rsm psr.dt // switch to using physical
data addressing
+#ifdef XEN
+ movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
+#else
+ mov r19=IA64_KR(PT_BASE) // get the page table base
address
+#endif
+ shl r21=r16,3 // shift bit 60 into sign bit
+ ;;
+ shr.u r17=r16,61 // get the region number into
r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is faulting address in
region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting
address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in
place
+
+ srlz.d
+ LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at
swapper_pg_dir
+
+ .pred.rel "mutex", p6, p7
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) <<
7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all
zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page
table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page
table entry
+(p6) br.cond.spnt page_fault
+ mov b0=r30
+ br.sptk.many b0 // return to continuation point
+END(nested_dtlb_miss)
+
+ .org ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(ikey_miss)
+#ifdef XEN
+ REFLECT(6)
+#endif
+ DBG_FAULT(6)
+ FAULT(6)
+END(ikey_miss)
+
+
//-----------------------------------------------------------------------------------
+ // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is
faulting address)
+ENTRY(page_fault)
+ ssm psr.dt
+ ;;
+ srlz.i
+ ;;
+ SAVE_MIN_WITH_COVER
+#ifdef XEN
+ alloc r15=ar.pfs,0,0,4,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out3=cr.itir
+#else
+ alloc r15=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+#endif
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collectin is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ movl r14=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12 // out2 = pointer to pt_regs
+ br.call.sptk.many b6=ia64_do_page_fault // ignore return address
+END(page_fault)
+
+ .org ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(dkey_miss)
+#ifdef XEN
+ REFLECT(7)
+#endif
+ DBG_FAULT(7)
+ FAULT(7)
+END(dkey_miss)
+
+ .org ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(dirty_bit)
+#ifdef XEN
+ REFLECT(8)
+#endif
+ DBG_FAULT(8)
+ /*
+ * What we do here is to simply turn on the dirty bit in the PTE. We
need to
+ * update both the page-table and the TLB entry. To efficiently access
the PTE,
+ * we address it through the virtual page table. Most likely, the TLB
entry for
+ * the relevant virtual page table page is still present in the TLB so
we can
+ * normally do this without additional TLB misses. In case the
necessary virtual
+ * page table TLB entry isn't present, we take a nested TLB miss hit
where we look
+ * up the physical address of the L3 PTE and then continue at label 1
below.
+ */
+ mov r16=cr.ifa // get the address that caused
the fault
+ movl r30=1f // load continuation point in
case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of
L3 PTE
+ mov r29=b0 // save b0 in case of nested
fault
+ mov r31=pr // save pr
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed
bits
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.d r25 // install updated PTE
+ ;;
+ /*
+ * Tell the assemblers dependency-violation checker that the above
"itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly
installed
+ ;;
+(p7) ptc.l r16,r24
+ mov b0=r29 // restore b0
+ mov ar.ccv=r28
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed
bits
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
+#endif
+ mov pr=r31,-1 // restore pr
+ rfi
+END(dirty_bit)
+
+ .org ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(iaccess_bit)
+#ifdef XEN
+ mov r31=pr;
+ mov r16=cr.isr
+ mov r17=cr.ifa
+ mov r19=9
+ movl r20=0x2400
+ br.sptk.many fast_access_reflect;;
+#endif
+ DBG_FAULT(9)
+ // Like Entry 8, except for instruction access
+ mov r16=cr.ifa // get the address that caused
the fault
+ movl r30=1f // load continuation point in
case of nested fault
+ mov r31=pr // save predicates
+#ifdef CONFIG_ITANIUM
+ /*
+ * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
+ */
+ mov r17=cr.ipsr
+ ;;
+ mov r18=cr.iip
+ tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
+ ;;
+(p6) mov r16=r18 // if so, use cr.iip instead of
cr.ifa
+#endif /* CONFIG_ITANIUM */
+ ;;
+ thash r17=r16 // compute virtual address of
L3 PTE
+ mov r29=b0 // save b0 in case of nested
fault)
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;;
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_A,r18 // set the accessed bit
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.i r25 // install updated PTE
+ ;;
+ /*
+ * Tell the assemblers dependency-violation checker that the above
"itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly
installed
+ ;;
+(p7) ptc.l r16,r24
+ mov b0=r29 // restore b0
+ mov ar.ccv=r28
+#else /* !CONFIG_SMP */
+ ;;
+1: ld8 r18=[r17]
+ ;;
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.i r18 // install updated PTE
+#endif /* !CONFIG_SMP */
+ mov pr=r31,-1
+ rfi
+END(iaccess_bit)
+
+ .org ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(daccess_bit)
+#ifdef XEN
+ mov r31=pr;
+ mov r16=cr.isr
+ mov r17=cr.ifa
+ mov r19=10
+ movl r20=0x2800
+ br.sptk.many fast_access_reflect;;
+#endif
+ DBG_FAULT(10)
+ // Like Entry 8, except for data access
+ mov r16=cr.ifa // get the address that caused
the fault
+ movl r30=1f // load continuation point in
case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of
L3 PTE
+ mov r31=pr
+ mov r29=b0 // save b0 in case of nested
fault)
+#ifdef CONFIG_SMP
+ mov r28=ar.ccv // save ar.ccv
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ mov ar.ccv=r18 // set compare value for cmpxchg
+ or r25=_PAGE_A,r18 // set the dirty bit
+ ;;
+ cmpxchg8.acq r26=[r17],r25,ar.ccv
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ cmp.eq p6,p7=r26,r18
+ ;;
+(p6) itc.d r25 // install updated PTE
+ /*
+ * Tell the assemblers dependency-violation checker that the above
"itc" instructions
+ * cannot possibly affect the following loads:
+ */
+ dv_serialize_data
+ ;;
+ ld8 r18=[r17] // read PTE again
+ ;;
+ cmp.eq p6,p7=r18,r25 // is it same as the newly
installed
+ ;;
+(p7) ptc.l r16,r24
+ mov ar.ccv=r28
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
+#endif
+ mov b0=r29 // restore b0
+ mov pr=r31,-1
+ rfi
+END(daccess_bit)
+
+ .org ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(break_fault)
+ /*
+ * The streamlined system call entry/exit paths only save/restore the
initial part
+ * of pt_regs. This implies that the callers of system-calls must
adhere to the
+ * normal procedure calling conventions.
+ *
+ * Registers to be saved & restored:
+ * CR registers: cr.ipsr, cr.iip, cr.ifs
+ * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore,
ar.fpsr
+ * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
+ * Registers to be restored only:
+ * r8-r11: output value from the system call.
+ *
+ * During system call exit, scratch registers (including r15) are
modified/cleared
+ * to prevent leaking bits from kernel to user level.
+ */
+ DBG_FAULT(11)
+#ifdef XEN
+ mov r16=cr.isr
+ mov r17=cr.iim
+ mov r31=pr
+ ;;
+ movl r18=XSI_PSR_IC
+ ;;
+ ld8 r19=[r18]
+ ;;
+ cmp.eq p7,p0=r0,r17 // is this a psuedo-cover?
+(p7) br.spnt.many dispatch_privop_fault
+ ;;
+ // if vpsr.ic is off, we have a hyperprivop
+ // A hyperprivop is hand-coded assembly with psr.ic off
+ // which means no calls, no use of r1-r15 and no memory accesses
+ // except to pinned addresses!
+ cmp4.eq p7,p0=r0,r19
+(p7) br.sptk.many fast_hyperprivop
+ ;;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22 = [r22]
+ ;;
+ adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;;
+ ld4 r23=[r22];;
+ cmp4.eq p6,p7=r23,r17 // Xen-reserved breakimm?
+(p6) br.spnt.many dispatch_break_fault
+ ;;
+ br.sptk.many fast_break_reflect
+ ;;
+#endif
+ movl r16=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r16=[r16]
+ mov r17=cr.iim
+ mov r18=__IA64_BREAK_SYSCALL
+ mov r21=ar.fpsr
+ mov r29=cr.ipsr
+ mov r19=b6
+ mov r25=ar.unat
+ mov r27=ar.rsc
+ mov r26=ar.pfs
+ mov r28=cr.iip
+#ifndef XEN
+ mov r31=pr // prepare to save predicates
+#endif
+ mov r20=r1
+ ;;
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+ cmp.eq p0,p7=r18,r17 // is this a system call? (p7
<- false, if so)
+(p7) br.cond.spnt non_syscall
+ ;;
+ ld1 r17=[r16] // load
current->thread.on_ustack flag
+ st1 [r16]=r0 // clear
current->thread.on_ustack flag
+ add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for
MINSTATE_START_SAVE_MIN_VIRT
+ ;;
+ invala
+
+ /* adjust return address so we skip over the break instruction: */
+
+ extr.u r8=r29,41,2 // extract ei field from cr.ipsr
+ ;;
+ cmp.eq p6,p7=2,r8 // isr.ei==2?
+ mov r2=r1 // setup r2 for
ia64_syscall_setup
+ ;;
+(p6) mov r8=0 // clear ei to 0
+(p6) adds r28=16,r28 // switch cr.iip to next bundle
cr.ipsr.ei wrapped
+(p7) adds r8=1,r8 // increment ei to next slot
+ ;;
+ cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode
already?
+ dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
+ ;;
+
+ // switch from user to kernel RBS:
+ MINSTATE_START_SAVE_MIN_VIRT
+ br.call.sptk.many b7=ia64_syscall_setup
+ ;;
+ MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ mov r3=NR_syscalls - 1
+ ;;
+(p15) ssm psr.i // restore psr.i
+ // p10==true means out registers are more than 8 or r15's Nat is true
+(p10) br.cond.spnt.many ia64_ret_from_syscall
+ ;;
+ movl r16=sys_call_table
+
+ adds r15=-1024,r15 // r15 contains the syscall
number---subtract 1024
+ movl r2=ia64_ret_from_syscall
+ ;;
+ shladd r20=r15,3,r16 // r20 = sys_call_table +
8*(syscall-1024)
+ cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall <
1024 + NR_syscalls) ?
+ mov rp=r2 // set the real return addr
+ ;;
+(p6) ld8 r20=[r20] // load address of syscall
entry point
+(p7) movl r20=sys_ni_syscall
+
+ add r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 r2=[r2] // r2 =
current_thread_info()->flags
+ ;;
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ ;;
+ cmp.eq p8,p0=r2,r0
+ mov b6=r20
+ ;;
+(p8) br.call.sptk.many b6=b6 // ignore this return addr
+ br.cond.sptk ia64_trace_syscall
+ // NOT REACHED
+END(break_fault)
+
+ .org ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(interrupt)
+ DBG_FAULT(12)
+ mov r31=pr // prepare to save predicates
+ ;;
+#ifdef XEN
+ mov r30=cr.ivr // pass cr.ivr as first arg
+ // FIXME: this is a hack... use cpuinfo.ksoftirqd because its
+ // not used anywhere else and we need a place to stash ivr and
+ // there's no registers available unused by SAVE_MIN/REST
+ movl r29=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
+ st8 [r29]=r30;;
+ movl r28=slow_interrupt;;
+ mov r29=rp;;
+ mov rp=r28;;
+ br.cond.sptk.many fast_tick_reflect
+ ;;
+slow_interrupt:
+ mov rp=r29;;
+#endif
+ SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+#ifdef XEN
+ movl out0=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
+ ld8 out0=[out0];;
+#else
+ mov out0=cr.ivr // pass cr.ivr as first arg
+#endif
+ add out1=16,sp // pass pointer to pt_regs as second arg
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_handle_irq
+END(interrupt)
+
+ .org ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ DBG_FAULT(13)
+ FAULT(13)
+
+#ifdef XEN
+ // There is no particular reason for this code to be here, other than
that
+ // there happens to be space here that would go unused otherwise. If
this
+ // fault ever gets "unreserved", simply moved the following code to a
more
+ // suitable spot...
+
+GLOBAL_ENTRY(dispatch_break_fault)
+ SAVE_MIN_WITH_COVER
+ ;;
+dispatch_break_fault_post_save:
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_break
+END(dispatch_break_fault)
+#endif
+
+ .org ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ DBG_FAULT(14)
+ FAULT(14)
+
+ /*
+ * There is no particular reason for this code to be here, other than
that
+ * there happens to be space here that would go unused otherwise. If
this
+ * fault ever gets "unreserved", simply moved the following code to a
more
+ * suitable spot...
+ *
+ * ia64_syscall_setup() is a separate subroutine so that it can
+ * allocate stacked registers so it can safely demine any
+ * potential NaT values from the input registers.
+ *
+ * On entry:
+ * - executing on bank 0 or bank 1 register set (doesn't matter)
+ * - r1: stack pointer
+ * - r2: current task pointer
+ * - r3: preserved
+ * - r11: original contents (saved ar.pfs to be saved)
+ * - r12: original contents (sp to be saved)
+ * - r13: original contents (tp to be saved)
+ * - r15: original contents (syscall # to be saved)
+ * - r18: saved bsp (after switching to kernel stack)
+ * - r19: saved b6
+ * - r20: saved r1 (gp)
+ * - r21: saved ar.fpsr
+ * - r22: kernel's register backing store base (krbs_base)
+ * - r23: saved ar.bspstore
+ * - r24: saved ar.rnat
+ * - r25: saved ar.unat
+ * - r26: saved ar.pfs
+ * - r27: saved ar.rsc
+ * - r28: saved cr.iip
+ * - r29: saved cr.ipsr
+ * - r31: saved pr
+ * - b0: original contents (to be saved)
+ * On exit:
+ * - executing on bank 1 registers
+ * - psr.ic enabled, interrupts restored
+ * - p10: TRUE if syscall is invoked with more than 8 out
+ * registers or r15's Nat is true
+ * - r1: kernel's gp
+ * - r3: preserved (same as on entry)
+ * - r8: -EINVAL if p10 is true
+ * - r12: points to kernel stack
+ * - r13: points to current task
+ * - p15: TRUE if interrupts need to be re-enabled
+ * - ar.fpsr: set to kernel settings
+ */
+GLOBAL_ENTRY(ia64_syscall_setup)
+#ifndef XEN
+#if PT(B6) != 0
+# error This code assumes that b6 is the first field in pt_regs.
+#endif
+#endif
+ st8 [r1]=r19 // save b6
+ add r16=PT(CR_IPSR),r1 // initialize first base pointer
+ add r17=PT(R11),r1 // initialize second base
pointer
+ ;;
+ alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
+ st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
+ tnat.nz p8,p0=in0
+
+ st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
+ tnat.nz p9,p0=in1
+(pKStk) mov r18=r0 // make sure r18 isn't
NaT
+ ;;
+
+ st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
+ st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
+ mov r28=b0 // save b0 (2 cyc)
+ ;;
+
+ st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
+ dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
+(p8) mov in0=-1
+ ;;
+
+ st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
+ extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
+ and r8=0x7f,r19 // A // get sof of ar.pfs
+
+ st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+ tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
+(p9) mov in1=-1
+ ;;
+
+(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
+ tnat.nz p10,p0=in2
+ add r11=8,r11
+ ;;
+(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat
field
+(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
+ tnat.nz p11,p0=in3
+ ;;
+(p10) mov in2=-1
+ tnat.nz p12,p0=in4 // [I0]
+(p11) mov in3=-1
+ ;;
+(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
+(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
+ shl r18=r18,16 // compute ar.rsc to be used
for "loadrs"
+ ;;
+ st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
+ st8 [r17]=r28,PT(R1)-PT(B0) // save b0
+ tnat.nz p13,p0=in5 // [I0]
+ ;;
+ st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for
"loadrs"
+ st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
+(p12) mov in4=-1
+ ;;
+
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
+(p13) mov in5=-1
+ ;;
+ st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
+ tnat.nz p14,p0=in6
+ cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
+ ;;
+ stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see
handle_syscall_error)
+(p9) tnat.nz p10,p0=r15
+ adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes
of scratch)
+
+ st8.spill [r17]=r15 // save r15
+ tnat.nz p8,p0=in7
+ nop.i 0
+
+ mov r13=r2 // establish `current'
+ movl r1=__gp // establish kernel global
pointer
+ ;;
+(p14) mov in6=-1
+(p8) mov in7=-1
+ nop.i 0
+
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ movl r17=FPSR_DEFAULT
+ ;;
+ mov.m ar.fpsr=r17 // set ar.fpsr to kernel
default value
+(p10) mov r8=-EINVAL
+ br.ret.sptk.many b7
+END(ia64_syscall_setup)
+
+ .org ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ DBG_FAULT(15)
+ FAULT(15)
+
+ /*
+ * Squatting in this space ...
+ *
+ * This special case dispatcher for illegal operation faults allows
preserved
+ * registers to be modified through a callback function (asm only) that
is handed
+ * back from the fault handler in r8. Up to three arguments can be
passed to the
+ * callback function by returning an aggregate with the callback as its
first
+ * element, followed by the arguments.
+ */
+ENTRY(dispatch_illegal_op_fault)
+ SAVE_MIN_WITH_COVER
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
+ mov out0=ar.ec
+ ;;
+ SAVE_REST
+ ;;
+ br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+ alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r9
+ mov out1=r10
+ mov out2=r11
+ movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ mov b6=r8
+ ;;
+ cmp.ne p6,p0=0,r8
+(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
+ br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
+ .org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ DBG_FAULT(16)
+ FAULT(16)
+
+#ifdef XEN
+ // There is no particular reason for this code to be here, other than
that
+ // there happens to be space here that would go unused otherwise. If
this
+ // fault ever gets "unreserved", simply moved the following code to a
more
+ // suitable spot...
+
+ENTRY(dispatch_privop_fault)
+ SAVE_MIN_WITH_COVER
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first
in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.itir
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_privop
+END(dispatch_privop_fault)
+#endif
+
+
+ .org ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ DBG_FAULT(17)
+ FAULT(17)
+
+ENTRY(non_syscall)
+ SAVE_MIN_WITH_COVER
+
+ // There is no particular reason for this code to be here, other than
that
+ // there happens to be space here that would go unused otherwise. If
this
+ // fault ever gets "unreserved", simply moved the following code to a
more
+ // suitable spot...
+
+ alloc r14=ar.pfs,0,0,2,0
+ mov out0=cr.iim
+ add out1=16,sp
+ adds r3=8,r2 // set up second base pointer for
SAVE_REST
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ movl r15=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r15
+ ;;
+ br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore
return addr
+END(non_syscall)
+
+ .org ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ DBG_FAULT(18)
+ FAULT(18)
+
+ /*
+ * There is no particular reason for this code to be here, other than
that
+ * there happens to be space here that would go unused otherwise. If
this
+ * fault ever gets "unreserved", simply moved the following code to a
more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_unaligned_handler)
+ SAVE_MIN_WITH_COVER
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first
in insn group!)
+ mov out0=cr.ifa
+ adds out1=16,sp
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+ .org ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ DBG_FAULT(19)
+ FAULT(19)
+
+ /*
+ * There is no particular reason for this code to be here, other than
that
+ * there happens to be space here that would go unused otherwise. If
this
+ * fault ever gets "unreserved", simply moved the following code to a
more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_to_fault_handler)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: fault vector number (e.g., 24 for General Exception)
+ * r31: contains saved predicates (pr)
+ */
+ SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out0=r15
+ mov out1=cr.isr
+ mov out2=cr.ifa
+ mov out3=cr.iim
+ mov out4=cr.itir
+ ;;
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
for SAVE_REST
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+//
+// --- End of long entries, Beginning of short entries
+//
+
+ .org ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ENTRY(page_not_present)
+#ifdef XEN
+ REFLECT(20)
+#endif
+ DBG_FAULT(20)
+ mov r16=cr.ifa
+ rsm psr.dt
+ /*
+ * The Linux page fault handler doesn't expect non-present pages to be
in
+ * the TLB. Flush the existing entry now, so we meet that expectation.
+ */
+ mov r17=PAGE_SHIFT<<2
+ ;;
+ ptc.l r16,r17
+ ;;
+ mov r31=pr
+ srlz.d
+ br.sptk.many page_fault
+END(page_not_present)
+
+ .org ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ENTRY(key_permission)
+#ifdef XEN
+ REFLECT(21)
+#endif
+ DBG_FAULT(21)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(key_permission)
+
+ .org ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(iaccess_rights)
+#ifdef XEN
+ REFLECT(22)
+#endif
+ DBG_FAULT(22)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(iaccess_rights)
+
+ .org ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(daccess_rights)
+#ifdef XEN
+ mov r31=pr;
+ mov r16=cr.isr
+ mov r17=cr.ifa
+ mov r19=23
+ movl r20=0x5300
+ br.sptk.many fast_access_reflect;;
+#endif
+ DBG_FAULT(23)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.sptk.many page_fault
+END(daccess_rights)
+
+ .org ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(general_exception)
+ DBG_FAULT(24)
+ mov r16=cr.isr
+ mov r31=pr
+ ;;
+#ifdef XEN
+ cmp4.ge p6,p0=0x20,r16
+(p6) br.sptk.many dispatch_privop_fault
+#else
+ cmp4.eq p6,p0=0,r16
+(p6) br.sptk.many dispatch_illegal_op_fault
+#endif
+ ;;
+ mov r19=24 // fault number
+ br.sptk.many dispatch_to_fault_handler
+END(general_exception)
+
+ .org ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(disabled_fp_reg)
+#ifdef XEN
+ REFLECT(25)
+#endif
+ DBG_FAULT(25)
+ rsm psr.dfh // ensure we can access fph
+ ;;
+ srlz.d
+ mov r31=pr
+ mov r19=25
+ br.sptk.many dispatch_to_fault_handler
+END(disabled_fp_reg)
+
+ .org ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(nat_consumption)
+#ifdef XEN
+ REFLECT(26)
+#endif
+ DBG_FAULT(26)
+ FAULT(26)
+END(nat_consumption)
+
+ .org ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(speculation_vector)
+#ifdef XEN
+ // this probably need not reflect...
+ REFLECT(27)
+#endif
+ DBG_FAULT(27)
+ /*
+ * A [f]chk.[as] instruction needs to take the branch to the recovery
code but
+ * this part of the architecture is not implemented in hardware on some
CPUs, such
+ * as Itanium. Thus, in general we need to emulate the behavior. IIM
contains
+ * the relative target (not yet sign extended). So after sign
extending it we
+ * simply add it to IIP. We also need to reset the EI field of the
IPSR to zero,
+ * i.e., the slot to restart into.
+ *
+ * cr.imm contains zero_ext(imm21)
+ */
+ mov r18=cr.iim
+ ;;
+ mov r17=cr.iip
+ shl r18=r18,43 // put sign bit in position (43=64-21)
+ ;;
+
+ mov r16=cr.ipsr
+ shr r18=r18,39 // sign extend (39=43-4)
+ ;;
+
+ add r17=r17,r18 // now add the offset
+ ;;
+ mov cr.iip=r17
+ dep r16=0,r16,41,2 // clear EI
+ ;;
+
+ mov cr.ipsr=r16
+ ;;
+
+ rfi // and go back
+END(speculation_vector)
+
+ .org ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ DBG_FAULT(28)
+ FAULT(28)
+
+ .org ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(debug_vector)
+#ifdef XEN
+ REFLECT(29)
+#endif
+ DBG_FAULT(29)
+ FAULT(29)
+END(debug_vector)
+
+ .org ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(unaligned_access)
+#ifdef XEN
+ REFLECT(30)
+#endif
+ DBG_FAULT(30)
+ mov r16=cr.ipsr
+ mov r31=pr // prepare to save predicates
+ ;;
+ br.sptk.many dispatch_unaligned_handler
+END(unaligned_access)
+
+ .org ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
+#ifdef XEN
+ REFLECT(31)
+#endif
+ DBG_FAULT(31)
+ FAULT(31)
+END(unsupported_data_reference)
+
+ .org ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
+#ifdef XEN
+ REFLECT(32)
+#endif
+ DBG_FAULT(32)
+ FAULT(32)
+END(floating_point_fault)
+
+ .org ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
+#ifdef XEN
+ REFLECT(33)
+#endif
+ DBG_FAULT(33)
+ FAULT(33)
+END(floating_point_trap)
+
+ .org ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
+#ifdef XEN
+ REFLECT(34)
+#endif
+ DBG_FAULT(34)
+ FAULT(34)
+END(lower_privilege_trap)
+
+ .org ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
+#ifdef XEN
+ REFLECT(35)
+#endif
+ DBG_FAULT(35)
+ FAULT(35)
+END(taken_branch_trap)
+
+ .org ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
+#ifdef XEN
+ REFLECT(36)
+#endif
+ DBG_FAULT(36)
+ FAULT(36)
+END(single_step_trap)
+
+ .org ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+ DBG_FAULT(37)
+ FAULT(37)
+
+ .org ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ DBG_FAULT(38)
+ FAULT(38)
+
+ .org ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ DBG_FAULT(39)
+ FAULT(39)
+
+ .org ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ DBG_FAULT(40)
+ FAULT(40)
+
+ .org ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ DBG_FAULT(41)
+ FAULT(41)
+
+ .org ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ DBG_FAULT(42)
+ FAULT(42)
+
+ .org ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ DBG_FAULT(43)
+ FAULT(43)
+
+ .org ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ DBG_FAULT(44)
+ FAULT(44)
+
+ .org ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(ia32_exception)
+#ifdef XEN
+ REFLECT(45)
+#endif
+ DBG_FAULT(45)
+ FAULT(45)
+END(ia32_exception)
+
+ .org ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ENTRY(ia32_intercept)
+#ifdef XEN
+ REFLECT(46)
+#endif
+ DBG_FAULT(46)
+#ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+ mov r16=cr.isr
+ ;;
+ extr.u r17=r16,16,8 // get ISR.code
+ mov r18=ar.eflag
+ mov r19=cr.iim // old eflag value
+ ;;
+ cmp.ne p6,p0=2,r17
+(p6) br.cond.spnt 1f // not a system flag fault
+ xor r16=r18,r19
+ ;;
+ extr.u r17=r16,18,1 // get the eflags.ac bit
+ ;;
+ cmp.eq p6,p0=0,r17
+(p6) br.cond.spnt 1f // eflags.ac bit didn't change
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi
+
+1:
+#endif // CONFIG_IA32_SUPPORT
+ FAULT(46)
+END(ia32_intercept)
+
+ .org ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+ENTRY(ia32_interrupt)
+#ifdef XEN
+ REFLECT(47)
+#endif
+ DBG_FAULT(47)
+#ifdef CONFIG_IA32_SUPPORT
+ mov r31=pr
+ br.sptk.many dispatch_to_ia32_handler
+#else
+ FAULT(47)
+#endif
+END(ia32_interrupt)
+
+ .org ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ DBG_FAULT(48)
+ FAULT(48)
+
+ .org ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ DBG_FAULT(49)
+ FAULT(49)
+
+ .org ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ DBG_FAULT(50)
+ FAULT(50)
+
+ .org ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ DBG_FAULT(51)
+ FAULT(51)
+
+ .org ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ DBG_FAULT(52)
+ FAULT(52)
+
+ .org ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ DBG_FAULT(53)
+ FAULT(53)
+
+ .org ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ DBG_FAULT(54)
+ FAULT(54)
+
+ .org ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ DBG_FAULT(55)
+ FAULT(55)
+
+ .org ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ DBG_FAULT(56)
+ FAULT(56)
+
+ .org ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ DBG_FAULT(57)
+ FAULT(57)
+
+ .org ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ DBG_FAULT(58)
+ FAULT(58)
+
+ .org ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ DBG_FAULT(59)
+ FAULT(59)
+
+ .org ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ DBG_FAULT(60)
+ FAULT(60)
+
+ .org ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ DBG_FAULT(61)
+ FAULT(61)
+
+ .org ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ DBG_FAULT(62)
+ FAULT(62)
+
+ .org ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ DBG_FAULT(63)
+ FAULT(63)
+
+ .org ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ DBG_FAULT(64)
+ FAULT(64)
+
+ .org ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ DBG_FAULT(65)
+ FAULT(65)
+
+ .org ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ DBG_FAULT(66)
+ FAULT(66)
+
+ .org ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ DBG_FAULT(67)
+ FAULT(67)
+
+#ifdef XEN
+ .org ia64_ivt+0x8000
+GLOBAL_ENTRY(dispatch_reflection)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out4=r15
+ mov out0=cr.ifa
+ adds out1=16,sp
+ mov out2=cr.isr
+ mov out3=cr.iim
+// mov out3=cr.itir
+
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_reflection
+END(dispatch_reflection)
+
+#define SAVE_MIN_COVER_DONE DO_SAVE_MIN(,mov r30=cr.ifs,)
+
+// same as dispatch_break_fault except cover has already been done
+GLOBAL_ENTRY(dispatch_slow_hyperprivop)
+ SAVE_MIN_COVER_DONE
+ ;;
+ br.sptk.many dispatch_break_fault_post_save
+END(dispatch_slow_hyperprivop)
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+
+ /*
+ * There is no particular reason for this code to be here, other than
that
+ * there happens to be space here that would go unused otherwise. If
this
+ * fault ever gets "unreserved", simply moved the following code to a
more
+ * suitable spot...
+ */
+
+ // IA32 interrupt entry point
+
+ENTRY(dispatch_to_ia32_handler)
+ SAVE_MIN
+ ;;
+ mov r14=cr.isr
+ ssm psr.ic | PSR_DEFAULT_BITS
+ ;;
+ srlz.i // guarantee that interruption
collection is on
+ ;;
+(p15) ssm psr.i
+ adds r3=8,r2 // Base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;;
+ mov r15=0x80
+ shr r14=r14,16 // Get interrupt number
+ ;;
+ cmp.ne p6,p0=r14,r15
+(p6) br.call.dpnt.many b6=non_ia32_syscall
+
+ adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW
conventions
+ adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+ ;;
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ ld8 r8=[r14] // get r8
+ ;;
+ st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't
use the GP)
+ ;;
+ alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
+ ;;
+ ld4 r8=[r14],8 // r8 == eax (syscall number)
+ mov r15=IA32_NR_syscalls
+ ;;
+ cmp.ltu.unc p6,p7=r8,r15
+ ld4 out1=[r14],8 // r9 == ecx
+ ;;
+ ld4 out2=[r14],8 // r10 == edx
+ ;;
+ ld4 out0=[r14] // r11 == ebx
+ adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+ ;;
+ ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
+ ;;
+ ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
+ adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+ ld4 out4=[r14] // r15 == edi
+ movl r16=ia32_syscall_table
+ ;;
+(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
+ ld4 r2=[r2] // r2 = current_thread_info()->flags
+ ;;
+ ld8 r16=[r16]
+ and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ ;;
+ mov b6=r16
+ movl r15=ia32_ret_from_syscall
+ cmp.eq p8,p0=r2,r0
+ ;;
+ mov rp=r15
+(p8) br.call.sptk.many b6=b6
+ br.cond.sptk ia32_trace_syscall
+
+non_ia32_syscall:
+ alloc r15=ar.pfs,0,0,2,0
+ mov out0=r14 // interrupt #
+ add out1=16,sp // pointer to pt_regs
+ ;; // avoid WAW on CFM
+ br.call.sptk.many rp=ia32_bad_interrupt
+.ret1: movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ br.ret.sptk.many rp
+END(dispatch_to_ia32_handler)
+
+#endif /* CONFIG_IA32_SUPPORT */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/mm_init.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/mm_init.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,549 @@
+/*
+ * Initialize MMU support.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#ifdef XEN
+#include <xen/sched.h>
+#endif
+#include <linux/bootmem.h>
+#include <linux/efi.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#ifndef XEN
+#include <linux/personality.h>
+#endif
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#ifndef XEN
+#include <linux/proc_fs.h>
+#endif
+
+#ifndef XEN
+#include <asm/a.out.h>
+#endif
+#include <asm/bitops.h>
+#include <asm/dma.h>
+#ifndef XEN
+#include <asm/ia32.h>
+#endif
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/numa.h>
+#include <asm/patch.h>
+#include <asm/pgalloc.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/tlb.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/mca.h>
+
+#ifndef XEN
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+#endif
+
+extern void ia64_tlb_init (void);
+
+unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+unsigned long vmalloc_end = VMALLOC_END_INIT;
+EXPORT_SYMBOL(vmalloc_end);
+struct page *vmem_map;
+EXPORT_SYMBOL(vmem_map);
+#endif
+
+static int pgt_cache_water[2] = { 25, 50 };
+
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
+EXPORT_SYMBOL(zero_page_memmap_ptr);
+
+#ifdef XEN
+void *high_memory;
+EXPORT_SYMBOL(high_memory);
+
+/////////////////////////////////////////////
+// following from linux-2.6.7/mm/mmap.c
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware. The expected
+ * behavior is in parens:
+ *
+ * map_type prot
+ * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
+ * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (yes) yes w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (copy) copy w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ */
+pgprot_t protection_map[16] = {
+ __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
+ __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+};
+
+void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+ printf("insert_vm_struct: called, not implemented yet\n");
+}
+
+/////////////////////////////////////////////
+//following from linux/mm/memory.c
+
+#ifndef __ARCH_HAS_4LEVEL_HACK
+/*
+ * Allocate page upper directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level or three-level page table, this ends up actually being
+ * entirely optimized away.
+ */
+pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long
address)
+{
+ pud_t *new;
+
+ spin_unlock(&mm->page_table_lock);
+ new = pud_alloc_one(mm, address);
+ spin_lock(&mm->page_table_lock);
+ if (!new)
+ return NULL;
+
+ /*
+ * Because we dropped the lock, we should re-check the
+ * entry, as somebody else could have populated it..
+ */
+ if (pgd_present(*pgd)) {
+ pud_free(new);
+ goto out;
+ }
+ pgd_populate(mm, pgd, new);
+ out:
+ return pud_offset(pgd, address);
+}
+
+/*
+ * Allocate page middle directory.
+ *
+ * We've already handled the fast-path in-line, and we own the
+ * page table lock.
+ *
+ * On a two-level page table, this ends up actually being entirely
+ * optimized away.
+ */
+pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long
address)
+{
+ pmd_t *new;
+
+ spin_unlock(&mm->page_table_lock);
+ new = pmd_alloc_one(mm, address);
+ spin_lock(&mm->page_table_lock);
+ if (!new)
+ return NULL;
+
+ /*
+ * Because we dropped the lock, we should re-check the
+ * entry, as somebody else could have populated it..
+ */
+ if (pud_present(*pud)) {
+ pmd_free(new);
+ goto out;
+ }
+ pud_populate(mm, pud, new);
+ out:
+ return pmd_offset(pud, address);
+}
+#endif
+
+pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long
address)
+{
+ if (!pmd_present(*pmd)) {
+ struct page *new;
+
+ spin_unlock(&mm->page_table_lock);
+ new = pte_alloc_one(mm, address);
+ spin_lock(&mm->page_table_lock);
+ if (!new)
+ return NULL;
+
+ /*
+ * Because we dropped the lock, we should re-check the
+ * entry, as somebody else could have populated it..
+ */
+ if (pmd_present(*pmd)) {
+ pte_free(new);
+ goto out;
+ }
+ inc_page_state(nr_page_table_pages);
+ pmd_populate(mm, pmd, new);
+ }
+out:
+ return pte_offset_map(pmd, address);
+}
+/////////////////////////////////////////////
+#endif /* XEN */
+
+#if 0
+void
+update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+{
+ unsigned long addr;
+ struct page *page;
+
+ if (!pte_exec(pte))
+ return; /* not an executable page... */
+
+ page = pte_page(pte);
+ /* don't use VADDR: it may not be mapped on this CPU (or may have just
been flushed): */
+ addr = (unsigned long) page_address(page);
+
+ if (test_bit(PG_arch_1, &page->flags))
+ return; /* i-cache is already coherent
with d-cache */
+
+ flush_icache_range(addr, addr + PAGE_SIZE);
+ set_bit(PG_arch_1, &page->flags); /* mark page as clean */
+}
+#endif
+
+inline void
+ia64_set_rbs_bot (void)
+{
+#ifdef XEN
+ unsigned stack_size = MAX_USER_STACK_SIZE;
+#else
+ unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16;
+#endif
+
+ if (stack_size > MAX_USER_STACK_SIZE)
+ stack_size = MAX_USER_STACK_SIZE;
+ current->arch._thread.rbs_bot = STACK_TOP - stack_size;
+}
+
+/*
+ * This performs some platform-dependent address space initialization.
+ * On IA-64, we want to setup the VM area for the register backing
+ * store (which grows upwards) and install the gateway page which is
+ * used for signal trampolines, etc.
+ */
+void
+ia64_init_addr_space (void)
+{
+#ifdef XEN
+printf("ia64_init_addr_space: called, not implemented\n");
+#else
+ struct vm_area_struct *vma;
+
+ ia64_set_rbs_bot();
+
+ /*
+ * If we're out of memory and kmem_cache_alloc() returns NULL, we
simply ignore
+ * the problem. When the process attempts to write to the register
backing store
+ * for the first time, it will get a SEGFAULT in this case.
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
+ vma->vm_flags =
VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
+ insert_vm_struct(current->mm, vma);
+ }
+
+ /* map NaT-page at address zero to speed up speculative dereferencing
of NULL: */
+ if (!(current->personality & MMAP_PAGE_ZERO)) {
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ memset(vma, 0, sizeof(*vma));
+ vma->vm_mm = current->mm;
+ vma->vm_end = PAGE_SIZE;
+ vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY)
| _PAGE_MA_NAT);
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
VM_RESERVED;
+ insert_vm_struct(current->mm, vma);
+ }
+ }
+#endif
+}
+
+setup_gate (void)
+{
+ printk("setup_gate not-implemented.\n");
+}
+
+void __devinit
+ia64_mmu_init (void *my_cpu_data)
+{
+ unsigned long psr, pta, impl_va_bits;
+ extern void __devinit tlb_init (void);
+ int cpu;
+
+#ifdef CONFIG_DISABLE_VHPT
+# define VHPT_ENABLE_BIT 0
+#else
+# define VHPT_ENABLE_BIT 1
+#endif
+
+ /* Pin mapping for percpu area into TLB */
+ psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
+ pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
+ PERCPU_PAGE_SHIFT);
+
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+
+ /*
+ * Check if the virtually mapped linear page table (VMLPT) overlaps
with a mapped
+ * address space. The IA-64 architecture guarantees that at least 50
bits of
+ * virtual address space are implemented but if we pick a large enough
page size
+ * (e.g., 64KB), the mapped address space is big enough that it will
overlap with
+ * VMLPT. I assume that once we run on machines big enough to warrant
64KB pages,
+ * IMPL_VA_MSB will be significantly bigger, so this is unlikely to
become a
+ * problem in practice. Alternatively, we could truncate the top of
the mapped
+ * address space to not permit mappings that would overlap with the
VMLPT.
+ * --davidm 00/12/06
+ */
+# define pte_bits 3
+# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
+ /*
+ * The virtual page table has to cover the entire implemented address
space within
+ * a region even though not all of this space may be mappable. The
reason for
+ * this is that the Access bit and Dirty bit fault handlers perform
+ * non-speculative accesses to the virtual page table, so the address
range of the
+ * virtual page table itself needs to be covered by virtual page table.
+ */
+# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
+# define POW2(n) (1ULL << (n))
+
+ impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+
+ if (impl_va_bits < 51 || impl_va_bits > 61)
+ panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits
- 1);
+
+#ifdef XEN
+ vhpt_init();
+#endif
+#if 0
+ /* place the VMLPT at the end of each page-table mapped region: */
+ pta = POW2(61) - POW2(vmlpt_bits);
+
+ if (POW2(mapped_space_bits) >= pta)
+ panic("mm/init: overlap between virtually mapped linear page
table and "
+ "mapped kernel space!");
+ /*
+ * Set the (virtually mapped linear) page table address. Bit
+ * 8 selects between the short and long format, bits 2-7 the
+ * size of the table, and bit 0 whether the VHPT walker is
+ * enabled.
+ */
+ ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
+#endif
+ ia64_tlb_init();
+
+#ifdef CONFIG_HUGETLB_PAGE
+ ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+ ia64_srlz_d();
+#endif
+
+ cpu = smp_processor_id();
+
+#ifndef XEN
+ /* mca handler uses cr.lid as key to pick the right entry */
+ ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID);
+
+ /* insert this percpu data information into our list for MCA recovery
purposes */
+ ia64_mca_tlb_list[cpu].percpu_paddr =
pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL));
+ /* Also save per-cpu tlb flush recipe for use in physical mode mca
handler */
+ ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base;
+ ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0];
+ ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1];
+ ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0];
+ ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1];
+#endif
+}
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+
+int
+create_mem_map_page_table (u64 start, u64 end, void *arg)
+{
+ unsigned long address, start_page, end_page;
+ struct page *map_start, *map_end;
+ int node;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+ map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+ start_page = (unsigned long) map_start & PAGE_MASK;
+ end_page = PAGE_ALIGN((unsigned long) map_end);
+ node = paddr_to_nid(__pa(start));
+
+ for (address = start_page; address < end_page; address += PAGE_SIZE) {
+ pgd = pgd_offset_k(address);
+ if (pgd_none(*pgd))
+ pgd_populate(&init_mm, pgd,
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pmd = pmd_offset(pgd, address);
+
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(&init_mm, pmd,
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pte = pte_offset_kernel(pmd, address);
+
+ if (pte_none(*pte))
+ set_pte(pte,
pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >>
PAGE_SHIFT,
+ PAGE_KERNEL));
+ }
+ return 0;
+}
+
+struct memmap_init_callback_data {
+ struct page *start;
+ struct page *end;
+ int nid;
+ unsigned long zone;
+};
+
+static int
+virtual_memmap_init (u64 start, u64 end, void *arg)
+{
+ struct memmap_init_callback_data *args;
+ struct page *map_start, *map_end;
+
+ args = (struct memmap_init_callback_data *) arg;
+
+ map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
+ map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
+
+ if (map_start < args->start)
+ map_start = args->start;
+ if (map_end > args->end)
+ map_end = args->end;
+
+ /*
+ * We have to initialize "out of bounds" struct page elements that fit
completely
+ * on the same pages that were allocated for the "in bounds" elements
because they
+ * may be referenced later (and found to be "reserved").
+ */
+ map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) /
sizeof(struct page);
+ map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long)
map_end)
+ / sizeof(struct page));
+
+ if (map_start < map_end)
+ memmap_init_zone(map_start, (unsigned long) (map_end -
map_start),
+ args->nid, args->zone, page_to_pfn(map_start));
+ return 0;
+}
+
+void
+memmap_init (struct page *start, unsigned long size, int nid,
+ unsigned long zone, unsigned long start_pfn)
+{
+ if (!vmem_map)
+ memmap_init_zone(start, size, nid, zone, start_pfn);
+ else {
+ struct memmap_init_callback_data args;
+
+ args.start = start;
+ args.end = start + size;
+ args.nid = nid;
+ args.zone = zone;
+
+ efi_memmap_walk(virtual_memmap_init, &args);
+ }
+}
+
+int
+ia64_pfn_valid (unsigned long pfn)
+{
+ char byte;
+ struct page *pg = pfn_to_page(pfn);
+
+ return (__get_user(byte, (char *) pg) == 0)
+ && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
+ || (__get_user(byte, (char *) (pg + 1) - 1) == 0));
+}
+EXPORT_SYMBOL(ia64_pfn_valid);
+
+int
+find_largest_hole (u64 start, u64 end, void *arg)
+{
+ u64 *max_gap = arg;
+
+ static u64 last_end = PAGE_OFFSET;
+
+ /* NOTE: this algorithm assumes efi memmap table is ordered */
+
+#ifdef XEN
+//printf("find_largest_hole:
start=%lx,end=%lx,max_gap=%lx\n",start,end,*(unsigned long *)arg);
+#endif
+ if (*max_gap < (start - last_end))
+ *max_gap = start - last_end;
+ last_end = end;
+#ifdef XEN
+//printf("find_largest_hole2: max_gap=%lx,last_end=%lx\n",*max_gap,last_end);
+#endif
+ return 0;
+}
+#endif /* CONFIG_VIRTUAL_MEM_MAP */
+
+static int
+count_reserved_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long num_reserved = 0;
+ unsigned long *count = arg;
+
+ for (; start < end; start += PAGE_SIZE)
+ if (PageReserved(virt_to_page(start)))
+ ++num_reserved;
+ *count += num_reserved;
+ return 0;
+}
+
+/*
+ * Boot command-line option "nolwsys" can be used to disable the use of any
light-weight
+ * system call handler. When this option is in effect, all fsyscalls will end
up bubbling
+ * down into the kernel and calling the normal (heavy-weight) syscall handler.
This is
+ * useful for performance testing, but conceivably could also come in handy
for debugging
+ * purposes.
+ */
+
+static int nolwsys;
+
+static int __init
+nolwsys_setup (char *s)
+{
+ nolwsys = 1;
+ return 1;
+}
+
+__setup("nolwsys", nolwsys_setup);
+
+void
+mem_init (void)
+{
+#ifdef CONFIG_PCI
+ /*
+ * This needs to be called _after_ the command line has been parsed but
_before_
+ * any drivers that may need the PCI DMA interface are initialized or
bootmem has
+ * been freed.
+ */
+ platform_dma_init();
+#endif
+
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/pcdp.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/pcdp.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,120 @@
+/*
+ * Parse the EFI PCDP table to locate the console device.
+ *
+ * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P.
+ * Khalid Aziz <khalid.aziz@xxxxxx>
+ * Alex Williamson <alex.williamson@xxxxxx>
+ * Bjorn Helgaas <bjorn.helgaas@xxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/console.h>
+#include <linux/efi.h>
+#include <linux/serial.h>
+#ifdef XEN
+#include <linux/errno.h>
+#endif
+#include "pcdp.h"
+
+static int __init
+setup_serial_console(struct pcdp_uart *uart)
+{
+#ifdef XEN
+ extern struct ns16550_defaults ns16550_com1;
+ ns16550_com1.baud = uart->baud;
+ ns16550_com1.io_base = uart->addr.address;
+ if (uart->bits)
+ ns16550_com1.data_bits = uart->bits;
+ return 0;
+#else
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+ int mmio;
+ static char options[64];
+
+ mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY);
+ snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d",
+ mmio ? "mmio" : "io", uart->addr.address, uart->baud,
+ uart->bits ? uart->bits : 8);
+
+ return early_serial_console_init(options);
+#else
+ return -ENODEV;
+#endif
+#endif
+}
+
+#ifndef XEN
+static int __init
+setup_vga_console(struct pcdp_vga *vga)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
+ if (efi_mem_type(0xA0000) == EFI_CONVENTIONAL_MEMORY) {
+ printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not
MMIO!\n");
+ return -ENODEV;
+ }
+
+ conswitchp = &vga_con;
+ printk(KERN_INFO "PCDP: VGA console\n");
+ return 0;
+#else
+ return -ENODEV;
+#endif
+}
+#endif
+
+int __init
+efi_setup_pcdp_console(char *cmdline)
+{
+ struct pcdp *pcdp;
+ struct pcdp_uart *uart;
+ struct pcdp_device *dev, *end;
+ int i, serial = 0;
+
+ pcdp = efi.hcdp;
+ if (!pcdp)
+ return -ENODEV;
+
+#ifndef XEN
+ printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, __pa(pcdp));
+#endif
+
+ if (strstr(cmdline, "console=hcdp")) {
+ if (pcdp->rev < 3)
+ serial = 1;
+ } else if (strstr(cmdline, "console=")) {
+#ifndef XEN
+ printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n");
+#endif
+ return -ENODEV;
+ }
+
+ if (pcdp->rev < 3 && efi_uart_console_only())
+ serial = 1;
+
+ for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) {
+ if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) {
+ if (uart->type == PCDP_CONSOLE_UART) {
+ return setup_serial_console(uart);
+ }
+ }
+ }
+
+#ifndef XEN
+ end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length);
+ for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts);
+ dev < end;
+ dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) {
+ if (dev->flags & PCDP_PRIMARY_CONSOLE) {
+ if (dev->type == PCDP_CONSOLE_VGA) {
+ return setup_vga_console((struct pcdp_vga *)
dev);
+ }
+ }
+ }
+#endif
+
+ return -ENODEV;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/privop.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/privop.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1130 @@
+/*
+ * Privileged operation "API" handling functions.
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <asm/privop.h>
+#include <asm/vcpu.h>
+#include <asm/processor.h>
+#include <asm/delay.h> // Debug only
+//#include <debug.h>
+
+long priv_verbose=0;
+
+/**************************************************************************
+Hypercall bundle creation
+**************************************************************************/
+
+
+void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64
ret)
+{
+ INST64_A5 slot0;
+ INST64_I19 slot1;
+ INST64_B4 slot2;
+ IA64_BUNDLE bundle;
+
+ // slot1: mov r2 = hypnum (low 20 bits)
+ slot0.inst = 0;
+ slot0.qp = 0; slot0.r1 = 2; slot0.r3 = 0; slot0.major = 0x9;
+ slot0.imm7b = hypnum; slot0.imm9d = hypnum >> 7;
+ slot0.imm5c = hypnum >> 16; slot0.s = 0;
+ // slot1: break brkimm
+ slot1.inst = 0;
+ slot1.qp = 0; slot1.x6 = 0; slot1.x3 = 0; slot1.major = 0x0;
+ slot1.imm20 = brkimm; slot1.i = brkimm >> 20;
+ // if ret slot2: br.ret.sptk.many rp
+ // else slot2: br.cond.sptk.many rp
+ slot2.inst = 0; slot2.qp = 0; slot2.p = 1; slot2.b2 = 0;
+ slot2.wh = 0; slot2.d = 0; slot2.major = 0x0;
+ if (ret) {
+ slot2.btype = 4; slot2.x6 = 0x21;
+ }
+ else {
+ slot2.btype = 0; slot2.x6 = 0x20;
+ }
+
+ bundle.i64[0] = 0; bundle.i64[1] = 0;
+ bundle.template = 0x11;
+ bundle.slot0 = slot0.inst; bundle.slot2 = slot2.inst;
+ bundle.slot1a = slot1.inst; bundle.slot1b = slot1.inst >> 18;
+
+ *imva++ = bundle.i64[0]; *imva = bundle.i64[1];
+}
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT priv_rfi(VCPU *vcpu, INST64 inst)
+{
+ return vcpu_rfi(vcpu);
+}
+
+IA64FAULT priv_bsw0(VCPU *vcpu, INST64 inst)
+{
+ return vcpu_bsw0(vcpu);
+}
+
+IA64FAULT priv_bsw1(VCPU *vcpu, INST64 inst)
+{
+ return vcpu_bsw1(vcpu);
+}
+
+IA64FAULT priv_cover(VCPU *vcpu, INST64 inst)
+{
+ return vcpu_cover(vcpu);
+}
+
+IA64FAULT priv_ptc_l(VCPU *vcpu, INST64 inst)
+{
+ UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+ UINT64 addr_range;
+
+ addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+ return vcpu_ptc_l(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptc_e(VCPU *vcpu, INST64 inst)
+{
+ UINT src = inst.M28.r3;
+
+ // NOTE: ptc_e with source gr > 63 is emulated as a fc r(y-64)
+ if (src > 63) return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64)));
+ return vcpu_ptc_e(vcpu,vcpu_get_gr(vcpu,src));
+}
+
+IA64FAULT priv_ptc_g(VCPU *vcpu, INST64 inst)
+{
+ UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+ UINT64 addr_range;
+
+ addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+ return vcpu_ptc_g(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptc_ga(VCPU *vcpu, INST64 inst)
+{
+ UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+ UINT64 addr_range;
+
+ addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+ return vcpu_ptc_ga(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptr_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+ UINT64 addr_range;
+
+ addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+ return vcpu_ptr_d(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_ptr_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
+ UINT64 addr_range;
+
+ addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
+ return vcpu_ptr_i(vcpu,vadr,addr_range);
+}
+
+IA64FAULT priv_tpa(VCPU *vcpu, INST64 inst)
+{
+ UINT64 padr;
+ UINT fault;
+ UINT src = inst.M46.r3;
+
+ // NOTE: tpa with source gr > 63 is emulated as a ttag rx=r(y-64)
+ if (src > 63)
+ fault = vcpu_ttag(vcpu,vcpu_get_gr(vcpu,src-64),&padr);
+ else fault = vcpu_tpa(vcpu,vcpu_get_gr(vcpu,src),&padr);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M46.r1, padr);
+ else return fault;
+}
+
+IA64FAULT priv_tak(VCPU *vcpu, INST64 inst)
+{
+ UINT64 key;
+ UINT fault;
+ UINT src = inst.M46.r3;
+
+ // NOTE: tak with source gr > 63 is emulated as a thash rx=r(y-64)
+ if (src > 63)
+ fault = vcpu_thash(vcpu,vcpu_get_gr(vcpu,src-64),&key);
+ else fault = vcpu_tak(vcpu,vcpu_get_gr(vcpu,src),&key);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M46.r1, key);
+ else return fault;
+}
+
+/************************************
+ * Insert translation register/cache
+************************************/
+
+IA64FAULT priv_itr_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte, slot;
+
+ //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ pte = vcpu_get_gr(vcpu,inst.M42.r2);
+ slot = vcpu_get_gr(vcpu,inst.M42.r3);
+
+ return (vcpu_itr_d(vcpu,slot,pte,itir,ifa));
+}
+
+IA64FAULT priv_itr_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte, slot;
+
+ //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ pte = vcpu_get_gr(vcpu,inst.M42.r2);
+ slot = vcpu_get_gr(vcpu,inst.M42.r3);
+
+ return (vcpu_itr_i(vcpu,slot,pte,itir,ifa));
+}
+
+IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte;
+
+ //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ pte = vcpu_get_gr(vcpu,inst.M41.r2);
+
+ return (vcpu_itc_d(vcpu,pte,itir,ifa));
+}
+
+IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst)
+{
+ UINT64 fault, itir, ifa, pte;
+
+ //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
+ return(IA64_ILLOP_FAULT);
+ pte = vcpu_get_gr(vcpu,inst.M41.r2);
+
+ return (vcpu_itc_i(vcpu,pte,itir,ifa));
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+*************************************/
+
+IA64FAULT priv_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
+{
+ // I27 and M30 are identical for these fields
+ UINT64 ar3 = inst.M30.ar3;
+ UINT64 imm = vcpu_get_gr(vcpu,inst.M30.imm);
+ return (vcpu_set_ar(vcpu,ar3,imm));
+}
+
+IA64FAULT priv_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
+{
+ // I26 and M29 are identical for these fields
+ UINT64 ar3 = inst.M29.ar3;
+
+ if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) { // privified mov from kr
+ UINT64 val;
+ if (vcpu_get_ar(vcpu,ar3,&val) != IA64_ILLOP_FAULT)
+ return vcpu_set_gr(vcpu, inst.M29.r2-64, val);
+ else return IA64_ILLOP_FAULT;
+ }
+ else {
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M29.r2);
+ return (vcpu_set_ar(vcpu,ar3,r2));
+ }
+}
+
+/********************************
+ * Moves to privileged registers
+********************************/
+
+IA64FAULT priv_mov_to_pkr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+ return (vcpu_set_pkr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_rr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+ return (vcpu_set_rr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_dbr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+ return (vcpu_set_dbr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_ibr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+ return (vcpu_set_ibr(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_pmc(VCPU *vcpu, INST64 inst)
+{
+ UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+ return (vcpu_set_pmc(vcpu,r3,r2));
+}
+
+IA64FAULT priv_mov_to_pmd(VCPU *vcpu, INST64 inst)
+{
+ UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
+ UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
+ return (vcpu_set_pmd(vcpu,r3,r2));
+}
+
+unsigned long to_cr_cnt[128] = { 0 };
+
+IA64FAULT priv_mov_to_cr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val = vcpu_get_gr(vcpu, inst.M32.r2);
+ to_cr_cnt[inst.M32.cr3]++;
+ switch (inst.M32.cr3) {
+ case 0: return vcpu_set_dcr(vcpu,val);
+ case 1: return vcpu_set_itm(vcpu,val);
+ case 2: return vcpu_set_iva(vcpu,val);
+ case 8: return vcpu_set_pta(vcpu,val);
+ case 16:return vcpu_set_ipsr(vcpu,val);
+ case 17:return vcpu_set_isr(vcpu,val);
+ case 19:return vcpu_set_iip(vcpu,val);
+ case 20:return vcpu_set_ifa(vcpu,val);
+ case 21:return vcpu_set_itir(vcpu,val);
+ case 22:return vcpu_set_iipa(vcpu,val);
+ case 23:return vcpu_set_ifs(vcpu,val);
+ case 24:return vcpu_set_iim(vcpu,val);
+ case 25:return vcpu_set_iha(vcpu,val);
+ case 64:return vcpu_set_lid(vcpu,val);
+ case 65:return IA64_ILLOP_FAULT;
+ case 66:return vcpu_set_tpr(vcpu,val);
+ case 67:return vcpu_set_eoi(vcpu,val);
+ case 68:return IA64_ILLOP_FAULT;
+ case 69:return IA64_ILLOP_FAULT;
+ case 70:return IA64_ILLOP_FAULT;
+ case 71:return IA64_ILLOP_FAULT;
+ case 72:return vcpu_set_itv(vcpu,val);
+ case 73:return vcpu_set_pmv(vcpu,val);
+ case 74:return vcpu_set_cmcv(vcpu,val);
+ case 80:return vcpu_set_lrr0(vcpu,val);
+ case 81:return vcpu_set_lrr1(vcpu,val);
+ default: return IA64_ILLOP_FAULT;
+ }
+}
+
+IA64FAULT priv_rsm(VCPU *vcpu, INST64 inst)
+{
+ UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+ return vcpu_reset_psr_sm(vcpu,imm24);
+}
+
+IA64FAULT priv_ssm(VCPU *vcpu, INST64 inst)
+{
+ UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
+ return vcpu_set_psr_sm(vcpu,imm24);
+}
+
+/**
+ * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
+ */
+IA64FAULT priv_mov_to_psr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val = vcpu_get_gr(vcpu, inst.M35.r2);
+ return vcpu_set_psr_l(vcpu,val);
+}
+
+/**********************************
+ * Moves from privileged registers
+ **********************************/
+
+IA64FAULT priv_mov_from_rr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+
+ if (inst.M43.r1 > 63) { // privified mov from cpuid
+ fault = vcpu_get_cpuid(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
+ }
+ else {
+ fault = vcpu_get_rr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1, val);
+ }
+ return fault;
+}
+
+IA64FAULT priv_mov_from_pkr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+
+ fault = vcpu_get_pkr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1, val);
+ else return fault;
+}
+
+IA64FAULT priv_mov_from_dbr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+
+ fault = vcpu_get_dbr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1, val);
+ else return fault;
+}
+
+IA64FAULT priv_mov_from_ibr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+
+ fault = vcpu_get_ibr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1, val);
+ else return fault;
+}
+
+IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst)
+{
+ UINT64 val;
+ IA64FAULT fault;
+
+ if (inst.M43.r1 > 63) { // privified mov from pmd
+ fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
+ }
+ else {
+ fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
+ if (fault == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, inst.M43.r1, val);
+ }
+ return fault;
+}
+
+unsigned long from_cr_cnt[128] = { 0 };
+
+#define cr_get(cr) \
+ ((fault = vcpu_get_##cr(vcpu,&val)) == IA64_NO_FAULT) ? \
+ vcpu_set_gr(vcpu, tgt, val) : fault;
+
+IA64FAULT priv_mov_from_cr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 tgt = inst.M33.r1;
+ UINT64 val;
+ IA64FAULT fault;
+
+ from_cr_cnt[inst.M33.cr3]++;
+ switch (inst.M33.cr3) {
+ case 0: return cr_get(dcr);
+ case 1: return cr_get(itm);
+ case 2: return cr_get(iva);
+ case 8: return cr_get(pta);
+ case 16:return cr_get(ipsr);
+ case 17:return cr_get(isr);
+ case 19:return cr_get(iip);
+ case 20:return cr_get(ifa);
+ case 21:return cr_get(itir);
+ case 22:return cr_get(iipa);
+ case 23:return cr_get(ifs);
+ case 24:return cr_get(iim);
+ case 25:return cr_get(iha);
+ case 64:return cr_get(lid);
+ case 65:return cr_get(ivr);
+ case 66:return cr_get(tpr);
+ case 67:return vcpu_set_gr(vcpu,tgt,0L);
+ case 68:return cr_get(irr0);
+ case 69:return cr_get(irr1);
+ case 70:return cr_get(irr2);
+ case 71:return cr_get(irr3);
+ case 72:return cr_get(itv);
+ case 73:return cr_get(pmv);
+ case 74:return cr_get(cmcv);
+ case 80:return cr_get(lrr0);
+ case 81:return cr_get(lrr1);
+ default: return IA64_ILLOP_FAULT;
+ }
+ return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT priv_mov_from_psr(VCPU *vcpu, INST64 inst)
+{
+ UINT64 tgt = inst.M33.r1;
+ UINT64 val;
+ IA64FAULT fault;
+
+ if ((fault = vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
+ return vcpu_set_gr(vcpu, tgt, val);
+ else return fault;
+}
+
+/**************************************************************************
+Privileged operation decode and dispatch routines
+**************************************************************************/
+
+IA64_SLOT_TYPE slot_types[0x20][3] = {
+ {M, I, I}, {M, I, I}, {M, I, I}, {M, I, I},
+ {M, I, ILLEGAL}, {M, I, ILLEGAL},
+ {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
+ {M, M, I}, {M, M, I}, {M, M, I}, {M, M, I},
+ {M, F, I}, {M, F, I},
+ {M, M, F}, {M, M, F},
+ {M, I, B}, {M, I, B},
+ {M, B, B}, {M, B, B},
+ {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
+ {B, B, B}, {B, B, B},
+ {M, M, B}, {M, M, B},
+ {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
+ {M, F, B}, {M, F, B},
+ {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}
+};
+
+// pointer to privileged emulation function
+typedef IA64FAULT (*PPEFCN)(VCPU *vcpu, INST64 inst);
+
+PPEFCN Mpriv_funcs[64] = {
+ priv_mov_to_rr, priv_mov_to_dbr, priv_mov_to_ibr, priv_mov_to_pkr,
+ priv_mov_to_pmc, priv_mov_to_pmd, 0, 0,
+ 0, priv_ptc_l, priv_ptc_g, priv_ptc_ga,
+ priv_ptr_d, priv_ptr_i, priv_itr_d, priv_itr_i,
+ priv_mov_from_rr, priv_mov_from_dbr, priv_mov_from_ibr, priv_mov_from_pkr,
+ priv_mov_from_pmc, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, priv_tpa, priv_tak,
+ 0, 0, 0, 0,
+ priv_mov_from_cr, priv_mov_from_psr, 0, 0,
+ 0, 0, 0, 0,
+ priv_mov_to_cr, priv_mov_to_psr, priv_itc_d, priv_itc_i,
+ 0, 0, 0, 0,
+ priv_ptc_e, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+struct {
+ unsigned long mov_to_ar_imm;
+ unsigned long mov_to_ar_reg;
+ unsigned long mov_from_ar;
+ unsigned long ssm;
+ unsigned long rsm;
+ unsigned long rfi;
+ unsigned long bsw0;
+ unsigned long bsw1;
+ unsigned long cover;
+ unsigned long fc;
+ unsigned long cpuid;
+ unsigned long Mpriv_cnt[64];
+} privcnt = { 0 };
+
+unsigned long privop_trace = 0;
+
+IA64FAULT
+priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl)
+{
+ IA64_BUNDLE bundle;
+ IA64_BUNDLE __get_domain_bundle(UINT64);
+ int slot;
+ IA64_SLOT_TYPE slot_type;
+ INST64 inst;
+ PPEFCN pfunc;
+ unsigned long ipsr = regs->cr_ipsr;
+ UINT64 iip = regs->cr_iip;
+ int x6;
+
+ // make a local copy of the bundle containing the privop
+#if 1
+ bundle = __get_domain_bundle(iip);
+ if (!bundle.i64[0] && !bundle.i64[1])
+#else
+ if (__copy_from_user(&bundle,iip,sizeof(bundle)))
+#endif
+ {
+//printf("*** priv_handle_op: privop bundle @%p not mapped, retrying\n",iip);
+ return vcpu_force_data_miss(vcpu,regs->cr_iip);
+ }
+#if 0
+ if (iip==0xa000000100001820) {
+ static int firstpagefault = 1;
+ if (firstpagefault) {
+ printf("*** First time to domain page fault!\n");
firstpagefault=0;
+ }
+ }
+#endif
+ if (privop_trace) {
+ static long i = 400;
+ //if (i > 0) printf("privop @%p\n",iip);
+ if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
+ iip,ia64_get_itc(),ia64_get_itm());
+ i--;
+ }
+ slot = ((struct ia64_psr *)&ipsr)->ri;
+ if (!slot) inst.inst = (bundle.i64[0]>>5) & MASK_41;
+ else if (slot == 1)
+ inst.inst = ((bundle.i64[0]>>46) | bundle.i64[1]<<18) & MASK_41;
+ else if (slot == 2) inst.inst = (bundle.i64[1]>>23) & MASK_41;
+ else printf("priv_handle_op: illegal slot: %d\n", slot);
+
+ slot_type = slot_types[bundle.template][slot];
+ if (priv_verbose) {
+ printf("priv_handle_op: checking bundle at 0x%lx (op=0x%016lx)
slot %d (type=%d)\n",
+ iip, (UINT64)inst.inst, slot, slot_type);
+ }
+ if (slot_type == B && inst.generic.major == 0 && inst.B8.x6 == 0x0) {
+ // break instr for privified cover
+ }
+ else if (privlvl != 2) return (IA64_ILLOP_FAULT);
+ switch (slot_type) {
+ case M:
+ if (inst.generic.major == 0) {
+#if 0
+ if (inst.M29.x6 == 0 && inst.M29.x3 == 0) {
+ privcnt.cover++;
+ return priv_cover(vcpu,inst);
+ }
+#endif
+ if (inst.M29.x3 != 0) break;
+ if (inst.M30.x4 == 8 && inst.M30.x2 == 2) {
+ privcnt.mov_to_ar_imm++;
+ return priv_mov_to_ar_imm(vcpu,inst);
+ }
+ if (inst.M44.x4 == 6) {
+ privcnt.ssm++;
+ return priv_ssm(vcpu,inst);
+ }
+ if (inst.M44.x4 == 7) {
+ privcnt.rsm++;
+ return priv_rsm(vcpu,inst);
+ }
+ break;
+ }
+ else if (inst.generic.major != 1) break;
+ x6 = inst.M29.x6;
+ if (x6 == 0x2a) {
+ if (inst.M29.r2 > 63 && inst.M29.ar3 < 8)
+ privcnt.mov_from_ar++; // privified mov from kr
+ else privcnt.mov_to_ar_reg++;
+ return priv_mov_to_ar_reg(vcpu,inst);
+ }
+ if (inst.M29.x3 != 0) break;
+ if (!(pfunc = Mpriv_funcs[x6])) break;
+ if (x6 == 0x1e || x6 == 0x1f) { // tpa or tak are "special"
+ if (inst.M46.r3 > 63) {
+ if (x6 == 0x1e) x6 = 0x1b;
+ else x6 = 0x1a;
+ }
+ }
+ if (x6 == 52 && inst.M28.r3 > 63)
+ privcnt.fc++;
+ else if (x6 == 16 && inst.M43.r3 > 63)
+ privcnt.cpuid++;
+ else privcnt.Mpriv_cnt[x6]++;
+ return (*pfunc)(vcpu,inst);
+ break;
+ case B:
+ if (inst.generic.major != 0) break;
+ if (inst.B8.x6 == 0x08) {
+ IA64FAULT fault;
+ privcnt.rfi++;
+ fault = priv_rfi(vcpu,inst);
+ if (fault == IA64_NO_FAULT) fault =
IA64_RFI_IN_PROGRESS;
+ return fault;
+ }
+ if (inst.B8.x6 == 0x0c) {
+ privcnt.bsw0++;
+ return priv_bsw0(vcpu,inst);
+ }
+ if (inst.B8.x6 == 0x0d) {
+ privcnt.bsw1++;
+ return priv_bsw1(vcpu,inst);
+ }
+ if (inst.B8.x6 == 0x0) { // break instr for privified cover
+ privcnt.cover++;
+ return priv_cover(vcpu,inst);
+ }
+ break;
+ case I:
+ if (inst.generic.major != 0) break;
+#if 0
+ if (inst.I26.x6 == 0 && inst.I26.x3 == 0) {
+ privcnt.cover++;
+ return priv_cover(vcpu,inst);
+ }
+#endif
+ if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3
+ if (inst.I26.x6 == 0x2a) {
+ if (inst.I26.r2 > 63 && inst.I26.ar3 < 8)
+ privcnt.mov_from_ar++; // privified mov from kr
+ else privcnt.mov_to_ar_reg++;
+ return priv_mov_to_ar_reg(vcpu,inst);
+ }
+ if (inst.I27.x6 == 0x0a) {
+ privcnt.mov_to_ar_imm++;
+ return priv_mov_to_ar_imm(vcpu,inst);
+ }
+ break;
+ default:
+ break;
+ }
+ //printf("We who are about do die salute you\n");
+ printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d
(type=%d), ipsr=%p\n",
+ iip, (UINT64)inst.inst, slot, slot_type, ipsr);
+ //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
+ //thread_mozambique("privop fault\n");
+ return (IA64_ILLOP_FAULT);
+}
+
+/** Emulate a privileged operation.
+ *
+ * This should probably return 0 on success and the "trap number"
+ * (e.g. illegal operation for bad register, priv op for an
+ * instruction that isn't allowed, etc.) on "failure"
+ *
+ * @param vcpu virtual cpu
+ * @param isrcode interrupt service routine code
+ * @return fault
+ */
+IA64FAULT
+priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr)
+{
+ IA64FAULT fault;
+ UINT64 ipsr = regs->cr_ipsr;
+ UINT64 isrcode = (isr >> 4) & 0xf;
+ int privlvl;
+
+ // handle privops masked as illops? and breaks (6)
+ if (isrcode != 1 && isrcode != 2 && isrcode != 0 && isrcode != 6) {
+ printf("priv_emulate: isrcode != 0 or 1 or 2\n");
+ printf("priv_emulate: returning ILLOP, not implemented!\n");
+ while (1);
+ return IA64_ILLOP_FAULT;
+ }
+ //if (isrcode != 1 && isrcode != 2) return 0;
+ vcpu_set_regs(vcpu,regs);
+ privlvl = (ipsr & IA64_PSR_CPL) >> IA64_PSR_CPL0_BIT;
+ // its OK for a privified-cover to be executed in user-land
+ fault = priv_handle_op(vcpu,regs,privlvl);
+ if ((fault == IA64_NO_FAULT) || (fault == IA64_EXTINT_VECTOR)) { //
success!!
+ // update iip/ipsr to point to the next instruction
+ (void)vcpu_increment_iip(vcpu);
+ }
+ if (fault == IA64_ILLOP_FAULT)
+ printf("priv_emulate: priv_handle_op fails, isr=%p\n",isr);
+ return fault;
+}
+
+
+// FIXME: Move these to include/public/arch-ia64?
+#define HYPERPRIVOP_RFI 0x1
+#define HYPERPRIVOP_RSM_DT 0x2
+#define HYPERPRIVOP_SSM_DT 0x3
+#define HYPERPRIVOP_COVER 0x4
+#define HYPERPRIVOP_ITC_D 0x5
+#define HYPERPRIVOP_ITC_I 0x6
+#define HYPERPRIVOP_SSM_I 0x7
+#define HYPERPRIVOP_GET_IVR 0x8
+#define HYPERPRIVOP_GET_TPR 0x9
+#define HYPERPRIVOP_SET_TPR 0xa
+#define HYPERPRIVOP_EOI 0xb
+#define HYPERPRIVOP_SET_ITM 0xc
+#define HYPERPRIVOP_THASH 0xd
+#define HYPERPRIVOP_PTC_GA 0xe
+#define HYPERPRIVOP_ITR_D 0xf
+#define HYPERPRIVOP_GET_RR 0x10
+#define HYPERPRIVOP_SET_RR 0x11
+#define HYPERPRIVOP_MAX 0x11
+
+char *hyperpriv_str[HYPERPRIVOP_MAX+1] = {
+ 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
+ "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d",
+ "=rr", "rr=",
+ 0
+};
+
+unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
+unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
+
+/* hyperprivops are generally executed in assembly (with physical psr.ic off)
+ * so this code is primarily used for debugging them */
+int
+ia64_hyperprivop(unsigned long iim, REGS *regs)
+{
+ struct vcpu *v = (struct domain *) current;
+ INST64 inst;
+ UINT64 val;
+ UINT64 itir, ifa;
+
+// FIXME: Handle faults appropriately for these
+ if (!iim || iim > HYPERPRIVOP_MAX) {
+ printf("bad hyperprivop; ignored\n");
+ printf("iim=%d, iip=%p\n",iim,regs->cr_iip);
+ return 1;
+ }
+ slow_hyperpriv_cnt[iim]++;
+ switch(iim) {
+ case HYPERPRIVOP_RFI:
+ (void)vcpu_rfi(v);
+ return 0; // don't update iip
+ case HYPERPRIVOP_RSM_DT:
+ (void)vcpu_reset_psr_dt(v);
+ return 1;
+ case HYPERPRIVOP_SSM_DT:
+ (void)vcpu_set_psr_dt(v);
+ return 1;
+ case HYPERPRIVOP_COVER:
+ (void)vcpu_cover(v);
+ return 1;
+ case HYPERPRIVOP_ITC_D:
+ (void)vcpu_get_itir(v,&itir);
+ (void)vcpu_get_ifa(v,&ifa);
+ (void)vcpu_itc_d(v,regs->r8,itir,ifa);
+ return 1;
+ case HYPERPRIVOP_ITC_I:
+ (void)vcpu_get_itir(v,&itir);
+ (void)vcpu_get_ifa(v,&ifa);
+ (void)vcpu_itc_i(v,regs->r8,itir,ifa);
+ return 1;
+ case HYPERPRIVOP_SSM_I:
+ (void)vcpu_set_psr_i(v);
+ return 1;
+ case HYPERPRIVOP_GET_IVR:
+ (void)vcpu_get_ivr(v,&val);
+ regs->r8 = val;
+ return 1;
+ case HYPERPRIVOP_GET_TPR:
+ (void)vcpu_get_tpr(v,&val);
+ regs->r8 = val;
+ return 1;
+ case HYPERPRIVOP_SET_TPR:
+ (void)vcpu_set_tpr(v,regs->r8);
+ return 1;
+ case HYPERPRIVOP_EOI:
+ (void)vcpu_set_eoi(v,0L);
+ return 1;
+ case HYPERPRIVOP_SET_ITM:
+ (void)vcpu_set_itm(v,regs->r8);
+ return 1;
+ case HYPERPRIVOP_THASH:
+ (void)vcpu_thash(v,regs->r8,&val);
+ regs->r8 = val;
+ return 1;
+ case HYPERPRIVOP_PTC_GA:
+ (void)vcpu_ptc_ga(v,regs->r8,(1L << ((regs->r9 & 0xfc) >> 2)));
+ return 1;
+ case HYPERPRIVOP_ITR_D:
+ (void)vcpu_get_itir(v,&itir);
+ (void)vcpu_get_ifa(v,&ifa);
+ (void)vcpu_itr_d(v,regs->r8,regs->r9,itir,ifa);
+ return 1;
+ case HYPERPRIVOP_GET_RR:
+ (void)vcpu_get_rr(v,regs->r8,&val);
+ regs->r8 = val;
+ return 1;
+ case HYPERPRIVOP_SET_RR:
+ (void)vcpu_set_rr(v,regs->r8,regs->r9);
+ return 1;
+ }
+ return 0;
+}
+
+
+/**************************************************************************
+Privileged operation instrumentation routines
+**************************************************************************/
+
+char *Mpriv_str[64] = {
+ "mov_to_rr", "mov_to_dbr", "mov_to_ibr", "mov_to_pkr",
+ "mov_to_pmc", "mov_to_pmd", "<0x06>", "<0x07>",
+ "<0x08>", "ptc_l", "ptc_g", "ptc_ga",
+ "ptr_d", "ptr_i", "itr_d", "itr_i",
+ "mov_from_rr", "mov_from_dbr", "mov_from_ibr", "mov_from_pkr",
+ "mov_from_pmc", "<0x15>", "<0x16>", "<0x17>",
+ "<0x18>", "<0x19>", "privified-thash", "privified-ttag",
+ "<0x1c>", "<0x1d>", "tpa", "tak",
+ "<0x20>", "<0x21>", "<0x22>", "<0x23>",
+ "mov_from_cr", "mov_from_psr", "<0x26>", "<0x27>",
+ "<0x28>", "<0x29>", "<0x2a>", "<0x2b>",
+ "mov_to_cr", "mov_to_psr", "itc_d", "itc_i",
+ "<0x30>", "<0x31>", "<0x32>", "<0x33>",
+ "ptc_e", "<0x35>", "<0x36>", "<0x37>",
+ "<0x38>", "<0x39>", "<0x3a>", "<0x3b>",
+ "<0x3c>", "<0x3d>", "<0x3e>", "<0x3f>"
+};
+
+#define RS "Rsvd"
+char *cr_str[128] = {
+ "dcr","itm","iva",RS,RS,RS,RS,RS,
+ "pta",RS,RS,RS,RS,RS,RS,RS,
+ "ipsr","isr",RS,"iip","ifa","itir","iipa","ifs",
+ "iim","iha",RS,RS,RS,RS,RS,RS,
+ RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+ RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+ "lid","ivr","tpr","eoi","irr0","irr1","irr2","irr3",
+ "itv","pmv","cmcv",RS,RS,RS,RS,RS,
+ "lrr0","lrr1",RS,RS,RS,RS,RS,RS,
+ RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+ RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
+ RS,RS,RS,RS,RS,RS,RS,RS
+};
+
+// FIXME: should use snprintf to ensure no buffer overflow
+int dump_privop_counts(char *buf)
+{
+ int i, j;
+ UINT64 sum = 0;
+ char *s = buf;
+
+ // this is ugly and should probably produce sorted output
+ // but it will have to do for now
+ sum += privcnt.mov_to_ar_imm; sum += privcnt.mov_to_ar_reg;
+ sum += privcnt.ssm; sum += privcnt.rsm;
+ sum += privcnt.rfi; sum += privcnt.bsw0;
+ sum += privcnt.bsw1; sum += privcnt.cover;
+ for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i];
+ s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum);
+ if (privcnt.mov_to_ar_imm)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_imm,
+ "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum);
+ if (privcnt.mov_to_ar_reg)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_reg,
+ "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum);
+ if (privcnt.mov_from_ar)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_from_ar,
+ "privified-mov_from_ar",
(privcnt.mov_from_ar*100L)/sum);
+ if (privcnt.ssm)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.ssm,
+ "ssm", (privcnt.ssm*100L)/sum);
+ if (privcnt.rsm)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rsm,
+ "rsm", (privcnt.rsm*100L)/sum);
+ if (privcnt.rfi)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rfi,
+ "rfi", (privcnt.rfi*100L)/sum);
+ if (privcnt.bsw0)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw0,
+ "bsw0", (privcnt.bsw0*100L)/sum);
+ if (privcnt.bsw1)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw1,
+ "bsw1", (privcnt.bsw1*100L)/sum);
+ if (privcnt.cover)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cover,
+ "cover", (privcnt.cover*100L)/sum);
+ if (privcnt.fc)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.fc,
+ "privified-fc", (privcnt.fc*100L)/sum);
+ if (privcnt.cpuid)
+ s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cpuid,
+ "privified-getcpuid", (privcnt.cpuid*100L)/sum);
+ for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) {
+ if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n");
+ else s += sprintf(s,"%10d %s [%d%%]\n", privcnt.Mpriv_cnt[i],
+ Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum);
+ if (i == 0x24) { // mov from CR
+ s += sprintf(s," [");
+ for (j=0; j < 128; j++) if (from_cr_cnt[j]) {
+ if (!cr_str[j])
+ s += sprintf(s,"PRIVSTRING NULL!!\n");
+ s +=
sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]);
+ }
+ s += sprintf(s,"]\n");
+ }
+ else if (i == 0x2c) { // mov to CR
+ s += sprintf(s," [");
+ for (j=0; j < 128; j++) if (to_cr_cnt[j]) {
+ if (!cr_str[j])
+ s += sprintf(s,"PRIVSTRING NULL!!\n");
+ s +=
sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]);
+ }
+ s += sprintf(s,"]\n");
+ }
+ }
+ return s - buf;
+}
+
+int zero_privop_counts(char *buf)
+{
+ int i, j;
+ char *s = buf;
+
+ // this is ugly and should probably produce sorted output
+ // but it will have to do for now
+ privcnt.mov_to_ar_imm = 0; privcnt.mov_to_ar_reg = 0;
+ privcnt.mov_from_ar = 0;
+ privcnt.ssm = 0; privcnt.rsm = 0;
+ privcnt.rfi = 0; privcnt.bsw0 = 0;
+ privcnt.bsw1 = 0; privcnt.cover = 0;
+ privcnt.fc = 0; privcnt.cpuid = 0;
+ for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0;
+ for (j=0; j < 128; j++) from_cr_cnt[j] = 0;
+ for (j=0; j < 128; j++) to_cr_cnt[j] = 0;
+ s += sprintf(s,"All privop statistics zeroed\n");
+ return s - buf;
+}
+
+#ifdef PRIVOP_ADDR_COUNT
+
+extern struct privop_addr_count privop_addr_counter[];
+
+void privop_count_addr(unsigned long iip, int inst)
+{
+ struct privop_addr_count *v = &privop_addr_counter[inst];
+ int i;
+
+ for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) {
+ if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; }
+ else if (v->addr[i] == iip) { v->count[i]++; return; }
+ }
+ v->overflow++;;
+}
+
+int dump_privop_addrs(char *buf)
+{
+ int i,j;
+ char *s = buf;
+ s += sprintf(s,"Privop addresses:\n");
+ for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
+ struct privop_addr_count *v = &privop_addr_counter[i];
+ s += sprintf(s,"%s:\n",v->instname);
+ for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) {
+ if (!v->addr[j]) break;
+ s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]);
+ }
+ if (v->overflow)
+ s += sprintf(s," other #%ld\n",v->overflow);
+ }
+ return s - buf;
+}
+
+void zero_privop_addrs(void)
+{
+ int i,j;
+ for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
+ struct privop_addr_count *v = &privop_addr_counter[i];
+ for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+ v->addr[j] = v->count[j] = 0;
+ v->overflow = 0;
+ }
+}
+#endif
+
+extern unsigned long dtlb_translate_count;
+extern unsigned long tr_translate_count;
+extern unsigned long phys_translate_count;
+extern unsigned long vhpt_translate_count;
+extern unsigned long lazy_cover_count;
+extern unsigned long idle_when_pending;
+extern unsigned long pal_halt_light_count;
+extern unsigned long context_switch_count;
+
+int dump_misc_stats(char *buf)
+{
+ char *s = buf;
+ s += sprintf(s,"Virtual TR translations: %d\n",tr_translate_count);
+ s += sprintf(s,"Virtual VHPT translations: %d\n",vhpt_translate_count);
+ s += sprintf(s,"Virtual DTLB translations: %d\n",dtlb_translate_count);
+ s += sprintf(s,"Physical translations: %d\n",phys_translate_count);
+ s += sprintf(s,"Idle when pending: %d\n",idle_when_pending);
+ s += sprintf(s,"PAL_HALT_LIGHT (no pending):
%d\n",pal_halt_light_count);
+ s += sprintf(s,"context switches: %d\n",context_switch_count);
+ s += sprintf(s,"Lazy covers: %d\n",lazy_cover_count);
+ return s - buf;
+}
+
+void zero_misc_stats(void)
+{
+ dtlb_translate_count = 0;
+ tr_translate_count = 0;
+ phys_translate_count = 0;
+ vhpt_translate_count = 0;
+ lazy_cover_count = 0;
+ pal_halt_light_count = 0;
+ idle_when_pending = 0;
+ context_switch_count = 0;
+}
+
+int dump_hyperprivop_counts(char *buf)
+{
+ int i;
+ char *s = buf;
+ unsigned long total = 0;
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += slow_hyperpriv_cnt[i];
+ s += sprintf(s,"Slow hyperprivops (total %d):\n",total);
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++)
+ if (slow_hyperpriv_cnt[i])
+ s += sprintf(s,"%10d %s\n",
+ slow_hyperpriv_cnt[i], hyperpriv_str[i]);
+ total = 0;
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += fast_hyperpriv_cnt[i];
+ s += sprintf(s,"Fast hyperprivops (total %d):\n",total);
+ for (i = 1; i <= HYPERPRIVOP_MAX; i++)
+ if (fast_hyperpriv_cnt[i])
+ s += sprintf(s,"%10d %s\n",
+ fast_hyperpriv_cnt[i], hyperpriv_str[i]);
+ return s - buf;
+}
+
+void zero_hyperprivop_counts(void)
+{
+ int i;
+ for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0;
+ for (i = 0; i <= HYPERPRIVOP_MAX; i++) fast_hyperpriv_cnt[i] = 0;
+}
+
+#define TMPBUFLEN 8*1024
+int dump_privop_counts_to_user(char __user *ubuf, int len)
+{
+ char buf[TMPBUFLEN];
+ int n = dump_privop_counts(buf);
+
+ n += dump_hyperprivop_counts(buf + n);
+ n += dump_reflect_counts(buf + n);
+#ifdef PRIVOP_ADDR_COUNT
+ n += dump_privop_addrs(buf + n);
+#endif
+ n += dump_misc_stats(buf + n);
+ if (len < TMPBUFLEN) return -1;
+ if (__copy_to_user(ubuf,buf,n)) return -1;
+ return n;
+}
+
+int zero_privop_counts_to_user(char __user *ubuf, int len)
+{
+ char buf[TMPBUFLEN];
+ int n = zero_privop_counts(buf);
+
+ zero_hyperprivop_counts();
+#ifdef PRIVOP_ADDR_COUNT
+ zero_privop_addrs();
+#endif
+ zero_misc_stats();
+ zero_reflect_counts();
+ if (len < TMPBUFLEN) return -1;
+ if (__copy_to_user(ubuf,buf,n)) return -1;
+ return n;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/process.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/process.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,749 @@
+/*
+ * Miscellaneous process/domain related routines
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <asm/ptrace.h>
+#include <xen/delay.h>
+
+#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
+#include <asm/sal.h> /* FOR struct ia64_sal_retval */
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+//#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <asm/regionreg.h>
+#include <asm/privop.h>
+#include <asm/vcpu.h>
+#include <asm/ia64_int.h>
+#include <asm/dom_fw.h>
+#include "hpsim_ssc.h"
+
+extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
+extern struct ia64_sal_retval pal_emulator_static(UINT64);
+extern struct ia64_sal_retval
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
+
+extern unsigned long dom0_start, dom0_size;
+
+#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
+// note IA64_PSR_PK removed from following, why is this necessary?
+#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
+ IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
+ IA64_PSR_IT | IA64_PSR_BN)
+
+#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
+ IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
+ IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
+ IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \
+ IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
+ IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
+
+#define PSCB(x,y) VCPU(x,y)
+#define PSCBX(x,y) x->arch.y
+
+extern unsigned long vcpu_verbose;
+
+long do_iopl(domid_t domain, unsigned int new_io_pl)
+{
+ dummy();
+ return 0;
+}
+
+void schedule_tail(struct vcpu *next)
+{
+ unsigned long rr7;
+ //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
+ //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
+#ifdef CONFIG_VTI
+ /* rr7 will be postponed to last point when resuming back to guest */
+ vmx_load_all_rr(current);
+#else // CONFIG_VTI
+ if (rr7 = load_region_regs(current)) {
+ printk("schedule_tail: change to rr7 not yet implemented\n");
+ }
+#endif // CONFIG_VTI
+}
+
+void tdpfoo(void) { }
+
+// given a domain virtual address, pte and pagesize, extract the metaphysical
+// address, convert the pte for a physical address for (possibly different)
+// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use
+// PAGE_SIZE!)
+unsigned long translate_domain_pte(unsigned long pteval,
+ unsigned long address, unsigned long itir)
+{
+ struct domain *d = current->domain;
+ unsigned long mask, pteval2, mpaddr;
+ unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+ extern struct domain *dom0;
+ extern unsigned long dom0_start, dom0_size;
+
+ // FIXME address had better be pre-validated on insert
+ mask = (1L << ((itir >> 2) & 0x3f)) - 1;
+ mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
+ if (d == dom0) {
+ if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+ //printk("translate_domain_pte: out-of-bounds dom0
mpaddr %p! itc=%lx...\n",mpaddr,ia64_get_itc());
+ tdpfoo();
+ }
+ }
+ else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) {
+ printf("translate_domain_pte: bad mpa=%p (>
%p),vadr=%p,pteval=%p,itir=%p\n",
+ mpaddr,d->max_pages<<PAGE_SHIFT,address,pteval,itir);
+ tdpfoo();
+ }
+ pteval2 = lookup_domain_mpa(d,mpaddr);
+ pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
+ pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
+ pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
+ return pteval2;
+}
+
+// given a current domain metaphysical address, return the physical address
+unsigned long translate_domain_mpaddr(unsigned long mpaddr)
+{
+ extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+ unsigned long pteval;
+
+ if (current->domain == dom0) {
+ if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
+ printk("translate_domain_mpaddr: out-of-bounds dom0
mpaddr %p! continuing...\n",mpaddr);
+ tdpfoo();
+ }
+ }
+ pteval = lookup_domain_mpa(current->domain,mpaddr);
+ return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
+}
+
+unsigned long slow_reflect_count[0x80] = { 0 };
+unsigned long fast_reflect_count[0x80] = { 0 };
+
+#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++;
+
+void zero_reflect_counts(void)
+{
+ int i;
+ for (i=0; i<0x80; i++) slow_reflect_count[i] = 0;
+ for (i=0; i<0x80; i++) fast_reflect_count[i] = 0;
+}
+
+int dump_reflect_counts(char *buf)
+{
+ int i,j,cnt;
+ char *s = buf;
+
+ s += sprintf(s,"Slow reflections by vector:\n");
+ for (i = 0, j = 0; i < 0x80; i++) {
+ if (cnt = slow_reflect_count[i]) {
+ s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
+ if ((j++ & 3) == 3) s += sprintf(s,"\n");
+ }
+ }
+ if (j & 3) s += sprintf(s,"\n");
+ s += sprintf(s,"Fast reflections by vector:\n");
+ for (i = 0, j = 0; i < 0x80; i++) {
+ if (cnt = fast_reflect_count[i]) {
+ s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
+ if ((j++ & 3) == 3) s += sprintf(s,"\n");
+ }
+ }
+ if (j & 3) s += sprintf(s,"\n");
+ return s - buf;
+}
+
+void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long
itiriim, struct pt_regs *regs, unsigned long vector)
+{
+ unsigned long vcpu_get_ipsr_int_state(struct vcpu *,unsigned long);
+ unsigned long vcpu_get_rr_ve(struct vcpu *,unsigned long);
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
+
+ if (vector == IA64_EXTINT_VECTOR) {
+
+ extern unsigned long vcpu_verbose, privop_trace;
+ static first_extint = 1;
+ if (first_extint) {
+ printf("Delivering first extint to domain: ifa=%p,
isr=%p, itir=%p, iip=%p\n",ifa,isr,itiriim,regs->cr_iip);
+ //privop_trace = 1; vcpu_verbose = 1;
+ first_extint = 0;
+ }
+ }
+ if (!PSCB(v,interrupt_collection_enabled)) {
+ if (!(PSCB(v,ipsr) & IA64_PSR_DT)) {
+ panic_domain(regs,"psr.dt off, trying to deliver nested
dtlb!\n");
+ }
+ vector &= ~0xf;
+ if (vector != IA64_DATA_TLB_VECTOR &&
+ vector != IA64_ALT_DATA_TLB_VECTOR &&
+ vector != IA64_VHPT_TRANS_VECTOR) {
+panic_domain(regs,"psr.ic off, delivering
fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
+ vector,regs->cr_ipsr,regs->cr_iip,ifa,isr,PSCB(v,iip));
+
+ }
+//printf("Delivering NESTED DATA TLB fault\n");
+ vector = IA64_DATA_NESTED_TLB_VECTOR;
+ regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) &
~0xffUL;
+ regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) |
DELIVER_PSR_SET;
+// NOTE: nested trap must NOT pass PSCB address
+ //regs->r31 = (unsigned long) &PSCB(v);
+ inc_slow_reflect_count(vector);
+ return;
+
+ }
+ if ((vector & 0xf) == IA64_FORCED_IFA)
+ ifa = PSCB(v,tmp[0]);
+ vector &= ~0xf;
+ PSCB(v,ifa) = ifa;
+ if (vector < IA64_DATA_NESTED_TLB_VECTOR) /* VHPT miss, TLB miss, Alt
TLB miss */
+ vcpu_thash(v,ifa,&PSCB(current,iha));
+ PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed?
+ PSCB(v,precover_ifs) = regs->cr_ifs;
+ vcpu_bsw0(v);
+ PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr);
+ if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+ PSCB(v,iim) = itiriim;
+ else PSCB(v,itir) = vcpu_get_itir_on_fault(v,ifa);
+ PSCB(v,isr) = isr; // this is unnecessary except for interrupts!
+ PSCB(v,iip) = regs->cr_iip;
+ PSCB(v,ifs) = 0;
+ PSCB(v,incomplete_regframe) = 0;
+
+ regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL;
+ regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
+#ifdef CONFIG_SMP
+#warning "SMP FIXME: sharedinfo doesn't handle smp yet, need page per vcpu"
+#endif
+ regs->r31 = &(((mapped_regs_t *)SHARED_ARCHINFO_ADDR)->ipsr);
+
+ PSCB(v,interrupt_delivery_enabled) = 0;
+ PSCB(v,interrupt_collection_enabled) = 0;
+
+ inc_slow_reflect_count(vector);
+}
+
+void foodpi(void) {}
+
+unsigned long pending_false_positive = 0;
+
+// ONLY gets called from ia64_leave_kernel
+// ONLY call with interrupts disabled?? (else might miss one?)
+// NEVER successful if already reflecting a trap/fault because psr.i==0
+void deliver_pending_interrupt(struct pt_regs *regs)
+{
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
+ // FIXME: Will this work properly if doing an RFI???
+ if (!is_idle_task(d) && user_mode(regs)) {
+ //vcpu_poke_timer(v);
+ if (vcpu_deliverable_interrupts(v)) {
+ unsigned long isr = regs->cr_ipsr & IA64_PSR_RI;
+ if (vcpu_timer_pending_early(v))
+printf("*#*#*#* about to deliver early timer to domain
%d!!!\n",v->domain->domain_id);
+ reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR);
+ }
+ else if (PSCB(v,pending_interruption))
+ ++pending_false_positive;
+ }
+}
+unsigned long lazy_cover_count = 0;
+
+int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
+{
+ if (!PSCB(v,interrupt_collection_enabled)) {
+ PSCB(v,ifs) = regs->cr_ifs;
+ PSCB(v,incomplete_regframe) = 1;
+ regs->cr_ifs = 0;
+ lazy_cover_count++;
+ return(1); // retry same instruction with cr.ifs off
+ }
+ return(0);
+}
+
+void ia64_do_page_fault (unsigned long address, unsigned long isr, struct
pt_regs *regs, unsigned long itir)
+{
+ unsigned long iip = regs->cr_iip;
+ // FIXME should validate address here
+ unsigned long pteval;
+ unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
+ IA64FAULT fault;
+
+ if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs))
return;
+ if ((isr & IA64_ISR_SP)
+ || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) ==
IA64_ISR_CODE_LFETCH))
+ {
+ /*
+ * This fault was due to a speculative load or lfetch.fault,
set the "ed"
+ * bit in the psr to ensure forward progress. (Target register
will get a
+ * NaT for ld.s, lfetch will be canceled.)
+ */
+ ia64_psr(regs)->ed = 1;
+ return;
+ }
+
+ fault = vcpu_translate(current,address,is_data,&pteval,&itir);
+ if (fault == IA64_NO_FAULT)
+ {
+ pteval = translate_domain_pte(pteval,address,itir);
+
vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f);
+ return;
+ }
+ else if (IS_VMM_ADDRESS(iip))
+ {
+ if (!ia64_done_with_exception(regs)) {
+ // should never happen. If it does, region 0 addr may
+ // indicate a bad xen pointer
+ printk("*** xen_handle_domain_access: exception table"
+ " lookup failed, iip=%p, addr=%p, spinning...\n",
+ iip,address);
+ panic_domain(regs,"*** xen_handle_domain_access:
exception table"
+ " lookup failed, iip=%p, addr=%p, spinning...\n",
+ iip,address);
+ }
+ return;
+ }
+
+ reflect_interruption(address, isr, 0, regs, fault);
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+ unsigned long iim, unsigned long itir, unsigned long arg5,
+ unsigned long arg6, unsigned long arg7, unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long code, error = isr;
+ char buf[128];
+ int result, sig;
+ static const char *reason[] = {
+ "IA-64 Illegal Operation fault",
+ "IA-64 Privileged Operation fault",
+ "IA-64 Privileged Register fault",
+ "IA-64 Reserved Register/Field fault",
+ "Disabled Instruction Set Transition fault",
+ "Unknown fault 5", "Unknown fault 6", "Unknown fault 7",
"Illegal Hazard fault",
+ "Unknown fault 9", "Unknown fault 10", "Unknown fault 11",
"Unknown fault 12",
+ "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+ };
+#if 0
+printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
+ vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
+#endif
+
+ if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) ==
IA64_ISR_CODE_LFETCH)) {
+ /*
+ * This fault was due to lfetch.fault, set "ed" bit in the psr
to cancel
+ * the lfetch.
+ */
+ ia64_psr(regs)->ed = 1;
+ printf("ia64_fault: handled lfetch.fault\n");
+ return;
+ }
+
+ switch (vector) {
+ case 24: /* General Exception */
+ code = (isr >> 4) & 0xf;
+ sprintf(buf, "General Exception: %s%s", reason[code],
+ (code == 3) ? ((isr & (1UL << 37))
+ ? " (RSE access)" : " (data access)") :
"");
+ if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+ printk("%s[%d]: possible hazard @ ip=%016lx (pr =
%016lx)\n",
+ current->comm, current->pid, regs->cr_iip +
ia64_psr(regs)->ri,
+ regs->pr);
+# endif
+ printf("ia64_fault: returning on hazard\n");
+ return;
+ }
+ break;
+
+ case 25: /* Disabled FP-Register */
+ if (isr & 2) {
+ //disabled_fph_fault(regs);
+ //return;
+ }
+ sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+ break;
+
+ case 26: /* NaT Consumption */
+ if (user_mode(regs)) {
+ void *addr;
+
+ if (((isr >> 4) & 0xf) == 2) {
+ /* NaT page consumption */
+ //sig = SIGSEGV;
+ //code = SEGV_ACCERR;
+ addr = (void *) ifa;
+ } else {
+ /* register NaT consumption */
+ //sig = SIGILL;
+ //code = ILL_ILLOPN;
+ addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
+ }
+ //siginfo.si_signo = sig;
+ //siginfo.si_code = code;
+ //siginfo.si_errno = 0;
+ //siginfo.si_addr = addr;
+ //siginfo.si_imm = vector;
+ //siginfo.si_flags = __ISR_VALID;
+ //siginfo.si_isr = isr;
+ //force_sig_info(sig, &siginfo, current);
+ //return;
+ } //else if (ia64_done_with_exception(regs))
+ //return;
+ sprintf(buf, "NaT consumption");
+ break;
+
+ case 31: /* Unsupported Data Reference */
+ if (user_mode(regs)) {
+ //siginfo.si_signo = SIGILL;
+ //siginfo.si_code = ILL_ILLOPN;
+ //siginfo.si_errno = 0;
+ //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
+ //siginfo.si_imm = vector;
+ //siginfo.si_flags = __ISR_VALID;
+ //siginfo.si_isr = isr;
+ //force_sig_info(SIGILL, &siginfo, current);
+ //return;
+ }
+ sprintf(buf, "Unsupported data reference");
+ break;
+
+ case 29: /* Debug */
+ case 35: /* Taken Branch Trap */
+ case 36: /* Single Step Trap */
+ //if (fsys_mode(current, regs)) {}
+ switch (vector) {
+ case 29:
+ //siginfo.si_code = TRAP_HWBKPT;
+#ifdef CONFIG_ITANIUM
+ /*
+ * Erratum 10 (IFA may contain incorrect address) now
has
+ * "NoFix" status. There are no plans for fixing this.
+ */
+ if (ia64_psr(regs)->is == 0)
+ ifa = regs->cr_iip;
+#endif
+ break;
+ case 35: ifa = 0; break;
+ case 36: ifa = 0; break;
+ //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
+ //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
+ }
+ //siginfo.si_signo = SIGTRAP;
+ //siginfo.si_errno = 0;
+ //siginfo.si_addr = (void *) ifa;
+ //siginfo.si_imm = 0;
+ //siginfo.si_flags = __ISR_VALID;
+ //siginfo.si_isr = isr;
+ //force_sig_info(SIGTRAP, &siginfo, current);
+ //return;
+
+ case 32: /* fp fault */
+ case 33: /* fp trap */
+ //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
+ //if ((result < 0) || (current->thread.flags &
IA64_THREAD_FPEMU_SIGFPE)) {
+ //siginfo.si_signo = SIGFPE;
+ //siginfo.si_errno = 0;
+ //siginfo.si_code = FPE_FLTINV;
+ //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
+ //siginfo.si_flags = __ISR_VALID;
+ //siginfo.si_isr = isr;
+ //siginfo.si_imm = 0;
+ //force_sig_info(SIGFPE, &siginfo, current);
+ //}
+ //return;
+ sprintf(buf, "FP fault/trap");
+ break;
+
+ case 34:
+ if (isr & 0x2) {
+ /* Lower-Privilege Transfer Trap */
+ /*
+ * Just clear PSR.lp and then return immediately: all
the
+ * interesting work (e.g., signal delivery is done in
the kernel
+ * exit path).
+ */
+ //ia64_psr(regs)->lp = 0;
+ //return;
+ sprintf(buf, "Lower-Privilege Transfer trap");
+ } else {
+ /* Unimplemented Instr. Address Trap */
+ if (user_mode(regs)) {
+ //siginfo.si_signo = SIGILL;
+ //siginfo.si_code = ILL_BADIADDR;
+ //siginfo.si_errno = 0;
+ //siginfo.si_flags = 0;
+ //siginfo.si_isr = 0;
+ //siginfo.si_imm = 0;
+ //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
+ //force_sig_info(SIGILL, &siginfo, current);
+ //return;
+ }
+ sprintf(buf, "Unimplemented Instruction Address fault");
+ }
+ break;
+
+ case 45:
+ printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
+ printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
+ regs->cr_iip, ifa, isr);
+ //force_sig(SIGSEGV, current);
+ break;
+
+ case 46:
+ printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
+ printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim -
0x%lx\n",
+ regs->cr_iip, ifa, isr, iim);
+ //force_sig(SIGSEGV, current);
+ return;
+
+ case 47:
+ sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+ break;
+
+ default:
+ sprintf(buf, "Fault %lu", vector);
+ break;
+ }
+ //die_if_kernel(buf, regs, error);
+printk("ia64_fault: %s: reflecting\n",buf);
+reflect_interruption(ifa,isr,iim,regs,IA64_GENEX_VECTOR);
+//while(1);
+ //force_sig(SIGILL, current);
+}
+
+unsigned long running_on_sim = 0;
+
+void
+do_ssc(unsigned long ssc, struct pt_regs *regs)
+{
+ extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
+ unsigned long arg0, arg1, arg2, arg3, retval;
+ char buf[2];
+/**/ static int last_fd, last_count; // FIXME FIXME FIXME
+/**/ // BROKEN FOR MULTIPLE DOMAINS & SMP
+/**/ struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat;
+ extern unsigned long vcpu_verbose, privop_trace;
+
+ arg0 = vcpu_get_gr(current,32);
+ switch(ssc) {
+ case SSC_PUTCHAR:
+ buf[0] = arg0;
+ buf[1] = '\0';
+ printf(buf);
+ break;
+ case SSC_GETCHAR:
+ retval = ia64_ssc(0,0,0,0,ssc);
+ vcpu_set_gr(current,8,retval);
+ break;
+ case SSC_WAIT_COMPLETION:
+ if (arg0) { // metaphysical address
+
+ arg0 = translate_domain_mpaddr(arg0);
+/**/ stat = (struct ssc_disk_stat *)__va(arg0);
+///**/ if (stat->fd == last_fd) stat->count = last_count;
+/**/ stat->count = last_count;
+//if (last_count >= PAGE_SIZE) printf("ssc_wait:
stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count);
+///**/ retval = ia64_ssc(arg0,0,0,0,ssc);
+/**/ retval = 0;
+ }
+ else retval = -1L;
+ vcpu_set_gr(current,8,retval);
+ break;
+ case SSC_OPEN:
+ arg1 = vcpu_get_gr(current,33); // access rights
+if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware.
(ignoring...)\n"); arg0 = 0; }
+ if (arg0) { // metaphysical address
+ arg0 = translate_domain_mpaddr(arg0);
+ retval = ia64_ssc(arg0,arg1,0,0,ssc);
+ }
+ else retval = -1L;
+ vcpu_set_gr(current,8,retval);
+ break;
+ case SSC_WRITE:
+ case SSC_READ:
+//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n");
+ arg1 = vcpu_get_gr(current,33);
+ arg2 = vcpu_get_gr(current,34);
+ arg3 = vcpu_get_gr(current,35);
+ if (arg2) { // metaphysical address of descriptor
+ struct ssc_disk_req *req;
+ unsigned long mpaddr, paddr;
+ long len;
+
+ arg2 = translate_domain_mpaddr(arg2);
+ req = (struct disk_req *)__va(arg2);
+ req->len &= 0xffffffffL; // avoid strange bug
+ len = req->len;
+/**/ last_fd = arg1;
+/**/ last_count = len;
+ mpaddr = req->addr;
+//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx
",last_fd,mpaddr,len);
+ retval = 0;
+ if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) &
PAGE_MASK)) {
+ // do partial page first
+ req->addr = translate_domain_mpaddr(mpaddr);
+ req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK);
+ len -= req->len; mpaddr += req->len;
+ retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc);
+ arg3 += req->len; // file offset
+/**/ last_stat.fd = last_fd;
+/**/
(void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION);
+//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x
",req->addr,req->len,retval);
+ }
+ if (retval >= 0) while (len > 0) {
+ req->addr = translate_domain_mpaddr(mpaddr);
+ req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len;
+ len -= PAGE_SIZE; mpaddr += PAGE_SIZE;
+ retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc);
+ arg3 += req->len; // file offset
+// TEMP REMOVED AGAIN arg3 += req->len; // file offset
+/**/ last_stat.fd = last_fd;
+/**/
(void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION);
+//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x
",req->addr,req->len,retval);
+ }
+ // set it back to the original value
+ req->len = last_count;
+ }
+ else retval = -1L;
+ vcpu_set_gr(current,8,retval);
+//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval);
+ break;
+ case SSC_CONNECT_INTERRUPT:
+ arg1 = vcpu_get_gr(current,33);
+ arg2 = vcpu_get_gr(current,34);
+ arg3 = vcpu_get_gr(current,35);
+ if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not
implemented on hardware. (ignoring...)\n"); break; }
+ (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc);
+ break;
+ case SSC_NETDEV_PROBE:
+ vcpu_set_gr(current,8,-1L);
+ break;
+ default:
+ printf("ia64_handle_break: bad ssc code %lx, iip=%p, b0=%p...
spinning\n",ssc,regs->cr_iip,regs->b0);
+ while(1);
+ break;
+ }
+ vcpu_increment_iip(current);
+}
+
+int first_break = 1;
+
+void
+ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr,
unsigned long iim)
+{
+ struct domain *d = (struct domain *) current->domain;
+ struct vcpu *v = (struct domain *) current;
+ extern unsigned long running_on_sim;
+
+ if (first_break) {
+ if (platform_is_hp_ski()) running_on_sim = 1;
+ else running_on_sim = 0;
+ first_break = 0;
+ }
+ if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
+ if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
+ else do_ssc(vcpu_get_gr(current,36), regs);
+ }
+ else if (iim == d->arch.breakimm) {
+ if (ia64_hypercall(regs))
+ vcpu_increment_iip(current);
+ }
+ else if (!PSCB(v,interrupt_collection_enabled)) {
+ if (ia64_hyperprivop(iim,regs))
+ vcpu_increment_iip(current);
+ }
+ else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR);
+}
+
+void
+ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long
isr, unsigned long itir)
+{
+ IA64FAULT vector;
+ struct domain *d = current->domain;
+ struct vcpu *v = current;
+ // FIXME: no need to pass itir in to this routine as we need to
+ // compute the virtual itir anyway (based on domain's RR.ps)
+ // AND ACTUALLY reflect_interruption doesn't use it anyway!
+ itir = vcpu_get_itir_on_fault(v,ifa);
+ vector = priv_emulate(current,regs,isr);
+ if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) {
+ reflect_interruption(ifa,isr,itir,regs,vector);
+ }
+}
+
+#define INTR_TYPE_MAX 10
+UINT64 int_counts[INTR_TYPE_MAX];
+
+void
+ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long
isr, unsigned long iim, unsigned long vector)
+{
+ struct domain *d = (struct domain *) current->domain;
+ struct vcpu *v = (struct domain *) current;
+ unsigned long check_lazy_cover = 0;
+ unsigned long psr = regs->cr_ipsr;
+ unsigned long itir = vcpu_get_itir_on_fault(v,ifa);
+
+ if (!(psr & IA64_PSR_CPL)) {
+ printk("ia64_handle_reflection: reflecting with priv=0!!\n");
+ }
+ // FIXME: no need to pass itir in to this routine as we need to
+ // compute the virtual itir anyway (based on domain's RR.ps)
+ // AND ACTUALLY reflect_interruption doesn't use it anyway!
+ itir = vcpu_get_itir_on_fault(v,ifa);
+ switch(vector) {
+ case 8:
+ vector = IA64_DIRTY_BIT_VECTOR; break;
+ case 9:
+ vector = IA64_INST_ACCESS_BIT_VECTOR; break;
+ case 10:
+ check_lazy_cover = 1;
+ vector = IA64_DATA_ACCESS_BIT_VECTOR; break;
+ case 20:
+ check_lazy_cover = 1;
+ vector = IA64_PAGE_NOT_PRESENT_VECTOR; break;
+ case 22:
+ vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break;
+ case 23:
+ check_lazy_cover = 1;
+ vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break;
+ case 25:
+ vector = IA64_DISABLED_FPREG_VECTOR;
+ break;
+ case 26:
+printf("*** NaT fault... attempting to handle as privop\n");
+printf("isr=%p, ifa=%p,iip=%p,ipsr=%p\n",isr,ifa,regs->cr_iip,psr);
+ vector = priv_emulate(v,regs,isr);
+ if (vector == IA64_NO_FAULT) {
+printf("*** Handled privop masquerading as NaT fault\n");
+ return;
+ }
+ vector = IA64_NAT_CONSUMPTION_VECTOR; break;
+ case 27:
+//printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc());
+ itir = iim;
+ vector = IA64_SPECULATION_VECTOR; break;
+ case 30:
+ // FIXME: Should we handle unaligned refs in Xen??
+ vector = IA64_UNALIGNED_REF_VECTOR; break;
+ default:
+ printf("ia64_handle_reflection: unhandled
vector=0x%lx\n",vector);
+ while(vector);
+ return;
+ }
+ if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v,
isr, regs)) return;
+ reflect_interruption(ifa,isr,itir,regs,vector);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/regionreg.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/regionreg.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,376 @@
+/*
+ * Region register and region id management
+ *
+ * Copyright (C) 2001-2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx
+ * Bret Mckee (bret.mckee@xxxxxx)
+ *
+ */
+
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/regionreg.h>
+#include <asm/vhpt.h>
+#include <asm/vcpu.h>
+extern void ia64_new_rr7(unsigned long rid,void *shared_info, void
*shared_arch_info);
+
+
+#define IA64_MIN_IMPL_RID_BITS (IA64_MIN_IMPL_RID_MSB+1)
+#define IA64_MAX_IMPL_RID_BITS 24
+
+#define MIN_RIDS (1 << IA64_MIN_IMPL_RID_BITS)
+#define MIN_RID_MAX (MIN_RIDS - 1)
+#define MIN_RID_MASK (MIN_RIDS - 1)
+#define MAX_RIDS (1 << (IA64_MAX_IMPL_RID_BITS))
+#define MAX_RID (MAX_RIDS - 1)
+#define MAX_RID_BLOCKS (1 <<
(IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS))
+#define RIDS_PER_RIDBLOCK MIN_RIDS
+
+#if 0
+// following already defined in include/asm-ia64/gcc_intrin.h
+// it should probably be ifdef'd out from there to ensure all region
+// register usage is encapsulated in this file
+static inline unsigned long
+ia64_get_rr (unsigned long rr)
+{
+ unsigned long r;
+ __asm__ __volatile__ (";;mov %0=rr[%1];;":"=r"(r):"r"(rr):"memory");
+ return r;
+}
+
+static inline void
+ia64_set_rr (unsigned long rr, unsigned long rrv)
+{
+ __asm__ __volatile__ (";;mov
rr[%0]=%1;;"::"r"(rr),"r"(rrv):"memory");
+}
+#endif
+
+// use this to allocate a rid out of the "Xen reserved rid block"
+unsigned long allocate_reserved_rid(void)
+{
+ static unsigned long currentrid = XEN_DEFAULT_RID;
+ unsigned long t = currentrid;
+
+ unsigned long max = RIDS_PER_RIDBLOCK;
+
+ if (++currentrid >= max) return(-1UL);
+ return t;
+}
+
+
+// returns -1 if none available
+unsigned long allocate_metaphysical_rr(void)
+{
+ ia64_rr rrv;
+
+ rrv.rid = allocate_reserved_rid();
+ rrv.ps = PAGE_SHIFT;
+ rrv.ve = 0;
+ return rrv.rrval;
+}
+
+int deallocate_metaphysical_rid(unsigned long rid)
+{
+ // fix this when the increment allocation mechanism is fixed.
+ return 1;
+}
+
+/*************************************
+ Region Block setup/management
+*************************************/
+
+static int implemented_rid_bits = 0;
+static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 };
+
+void get_impl_rid_bits(void)
+{
+ // FIXME (call PAL)
+//#ifdef CONFIG_MCKINLEY
+ implemented_rid_bits = IA64_MAX_IMPL_RID_BITS;
+//#else
+//#error "rid ranges won't work on Merced"
+//#endif
+ if (implemented_rid_bits <= IA64_MIN_IMPL_RID_BITS ||
+ implemented_rid_bits > IA64_MAX_IMPL_RID_BITS)
+ BUG();
+}
+
+
+/*
+ * Allocate a power-of-two-sized chunk of region id space -- one or more
+ * "rid blocks"
+ */
+int allocate_rid_range(struct domain *d, unsigned long ridbits)
+{
+ int i, j, n_rid_blocks;
+
+ if (implemented_rid_bits == 0) get_impl_rid_bits();
+
+ if (ridbits >= IA64_MAX_IMPL_RID_BITS)
+ ridbits = IA64_MAX_IMPL_RID_BITS - 1;
+
+ if (ridbits < IA64_MIN_IMPL_RID_BITS)
+ ridbits = IA64_MIN_IMPL_RID_BITS;
+
+ // convert to rid_blocks and find one
+ n_rid_blocks = ridbits - IA64_MIN_IMPL_RID_BITS + 1;
+
+ // skip over block 0, reserved for "meta-physical mappings (and Xen)"
+ for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) {
+ if (ridblock_owner[i] == NULL) {
+ for (j = i; j < i + n_rid_blocks; ++j) {
+ if (ridblock_owner[j]) break;
+ }
+ if (ridblock_owner[j] == NULL) break;
+ }
+ }
+
+ if (i >= MAX_RID_BLOCKS) return 0;
+
+ // found an unused block:
+ // (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits)
+ // mark this block as owned
+ for (j = i; j < i + n_rid_blocks; ++j) ridblock_owner[j] = d;
+
+ // setup domain struct
+ d->arch.rid_bits = ridbits;
+ d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS; d->arch.ending_rid
= (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS;
+printf("###allocating rid_range, domain %p: starting_rid=%lx,
ending_rid=%lx\n",
+d,d->arch.starting_rid, d->arch.ending_rid);
+
+ return 1;
+}
+
+
+int deallocate_rid_range(struct domain *d)
+{
+ int i;
+ int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS;
+ int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS;
+
+ return 1; // KLUDGE ALERT
+ //
+ // not all domains will have allocated RIDs (physical mode loaders for
instance)
+ //
+ if (d->arch.rid_bits == 0) return 1;
+
+#ifdef DEBUG
+ for (i = rid_block_start; i < rid_block_end; ++i) {
+ ASSERT(ridblock_owner[i] == d);
+ }
+#endif
+
+ for (i = rid_block_start; i < rid_block_end; ++i)
+ ridblock_owner[i] = NULL;
+
+ d->arch.rid_bits = 0;
+ d->arch.starting_rid = 0;
+ d->arch.ending_rid = 0;
+ return 1;
+}
+
+
+static inline void
+set_rr_no_srlz(unsigned long rr, unsigned long rrval)
+{
+ ia64_set_rr(rr, vmMangleRID(rrval));
+}
+
+void
+set_rr(unsigned long rr, unsigned long rrval)
+{
+ ia64_set_rr(rr, vmMangleRID(rrval));
+ ia64_srlz_d();
+}
+
+unsigned long
+get_rr(unsigned long rr)
+{
+ return vmUnmangleRID(ia64_get_rr(rr));
+}
+
+static inline int validate_page_size(unsigned long ps)
+{
+ switch(ps) {
+ case 12: case 13: case 14: case 16: case 18:
+ case 20: case 22: case 24: case 26: case 28:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+// validates and changes a single region register
+// in the currently executing domain
+// Passing a value of -1 is a (successful) no-op
+// NOTE: DOES NOT SET VCPU's rrs[x] value!!
+int set_one_rr(unsigned long rr, unsigned long val)
+{
+ struct vcpu *v = current;
+ unsigned long rreg = REGION_NUMBER(rr);
+ ia64_rr rrv, newrrv, memrrv;
+ unsigned long newrid;
+
+ if (val == -1) return 1;
+
+ rrv.rrval = val;
+ newrrv.rrval = 0;
+ newrid = v->arch.starting_rid + rrv.rid;
+
+ if (newrid > v->arch.ending_rid) {
+ printk("can't set rr%d to %lx, starting_rid=%lx,"
+ "ending_rid=%lx, val=%lx\n", rreg, newrid,
+ v->arch.starting_rid,v->arch.ending_rid,val);
+ return 0;
+ }
+
+#ifdef CONFIG_VTI
+ memrrv.rrval = rrv.rrval;
+ if (rreg == 7) {
+ newrrv.rid = newrid;
+ newrrv.ve = VHPT_ENABLED_REGION_7;
+ newrrv.ps = IA64_GRANULE_SHIFT;
+ ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
+ v->vcpu_info->arch.privregs);
+ }
+ else {
+ newrrv.rid = newrid;
+ // FIXME? region 6 needs to be uncached for EFI to work
+ if (rreg == 6) newrrv.ve = VHPT_ENABLED_REGION_7;
+ else newrrv.ve = VHPT_ENABLED_REGION_0_TO_6;
+ newrrv.ps = PAGE_SHIFT;
+ if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
+ set_rr(rr,newrrv.rrval);
+ }
+#else
+ memrrv.rrval = rrv.rrval;
+ newrrv.rid = newrid;
+ newrrv.ve = 1; // VHPT now enabled for region 7!!
+ newrrv.ps = PAGE_SHIFT;
+ if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
+ if (rreg == 7) ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
+ v->vcpu_info->arch.privregs);
+ else set_rr(rr,newrrv.rrval);
+#endif
+ return 1;
+}
+
+// set rr0 to the passed rid (for metaphysical mode so don't use domain offset
+int set_metaphysical_rr0(void)
+{
+ struct vcpu *v = current;
+ ia64_rr rrv;
+
+// rrv.ve = 1; FIXME: TURN ME BACK ON WHEN VHPT IS WORKING
+ set_rr(0,v->arch.metaphysical_rr0);
+}
+
+// validates/changes region registers 0-6 in the currently executing domain
+// Note that this is the one and only SP API (other than executing a privop)
+// for a domain to use to change region registers
+int set_all_rr( u64 rr0, u64 rr1, u64 rr2, u64 rr3,
+ u64 rr4, u64 rr5, u64 rr6, u64 rr7)
+{
+ if (!set_one_rr(0x0000000000000000L, rr0)) return 0;
+ if (!set_one_rr(0x2000000000000000L, rr1)) return 0;
+ if (!set_one_rr(0x4000000000000000L, rr2)) return 0;
+ if (!set_one_rr(0x6000000000000000L, rr3)) return 0;
+ if (!set_one_rr(0x8000000000000000L, rr4)) return 0;
+ if (!set_one_rr(0xa000000000000000L, rr5)) return 0;
+ if (!set_one_rr(0xc000000000000000L, rr6)) return 0;
+ if (!set_one_rr(0xe000000000000000L, rr7)) return 0;
+ return 1;
+}
+
+void init_all_rr(struct vcpu *v)
+{
+ ia64_rr rrv;
+
+ rrv.rrval = 0;
+ rrv.rrval = v->domain->arch.metaphysical_rr0;
+ rrv.ps = PAGE_SHIFT;
+ rrv.ve = 1;
+if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); }
+ VCPU(v,rrs[0]) = -1;
+ VCPU(v,rrs[1]) = rrv.rrval;
+ VCPU(v,rrs[2]) = rrv.rrval;
+ VCPU(v,rrs[3]) = rrv.rrval;
+ VCPU(v,rrs[4]) = rrv.rrval;
+ VCPU(v,rrs[5]) = rrv.rrval;
+ rrv.ve = 0;
+ VCPU(v,rrs[6]) = rrv.rrval;
+// v->shared_info->arch.rrs[7] = rrv.rrval;
+}
+
+
+/* XEN/ia64 INTERNAL ROUTINES */
+
+unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval)
+{
+ ia64_rr rrv;
+
+ rrv.rrval = rrval;
+ rrv.rid += v->arch.starting_rid;
+ return rrv.rrval;
+}
+
+unsigned long
+virtualize_rid(struct vcpu *v, unsigned long rrval)
+{
+ ia64_rr rrv;
+
+ rrv.rrval = rrval;
+ rrv.rid -= v->arch.starting_rid;
+ return rrv.rrval;
+}
+
+// loads a thread's region register (0-6) state into
+// the real physical region registers. Returns the
+// (possibly mangled) bits to store into rr7
+// iff it is different than what is currently in physical
+// rr7 (because we have to to assembly and physical mode
+// to change rr7). If no change to rr7 is required, returns 0.
+//
+unsigned long load_region_regs(struct vcpu *v)
+{
+ unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6, rr7;
+ // TODO: These probably should be validated
+ unsigned long bad = 0;
+
+ if (VCPU(v,metaphysical_mode)) {
+ ia64_rr rrv;
+
+ rrv.rrval = 0;
+ rrv.rid = v->domain->arch.metaphysical_rr0;
+ rrv.ps = PAGE_SHIFT;
+ rrv.ve = 1;
+ rr0 = rrv.rrval;
+ set_rr_no_srlz(0x0000000000000000L, rr0);
+ ia64_srlz_d();
+ }
+ else {
+ rr0 = VCPU(v,rrs[0]);
+ if (!set_one_rr(0x0000000000000000L, rr0)) bad |= 1;
+ }
+ rr1 = VCPU(v,rrs[1]);
+ rr2 = VCPU(v,rrs[2]);
+ rr3 = VCPU(v,rrs[3]);
+ rr4 = VCPU(v,rrs[4]);
+ rr5 = VCPU(v,rrs[5]);
+ rr6 = VCPU(v,rrs[6]);
+ rr7 = VCPU(v,rrs[7]);
+ if (!set_one_rr(0x2000000000000000L, rr1)) bad |= 2;
+ if (!set_one_rr(0x4000000000000000L, rr2)) bad |= 4;
+ if (!set_one_rr(0x6000000000000000L, rr3)) bad |= 8;
+ if (!set_one_rr(0x8000000000000000L, rr4)) bad |= 0x10;
+ if (!set_one_rr(0xa000000000000000L, rr5)) bad |= 0x20;
+ if (!set_one_rr(0xc000000000000000L, rr6)) bad |= 0x40;
+ if (!set_one_rr(0xe000000000000000L, rr7)) bad |= 0x80;
+ if (bad) {
+ panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad);
+ }
+ return 0;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/sn_console.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/sn_console.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,84 @@
+/*
+ * C-Brick Serial Port (and console) driver for SGI Altix machines.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+#include <asm/acpi.h>
+#include <asm/sn/sn_sal.h>
+#include <xen/serial.h>
+
+void sn_putc(struct serial_port *, char);
+
+static struct uart_driver sn_sal_console = {
+ .putc = sn_putc,
+};
+
+/**
+ * early_sn_setup - early setup routine for SN platforms
+ *
+ * pulled from arch/ia64/sn/kernel/setup.c
+ */
+static void __init early_sn_setup(void)
+{
+ efi_system_table_t *efi_systab;
+ efi_config_table_t *config_tables;
+ struct ia64_sal_systab *sal_systab;
+ struct ia64_sal_desc_entry_point *ep;
+ char *p;
+ int i, j;
+
+ /*
+ * Parse enough of the SAL tables to locate the SAL entry point. Since,
console
+ * IO on SN2 is done via SAL calls, early_printk won't work without
this.
+ *
+ * This code duplicates some of the ACPI table parsing that is in efi.c
& sal.c.
+ * Any changes to those file may have to be made hereas well.
+ */
+ efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
+ config_tables = __va(efi_systab->tables);
+ for (i = 0; i < efi_systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
+ 0) {
+ sal_systab = __va(config_tables[i].table);
+ p = (char *)(sal_systab + 1);
+ for (j = 0; j < sal_systab->entry_count; j++) {
+ if (*p == SAL_DESC_ENTRY_POINT) {
+ ep = (struct ia64_sal_desc_entry_point
+ *)p;
+ ia64_sal_handler_init(__va
+ (ep->sal_proc),
+ __va(ep->gp));
+ return;
+ }
+ p += SAL_DESC_SIZE(*p);
+ }
+ }
+ }
+ /* Uh-oh, SAL not available?? */
+ printk(KERN_ERR "failed to find SAL entry point\n");
+}
+
+/**
+ * sn_serial_console_early_setup - Sets up early console output support
+ *
+ * pulled from drivers/serial/sn_console.c
+ */
+int __init sn_serial_console_early_setup(void)
+{
+ if (strcmp("sn2",acpi_get_sysname()))
+ return -1;
+
+ early_sn_setup(); /* Find SAL entry points */
+ serial_register_uart(0, &sn_sal_console, NULL);
+
+ return 0;
+}
+
+/*
+ * sn_putc - Send a character to the console, polled or interrupt mode
+ */
+void sn_putc(struct serial_port *port, char c)
+{
+ return ia64_sn_console_putc(c);
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/vcpu.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/vcpu.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,1843 @@
+/*
+ * Virtualized CPU functions
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <linux/sched.h>
+#include <public/arch-ia64.h>
+#include <asm/ia64_int.h>
+#include <asm/vcpu.h>
+#include <asm/regionreg.h>
+#include <asm/tlb.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/vmx_vcpu.h>
+
+typedef union {
+ struct ia64_psr ia64_psr;
+ unsigned long i64;
+} PSR;
+
+//typedef struct pt_regs REGS;
+//typedef struct domain VCPU;
+
+// this def for vcpu_regs won't work if kernel stack is present
+#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs)
+#define PSCB(x,y) VCPU(x,y)
+#define PSCBX(x,y) x->arch.y
+
+#define TRUE 1
+#define FALSE 0
+#define IA64_PTA_SZ_BIT 2
+#define IA64_PTA_VF_BIT 8
+#define IA64_PTA_BASE_BIT 15
+#define IA64_PTA_LFMT (1UL << IA64_PTA_VF_BIT)
+#define IA64_PTA_SZ(x) (x##UL << IA64_PTA_SZ_BIT)
+
+#define STATIC
+
+#ifdef PRIVOP_ADDR_COUNT
+struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = {
+ { "=ifa", { 0 }, { 0 }, 0 },
+ { "thash", { 0 }, { 0 }, 0 },
+ 0
+};
+extern void privop_count_addr(unsigned long addr, int inst);
+#define PRIVOP_COUNT_ADDR(regs,inst)
privop_count_addr(regs->cr_iip,inst)
+#else
+#define PRIVOP_COUNT_ADDR(x,y) do {} while (0)
+#endif
+
+unsigned long dtlb_translate_count = 0;
+unsigned long tr_translate_count = 0;
+unsigned long phys_translate_count = 0;
+
+unsigned long vcpu_verbose = 0;
+#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
+
+extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa);
+extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa);
+
+/**************************************************************************
+ VCPU general register access routines
+**************************************************************************/
+
+UINT64
+vcpu_get_gr(VCPU *vcpu, unsigned reg)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ UINT64 val;
+
+ if (!reg) return 0;
+ getreg(reg,&val,0,regs); // FIXME: handle NATs later
+ return val;
+}
+
+// returns:
+// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
+// IA64_NO_FAULT otherwise
+IA64FAULT
+vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ long sof = (regs->cr_ifs) & 0x7f;
+
+ if (!reg) return IA64_ILLOP_FAULT;
+ if (reg >= sof + 32) return IA64_ILLOP_FAULT;
+ setreg(reg,value,0,regs); // FIXME: handle NATs later
+ return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU privileged application register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ if (reg == 44) return (vcpu_set_itc(vcpu,val));
+ else if (reg == 27) return (IA64_ILLOP_FAULT);
+ else if (reg == 24)
+ printf("warning: setting ar.eflg is a no-op; no IA-32 support\n");
+ else if (reg > 7) return (IA64_ILLOP_FAULT);
+ else PSCB(vcpu,krs[reg]) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
+{
+ if (reg == 24)
+ printf("warning: getting ar.eflg is a no-op; no IA-32 support\n");
+ else if (reg > 7) return (IA64_ILLOP_FAULT);
+ else *val = PSCB(vcpu,krs[reg]);
+ return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU processor status register access routines
+**************************************************************************/
+
+void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode)
+{
+ /* only do something if mode changes */
+ if (!!newmode ^ !!PSCB(vcpu,metaphysical_mode)) {
+ if (newmode) set_metaphysical_rr0();
+ else if (PSCB(vcpu,rrs[0]) != -1)
+ set_one_rr(0, PSCB(vcpu,rrs[0]));
+ PSCB(vcpu,metaphysical_mode) = newmode;
+ }
+}
+
+IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu)
+{
+ vcpu_set_metaphysical_mode(vcpu,TRUE);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+ struct ia64_psr psr, imm, *ipsr;
+ REGS *regs = vcpu_regs(vcpu);
+
+ //PRIVOP_COUNT_ADDR(regs,_RSM);
+ // TODO: All of these bits need to be virtualized
+ // TODO: Only allowed for current vcpu
+ __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+ ipsr = (struct ia64_psr *)®s->cr_ipsr;
+ imm = *(struct ia64_psr *)&imm24;
+ // interrupt flag
+ if (imm.i) PSCB(vcpu,interrupt_delivery_enabled) = 0;
+ if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 0;
+ // interrupt collection flag
+ //if (imm.ic) PSCB(vcpu,interrupt_delivery_enabled) = 0;
+ // just handle psr.up and psr.pp for now
+ if (imm24 & ~(IA64_PSR_BE | IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP
+ | IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT
+ | IA64_PSR_DFL | IA64_PSR_DFH))
+ return (IA64_ILLOP_FAULT);
+ if (imm.dfh) ipsr->dfh = 0;
+ if (imm.dfl) ipsr->dfl = 0;
+ if (imm.pp) { ipsr->pp = 0; psr.pp = 0; }
+ if (imm.up) { ipsr->up = 0; psr.up = 0; }
+ if (imm.sp) { ipsr->sp = 0; psr.sp = 0; }
+ if (imm.be) ipsr->be = 0;
+ if (imm.dt) vcpu_set_metaphysical_mode(vcpu,TRUE);
+ __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+ return IA64_NO_FAULT;
+}
+
+extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu);
+#define SPURIOUS_VECTOR 0xf
+
+IA64FAULT vcpu_set_psr_dt(VCPU *vcpu)
+{
+ vcpu_set_metaphysical_mode(vcpu,FALSE);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_i(VCPU *vcpu)
+{
+ PSCB(vcpu,interrupt_delivery_enabled) = 1;
+ PSCB(vcpu,interrupt_collection_enabled) = 1;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
+{
+ struct ia64_psr psr, imm, *ipsr;
+ REGS *regs = vcpu_regs(vcpu);
+ UINT64 mask, enabling_interrupts = 0;
+
+ //PRIVOP_COUNT_ADDR(regs,_SSM);
+ // TODO: All of these bits need to be virtualized
+ __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+ imm = *(struct ia64_psr *)&imm24;
+ ipsr = (struct ia64_psr *)®s->cr_ipsr;
+ // just handle psr.sp,pp and psr.i,ic (and user mask) for now
+ mask = IA64_PSR_PP|IA64_PSR_SP|IA64_PSR_I|IA64_PSR_IC|IA64_PSR_UM |
+ IA64_PSR_DT|IA64_PSR_DFL|IA64_PSR_DFH;
+ if (imm24 & ~mask) return (IA64_ILLOP_FAULT);
+ if (imm.dfh) ipsr->dfh = 1;
+ if (imm.dfl) ipsr->dfl = 1;
+ if (imm.pp) { ipsr->pp = 1; psr.pp = 1; }
+ if (imm.sp) { ipsr->sp = 1; psr.sp = 1; }
+ if (imm.i) {
+ if (!PSCB(vcpu,interrupt_delivery_enabled)) {
+//printf("vcpu_set_psr_sm: psr.ic 0->1 ");
+ enabling_interrupts = 1;
+ }
+ PSCB(vcpu,interrupt_delivery_enabled) = 1;
+ }
+ if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
+ // TODO: do this faster
+ if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
+ if (imm.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
+ if (imm.ac) { ipsr->ac = 1; psr.ac = 1; }
+ if (imm.up) { ipsr->up = 1; psr.up = 1; }
+ if (imm.be) {
+ printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+ return (IA64_ILLOP_FAULT);
+ }
+ if (imm.dt) vcpu_set_metaphysical_mode(vcpu,FALSE);
+ __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+#if 0 // now done with deliver_pending_interrupts
+ if (enabling_interrupts) {
+ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) {
+//printf("with interrupts pending\n");
+ return IA64_EXTINT_VECTOR;
+ }
+//else printf("but nothing pending\n");
+ }
+#endif
+ if (enabling_interrupts &&
+ vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu,pending_interruption) = 1;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
+{
+ struct ia64_psr psr, newpsr, *ipsr;
+ REGS *regs = vcpu_regs(vcpu);
+ UINT64 enabling_interrupts = 0;
+
+ // TODO: All of these bits need to be virtualized
+ __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+ newpsr = *(struct ia64_psr *)&val;
+ ipsr = (struct ia64_psr *)®s->cr_ipsr;
+ // just handle psr.up and psr.pp for now
+ //if (val & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP)) return
(IA64_ILLOP_FAULT);
+ // however trying to set other bits can't be an error as it is in ssm
+ if (newpsr.dfh) ipsr->dfh = 1;
+ if (newpsr.dfl) ipsr->dfl = 1;
+ if (newpsr.pp) { ipsr->pp = 1; psr.pp = 1; }
+ if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
+ if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; }
+ if (newpsr.i) {
+ if (!PSCB(vcpu,interrupt_delivery_enabled))
+ enabling_interrupts = 1;
+ PSCB(vcpu,interrupt_delivery_enabled) = 1;
+ }
+ if (newpsr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
+ if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
+ if (newpsr.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
+ if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; }
+ if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
+ if (newpsr.dt && newpsr.rt) vcpu_set_metaphysical_mode(vcpu,FALSE);
+ else vcpu_set_metaphysical_mode(vcpu,TRUE);
+ if (newpsr.be) {
+ printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+ return (IA64_ILLOP_FAULT);
+ }
+ //__asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
+#if 0 // now done with deliver_pending_interrupts
+ if (enabling_interrupts) {
+ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ return IA64_EXTINT_VECTOR;
+ }
+#endif
+ if (enabling_interrupts &&
+ vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu,pending_interruption) = 1;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval)
+{
+ UINT64 psr;
+ struct ia64_psr newpsr;
+
+ // TODO: This needs to return a "filtered" view of
+ // the psr, not the actual psr. Probably the psr needs
+ // to be a field in regs (in addition to ipsr).
+ __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
+ newpsr = *(struct ia64_psr *)&psr;
+ if (newpsr.cpl == 2) newpsr.cpl = 0;
+ if (PSCB(vcpu,interrupt_delivery_enabled)) newpsr.i = 1;
+ else newpsr.i = 0;
+ if (PSCB(vcpu,interrupt_collection_enabled)) newpsr.ic = 1;
+ else newpsr.ic = 0;
+ *pval = *(unsigned long *)&newpsr;
+ return IA64_NO_FAULT;
+}
+
+BOOLEAN vcpu_get_psr_ic(VCPU *vcpu)
+{
+ return !!PSCB(vcpu,interrupt_collection_enabled);
+}
+
+BOOLEAN vcpu_get_psr_i(VCPU *vcpu)
+{
+ return !!PSCB(vcpu,interrupt_delivery_enabled);
+}
+
+UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr)
+{
+ UINT64 dcr = PSCBX(vcpu,dcr);
+ PSR psr = {0};
+
+ //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr);
+ psr.i64 = prevpsr;
+ psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1;
+ psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1;
+ psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled);
+ psr.ia64_psr.i = PSCB(vcpu,interrupt_delivery_enabled);
+ psr.ia64_psr.bn = PSCB(vcpu,banknum);
+ psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1;
+ if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain
+ // psr.pk = 1;
+ //printf("returns 0x%016lx...",psr.i64);
+ return psr.i64;
+}
+
+/**************************************************************************
+ VCPU control register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval)
+{
+extern unsigned long privop_trace;
+//privop_trace=0;
+//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip));
+ // Reads of cr.dcr on Xen always have the sign bit set, so
+ // a domain can differentiate whether it is running on SP or not
+ *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCBX(vcpu,iva) & ~0x7fffL;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCB(vcpu,pta);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval)
+{
+ //REGS *regs = vcpu_regs(vcpu);
+ //*pval = regs->cr_ipsr;
+ *pval = PSCB(vcpu,ipsr);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCB(vcpu,isr);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval)
+{
+ //REGS *regs = vcpu_regs(vcpu);
+ //*pval = regs->cr_iip;
+ *pval = PSCB(vcpu,iip);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval)
+{
+ UINT64 val = PSCB(vcpu,ifa);
+ REGS *regs = vcpu_regs(vcpu);
+ PRIVOP_COUNT_ADDR(regs,_GET_IFA);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr)
+{
+ ia64_rr rr;
+
+ rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
+ return(rr.ps);
+}
+
+unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr)
+{
+ ia64_rr rr;
+
+ rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
+ return(rr.rid);
+}
+
+unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa)
+{
+ ia64_rr rr;
+
+ rr.rrval = 0;
+ rr.ps = vcpu_get_rr_ps(vcpu,ifa);
+ rr.rid = vcpu_get_rr_rid(vcpu,ifa);
+ return (rr.rrval);
+}
+
+
+IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval)
+{
+ UINT64 val = PSCB(vcpu,itir);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
+{
+ UINT64 val = PSCB(vcpu,iipa);
+ // SP entry code does not save iipa yet nor does it get
+ // properly delivered in the pscb
+ printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n");
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval)
+{
+ //PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs;
+ //*pval = PSCB(vcpu,regs).cr_ifs;
+ *pval = PSCB(vcpu,ifs);
+ PSCB(vcpu,incomplete_regframe) = 0;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval)
+{
+ UINT64 val = PSCB(vcpu,iim);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval)
+{
+ //return vcpu_thash(vcpu,PSCB(vcpu,ifa),pval);
+ UINT64 val = PSCB(vcpu,iha);
+ REGS *regs = vcpu_regs(vcpu);
+ PRIVOP_COUNT_ADDR(regs,_THASH);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val)
+{
+extern unsigned long privop_trace;
+//privop_trace=1;
+ // Reads of cr.dcr on SP always have the sign bit set, so
+ // a domain can differentiate whether it is running on SP or not
+ // Thus, writes of DCR should ignore the sign bit
+//verbose("vcpu_set_dcr: called\n");
+ PSCBX(vcpu,dcr) = val & ~0x8000000000000000L;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val)
+{
+ PSCBX(vcpu,iva) = val & ~0x7fffL;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val)
+{
+ if (val & IA64_PTA_LFMT) {
+ printf("*** No support for VHPT long format yet!!\n");
+ return (IA64_ILLOP_FAULT);
+ }
+ if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT;
+ if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT;
+ PSCB(vcpu,pta) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,ipsr) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,isr) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,iip) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_increment_iip(VCPU *vcpu)
+{
+ REGS *regs = vcpu_regs(vcpu);
+ struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr;
+ if (ipsr->ri == 2) { ipsr->ri=0; regs->cr_iip += 16; }
+ else ipsr->ri++;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,ifa) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,itir) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val)
+{
+ // SP entry code does not save iipa yet nor does it get
+ // properly delivered in the pscb
+ printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n");
+ PSCB(vcpu,iipa) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val)
+{
+ //REGS *regs = vcpu_regs(vcpu);
+ PSCB(vcpu,ifs) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,iim) = val;
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val)
+{
+ PSCB(vcpu,iha) = val;
+ return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU interrupt control register access routines
+**************************************************************************/
+
+void vcpu_pend_unspecified_interrupt(VCPU *vcpu)
+{
+ PSCB(vcpu,pending_interruption) = 1;
+}
+
+void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
+{
+ if (vector & ~0xff) {
+ printf("vcpu_pend_interrupt: bad vector\n");
+ return;
+ }
+#ifdef CONFIG_VTI
+ if ( VMX_DOMAIN(vcpu) ) {
+ set_bit(vector,VPD_CR(vcpu,irr));
+ } else
+#endif // CONFIG_VTI
+ {
+ /* if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; */
+ if (test_bit(vector,PSCBX(vcpu,irr))) {
+//printf("vcpu_pend_interrupt: overrun\n");
+ }
+ set_bit(vector,PSCBX(vcpu,irr));
+ PSCB(vcpu,pending_interruption) = 1;
+ }
+
+#if 0
+ /* Keir: I think you should unblock when an interrupt is pending. */
+ {
+ int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags);
+ vcpu_unblock(vcpu);
+ if ( running )
+ smp_send_event_check_cpu(vcpu->processor);
+ }
+#endif
+}
+
+void early_tick(VCPU *vcpu)
+{
+ UINT64 *p = &PSCBX(vcpu,irr[3]);
+ printf("vcpu_check_pending: about to deliver early tick\n");
+ printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p);
+}
+
+#define IA64_TPR_MMI 0x10000
+#define IA64_TPR_MIC 0x000f0
+
+/* checks to see if a VCPU has any unmasked pending interrupts
+ * if so, returns the highest, else returns SPURIOUS_VECTOR */
+/* NOTE: Since this gets called from vcpu_get_ivr() and the
+ * semantics of "mov rx=cr.ivr" ignore the setting of the psr.i bit,
+ * this routine also ignores pscb.interrupt_delivery_enabled
+ * and this must be checked independently; see vcpu_deliverable interrupts() */
+UINT64 vcpu_check_pending_interrupts(VCPU *vcpu)
+{
+ UINT64 *p, *q, *r, bits, bitnum, mask, i, vector;
+
+ p = &PSCBX(vcpu,irr[3]);
+ /* q = &PSCB(vcpu,delivery_mask[3]); */
+ r = &PSCBX(vcpu,insvc[3]);
+ for (i = 3; ; p--, q--, r--, i--) {
+ bits = *p /* & *q */;
+ if (bits) break; // got a potential interrupt
+ if (*r) {
+ // nothing in this word which is pending+inservice
+ // but there is one inservice which masks lower
+ return SPURIOUS_VECTOR;
+ }
+ if (i == 0) {
+ // checked all bits... nothing pending+inservice
+ return SPURIOUS_VECTOR;
+ }
+ }
+ // have a pending,deliverable interrupt... see if it is masked
+ bitnum = ia64_fls(bits);
+//printf("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...",bitnum);
+ vector = bitnum+(i*64);
+ mask = 1L << bitnum;
+//printf("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...",vector);
+ if (*r >= mask) {
+ // masked by equal inservice
+//printf("but masked by equal inservice\n");
+ return SPURIOUS_VECTOR;
+ }
+ if (PSCB(vcpu,tpr) & IA64_TPR_MMI) {
+ // tpr.mmi is set
+//printf("but masked by tpr.mmi\n");
+ return SPURIOUS_VECTOR;
+ }
+ if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) {
+ //tpr.mic masks class
+//printf("but masked by tpr.mic\n");
+ return SPURIOUS_VECTOR;
+ }
+
+//printf("returned to caller\n");
+#if 0
+if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ UINT64 now = ia64_get_itc();
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+ if (now < itm) early_tick(vcpu);
+
+}
+#endif
+ return vector;
+}
+
+UINT64 vcpu_deliverable_interrupts(VCPU *vcpu)
+{
+ return (vcpu_get_psr_i(vcpu) &&
+ vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR);
+}
+
+UINT64 vcpu_deliverable_timer(VCPU *vcpu)
+{
+ return (vcpu_get_psr_i(vcpu) &&
+ vcpu_check_pending_interrupts(vcpu) == PSCB(vcpu,itv));
+}
+
+IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval)
+{
+extern unsigned long privop_trace;
+//privop_trace=1;
+ //TODO: Implement this
+ printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n");
+ //*pval = 0;
+ *pval = ia64_getreg(_IA64_REG_CR_LID);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
+{
+ int i;
+ UINT64 vector, mask;
+
+#define HEARTBEAT_FREQ 16 // period in seconds
+#ifdef HEARTBEAT_FREQ
+#define N_DOMS 16 // period in seconds
+ static long count[N_DOMS] = { 0 };
+ static long nonclockcount[N_DOMS] = { 0 };
+ REGS *regs = vcpu_regs(vcpu);
+ unsigned domid = vcpu->domain->domain_id;
+#endif
+#ifdef IRQ_DEBUG
+ static char firstivr = 1;
+ static char firsttime[256];
+ if (firstivr) {
+ int i;
+ for (i=0;i<256;i++) firsttime[i]=1;
+ firstivr=0;
+ }
+#endif
+
+ vector = vcpu_check_pending_interrupts(vcpu);
+ if (vector == SPURIOUS_VECTOR) {
+ PSCB(vcpu,pending_interruption) = 0;
+ *pval = vector;
+ return IA64_NO_FAULT;
+ }
+#ifdef HEARTBEAT_FREQ
+ if (domid >= N_DOMS) domid = N_DOMS-1;
+ if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) {
+ printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
+ domid, count[domid], nonclockcount[domid]);
+ //count[domid] = 0;
+ //dump_runq();
+ }
+ }
+ else nonclockcount[domid]++;
+#endif
+ // now have an unmasked, pending, deliverable vector!
+ // getting ivr has "side effects"
+#ifdef IRQ_DEBUG
+ if (firsttime[vector]) {
+ printf("*** First get_ivr on vector=%d,itc=%lx\n",
+ vector,ia64_get_itc());
+ firsttime[vector]=0;
+ }
+#endif
+ i = vector >> 6;
+ mask = 1L << (vector & 0x3f);
+//printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector);
+ PSCBX(vcpu,insvc[i]) |= mask;
+ PSCBX(vcpu,irr[i]) &= ~mask;
+ //PSCB(vcpu,pending_interruption)--;
+ *pval = vector;
+ // if delivering a timer interrupt, remember domain_itm
+ if (vector == (PSCB(vcpu,itv) & 0xff)) {
+ PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm);
+ }
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCB(vcpu,tpr);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_eoi(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = 0L; // reads of eoi always return 0
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+ printk("vcpu_get_irr: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+#else
+ *pval = vcpu->irr[0];
+ return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+ printk("vcpu_get_irr: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+#else
+ *pval = vcpu->irr[1];
+ return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+ printk("vcpu_get_irr: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+#else
+ *pval = vcpu->irr[2];
+ return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval)
+{
+#ifndef IRR_USE_FIXED
+ printk("vcpu_get_irr: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+#else
+ *pval = vcpu->irr[3];
+ return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCB(vcpu,itv);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCB(vcpu,pmv);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval)
+{
+ *pval = PSCB(vcpu,cmcv);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval)
+{
+ // fix this when setting values other than m-bit is supported
+ printf("vcpu_get_lrr0: Unmasked interrupts unsupported\n");
+ *pval = (1L << 16);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval)
+{
+ // fix this when setting values other than m-bit is supported
+ printf("vcpu_get_lrr1: Unmasked interrupts unsupported\n");
+ *pval = (1L << 16);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val)
+{
+ printf("vcpu_set_lid: Setting cr.lid is unsupported\n");
+ return (IA64_ILLOP_FAULT);
+}
+
+IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val)
+{
+ if (val & 0xff00) return IA64_RSVDREG_FAULT;
+ PSCB(vcpu,tpr) = val;
+ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu,pending_interruption) = 1;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val)
+{
+ UINT64 *p, bits, vec, bitnum;
+ int i;
+
+ p = &PSCBX(vcpu,insvc[3]);
+ for (i = 3; (i >= 0) && !(bits = *p); i--, p--);
+ if (i < 0) {
+ printf("Trying to EOI interrupt when none are in-service.\r\n");
+ return;
+ }
+ bitnum = ia64_fls(bits);
+ vec = bitnum + (i*64);
+ /* clear the correct bit */
+ bits &= ~(1L << bitnum);
+ *p = bits;
+ /* clearing an eoi bit may unmask another pending interrupt... */
+ if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled...
+ // worry about this later... Linux only calls eoi
+ // with interrupts disabled
+ printf("Trying to EOI interrupt with interrupts enabled\r\n");
+ }
+ if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
+ PSCB(vcpu,pending_interruption) = 1;
+//printf("YYYYY vcpu_set_eoi: Successful\n");
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val)
+{
+ if (!(val & (1L << 16))) {
+ printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
+ return (IA64_ILLOP_FAULT);
+ }
+ // no place to save this state but nothing to do anyway
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val)
+{
+ if (!(val & (1L << 16))) {
+ printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
+ return (IA64_ILLOP_FAULT);
+ }
+ // no place to save this state but nothing to do anyway
+ return (IA64_NO_FAULT);
+}
+
+// parameter is a time interval specified in cycles
+void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles)
+{
+ PSCBX(vcpu,xen_timer_interval) = cycles;
+ vcpu_set_next_timer(vcpu);
+ printf("vcpu_enable_timer(%d): interval set to %d cycles\n",
+ PSCBX(vcpu,xen_timer_interval));
+ __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask));
+}
+
+IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val)
+{
+extern unsigned long privop_trace;
+//privop_trace=1;
+ if (val & 0xef00) return (IA64_ILLOP_FAULT);
+ PSCB(vcpu,itv) = val;
+ if (val & 0x10000) {
+printf("**** vcpu_set_itv(%d): vitm=%lx, setting to
0\n",val,PSCBX(vcpu,domain_itm));
+ PSCBX(vcpu,domain_itm) = 0;
+ }
+ else vcpu_enable_timer(vcpu,1000000L);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val)
+{
+ if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
+ PSCB(vcpu,pmv) = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val)
+{
+ if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
+ PSCB(vcpu,cmcv) = val;
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU temporary register access routines
+**************************************************************************/
+UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index)
+{
+ if (index > 7) return 0;
+ return PSCB(vcpu,tmp[index]);
+}
+
+void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val)
+{
+ if (index <= 7) PSCB(vcpu,tmp[index]) = val;
+}
+
+/**************************************************************************
+Interval timer routines
+**************************************************************************/
+
+BOOLEAN vcpu_timer_disabled(VCPU *vcpu)
+{
+ UINT64 itv = PSCB(vcpu,itv);
+ return(!itv || !!(itv & 0x10000));
+}
+
+BOOLEAN vcpu_timer_inservice(VCPU *vcpu)
+{
+ UINT64 itv = PSCB(vcpu,itv);
+ return (test_bit(itv, PSCBX(vcpu,insvc)));
+}
+
+BOOLEAN vcpu_timer_expired(VCPU *vcpu)
+{
+ unsigned long domain_itm = PSCBX(vcpu,domain_itm);
+ unsigned long now = ia64_get_itc();
+
+ if (!domain_itm) return FALSE;
+ if (now < domain_itm) return FALSE;
+ if (vcpu_timer_disabled(vcpu)) return FALSE;
+ return TRUE;
+}
+
+void vcpu_safe_set_itm(unsigned long val)
+{
+ unsigned long epsilon = 100;
+ UINT64 now = ia64_get_itc();
+
+ local_irq_disable();
+ while (1) {
+//printf("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now);
+ ia64_set_itm(val);
+ if (val > (now = ia64_get_itc())) break;
+ val = now + epsilon;
+ epsilon <<= 1;
+ }
+ local_irq_enable();
+}
+
+void vcpu_set_next_timer(VCPU *vcpu)
+{
+ UINT64 d = PSCBX(vcpu,domain_itm);
+ //UINT64 s = PSCBX(vcpu,xen_itm);
+ UINT64 s = local_cpu_data->itm_next;
+ UINT64 now = ia64_get_itc();
+ //UINT64 interval = PSCBX(vcpu,xen_timer_interval);
+
+ /* gloss over the wraparound problem for now... we know it exists
+ * but it doesn't matter right now */
+
+#if 0
+ /* ensure at least next SP tick is in the future */
+ if (!interval) PSCBX(vcpu,xen_itm) = now +
+#if 0
+ (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE :
+ DEFAULT_CLOCK_RATE);
+#else
+ 3000000;
+//printf("vcpu_set_next_timer: HACK!\n");
+#endif
+#if 0
+ if (PSCBX(vcpu,xen_itm) < now)
+ while (PSCBX(vcpu,xen_itm) < now + (interval>>1))
+ PSCBX(vcpu,xen_itm) += interval;
+#endif
+#endif
+
+ if (is_idle_task(vcpu->domain)) {
+ printf("****** vcpu_set_next_timer called during idle!!\n");
+ }
+ //s = PSCBX(vcpu,xen_itm);
+ if (d && (d > now) && (d < s)) {
+ vcpu_safe_set_itm(d);
+ //using_domain_as_itm++;
+ }
+ else {
+ vcpu_safe_set_itm(s);
+ //using_xen_as_itm++;
+ }
+}
+
+IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val)
+{
+ UINT now = ia64_get_itc();
+
+ //if (val < now) val = now + 1000;
+//printf("*** vcpu_set_itm: called with %lx\n",val);
+ PSCBX(vcpu,domain_itm) = val;
+ vcpu_set_next_timer(vcpu);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val)
+{
+
+ UINT64 oldnow = ia64_get_itc();
+ UINT64 olditm = PSCBX(vcpu,domain_itm);
+ unsigned long d = olditm - oldnow;
+ unsigned long x = local_cpu_data->itm_next - oldnow;
+
+ UINT64 newnow = val, min_delta;
+
+#define DISALLOW_SETTING_ITC_FOR_NOW
+#ifdef DISALLOW_SETTING_ITC_FOR_NOW
+printf("vcpu_set_itc: Setting ar.itc is currently disabled\n");
+#else
+ local_irq_disable();
+ if (olditm) {
+printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
+ PSCBX(vcpu,domain_itm) = newnow + d;
+ }
+ local_cpu_data->itm_next = newnow + x;
+ d = PSCBX(vcpu,domain_itm);
+ x = local_cpu_data->itm_next;
+
+ ia64_set_itc(newnow);
+ if (d && (d > newnow) && (d < x)) {
+ vcpu_safe_set_itm(d);
+ //using_domain_as_itm++;
+ }
+ else {
+ vcpu_safe_set_itm(x);
+ //using_xen_as_itm++;
+ }
+ local_irq_enable();
+#endif
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval)
+{
+ //FIXME: Implement this
+ printf("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n");
+ return (IA64_NO_FAULT);
+ //return (IA64_ILLOP_FAULT);
+}
+
+IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval)
+{
+ //TODO: Implement this
+ printf("vcpu_get_itc: Getting ar.itc is unsupported\n");
+ return (IA64_ILLOP_FAULT);
+}
+
+void vcpu_pend_timer(VCPU *vcpu)
+{
+ UINT64 itv = PSCB(vcpu,itv) & 0xff;
+
+ if (vcpu_timer_disabled(vcpu)) return;
+ //if (vcpu_timer_inservice(vcpu)) return;
+ if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) {
+ // already delivered an interrupt for this so
+ // don't deliver another
+ return;
+ }
+#if 0
+ // attempt to flag "timer tick before its due" source
+ {
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+ UINT64 now = ia64_get_itc();
+ if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n");
+ }
+#endif
+ vcpu_pend_interrupt(vcpu, itv);
+}
+
+// returns true if ready to deliver a timer interrupt too early
+UINT64 vcpu_timer_pending_early(VCPU *vcpu)
+{
+ UINT64 now = ia64_get_itc();
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+
+ if (vcpu_timer_disabled(vcpu)) return 0;
+ if (!itm) return 0;
+ return (vcpu_deliverable_timer(vcpu) && (now < itm));
+}
+
+//FIXME: This is a hack because everything dies if a timer tick is lost
+void vcpu_poke_timer(VCPU *vcpu)
+{
+ UINT64 itv = PSCB(vcpu,itv) & 0xff;
+ UINT64 now = ia64_get_itc();
+ UINT64 itm = PSCBX(vcpu,domain_itm);
+ UINT64 irr;
+
+ if (vcpu_timer_disabled(vcpu)) return;
+ if (!itm) return;
+ if (itv != 0xefL) {
+ printf("vcpu_poke_timer: unimplemented itv=%lx!\n",itv);
+ while(1);
+ }
+ // using 0xef instead of itv so can get real irr
+ if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) {
+ if (!test_bit(0xefL,PSCBX(vcpu,irr))) {
+ irr = ia64_getreg(_IA64_REG_CR_IRR3);
+ if (irr & (1L<<(0xef-0xc0))) return;
+if (now-itm>0x800000)
+printf("*** poking timer:
now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm());
+ vcpu_pend_timer(vcpu);
+ }
+ }
+}
+
+
+/**************************************************************************
+Privileged operation emulation routines
+**************************************************************************/
+
+IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa)
+{
+ PSCB(vcpu,tmp[0]) = ifa; // save ifa in vcpu structure, then
specify IA64_FORCED_IFA
+ return (vcpu_get_rr_ve(vcpu,ifa) ? IA64_DATA_TLB_VECTOR :
IA64_ALT_DATA_TLB_VECTOR) | IA64_FORCED_IFA;
+}
+
+
+IA64FAULT vcpu_rfi(VCPU *vcpu)
+{
+ // TODO: Only allowed for current vcpu
+ PSR psr;
+ UINT64 int_enable, regspsr = 0;
+ UINT64 ifs;
+ REGS *regs = vcpu_regs(vcpu);
+ extern void dorfirfi(void);
+
+ psr.i64 = PSCB(vcpu,ipsr);
+ if (psr.ia64_psr.cpl < 3) psr.ia64_psr.cpl = 2;
+ if (psr.ia64_psr.i) PSCB(vcpu,interrupt_delivery_enabled) = 1;
+ int_enable = psr.ia64_psr.i;
+ if (psr.ia64_psr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
+ if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it)
vcpu_set_metaphysical_mode(vcpu,FALSE);
+ else vcpu_set_metaphysical_mode(vcpu,TRUE);
+ psr.ia64_psr.ic = 1; psr.ia64_psr.i = 1;
+ psr.ia64_psr.dt = 1; psr.ia64_psr.rt = 1; psr.ia64_psr.it = 1;
+ psr.ia64_psr.bn = 1;
+ //psr.pk = 1; // checking pkeys shouldn't be a problem but seems broken
+ if (psr.ia64_psr.be) {
+ printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
+ return (IA64_ILLOP_FAULT);
+ }
+ PSCB(vcpu,incomplete_regframe) = 0; // is this necessary?
+ ifs = PSCB(vcpu,ifs);
+ //if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs !=
regs->cr_ifs) {
+ //if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
+ if (ifs & regs->cr_ifs & 0x8000000000000000L) {
+ // TODO: validate PSCB(vcpu,iip)
+ // TODO: PSCB(vcpu,ipsr) = psr;
+ PSCB(vcpu,ipsr) = psr.i64;
+ // now set up the trampoline
+ regs->cr_iip = *(unsigned long *)dorfirfi; // function pointer!!
+ __asm__ __volatile ("mov %0=psr;;":"=r"(regspsr)::"memory");
+ regs->cr_ipsr = regspsr & ~(IA64_PSR_I | IA64_PSR_IC |
IA64_PSR_BN);
+ }
+ else {
+ regs->cr_ipsr = psr.i64;
+ regs->cr_iip = PSCB(vcpu,iip);
+ }
+ PSCB(vcpu,interrupt_collection_enabled) = 1;
+ vcpu_bsw1(vcpu);
+ PSCB(vcpu,interrupt_delivery_enabled) = int_enable;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_cover(VCPU *vcpu)
+{
+ // TODO: Only allowed for current vcpu
+ REGS *regs = vcpu_regs(vcpu);
+
+ if (!PSCB(vcpu,interrupt_collection_enabled)) {
+ if (!PSCB(vcpu,incomplete_regframe))
+ PSCB(vcpu,ifs) = regs->cr_ifs;
+ else PSCB(vcpu,incomplete_regframe) = 0;
+ }
+ regs->cr_ifs = 0;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
+{
+ UINT64 pta = PSCB(vcpu,pta);
+ UINT64 pta_sz = (pta & IA64_PTA_SZ(0x3f)) >> IA64_PTA_SZ_BIT;
+ UINT64 pta_base = pta & ~((1UL << IA64_PTA_BASE_BIT)-1);
+ UINT64 Mask = (1L << pta_sz) - 1;
+ UINT64 Mask_60_15 = (Mask >> 15) & 0x3fffffffffff;
+ UINT64 compMask_60_15 = ~Mask_60_15;
+ //UINT64 rr_ps = RR_TO_PS(get_rr(vadr));
+ UINT64 rr_ps = vcpu_get_rr_ps(vcpu,vadr);
+ UINT64 VHPT_offset = (vadr >> rr_ps) << 3;
+ UINT64 VHPT_addr1 = vadr & 0xe000000000000000L;
+ UINT64 VHPT_addr2a =
+ ((pta_base >> 15) & 0x3fffffffffff) & compMask_60_15;
+ UINT64 VHPT_addr2b =
+ ((VHPT_offset >> 15) & 0x3fffffffffff) & Mask_60_15;;
+ UINT64 VHPT_addr3 = VHPT_offset & 0x7fff;
+ UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) |
+ VHPT_addr3;
+
+#if 0
+ if (VHPT_addr1 == 0xe000000000000000L) {
+ printf("vcpu_thash: thash unsupported with rr7 @%lx\n",
+ PSCB(vcpu,iip));
+ return (IA64_ILLOP_FAULT);
+ }
+#endif
+//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr);
+ *pval = VHPT_addr;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+ printf("vcpu_ttag: ttag instruction unsupported\n");
+ return (IA64_ILLOP_FAULT);
+}
+
+#define itir_ps(itir) ((itir >> 2) & 0x3f)
+#define itir_mask(itir) (~((1UL << itir_ps(itir)) - 1))
+
+unsigned long vhpt_translate_count = 0;
+
+IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64
*pteval, UINT64 *itir)
+{
+ unsigned long pta, pta_mask, iha, pte, ps;
+ TR_ENTRY *trp;
+ ia64_rr rr;
+
+ if (!(address >> 61)) {
+ if (!PSCB(vcpu,metaphysical_mode)) {
+ REGS *regs = vcpu_regs(vcpu);
+ unsigned long viip = PSCB(vcpu,iip);
+ unsigned long vipsr = PSCB(vcpu,ipsr);
+ unsigned long iip = regs->cr_iip;
+ unsigned long ipsr = regs->cr_ipsr;
+ printk("vcpu_translate: bad address %p, viip=%p,
vipsr=%p, iip=%p, ipsr=%p continuing\n", address, viip, vipsr, iip, ipsr);
+ }
+
+ *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS |
_PAGE_PL_2 | _PAGE_AR_RWX;
+ *itir = PAGE_SHIFT << 2;
+ phys_translate_count++;
+ return IA64_NO_FAULT;
+ }
+
+ /* check translation registers */
+ if ((trp = match_tr(vcpu,address))) {
+ tr_translate_count++;
+ *pteval = trp->page_flags;
+ *itir = trp->itir;
+ return IA64_NO_FAULT;
+ }
+
+ /* check 1-entry TLB */
+ if ((trp = match_dtlb(vcpu,address))) {
+ dtlb_translate_count++;
+ *pteval = trp->page_flags;
+ *itir = trp->itir;
+ return IA64_NO_FAULT;
+ }
+
+ /* check guest VHPT */
+ pta = PSCB(vcpu,pta);
+ rr.rrval = PSCB(vcpu,rrs)[address>>61];
+ if (rr.ve && (pta & IA64_PTA_VE))
+ {
+ if (pta & IA64_PTA_VF)
+ {
+ /* long format VHPT - not implemented */
+ return (is_data ? IA64_DATA_TLB_VECTOR :
IA64_INST_TLB_VECTOR);
+ }
+ else
+ {
+ /* short format VHPT */
+
+ /* avoid recursively walking VHPT */
+ pta_mask = (itir_mask(pta) << 3) >> 3;
+ if (((address ^ pta) & pta_mask) == 0)
+ return (is_data ? IA64_DATA_TLB_VECTOR :
IA64_INST_TLB_VECTOR);
+
+ vcpu_thash(vcpu, address, &iha);
+ if (__copy_from_user(&pte, (void *)iha, sizeof(pte)) !=
0)
+ return IA64_VHPT_TRANS_VECTOR;
+
+ /*
+ * Optimisation: this VHPT walker aborts on not-present
pages
+ * instead of inserting a not-present translation, this
allows
+ * vectoring directly to the miss handler.
+ \ */
+ if (pte & _PAGE_P)
+ {
+ *pteval = pte;
+ *itir = vcpu_get_itir_on_fault(vcpu,address);
+ vhpt_translate_count++;
+ return IA64_NO_FAULT;
+ }
+ return (is_data ? IA64_DATA_TLB_VECTOR :
IA64_INST_TLB_VECTOR);
+ }
+ }
+ return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR);
+}
+
+IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
+{
+ UINT64 pteval, itir, mask;
+ IA64FAULT fault;
+
+ fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir);
+ if (fault == IA64_NO_FAULT)
+ {
+ mask = itir_mask(itir);
+ *padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask);
+ return (IA64_NO_FAULT);
+ }
+ else
+ {
+ PSCB(vcpu,tmp[0]) = vadr; // save ifa in vcpu structure,
then specify IA64_FORCED_IFA
+ return (fault | IA64_FORCED_IFA);
+ }
+}
+
+IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
+{
+ printf("vcpu_tak: tak instruction unsupported\n");
+ return (IA64_ILLOP_FAULT);
+ // HACK ALERT: tak does a thash for now
+ //return vcpu_thash(vcpu,vadr,key);
+}
+
+/**************************************************************************
+ VCPU debug breakpoint register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: unimplemented DBRs return a reserved register fault
+ // TODO: Should set Logical CPU state, not just physical
+ ia64_set_dbr(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: unimplemented IBRs return a reserved register fault
+ // TODO: Should set Logical CPU state, not just physical
+ ia64_set_ibr(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // TODO: unimplemented DBRs return a reserved register fault
+ UINT64 val = ia64_get_dbr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // TODO: unimplemented IBRs return a reserved register fault
+ UINT64 val = ia64_get_ibr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU performance monitor register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: Should set Logical CPU state, not just physical
+ // NOTE: Writes to unimplemented PMC registers are discarded
+ ia64_set_pmc(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ // TODO: Should set Logical CPU state, not just physical
+ // NOTE: Writes to unimplemented PMD registers are discarded
+ ia64_set_pmd(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // NOTE: Reads from unimplemented PMC registers return zero
+ UINT64 val = (UINT64)ia64_get_pmc(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // NOTE: Reads from unimplemented PMD registers return zero
+ UINT64 val = (UINT64)ia64_get_pmd(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU banked general register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_bsw0(VCPU *vcpu)
+{
+ // TODO: Only allowed for current vcpu
+ REGS *regs = vcpu_regs(vcpu);
+ unsigned long *r = ®s->r16;
+ unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
+ unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
+ int i;
+
+ if (PSCB(vcpu,banknum)) {
+ for (i = 0; i < 16; i++) { *b1++ = *r; *r++ = *b0++; }
+ PSCB(vcpu,banknum) = 0;
+ }
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_bsw1(VCPU *vcpu)
+{
+ // TODO: Only allowed for current vcpu
+ REGS *regs = vcpu_regs(vcpu);
+ unsigned long *r = ®s->r16;
+ unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
+ unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
+ int i;
+
+ if (!PSCB(vcpu,banknum)) {
+ for (i = 0; i < 16; i++) { *b0++ = *r; *r++ = *b1++; }
+ PSCB(vcpu,banknum) = 1;
+ }
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU cpuid access routines
+**************************************************************************/
+
+
+IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ // FIXME: This could get called as a result of a rsvd-reg fault
+ // if reg > 3
+ switch(reg) {
+ case 0:
+ memcpy(pval,"Xen/ia64",8);
+ break;
+ case 1:
+ *pval = 0;
+ break;
+ case 2:
+ *pval = 0;
+ break;
+ case 3:
+ *pval = ia64_get_cpuid(3);
+ break;
+ case 4:
+ *pval = ia64_get_cpuid(4);
+ break;
+ default:
+ if (reg > (ia64_get_cpuid(3) & 0xff))
+ return IA64_RSVDREG_FAULT;
+ *pval = ia64_get_cpuid(reg);
+ break;
+ }
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU region register access routines
+**************************************************************************/
+
+unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr)
+{
+ ia64_rr rr;
+
+ rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
+ return(rr.ve);
+}
+
+IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+ PSCB(vcpu,rrs)[reg>>61] = val;
+ // warning: set_one_rr() does it "live"
+ set_one_rr(reg,val);
+ return (IA64_NO_FAULT);
+}
+
+IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+ UINT val = PSCB(vcpu,rrs)[reg>>61];
+ *pval = val;
+ return (IA64_NO_FAULT);
+}
+
+/**************************************************************************
+ VCPU protection key register access routines
+**************************************************************************/
+
+IA64FAULT vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
+{
+#ifndef PKR_USE_FIXED
+ printk("vcpu_get_pkr: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+#else
+ UINT64 val = (UINT64)ia64_get_pkr(reg);
+ *pval = val;
+ return (IA64_NO_FAULT);
+#endif
+}
+
+IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
+{
+#ifndef PKR_USE_FIXED
+ printk("vcpu_set_pkr: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+#else
+// if (reg >= NPKRS) return (IA64_ILLOP_FAULT);
+ vcpu->pkrs[reg] = val;
+ ia64_set_pkr(reg,val);
+ return (IA64_NO_FAULT);
+#endif
+}
+
+/**************************************************************************
+ VCPU translation register access routines
+**************************************************************************/
+
+static void vcpu_purge_tr_entry(TR_ENTRY *trp)
+{
+ trp->p = 0;
+}
+
+static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64
ifa)
+{
+ UINT64 ps;
+
+ trp->itir = itir;
+ trp->rid = virtualize_rid(current, get_rr(ifa) & RR_RID_MASK);
+ trp->p = 1;
+ ps = trp->ps;
+ trp->page_flags = pte;
+ if (trp->pl < 2) trp->pl = 2;
+ trp->vadr = ifa & ~0xfff;
+ if (ps > 12) { // "ignore" relevant low-order bits
+ trp->ppn &= ~((1UL<<(ps-12))-1);
+ trp->vadr &= ~((1UL<<ps)-1);
+ }
+}
+
+TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count)
+{
+ unsigned long rid = (get_rr(ifa) & RR_RID_MASK);
+ int i;
+
+ for (i = 0; i < count; i++, trp++) {
+ if (!trp->p) continue;
+ if (physicalize_rid(vcpu,trp->rid) != rid) continue;
+ if (ifa < trp->vadr) continue;
+ if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
+ //if (trp->key && !match_pkr(vcpu,trp->key)) continue;
+ return trp;
+ }
+ return 0;
+}
+
+TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa)
+{
+ TR_ENTRY *trp;
+
+ trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS);
+ if (trp) return trp;
+ trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS);
+ if (trp) return trp;
+ return 0;
+}
+
+IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte,
+ UINT64 itir, UINT64 ifa)
+{
+ TR_ENTRY *trp;
+
+ if (slot >= NDTRS) return IA64_RSVDREG_FAULT;
+ trp = &PSCBX(vcpu,dtrs[slot]);
+//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
+ vcpu_set_tr_entry(trp,pte,itir,ifa);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte,
+ UINT64 itir, UINT64 ifa)
+{
+ TR_ENTRY *trp;
+
+ if (slot >= NITRS) return IA64_RSVDREG_FAULT;
+ trp = &PSCBX(vcpu,itrs[slot]);
+//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
+ vcpu_set_tr_entry(trp,pte,itir,ifa);
+ return IA64_NO_FAULT;
+}
+
+/**************************************************************************
+ VCPU translation cache access routines
+**************************************************************************/
+
+void foobar(void) { /*vcpu_verbose = 1;*/ }
+
+extern struct domain *dom0;
+
+void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte,
UINT64 mp_pte, UINT64 logps)
+{
+ unsigned long psr;
+ unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
+
+ // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
+ // FIXME, must be inlined or potential for nested fault here!
+ if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) {
+ printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n");
+ //FIXME: kill domain here
+ while(1);
+ }
+ psr = ia64_clear_ic();
+ ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
+ ia64_set_psr(psr);
+ // ia64_srlz_i(); // no srls req'd, will rfi later
+#ifdef VHPT_GLOBAL
+ if (vcpu->domain==dom0 && ((vaddr >> 61) == 7)) {
+ // FIXME: this is dangerous... vhpt_flush_address ensures these
+ // addresses never get flushed. More work needed if this
+ // ever happens.
+//printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
+ if (logps > PAGE_SHIFT) vhpt_multiple_insert(vaddr,pte,logps);
+ else vhpt_insert(vaddr,pte,logps<<2);
+ }
+ // even if domain pagesize is larger than PAGE_SIZE, just put
+ // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
+ else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
+#endif
+ if (IorD & 0x4) return; // don't place in 1-entry TLB
+ if (IorD & 0x1) {
+ vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
+ PSCBX(vcpu,itlb_pte) = mp_pte;
+ }
+ if (IorD & 0x2) {
+ vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
+ PSCBX(vcpu,dtlb_pte) = mp_pte;
+ }
+}
+
+// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check
+// the physical address contained for correctness
+TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa)
+{
+ TR_ENTRY *trp;
+
+ if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1))
+ return (&vcpu->arch.dtlb);
+ return 0UL;
+}
+
+IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+ unsigned long pteval, logps = (itir >> 2) & 0x3f;
+ unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
+
+ if (logps < PAGE_SHIFT) {
+ printf("vcpu_itc_d: domain trying to use smaller page size!\n");
+ //FIXME: kill domain here
+ while(1);
+ }
+ //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+ pteval = translate_domain_pte(pte,ifa,itir);
+ if (!pteval) return IA64_ILLOP_FAULT;
+ vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
+{
+ unsigned long pteval, logps = (itir >> 2) & 0x3f;
+ unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
+
+ // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
+ if (logps < PAGE_SHIFT) {
+ printf("vcpu_itc_i: domain trying to use smaller page size!\n");
+ //FIXME: kill domain here
+ while(1);
+ }
+ //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
+ pteval = translate_domain_pte(pte,ifa,itir);
+ // FIXME: what to do if bad physical address? (machine check?)
+ if (!pteval) return IA64_ILLOP_FAULT;
+ vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
+{
+ printk("vcpu_ptc_l: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+}
+
+// At privlvl=0, fc performs no access rights or protection key checks, while
+// at privlvl!=0, fc performs access rights checks as if it were a 1-byte
+// read but no protection key check. Thus in order to avoid an unexpected
+// access rights fault, we have to translate the virtual address to a
+// physical address (possibly via a metaphysical address) and do the fc
+// on the physical address, which is guaranteed to flush the same cache line
+IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr)
+{
+ // TODO: Only allowed for current vcpu
+ UINT64 mpaddr, paddr;
+ IA64FAULT fault;
+ unsigned long translate_domain_mpaddr(unsigned long);
+ IA64FAULT vcpu_tpa(VCPU *, UINT64, UINT64 *);
+
+ fault = vcpu_tpa(vcpu, vadr, &mpaddr);
+ if (fault == IA64_NO_FAULT) {
+ paddr = translate_domain_mpaddr(mpaddr);
+ ia64_fc(__va(paddr));
+ }
+ return fault;
+}
+
+int ptce_count = 0;
+IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
+{
+ // Note that this only needs to be called once, i.e. the
+ // architected loop to purge the entire TLB, should use
+ // base = stride1 = stride2 = 0, count0 = count 1 = 1
+
+#ifdef VHPT_GLOBAL
+ vhpt_flush(); // FIXME: This is overdoing it
+#endif
+ local_flush_tlb_all();
+ // just invalidate the "whole" tlb
+ vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
+{
+ printk("vcpu_ptc_g: called, not implemented yet\n");
+ return IA64_ILLOP_FAULT;
+}
+
+IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
+{
+ extern ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
+ // FIXME: validate not flushing Xen addresses
+ // if (Xen address) return(IA64_ILLOP_FAULT);
+ // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
+//printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
+#ifdef VHPT_GLOBAL
+ vhpt_flush_address(vadr,addr_range);
+#endif
+ ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
+ vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
+ return IA64_NO_FAULT;
+}
+
+IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
+{
+ printf("vcpu_ptr_d: Purging TLB is unsupported\n");
+ return (IA64_ILLOP_FAULT);
+}
+
+IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
+{
+ printf("vcpu_ptr_i: Purging TLB is unsupported\n");
+ return (IA64_ILLOP_FAULT);
+}
+
+void vcpu_set_regs(VCPU *vcpu, REGS *regs)
+{
+ vcpu->arch.regs = regs;
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/vhpt.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/vhpt.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,151 @@
+/*
+ * Initialize VHPT support.
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/dma.h>
+#include <asm/vhpt.h>
+
+unsigned long vhpt_paddr, vhpt_pend, vhpt_pte;
+
+void vhpt_flush(void)
+{
+ struct vhpt_lf_entry *v = (void *)VHPT_ADDR;
+ int i, cnt = 0;
+#if 0
+static int firsttime = 2;
+
+if (firsttime) firsttime--;
+else {
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+printf("vhpt_flush: *********************************************\n");
+}
+#endif
+ for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
+ v->itir = 0;
+ v->CChain = 0;
+ v->page_flags = 0;
+ v->ti_tag = INVALID_TI_TAG;
+ }
+ // initialize cache too???
+}
+
+#ifdef VHPT_GLOBAL
+void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
+{
+ unsigned long ps;
+ struct vhpt_lf_entry *vlfe;
+
+ if ((vadr >> 61) == 7) {
+ // no vhpt for region 7 yet, see vcpu_itc_no_srlz
+ printf("vhpt_flush_address: region 7, spinning...\n");
+ while(1);
+ }
+#if 0
+ // this only seems to occur at shutdown, but it does occur
+ if ((!addr_range) || addr_range & (addr_range - 1)) {
+ printf("vhpt_flush_address: weird range, spinning...\n");
+ while(1);
+ }
+//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
+#endif
+ while ((long)addr_range > 0) {
+ vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
+ // FIXME: for now, just blow it away even if it belongs to
+ // another domain. Later, use ttag to check for match
+//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
+//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
+//}
+ vlfe->ti_tag |= INVALID_TI_TAG;
+ addr_range -= PAGE_SIZE;
+ vadr += PAGE_SIZE;
+ }
+}
+#endif
+
+void vhpt_map(void)
+{
+ unsigned long psr;
+
+ psr = ia64_clear_ic();
+ ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, vhpt_pte, VHPT_SIZE_LOG2);
+ ia64_set_psr(psr);
+ ia64_srlz_i();
+}
+
+void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned
long logps)
+{
+ unsigned long mask = (1L << logps) - 1;
+ extern long running_on_sim;
+ int i;
+
+ if (logps-PAGE_SHIFT > 10 && !running_on_sim) {
+ // if this happens, we may want to revisit this algorithm
+ printf("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
+ while(1);
+ }
+ if (logps-PAGE_SHIFT > 2) {
+ // FIXME: Should add counter here to see how often this
+ // happens (e.g. for 16MB pages!) and determine if it
+ // is a performance problem. On a quick look, it takes
+ // about 39000 instrs for a 16MB page and it seems to occur
+ // only a few times/second, so OK for now.
+ // An alternate solution would be to just insert the one
+ // 16KB in the vhpt (but with the full mapping)?
+ //printf("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
+ //"va=%p, pa=%p, pa-masked=%p\n",
+ //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
+ //(pte&_PFN_MASK)&~mask);
+ }
+ vaddr &= ~mask;
+ pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
+ for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) {
+ vhpt_insert(vaddr,pte,logps<<2);
+ vaddr += PAGE_SIZE;
+ }
+}
+
+void vhpt_init(void)
+{
+ unsigned long vhpt_total_size, vhpt_alignment, vhpt_imva;
+#if !VHPT_ENABLED
+ return;
+#endif
+ // allocate a huge chunk of physical memory.... how???
+ vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
+ vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
+ printf("vhpt_init: vhpt size=%p,
align=%p\n",vhpt_total_size,vhpt_alignment);
+ /* This allocation only holds true if vhpt table is unique for
+ * all domains. Or else later new vhpt table should be allocated
+ * from domain heap when each domain is created. Assume xen buddy
+ * allocator can provide natural aligned page by order?
+ */
+ vhpt_imva = alloc_xenheap_pages(VHPT_SIZE_LOG2 - PAGE_SHIFT);
+ if (!vhpt_imva) {
+ printf("vhpt_init: can't allocate VHPT!\n");
+ while(1);
+ }
+ vhpt_paddr = __pa(vhpt_imva);
+ vhpt_pend = vhpt_paddr + vhpt_total_size - 1;
+ printf("vhpt_init: vhpt paddr=%p, end=%p\n",vhpt_paddr,vhpt_pend);
+ vhpt_pte = pte_val(pfn_pte(vhpt_paddr >> PAGE_SHIFT, PAGE_KERNEL));
+ vhpt_map();
+ ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
+ VHPT_ENABLED);
+ vhpt_flush();
+}
+
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xen.lds.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xen.lds.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,251 @@
+#include <linux/config.h>
+
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+
+#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(phys_start)
+jiffies = jiffies_64;
+PHDRS {
+ code PT_LOAD;
+ percpu PT_LOAD;
+ data PT_LOAD;
+}
+SECTIONS
+{
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.exit.text)
+ *(.exit.data)
+ *(.exitcall.exit)
+ *(.IA_64.unwind.exit.text)
+ *(.IA_64.unwind_info.exit.text)
+ }
+
+ v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
+ phys_start = _start - LOAD_OFFSET;
+
+ code : { } :code
+ . = KERNEL_START;
+
+ _text = .;
+ _stext = .;
+
+ .text : AT(ADDR(.text) - LOAD_OFFSET)
+ {
+ *(.text.ivt)
+ *(.text)
+ SCHED_TEXT
+ LOCK_TEXT
+ *(.gnu.linkonce.t*)
+ }
+ .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
+ { *(.text2) }
+#ifdef CONFIG_SMP
+ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
+ { *(.text.lock) }
+#endif
+ _etext = .;
+
+ /* Read-only data */
+
+ /* Exception table */
+ . = ALIGN(16);
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
+ {
+ __start___ex_table = .;
+ *(__ex_table)
+ __stop___ex_table = .;
+ }
+
+ .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
+ {
+ __start___vtop_patchlist = .;
+ *(.data.patch.vtop)
+ __end___vtop_patchlist = .;
+ }
+
+ .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
+ {
+ __start___mckinley_e9_bundles = .;
+ *(.data.patch.mckinley_e9)
+ __end___mckinley_e9_bundles = .;
+ }
+
+ /* Global data */
+ _data = .;
+
+#if defined(CONFIG_IA64_GENERIC)
+ /* Machine Vector */
+ . = ALIGN(16);
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
+ {
+ machvec_start = .;
+ *(.machvec)
+ machvec_end = .;
+ }
+#endif
+
+ /* Unwind info & table: */
+ . = ALIGN(8);
+ .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
+ { *(.IA_64.unwind_info*) }
+ .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
+ {
+ __start_unwind = .;
+ *(.IA_64.unwind*)
+ __end_unwind = .;
+ }
+
+ RODATA
+
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET)
+ { *(.opd) }
+
+ /* Initialization code and data: */
+
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
+ {
+ _sinittext = .;
+ *(.init.text)
+ _einittext = .;
+ }
+
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
+ { *(.init.data) }
+
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
+ {
+ __initramfs_start = .;
+ *(.init.ramfs)
+ __initramfs_end = .;
+ }
+
+ . = ALIGN(16);
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
+ {
+ __setup_start = .;
+ *(.init.setup)
+ __setup_end = .;
+ }
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
+ {
+ __initcall_start = .;
+ *(.initcall1.init)
+ *(.initcall2.init)
+ *(.initcall3.init)
+ *(.initcall4.init)
+ *(.initcall5.init)
+ *(.initcall6.init)
+ *(.initcall7.init)
+ __initcall_end = .;
+ }
+ __con_initcall_start = .;
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
+ { *(.con_initcall.init) }
+ __con_initcall_end = .;
+ __security_initcall_start = .;
+ .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
+ { *(.security_initcall.init) }
+ __security_initcall_end = .;
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
+
+ /* The initial task and kernel stack */
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
+ { *(.data.init_task) }
+
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
+ { *(__special_page_section)
+ __start_gate_section = .;
+ *(.data.gate)
+ __stop_gate_section = .;
+ }
+ . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't
expose kernel data */
+
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
+ { *(.data.cacheline_aligned) }
+
+ /* Per-cpu data: */
+ percpu : { } :percpu
+ . = ALIGN(PERCPU_PAGE_SIZE);
+ __phys_per_cpu_start = .;
+ .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
+ {
+ __per_cpu_start = .;
+ *(.data.percpu)
+ __per_cpu_end = .;
+ }
+ . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into
percpu page size */
+
+ data : { } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
+ { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
+
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
+ { *(.got.plt) *(.got) }
+ __gp = ADDR(.got) + 0x200000;
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
+ { *(.sdata) *(.sdata1) *(.srdata) }
+ _edata = .;
+ _bss = .;
+ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
+ { *(.sbss) *(.scommon) }
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET)
+ { *(.bss) *(COMMON) }
+
+ _end = .;
+
+ code : { } :code
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+ /* Discard them for now since Intel SoftSDV cannot handle them.
+ .comment 0 : { *(.comment) }
+ .note 0 : { *(.note) }
+ */
+ /DISCARD/ : { *(.comment) }
+ /DISCARD/ : { *(.note) }
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenasm.S
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenasm.S Thu Sep 1 18:46:28 2005
@@ -0,0 +1,501 @@
+/*
+ * Assembly support routines for Xen/ia64
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/vhpt.h>
+
+#if 0
+// FIXME: there's gotta be a better way...
+// ski and spaski are different... moved to xenmisc.c
+#define RunningOnHpSki(rx,ry,pn) \
+ addl rx = 2, r0; \
+ addl ry = 3, r0; \
+ ;; \
+ mov rx = cpuid[rx]; \
+ mov ry = cpuid[ry]; \
+ ;; \
+ cmp.eq pn,p0 = 0, rx; \
+ ;; \
+ (pn) movl rx = 0x7000004 ; \
+ ;; \
+ (pn) cmp.ge pn,p0 = ry, rx; \
+ ;;
+
+//int platform_is_hp_ski(void)
+GLOBAL_ENTRY(platform_is_hp_ski)
+ mov r8 = 0
+ RunningOnHpSki(r3,r9,p8)
+(p8) mov r8 = 1
+ br.ret.sptk.many b0
+END(platform_is_hp_ski)
+#endif
+
+// Change rr7 to the passed value while ensuring
+// Xen is mapped into the new region.
+// in0: new rr7 value
+// in1: Xen virtual address of shared info (to be pinned)
+#define PSR_BITS_TO_CLEAR \
+ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
+ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
+ IA64_PSR_DFL | IA64_PSR_DFH)
+// FIXME? Note that this turns off the DB bit (debug)
+#define PSR_BITS_TO_SET IA64_PSR_BN
+
+//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void
*shared_arch_info);
+GLOBAL_ENTRY(ia64_new_rr7)
+ // not sure this unwind statement is correct...
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
+ alloc loc1 = ar.pfs, 3, 8, 0, 0
+1: {
+ mov r28 = in0 // copy procedure index
+ mov r8 = ip // save ip to compute branch
+ mov loc0 = rp // save rp
+ };;
+ .body
+ movl loc2=PERCPU_ADDR
+ ;;
+ tpa loc2=loc2 // grab this BEFORE changing rr7
+ ;;
+#if VHPT_ENABLED
+ movl loc6=VHPT_ADDR
+ ;;
+ tpa loc6=loc6 // grab this BEFORE changing rr7
+ ;;
+#endif
+ mov loc5=in1
+ ;;
+ tpa loc5=loc5 // grab this BEFORE changing rr7
+ ;;
+ mov loc7=in2 // arch_vcpu_info_t
+ ;;
+ tpa loc7=loc7 // grab this BEFORE changing rr7
+ ;;
+ mov loc3 = psr // save psr
+ adds r8 = 1f-1b,r8 // calculate return address for call
+ ;;
+ tpa r8=r8 // convert rp to physical
+ ;;
+ mov loc4=ar.rsc // save RSE configuration
+ ;;
+ mov ar.rsc=0 // put RSE in enforced lazy, LE mode
+ movl r16=PSR_BITS_TO_CLEAR
+ movl r17=PSR_BITS_TO_SET
+ ;;
+ or loc3=loc3,r17 // add in psr the bits to set
+ ;;
+ andcm r16=loc3,r16 // removes bits to clear from psr
+ br.call.sptk.many rp=ia64_switch_mode_phys
+1:
+ // now in physical mode with psr.i/ic off so do rr7 switch
+ dep r16=-1,r0,61,3
+ ;;
+ mov rr[r16]=in0
+ srlz.d
+ ;;
+
+ // re-pin mappings for kernel text and data
+ mov r18=KERNEL_TR_PAGE_SHIFT<<2
+ movl r17=KERNEL_START
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ ;;
+ ptr.i r17,r18
+ ptr.d r17,r18
+ ;;
+ mov cr.itir=r18
+ mov cr.ifa=r17
+ mov r16=IA64_TR_KERNEL
+ //mov r3=ip
+ movl r18=PAGE_KERNEL
+ ;;
+ dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+ ;;
+ or r18=r2,r18
+ ;;
+ srlz.i
+ ;;
+ itr.i itr[r16]=r18
+ ;;
+ itr.d dtr[r16]=r18
+ ;;
+
+ // re-pin mappings for stack (current), per-cpu, vhpt, and shared info
+
+ // unless overlaps with KERNEL_TR
+ dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT
+ ;;
+ cmp.eq p7,p0=r17,r18
+(p7) br.cond.sptk .stack_overlaps
+ ;;
+ movl r25=PAGE_KERNEL
+ dep r21=0,r13,60,4 // physical address of "current"
+ ;;
+ or r23=r25,r21 // construct PA | page properties
+ mov r25=IA64_GRANULE_SHIFT<<2
+ ;;
+ ptr.d r13,r25
+ ;;
+ mov cr.itir=r25
+ mov cr.ifa=r13 // VA of next task...
+ ;;
+ mov r25=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ ;;
+.stack_overlaps:
+
+ movl r22=PERCPU_ADDR
+ ;;
+ movl r25=PAGE_KERNEL
+ ;;
+ mov r21=loc2 // saved percpu physical address
+ ;;
+ or r23=r25,r21 // construct PA | page properties
+ mov r24=PERCPU_PAGE_SHIFT<<2
+ ;;
+ ptr.d r22,r24
+ ;;
+ mov cr.itir=r24
+ mov cr.ifa=r22
+ ;;
+ mov r25=IA64_TR_PERCPU_DATA
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ ;;
+
+#if VHPT_ENABLED
+ movl r22=VHPT_ADDR
+ ;;
+ movl r25=PAGE_KERNEL
+ ;;
+ mov r21=loc6 // saved vhpt physical address
+ ;;
+ or r23=r25,r21 // construct PA | page properties
+ mov r24=VHPT_PAGE_SHIFT<<2
+ ;;
+ ptr.d r22,r24
+ ;;
+ mov cr.itir=r24
+ mov cr.ifa=r22
+ ;;
+ mov r25=IA64_TR_VHPT
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ ;;
+#endif
+
+ movl r22=SHAREDINFO_ADDR
+ ;;
+ movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+ ;;
+ mov r21=loc5 // saved sharedinfo physical address
+ ;;
+ or r23=r25,r21 // construct PA | page properties
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ ptr.d r22,r24
+ ;;
+ mov cr.itir=r24
+ mov cr.ifa=r22
+ ;;
+ mov r25=IA64_TR_SHARED_INFO
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ ;;
+ // Map for arch_vcpu_info_t
+ movl r22=SHARED_ARCHINFO_ADDR
+ ;;
+ movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
+ ;;
+ mov r21=loc7 // saved sharedinfo physical address
+ ;;
+ or r23=r25,r21 // construct PA | page properties
+ mov r24=PAGE_SHIFT<<2
+ ;;
+ ptr.d r22,r24
+ ;;
+ mov cr.itir=r24
+ mov cr.ifa=r22
+ ;;
+ mov r25=IA64_TR_ARCH_INFO
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ ;;
+
+ // done, switch back to virtual and return
+ mov r16=loc3 // r16= original psr
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+ mov psr.l = loc3 // restore init PSR
+
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+ mov ar.rsc=loc4 // restore RSE configuration
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.many rp
+END(ia64_new_rr7)
+
+#include "minstate.h"
+
+GLOBAL_ENTRY(ia64_prepare_handle_privop)
+ .prologue
+ /*
+ * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+ */
+ mov r16=r0
+ DO_SAVE_SWITCH_STACK
+ br.call.sptk.many rp=ia64_handle_privop // stack frame setup in
ivt
+.ret22: .body
+ DO_LOAD_SWITCH_STACK
+ br.cond.sptk.many rp // goes to
ia64_leave_kernel
+END(ia64_prepare_handle_privop)
+
+GLOBAL_ENTRY(ia64_prepare_handle_break)
+ .prologue
+ /*
+ * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+ */
+ mov r16=r0
+ DO_SAVE_SWITCH_STACK
+ br.call.sptk.many rp=ia64_handle_break // stack frame setup in ivt
+.ret23: .body
+ DO_LOAD_SWITCH_STACK
+ br.cond.sptk.many rp // goes to ia64_leave_kernel
+END(ia64_prepare_handle_break)
+
+GLOBAL_ENTRY(ia64_prepare_handle_reflection)
+ .prologue
+ /*
+ * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+ */
+ mov r16=r0
+ DO_SAVE_SWITCH_STACK
+ br.call.sptk.many rp=ia64_handle_reflection // stack frame setup in
ivt
+.ret24: .body
+ DO_LOAD_SWITCH_STACK
+ br.cond.sptk.many rp // goes to ia64_leave_kernel
+END(ia64_prepare_handle_reflection)
+
+GLOBAL_ENTRY(__get_domain_bundle)
+ EX(.failure_in_get_bundle,ld8 r8=[r32],8)
+ ;;
+ EX(.failure_in_get_bundle,ld8 r9=[r32])
+ ;;
+ br.ret.sptk.many rp
+ ;;
+.failure_in_get_bundle:
+ mov r8=0
+ ;;
+ mov r9=0
+ ;;
+ br.ret.sptk.many rp
+ ;;
+END(__get_domain_bundle)
+
+GLOBAL_ENTRY(dorfirfi)
+ movl r16 = XSI_IIP
+ movl r17 = XSI_IPSR
+ movl r18 = XSI_IFS
+ ;;
+ ld8 r16 = [r16]
+ ld8 r17 = [r17]
+ ld8 r18 = [r18]
+ ;;
+ mov cr.iip=r16
+ mov cr.ipsr=r17
+ mov cr.ifs=r18
+ ;;
+ // fall through
+END(dorfirfi)
+
+GLOBAL_ENTRY(dorfi)
+ rfi
+ ;;
+END(dorfirfi)
+
+//
+// Long's Peak UART Offsets
+//
+#define COM_TOP 0xff5e0000
+#define COM_BOT 0xff5e2000
+
+// UART offsets
+#define UART_TX 0 /* Out: Transmit buffer (DLAB=0) */
+#define UART_INT_ENB 1 /* interrupt enable (DLAB=0) */
+#define UART_INT_ID 2 /* Interrupt ID register */
+#define UART_LINE_CTL 3 /* Line control register */
+#define UART_MODEM_CTL 4 /* Modem Control Register */
+#define UART_LSR 5 /* In: Line Status Register */
+#define UART_MSR 6 /* Modem status register */
+#define UART_DLATCH_LOW UART_TX
+#define UART_DLATCH_HIGH UART_INT_ENB
+#define COM1 0x3f8
+#define COM2 0x2F8
+#define COM3 0x3E8
+
+/* interrupt enable bits (offset 1) */
+#define DATA_AVAIL_INT 1
+#define XMIT_HOLD_EMPTY_INT 2
+#define LINE_STAT_INT 4
+#define MODEM_STAT_INT 8
+
+/* line status bits (offset 5) */
+#define REC_DATA_READY 1
+#define OVERRUN 2
+#define PARITY_ERROR 4
+#define FRAMING_ERROR 8
+#define BREAK_INTERRUPT 0x10
+#define XMIT_HOLD_EMPTY 0x20
+#define XMIT_SHIFT_EMPTY 0x40
+
+// Write a single character
+// input: r32 = character to be written
+// output: none
+GLOBAL_ENTRY(longs_peak_putc)
+ rsm psr.dt
+ movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR
+ ;;
+ srlz.i
+ ;;
+
+.Chk_THRE_p:
+ ld1.acq r18=[r16]
+ ;;
+
+ and r18 = XMIT_HOLD_EMPTY, r18
+ ;;
+ cmp4.eq p6,p0=0,r18
+ ;;
+
+(p6) br .Chk_THRE_p
+ ;;
+ movl r16 = 0x8000000000000000 + COM_TOP + UART_TX
+ ;;
+ st1.rel [r16]=r32
+ ;;
+ ssm psr.dt
+ ;;
+ srlz.i
+ ;;
+ br.ret.sptk.many b0
+END(longs_peak_putc)
+
+/* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
+GLOBAL_ENTRY(pal_emulator_static)
+ mov r8=-1
+ mov r9=256
+ ;;
+ cmp.gtu p7,p8=r9,r32 /* r32 <= 255? */
+(p7) br.cond.sptk.few static
+ ;;
+ mov r9=512
+ ;;
+ cmp.gtu p7,p8=r9,r32
+(p7) br.cond.sptk.few stacked
+ ;;
+static: cmp.eq p7,p8=6,r32 /* PAL_PTCE_INFO */
+(p8) br.cond.sptk.few 1f
+ ;;
+ mov r8=0 /* status = 0 */
+ movl r9=0x100000000 /* tc.base */
+ movl r10=0x0000000200000003 /* count[0], count[1] */
+ movl r11=0x1000000000002000 /* stride[0], stride[1] */
+ br.ret.sptk.few rp
+1: cmp.eq p7,p8=14,r32 /* PAL_FREQ_RATIOS */
+(p8) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ movl r9 =0x900000002 /* proc_ratio (1/100) */
+ movl r10=0x100000100 /* bus_ratio<<32 (1/256) */
+ movl r11=0x900000002 /* itc_ratio<<32 (1/100) */
+ ;;
+1: cmp.eq p7,p8=19,r32 /* PAL_RSE_INFO */
+(p8) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ mov r9=96 /* num phys stacked */
+ mov r10=0 /* hints */
+ mov r11=0
+ br.ret.sptk.few rp
+1: cmp.eq p7,p8=1,r32 /* PAL_CACHE_FLUSH */
+(p8) br.cond.sptk.few 1f
+#if 0
+ mov r9=ar.lc
+ movl r8=524288 /* flush 512k million cache lines
(16MB) */
+ ;;
+ mov ar.lc=r8
+ movl r8=0xe000000000000000
+ ;;
+.loop: fc r8
+ add r8=32,r8
+ br.cloop.sptk.few .loop
+ sync.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.lc=r9
+ mov r8=r0
+ ;;
+1: cmp.eq p7,p8=15,r32 /* PAL_PERF_MON_INFO */
+(p8) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ movl r9 =0x08122f04 /* generic=4 width=47 retired=8
cycles=18 */
+ mov r10=0 /* reserved */
+ mov r11=0 /* reserved */
+ mov r16=0xffff /* implemented PMC */
+ mov r17=0x3ffff /* implemented PMD */
+ add r18=8,r29 /* second index */
+ ;;
+ st8 [r29]=r16,16 /* store implemented PMC */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r17,16 /* store implemented PMD */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ mov r16=0xf0 /* cycles count capable PMC */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ mov r17=0xf0 /* retired bundles capable PMC */
+ ;;
+ st8 [r29]=r16,16 /* store cycles capable */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r17,16 /* store retired bundle capable */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+ st8 [r29]=r0,16 /* clear remaining bits */
+ st8 [r18]=r0,16 /* clear remaining bits */
+ ;;
+1: br.cond.sptk.few rp
+#else
+1:
+#endif
+stacked:
+ br.ret.sptk.few rp
+END(pal_emulator_static)
+
+GLOBAL_ENTRY(vhpt_insert)
+// alloc loc0 = ar.pfs, 3, 1, 0, 0
+ mov r16=r32
+ mov r26=r33
+ mov r27=r34
+ ;;
+ VHPT_INSERT()
+// VHPT_INSERT1() ... add collision chains later
+// mov ar.pfs = loc0
+ br.ret.sptk.few rp
+ ;;
+END(vhpt_insert)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenirq.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenirq.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,78 @@
+/*
+ * Xen irq routines
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
+#include <asm/delay.h>
+
+
+void
+xen_debug_irq(ia64_vector vector, struct pt_regs *regs)
+{
+//FIXME: For debug only, can be removed
+ static char firstirq = 1;
+ static char firsttime[256];
+ static char firstpend[256];
+ if (firstirq) {
+ int i;
+ for (i=0;i<256;i++) firsttime[i] = 1;
+ for (i=0;i<256;i++) firstpend[i] = 1;
+ firstirq = 0;
+ }
+ if (firsttime[vector]) {
+ printf("**** (entry) First received int on vector=%d,itc=%lx\n",
+ (unsigned long) vector, ia64_get_itc());
+ firsttime[vector] = 0;
+ }
+}
+
+
+int
+xen_do_IRQ(ia64_vector vector)
+{
+ if (vector != 0xef) {
+ extern void vcpu_pend_interrupt(void *, int);
+#if 0
+ if (firsttime[vector]) {
+ printf("**** (iterate) First received int on
vector=%d,itc=%lx\n",
+ (unsigned long) vector, ia64_get_itc());
+ firsttime[vector] = 0;
+ }
+ if (firstpend[vector]) {
+ printf("**** First pended int on vector=%d,itc=%lx\n",
+ (unsigned long) vector,ia64_get_itc());
+ firstpend[vector] = 0;
+ }
+#endif
+ //FIXME: TEMPORARY HACK!!!!
+ vcpu_pend_interrupt(dom0->vcpu[0],vector);
+ vcpu_wake(dom0->vcpu[0]);
+ return(1);
+ }
+ return(0);
+}
+
+/* From linux/kernel/softirq.c */
+#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
+# define invoke_softirq() __do_softirq()
+#else
+# define invoke_softirq() do_softirq()
+#endif
+
+/*
+ * Exit an interrupt context. Process softirqs if needed and possible:
+ */
+void irq_exit(void)
+{
+ //account_system_vtime(current);
+ //sub_preempt_count(IRQ_EXIT_OFFSET);
+ if (!in_interrupt() && local_softirq_pending())
+ invoke_softirq();
+ //preempt_enable_no_resched();
+}
+/* end from linux/kernel/softirq.c */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenmem.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenmem.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,86 @@
+/*
+ * Xen memory allocator routines
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ * Copyright (C) 2005 Intel Corp.
+ *
+ * Routines used by ia64 machines with contiguous (or virtually contiguous)
+ * memory.
+ */
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+#include <xen/mm.h>
+
+extern struct page *zero_page_memmap_ptr;
+struct pfn_info *frame_table;
+unsigned long frame_table_size;
+unsigned long max_page;
+
+struct page *mem_map;
+#define MAX_DMA_ADDRESS ~0UL // FIXME???
+
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+static unsigned long num_dma_physpages;
+#endif
+
+/*
+ * Set up the page tables.
+ */
+#ifdef CONFIG_VTI
+unsigned long *mpt_table;
+unsigned long mpt_table_size;
+#endif // CONFIG_VTI
+
+void
+paging_init (void)
+{
+ struct pfn_info *pg;
+
+#ifdef CONFIG_VTI
+ unsigned int mpt_order;
+ /* Create machine to physical mapping table
+ * NOTE: similar to frame table, later we may need virtually
+ * mapped mpt table if large hole exists. Also MAX_ORDER needs
+ * to be changed in common code, which only support 16M by far
+ */
+ mpt_table_size = max_page * sizeof(unsigned long);
+ mpt_order = get_order(mpt_table_size);
+ ASSERT(mpt_order <= MAX_ORDER);
+ if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL)
+ panic("Not enough memory to bootstrap Xen.\n");
+
+ printk("machine to physical table: 0x%lx\n", (u64)mpt_table);
+ memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size);
+#endif // CONFIG_VTI
+
+ /* Other mapping setup */
+
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
+}
+
+/* FIXME: postpone support to machines with big holes between physical memorys.
+ * Current hack allows only efi memdesc upto 4G place. (See efi.c)
+ */
+#ifndef CONFIG_VIRTUAL_MEM_MAP
+#define FT_ALIGN_SIZE (16UL << 20)
+void __init init_frametable(void)
+{
+ unsigned long i, pfn;
+ frame_table_size = max_page * sizeof(struct pfn_info);
+ frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
+
+ /* Request continuous trunk from boot allocator, since HV
+ * address is identity mapped */
+ pfn = alloc_boot_pages(
+ frame_table_size >> PAGE_SHIFT, FT_ALIGN_SIZE >> PAGE_SHIFT);
+ if (pfn == 0)
+ panic("Not enough memory for frame table.\n");
+
+ frame_table = __va(pfn << PAGE_SHIFT);
+ memset(frame_table, 0, frame_table_size);
+ printk("size of frame_table: %lukB\n",
+ frame_table_size >> 10);
+}
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenmisc.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xenmisc.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,391 @@
+/*
+ * Xen misc
+ *
+ * Functions/decls that are/may be needed to link with Xen because
+ * of x86 dependencies
+ *
+ * Copyright (C) 2004 Hewlett-Packard Co.
+ * Dan Magenheimer (dan.magenheimer@xxxxxx)
+ *
+ */
+
+#include <linux/config.h>
+#include <xen/sched.h>
+#include <linux/efi.h>
+#include <asm/processor.h>
+#include <xen/serial.h>
+#include <asm/io.h>
+#include <xen/softirq.h>
+
+efi_memory_desc_t ia64_efi_io_md;
+EXPORT_SYMBOL(ia64_efi_io_md);
+unsigned long wait_init_idle;
+int phys_proc_id[NR_CPUS];
+unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c
+
+void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n");
}
+void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check
abort handling)\n"); }
+void ia64_mca_cpu_init(void *x) { }
+void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
+void ia64_patch_vtop(unsigned long a, unsigned long b) { }
+void hpsim_setup(char **x)
+{
+#ifdef CONFIG_SMP
+ init_smp_config();
+#endif
+}
+
+// called from mem_init... don't think s/w I/O tlb is needed in Xen
+//void swiotlb_init(void) { } ...looks like it IS needed
+
+long
+is_platform_hp_ski(void)
+{
+ int i;
+ long cpuid[6];
+
+ for (i = 0; i < 5; ++i)
+ cpuid[i] = ia64_get_cpuid(i);
+ if ((cpuid[0] & 0xff) != 'H') return 0;
+ if ((cpuid[3] & 0xff) != 0x4) return 0;
+ if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
+ if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
+ if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
+ return 1;
+}
+
+long
+platform_is_hp_ski(void)
+{
+ extern long running_on_sim;
+ return running_on_sim;
+}
+
+/* calls in xen/common code that are unused on ia64 */
+
+void sync_lazy_execstate_cpu(unsigned int cpu) {}
+
+#ifdef CONFIG_VTI
+int grant_table_create(struct domain *d) { return 0; }
+void grant_table_destroy(struct domain *d) { return; }
+#endif
+
+struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); }
+
+void raise_actimer_softirq(void)
+{
+ raise_softirq(AC_TIMER_SOFTIRQ);
+}
+
+#ifndef CONFIG_VTI
+unsigned long
+__gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+{
+ if (d == dom0)
+ return(gpfn);
+ else {
+ unsigned long pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT);
+ if (!pte) {
+printk("__gpfn_to_mfn_foreign: bad gpfn. spinning...\n");
+while(1);
+ return 0;
+ }
+ return ((pte & _PFN_MASK) >> PAGE_SHIFT);
+ }
+}
+
+u32
+__mfn_to_gpfn(struct domain *d, unsigned long frame)
+{
+ // FIXME: is this right?
+if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) {
+printk("__mfn_to_gpfn: bad frame. spinning...\n");
+while(1);
+}
+ return frame;
+}
+#endif
+
+#ifndef CONFIG_VTI
+unsigned long __hypercall_create_continuation(
+ unsigned int op, unsigned int nr_args, ...)
+{
+ printf("__hypercall_create_continuation: not implemented!!!\n");
+}
+#endif
+
+///////////////////////////////
+
+///////////////////////////////
+// from arch/x86/apic.c
+///////////////////////////////
+
+extern unsigned long domain0_ready;
+
+int reprogram_ac_timer(s_time_t timeout)
+{
+ struct vcpu *v = current;
+
+#ifdef CONFIG_VTI
+// if(VMX_DOMAIN(v))
+ return 1;
+#endif // CONFIG_VTI
+ if (!domain0_ready) return 1;
+ local_cpu_data->itm_next = timeout;
+ if (is_idle_task(v->domain)) vcpu_safe_set_itm(timeout);
+ else vcpu_set_next_timer(current);
+ return 1;
+}
+
+///////////////////////////////
+// from arch/ia64/page_alloc.c
+///////////////////////////////
+DEFINE_PER_CPU(struct page_state, page_states) = {0};
+unsigned long totalram_pages;
+
+void __mod_page_state(unsigned long offset, unsigned long delta)
+{
+ unsigned long flags;
+ void* ptr;
+
+ local_irq_save(flags);
+ ptr = &__get_cpu_var(page_states);
+ *(unsigned long*)(ptr + offset) += delta;
+ local_irq_restore(flags);
+}
+
+///////////////////////////////
+// from arch/x86/flushtlb.c
+///////////////////////////////
+
+u32 tlbflush_clock;
+u32 tlbflush_time[NR_CPUS];
+
+///////////////////////////////
+// from arch/x86/memory.c
+///////////////////////////////
+
+void init_percpu_info(void)
+{
+ dummy();
+ //memset(percpu_info, 0, sizeof(percpu_info));
+}
+
+void free_page_type(struct pfn_info *page, unsigned int type)
+{
+ dummy();
+}
+
+///////////////////////////////
+//// misc memory stuff
+///////////////////////////////
+
+unsigned long __get_free_pages(unsigned int mask, unsigned int order)
+{
+ void *p = alloc_xenheap_pages(order);
+
+ memset(p,0,PAGE_SIZE<<order);
+ return (unsigned long)p;
+}
+
+void __free_pages(struct page *page, unsigned int order)
+{
+ if (order) BUG();
+ free_xenheap_page(page);
+}
+
+void *pgtable_quicklist_alloc(void)
+{
+ return alloc_xenheap_pages(0);
+}
+
+void pgtable_quicklist_free(void *pgtable_entry)
+{
+ free_xenheap_page(pgtable_entry);
+}
+
+///////////////////////////////
+// from arch/ia64/traps.c
+///////////////////////////////
+
+void show_registers(struct pt_regs *regs)
+{
+ printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
+}
+
+int is_kernel_text(unsigned long addr)
+{
+ extern char _stext[], _etext[];
+ if (addr >= (unsigned long) _stext &&
+ addr <= (unsigned long) _etext)
+ return 1;
+
+ return 0;
+}
+
+unsigned long kernel_text_end(void)
+{
+ extern char _etext[];
+ return (unsigned long) _etext;
+}
+
+///////////////////////////////
+// from common/keyhandler.c
+///////////////////////////////
+void dump_pageframe_info(struct domain *d)
+{
+ printk("dump_pageframe_info not implemented\n");
+}
+
+///////////////////////////////
+// called from arch/ia64/head.S
+///////////////////////////////
+
+void console_print(char *msg)
+{
+ printk("console_print called, how did start_kernel return???\n");
+}
+
+void kernel_thread_helper(void)
+{
+ printk("kernel_thread_helper not implemented\n");
+ dummy();
+}
+
+void sys_exit(void)
+{
+ printk("sys_exit not implemented\n");
+ dummy();
+}
+
+////////////////////////////////////
+// called from unaligned.c
+////////////////////////////////////
+
+void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__
((noreturn)) */
+{
+ printk("die_if_kernel: called, not implemented\n");
+}
+
+long
+ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
+ unsigned long user_rbs_end, unsigned long addr, long *val)
+{
+ printk("ia64_peek: called, not implemented\n");
+}
+
+long
+ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
+ unsigned long user_rbs_end, unsigned long addr, long val)
+{
+ printk("ia64_poke: called, not implemented\n");
+}
+
+void
+ia64_sync_fph (struct task_struct *task)
+{
+ printk("ia64_sync_fph: called, not implemented\n");
+}
+
+void
+ia64_flush_fph (struct task_struct *task)
+{
+ printk("ia64_flush_fph: called, not implemented\n");
+}
+
+////////////////////////////////////
+// called from irq_ia64.c:init_IRQ()
+// (because CONFIG_IA64_HP_SIM is specified)
+////////////////////////////////////
+void hpsim_irq_init(void) { }
+
+
+// accomodate linux extable.c
+//const struct exception_table_entry *
+void *search_module_extables(unsigned long addr) { return NULL; }
+void *__module_text_address(unsigned long addr) { return NULL; }
+void *module_text_address(unsigned long addr) { return NULL; }
+
+void cs10foo(void) {}
+void cs01foo(void) {}
+
+unsigned long context_switch_count = 0;
+
+void context_switch(struct vcpu *prev, struct vcpu *next)
+{
+//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
+//printk("@@@@@@ context switch from domain %d (%x) to domain %d (%x)\n",
+//prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff);
+//if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo();
+//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
+//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
+#ifdef CONFIG_VTI
+ vtm_domain_out(prev);
+#endif
+ context_switch_count++;
+ switch_to(prev,next,prev);
+#ifdef CONFIG_VTI
+ vtm_domain_in(current);
+#endif
+
+// leave this debug for now: it acts as a heartbeat when more than
+// one domain is active
+{
+static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
+static int i = 100;
+int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
+if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
+if (!i--) { printk("+",id); i = 1000000; }
+}
+
+#ifdef CONFIG_VTI
+ if (VMX_DOMAIN(current))
+ vmx_load_all_rr(current);
+#else
+ if (!is_idle_task(current->domain)) {
+ load_region_regs(current);
+ if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
+ }
+ if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ /* nothing to do */
+}
+
+void continue_running(struct vcpu *same)
+{
+ /* nothing to do */
+}
+
+void panic_domain(struct pt_regs *regs, const char *fmt, ...)
+{
+ va_list args;
+ char buf[128];
+ struct vcpu *v = current;
+ static volatile int test = 1; // so can continue easily in debug
+ extern spinlock_t console_lock;
+ unsigned long flags;
+
+loop:
+ printf("$$$$$ PANIC in domain %d (k6=%p): ",
+ v->domain->domain_id,
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT]);
+ va_start(args, fmt);
+ (void)vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+ printf(buf);
+ if (regs) show_registers(regs);
+ domain_pause_by_systemcontroller(current->domain);
+ v->domain->shutdown_code = SHUTDOWN_crash;
+ set_bit(_DOMF_shutdown, v->domain->domain_flags);
+ if (v->domain->domain_id == 0) {
+ int i = 1000000000L;
+ // if domain0 crashes, just periodically print out panic
+ // message to make post-mortem easier
+ while(i--);
+ goto loop;
+ }
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xensetup.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xensetup.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,389 @@
+/******************************************************************************
+ * xensetup.c
+ * Copyright (c) 2004-2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+//#include <xen/spinlock.h>
+#include <xen/multiboot.h>
+#include <xen/sched.h>
+#include <xen/mm.h>
+//#include <xen/delay.h>
+#include <xen/compile.h>
+//#include <xen/console.h>
+#include <xen/serial.h>
+#include <xen/trace.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <xen/string.h>
+
+unsigned long xenheap_phys_end;
+
+char saved_command_line[COMMAND_LINE_SIZE];
+
+struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
+
+cpumask_t cpu_present_map;
+
+#ifdef CLONE_DOMAIN0
+struct domain *clones[CLONE_DOMAIN0];
+#endif
+extern unsigned long domain0_ready;
+
+int find_max_pfn (unsigned long, unsigned long, void *);
+void start_of_day(void);
+
+/* opt_nosmp: If true, secondary processors are ignored. */
+static int opt_nosmp = 0;
+boolean_param("nosmp", opt_nosmp);
+
+/* maxcpus: maximum number of CPUs to activate. */
+static unsigned int max_cpus = NR_CPUS;
+integer_param("maxcpus", max_cpus);
+
+/*
+ * opt_xenheap_megabytes: Size of Xen heap in megabytes, including:
+ * xen image
+ * bootmap bits
+ * xen heap
+ * Note: To allow xenheap size configurable, the prerequisite is
+ * to configure elilo allowing relocation defaultly. Then since
+ * elilo chooses 256M as alignment when relocating, alignment issue
+ * on IPF can be addressed.
+ */
+unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
+unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
+extern long running_on_sim;
+unsigned long xen_pstart;
+
+static int
+xen_count_pages(u64 start, u64 end, void *arg)
+{
+ unsigned long *count = arg;
+
+ /* FIXME: do we need consider difference between DMA-usable memory and
+ * normal memory? Seems that HV has no requirement to operate DMA which
+ * is owned by Dom0? */
+ *count += (end - start) >> PAGE_SHIFT;
+ return 0;
+}
+
+/* Find first hole after trunk for xen image */
+static int
+xen_find_first_hole(u64 start, u64 end, void *arg)
+{
+ unsigned long *first_hole = arg;
+
+ if ((*first_hole) == 0) {
+ if ((start <= KERNEL_START) && (KERNEL_START < end))
+ *first_hole = __pa(end);
+ }
+
+ return 0;
+}
+
+static void __init do_initcalls(void)
+{
+ initcall_t *call;
+ for ( call = &__initcall_start; call < &__initcall_end; call++ )
+ (*call)();
+}
+
+/*
+ * IPF loader only supports one commaind line currently, for
+ * both xen and guest kernel. This function provides pre-parse
+ * to mixed command line, to split it into two parts.
+ *
+ * User should split the parameters by "--", with strings after
+ * spliter for guest kernel. Missing "--" means whole line belongs
+ * to guest. Example:
+ * "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty
+ * root=/dev/sda3 ro"
+ */
+static char null[4] = { 0 };
+
+void early_cmdline_parse(char **cmdline_p)
+{
+ char *guest_cmd;
+ char *split = "--";
+
+ if (*cmdline_p == NULL) {
+ *cmdline_p = &null[0];
+ saved_command_line[0] = '\0';
+ return;
+ }
+
+ guest_cmd = strstr(*cmdline_p, split);
+ /* If no spliter, whole line is for guest */
+ if (guest_cmd == NULL) {
+ guest_cmd = *cmdline_p;
+ *cmdline_p = &null[0];
+ } else {
+ *guest_cmd = '\0'; /* Split boot parameters for xen and guest */
+ guest_cmd += strlen(split);
+ while (*guest_cmd == ' ') guest_cmd++;
+ }
+
+ strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE);
+ return;
+}
+
+struct ns16550_defaults ns16550_com1 = {
+ .baud = BAUD_AUTO,
+ .data_bits = 8,
+ .parity = 'n',
+ .stop_bits = 1
+};
+
+struct ns16550_defaults ns16550_com2 = {
+ .baud = BAUD_AUTO,
+ .data_bits = 8,
+ .parity = 'n',
+ .stop_bits = 1
+};
+
+void start_kernel(void)
+{
+ unsigned char *cmdline;
+ void *heap_start;
+ int i;
+ unsigned long max_mem, nr_pages, firsthole_start;
+ unsigned long dom0_memory_start, dom0_memory_end;
+ unsigned long initial_images_start, initial_images_end;
+
+ running_on_sim = is_platform_hp_ski();
+ /* Kernel may be relocated by EFI loader */
+ xen_pstart = ia64_tpa(KERNEL_START);
+
+ /* Must do this early -- e.g., spinlocks rely on get_current(). */
+ //set_current(&idle0_vcpu);
+ ia64_r13 = (void *)&idle0_vcpu;
+ idle0_vcpu.domain = &idle0_domain;
+
+ early_setup_arch(&cmdline);
+
+ /* We initialise the serial devices very early so we can get debugging. */
+ if (running_on_sim) hpsim_serial_init();
+ else {
+ ns16550_init(0, &ns16550_com1);
+ /* Also init com2 for Tiger4. */
+ ns16550_com2.io_base = 0x2f8;
+ ns16550_com2.irq = 3;
+ ns16550_init(1, &ns16550_com2);
+ }
+ serial_init_preirq();
+
+ init_console();
+ set_printk_prefix("(XEN) ");
+
+ /* xenheap should be in same TR-covered range with xen image */
+ xenheap_phys_end = xen_pstart + xenheap_size;
+ printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
+ xen_pstart, xenheap_phys_end);
+
+ /* Find next hole */
+ firsthole_start = 0;
+ efi_memmap_walk(xen_find_first_hole, &firsthole_start);
+
+ initial_images_start = xenheap_phys_end;
+ initial_images_end = initial_images_start + ia64_boot_param->initrd_size;
+
+ /* Later may find another memory trunk, even away from xen image... */
+ if (initial_images_end > firsthole_start) {
+ printk("Not enough memory to stash the DOM0 kernel image.\n");
+ printk("First hole:0x%lx, relocation end: 0x%lx\n",
+ firsthole_start, initial_images_end);
+ for ( ; ; );
+ }
+
+ /* This copy is time consuming, but elilo may load Dom0 image
+ * within xenheap range */
+ printk("ready to move Dom0 to 0x%lx...", initial_images_start);
+ memmove(__va(initial_images_start),
+ __va(ia64_boot_param->initrd_start),
+ ia64_boot_param->initrd_size);
+ ia64_boot_param->initrd_start = initial_images_start;
+ printk("Done\n");
+
+ /* first find highest page frame number */
+ max_page = 0;
+ efi_memmap_walk(find_max_pfn, &max_page);
+ printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
+
+ heap_start = memguard_init(ia64_imva(&_end));
+ printf("Before heap_start: 0x%lx\n", heap_start);
+ heap_start = __va(init_boot_allocator(__pa(heap_start)));
+ printf("After heap_start: 0x%lx\n", heap_start);
+
+ reserve_memory();
+
+ efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
+ efi_memmap_walk(xen_count_pages, &nr_pages);
+
+ printk("System RAM: %luMB (%lukB)\n",
+ nr_pages >> (20 - PAGE_SHIFT),
+ nr_pages << (PAGE_SHIFT - 10));
+
+ init_frametable();
+
+ ia64_fph_enable();
+ __ia64_init_fpu();
+
+ alloc_dom0();
+#ifdef DOMU_BUILD_STAGING
+ alloc_domU_staging();
+#endif
+
+ end_boot_allocator();
+
+ init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
+ printk("Xen heap: %luMB (%lukB)\n",
+ (xenheap_phys_end-__pa(heap_start)) >> 20,
+ (xenheap_phys_end-__pa(heap_start)) >> 10);
+
+ late_setup_arch(&cmdline);
+ setup_per_cpu_areas();
+ mem_init();
+
+printk("About to call scheduler_init()\n");
+ scheduler_init();
+ local_irq_disable();
+printk("About to call xen_time_init()\n");
+ xen_time_init();
+#ifdef CONFIG_VTI
+ init_xen_time(); /* initialise the time */
+#endif // CONFIG_VTI
+printk("About to call ac_timer_init()\n");
+ ac_timer_init();
+// init_xen_time(); ???
+
+#ifdef CONFIG_SMP
+ if ( opt_nosmp )
+ {
+ max_cpus = 0;
+ smp_num_siblings = 1;
+ //boot_cpu_data.x86_num_cores = 1;
+ }
+
+ smp_prepare_cpus(max_cpus);
+
+ /* We aren't hotplug-capable yet. */
+ //BUG_ON(!cpus_empty(cpu_present_map));
+ for_each_cpu ( i )
+ cpu_set(i, cpu_present_map);
+
+ //BUG_ON(!local_irq_is_enabled());
+
+printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus);
+ for_each_present_cpu ( i )
+ {
+ if ( num_online_cpus() >= max_cpus )
+ break;
+ if ( !cpu_online(i) ) {
+printk("About to call __cpu_up(%d)\n",i);
+ __cpu_up(i);
+ }
+ }
+
+ printk("Brought up %ld CPUs\n", (long)num_online_cpus());
+ smp_cpus_done(max_cpus);
+#endif
+
+
+ // FIXME: Should the following be swapped and moved later?
+ schedulers_start();
+ do_initcalls();
+printk("About to call sort_main_extable()\n");
+ sort_main_extable();
+
+ /* surrender usage of kernel registers to domain, use percpu area instead
*/
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] =
ia64_get_kr(IA64_KR_PER_CPU_DATA);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] =
ia64_get_kr(IA64_KR_CURRENT_STACK);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] =
ia64_get_kr(IA64_KR_FPU_OWNER);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE);
+
+ /* Create initial domain 0. */
+printk("About to call do_createdomain()\n");
+ dom0 = do_createdomain(0, 0);
+ init_task.domain = &idle0_domain;
+ init_task.processor = 0;
+// init_task.mm = &init_mm;
+ init_task.domain->arch.mm = &init_mm;
+// init_task.thread = INIT_THREAD;
+ //arch_do_createdomain(current);
+#ifdef CLONE_DOMAIN0
+ {
+ int i;
+ for (i = 0; i < CLONE_DOMAIN0; i++) {
+ clones[i] = do_createdomain(i+1, 0);
+ if ( clones[i] == NULL )
+ panic("Error creating domain0 clone %d\n",i);
+ }
+ }
+#endif
+ if ( dom0 == NULL )
+ panic("Error creating domain 0\n");
+
+ set_bit(_DOMF_privileged, &dom0->domain_flags);
+
+ /*
+ * We're going to setup domain0 using the module(s) that we stashed safely
+ * above our heap. The second module, if present, is an initrd ramdisk.
+ */
+printk("About to call construct_dom0()\n");
+ dom0_memory_start = __va(ia64_boot_param->initrd_start);
+ dom0_memory_end = ia64_boot_param->initrd_size;
+ if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
+ 0,
+ 0,
+ 0) != 0)
+ panic("Could not set up DOM0 guest OS\n");
+#ifdef CLONE_DOMAIN0
+ {
+ int i;
+ dom0_memory_start = __va(ia64_boot_param->initrd_start);
+ dom0_memory_end = ia64_boot_param->initrd_size;
+ for (i = 0; i < CLONE_DOMAIN0; i++) {
+printk("CONSTRUCTING DOMAIN0 CLONE #%d\n",i+1);
+ if ( construct_domU(clones[i], dom0_memory_start, dom0_memory_end,
+ 0,
+ 0,
+ 0) != 0)
+ panic("Could not set up DOM0 clone %d\n",i);
+ }
+ }
+#endif
+
+ /* The stash space for the initial kernel image can now be freed up. */
+ init_domheap_pages(ia64_boot_param->initrd_start,
+ ia64_boot_param->initrd_start +
ia64_boot_param->initrd_size);
+ if (!running_on_sim) // slow on ski and pages are pre-initialized to zero
+ scrub_heap_pages();
+
+printk("About to call init_trace_bufs()\n");
+ init_trace_bufs();
+
+ /* Give up the VGA console if DOM0 is configured to grab it. */
+#ifndef IA64
+ console_endboot(cmdline && strstr(cmdline, "tty0"));
+#endif
+
+#ifdef CLONE_DOMAIN0
+ {
+ int i;
+ for (i = 0; i < CLONE_DOMAIN0; i++)
+ domain_unpause_by_systemcontroller(clones[i]);
+ }
+#endif
+ domain_unpause_by_systemcontroller(dom0);
+ domain0_ready = 1;
+ local_irq_enable();
+printk("About to call startup_cpu_idle_loop()\n");
+ startup_cpu_idle_loop();
+}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xentime.c
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/arch/ia64/xen/xentime.c Thu Sep 1 18:46:28 2005
@@ -0,0 +1,382 @@
+/*
+ * xen/arch/ia64/time.c
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <linux/config.h>
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <linux/efi.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+
+#include <asm/machvec.h>
+#include <asm/delay.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#ifdef XEN
+#include <asm/vcpu.h>
+#include <linux/jiffies.h> // not included by xen/sched.h
+#endif
+#include <xen/softirq.h>
+
+#ifdef XEN
+seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+#endif
+
+#define TIME_KEEPER_ID 0
+extern unsigned long wall_jiffies;
+
+static s_time_t stime_irq; /* System time at last 'time update' */
+
+unsigned long domain0_ready = 0;
+
+#ifndef CONFIG_VTI
+static inline u64 get_time_delta(void)
+{
+ return ia64_get_itc();
+}
+#else // CONFIG_VTI
+static s_time_t stime_irq = 0x0; /* System time at last 'time
update' */
+unsigned long itc_scale;
+unsigned long itc_at_irq;
+static unsigned long wc_sec, wc_nsec; /* UTC time at last 'time update'. */
+//static rwlock_t time_lock = RW_LOCK_UNLOCKED;
+static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs
*regs);
+
+static inline u64 get_time_delta(void)
+{
+ s64 delta_itc;
+ u64 delta, cur_itc;
+
+ cur_itc = ia64_get_itc();
+
+ delta_itc = (s64)(cur_itc - itc_at_irq);
+ if ( unlikely(delta_itc < 0) ) delta_itc = 0;
+ delta = ((u64)delta_itc) * itc_scale;
+ delta = delta >> 32;
+
+ return delta;
+}
+
+u64 tick_to_ns(u64 tick)
+{
+ return (tick * itc_scale) >> 32;
+}
+#endif // CONFIG_VTI
+
+s_time_t get_s_time(void)
+{
+ s_time_t now;
+ unsigned long flags;
+
+ read_lock_irqsave(&xtime_lock, flags);
+
+ now = stime_irq + get_time_delta();
+
+ /* Ensure that the returned system time is monotonically increasing. */
+ {
+ static s_time_t prev_now = 0;
+ if ( unlikely(now < prev_now) )
+ now = prev_now;
+ prev_now = now;
+ }
+
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ return now;
+}
+
+void update_dom_time(struct vcpu *v)
+{
+// FIXME: implement this?
+// printf("update_dom_time: called, not implemented, skipping\n");
+ return;
+}
+
+/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
+void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
+{
+#ifdef CONFIG_VTI
+ u64 _nsecs;
+
+ write_lock_irq(&xtime_lock);
+
+ _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base);
+ while ( _nsecs >= 1000000000 )
+ {
+ _nsecs -= 1000000000;
+ secs++;
+ }
+
+ wc_sec = secs;
+ wc_nsec = (unsigned long)_nsecs;
+
+ write_unlock_irq(&xtime_lock);
+
+ update_dom_time(current->domain);
+#else
+// FIXME: Should this be do_settimeofday (from linux)???
+ printf("do_settime: called, not implemented, stopping\n");
+ dummy();
+#endif
+}
+
+irqreturn_t
+xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long new_itm;
+
+#define HEARTBEAT_FREQ 16 // period in seconds
+#ifdef HEARTBEAT_FREQ
+ static long count = 0;
+ if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) {
+ printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n",
+ regs->cr_iip,
+ VCPU(current,interrupt_delivery_enabled),
+ VCPU(current,pending_interruption));
+ count = 0;
+ }
+#endif
+#ifndef XEN
+ if (unlikely(cpu_is_offline(smp_processor_id()))) {
+ return IRQ_HANDLED;
+ }
+#endif
+#ifdef XEN
+ if (current->domain == dom0) {
+ // FIXME: there's gotta be a better way of doing this...
+ // We have to ensure that domain0 is launched before we
+ // call vcpu_timer_expired on it
+ //domain0_ready = 1; // moved to xensetup.c
+ VCPU(current,pending_interruption) = 1;
+ }
+ if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
+ vcpu_pend_timer(dom0->vcpu[0]);
+ //vcpu_set_next_timer(dom0->vcpu[0]);
+ vcpu_wake(dom0->vcpu[0]);
+ }
+ if (!is_idle_task(current->domain) && current->domain != dom0) {
+ if (vcpu_timer_expired(current)) {
+ vcpu_pend_timer(current);
+ // ensure another timer interrupt happens even if
domain doesn't
+ vcpu_set_next_timer(current);
+ vcpu_wake(current);
+ }
+ }
+ raise_actimer_softirq();
+#endif
+
+#ifndef XEN
+ platform_timer_interrupt(irq, dev_id, regs);
+#endif
+
+ new_itm = local_cpu_data->itm_next;
+
+ if (!time_after(ia64_get_itc(), new_itm))
+#ifdef XEN
+ return;
+#else
+ printk(KERN_ERR "Oops: timer tick before it's due
(itc=%lx,itm=%lx)\n",
+ ia64_get_itc(), new_itm);
+#endif
+
+#ifdef XEN
+// printf("GOT TO HERE!!!!!!!!!!!\n");
+ //while(1);
+#else
+ profile_tick(CPU_PROFILING, regs);
+#endif
+
+ while (1) {
+#ifndef XEN
+ update_process_times(user_mode(regs));
+#endif
+
+ new_itm += local_cpu_data->itm_delta;
+
+ if (smp_processor_id() == TIME_KEEPER_ID) {
+ /*
+ * Here we are in the timer irq handler. We have irqs
locally
+ * disabled, but we don't know if the timer_bh is
running on
+ * another CPU. We need to avoid to SMP race by
acquiring the
+ * xtime_lock.
+ */
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_seqlock(&xtime_lock);
+#endif
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ do_timer(regs);
+#endif
+ local_cpu_data->itm_next = new_itm;
+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
+ write_sequnlock(&xtime_lock);
+#endif
+ } else
+ local_cpu_data->itm_next = new_itm;
+
+ if (time_after(new_itm, ia64_get_itc()))
+ break;
+ }
+
+ do {
+ /*
+ * If we're too close to the next clock tick for
+ * comfort, we increase the safety margin by
+ * intentionally dropping the next tick(s). We do NOT
+ * update itm.next because that would force us to call
+ * do_timer() which in turn would let our clock run
+ * too fast (with the potentially devastating effect
+ * of losing monotony of time).
+ */
+ while (!time_after(new_itm, ia64_get_itc() +
local_cpu_data->itm_delta/2))
+ new_itm += local_cpu_data->itm_delta;
+//#ifdef XEN
+// vcpu_set_next_timer(current);
+//#else
+//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm);
+ ia64_set_itm(new_itm);
+//#endif
+ /* double check, in case we got hit by a (slow) PMI: */
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ return IRQ_HANDLED;
+}
+
+static struct irqaction xen_timer_irqaction = {
+#ifdef CONFIG_VTI
+ .handler = vmx_timer_interrupt,
+#else // CONFIG_VTI
+ .handler = xen_timer_interrupt,
+#endif // CONFIG_VTI
+#ifndef XEN
+ .flags = SA_INTERRUPT,
+#endif
+ .name = "timer"
+};
+
+void __init
+xen_time_init (void)
+{
+ register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction);
+ ia64_init_itm();
+}
+
+
+#ifdef CONFIG_VTI
+
+/* Late init function (after all CPUs are booted). */
+int __init init_xen_time()
+{
+ struct timespec tm;
+
+ itc_scale = 1000000000UL << 32 ;
+ itc_scale /= local_cpu_data->itc_freq;
+
+ /* System time ticks from zero. */
+ stime_irq = (s_time_t)0;
+ itc_at_irq = ia64_get_itc();
+
+ /* Wallclock time starts as the initial RTC time. */
+ efi_gettimeofday(&tm);
+ wc_sec = tm.tv_sec;
+ wc_nsec = tm.tv_nsec;
+
+
+ printk("Time init:\n");
+ printk(".... System Time: %ldns\n", NOW());
+ printk(".... scale: %16lX\n", itc_scale);
+ printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_nsec/1000);
+
+ return 0;
+}
+
+static irqreturn_t
+vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long new_itm;
+ struct vcpu *v = current;
+
+
+ new_itm = local_cpu_data->itm_next;
+
+ if (!time_after(ia64_get_itc(), new_itm))
+ return;
+
+ while (1) {
+#ifdef CONFIG_SMP
+ /*
+ * For UP, this is done in do_timer(). Weird, but
+ * fixing that would require updates to all
+ * platforms.
+ */
+ update_process_times(user_mode(v, regs));
+#endif
+ new_itm += local_cpu_data->itm_delta;
+
+ if (smp_processor_id() == TIME_KEEPER_ID) {
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ local_cpu_data->itm_next = new_itm;
+
+ write_lock_irq(&xtime_lock);
+ /* Update jiffies counter. */
+ (*(unsigned long *)&jiffies_64)++;
+
+ /* Update wall time. */
+ wc_nsec += 1000000000/HZ;
+ if ( wc_nsec >= 1000000000 )
+ {
+ wc_nsec -= 1000000000;
+ wc_sec++;
+ }
+
+ /* Updates system time (nanoseconds since boot). */
+ stime_irq += MILLISECS(1000/HZ);
+ itc_at_irq = ia64_get_itc();
+
+ write_unlock_irq(&xtime_lock);
+
+ } else
+ local_cpu_data->itm_next = new_itm;
+
+ if (time_after(new_itm, ia64_get_itc()))
+ break;
+ }
+
+ do {
+ /*
+ * If we're too close to the next clock tick for
+ * comfort, we increase the safety margin by
+ * intentionally dropping the next tick(s). We do NOT
+ * update itm.next because that would force us to call
+ * do_timer() which in turn would let our clock run
+ * too fast (with the potentially devastating effect
+ * of losing monotony of time).
+ */
+ while (!time_after(new_itm, ia64_get_itc() +
local_cpu_data->itm_delta/2))
+ new_itm += local_cpu_data->itm_delta;
+ ia64_set_itm(new_itm);
+ /* double check, in case we got hit by a (slow) PMI: */
+ } while (time_after_eq(ia64_get_itc(), new_itm));
+ raise_softirq(AC_TIMER_SOFTIRQ);
+
+ return IRQ_HANDLED;
+}
+#endif // CONFIG_VTI
+
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux-xen/asm/pgtable.h
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Thu Sep 1 18:46:28 2005
@@ -0,0 +1,577 @@
+#ifndef _ASM_IA64_PGTABLE_H
+#define _ASM_IA64_PGTABLE_H
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the IA-64 page table tree.
+ *
+ * This hopefully works with any (fixed) IA-64 page-size, as defined
+ * in <asm/page.h>.
+ *
+ * Copyright (C) 1998-2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@xxxxxxxxxx>
+ */
+
+#include <linux/config.h>
+
+#include <asm/mman.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/types.h>
+#ifdef XEN
+#ifndef __ASSEMBLY__
+#include <xen/sched.h> /* needed for mm_struct (via asm/domain.h) */
+#endif
+#endif
+
+#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits
(architected) */
+
+/*
+ * First, define the various bits in a PTE. Note that the PTE format
+ * matches the VHPT short format, the firt doubleword of the VHPD long
+ * format, and the first doubleword of the TLB insertion format.
+ */
+#define _PAGE_P_BIT 0
+#define _PAGE_A_BIT 5
+#define _PAGE_D_BIT 6
+
+#define _PAGE_P (1 << _PAGE_P_BIT) /* page present
bit */
+#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute
*/
+#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute
*/
+#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */
+#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory
attribute */
+#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */
+#define _PAGE_MA_MASK (0x7 << 2)
+#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */
+#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */
+#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */
+#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */
+#define _PAGE_PL_MASK (3 << 7)
+#define _PAGE_AR_R (0 << 9) /* read only */
+#define _PAGE_AR_RX (1 << 9) /* read & execute */
+#define _PAGE_AR_RW (2 << 9) /* read & write */
+#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */
+#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */
+#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read,
write & exec */
+#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec /
read & write */
+#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec
*/
+#define _PAGE_AR_MASK (7 << 9)
+#define _PAGE_AR_SHIFT 9
+#define _PAGE_A (1 << _PAGE_A_BIT) /* page
accessed bit */
+#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty
bit */
+#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) &
~0xfffUL)
+#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
+#define _PAGE_PROTNONE (__IA64_UL(1) << 63)
+
+/* Valid only for a PTE with the present bit cleared: */
+#define _PAGE_FILE (1 << 1) /* see swap & file pte
remarks below */
+
+#define _PFN_MASK _PAGE_PPN_MASK
+/* Mask of bits which may be changed by pte_modify(); the odd bits are there
for _PAGE_PROTNONE */
+#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK |
_PAGE_AR_MASK | _PAGE_ED)
+
+#define _PAGE_SIZE_4K 12
+#define _PAGE_SIZE_8K 13
+#define _PAGE_SIZE_16K 14
+#define _PAGE_SIZE_64K 16
+#define _PAGE_SIZE_256K 18
+#define _PAGE_SIZE_1M 20
+#define _PAGE_SIZE_4M 22
+#define _PAGE_SIZE_16M 24
+#define _PAGE_SIZE_64M 26
+#define _PAGE_SIZE_256M 28
+#define _PAGE_SIZE_1G 30
+#define _PAGE_SIZE_4G 32
+
+#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB
+#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB
+#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED
+
+/*
+ * Definitions for first level:
+ *
+ * PGDIR_SHIFT determines what a first-level page table entry can map.
+ */
+#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3))
+#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3))
+#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user
regions */
+#define FIRST_USER_ADDRESS 0
+
+/*
+ * Definitions for second level:
+ *
+ * PMD_SHIFT determines the size of the area a second-level page table
+ * can map.
+ */
+#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3))
+
+/*
+ * Definitions for third level:
+ */
+#define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3))
+
+/*
+ * All the normal masks have the "page accessed" bits on, as any time
+ * they are used, the page is accessed. They are cleared only by the
+ * page-out routines.
+ */
+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A)
+#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
+#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
+#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
+#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
+#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
+
+# ifndef __ASSEMBLY__
+
+#include <asm/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/processor.h>
+
+/*
+ * Next come the mappings that determine how mmap() protection bits
+ * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The
+ * _P version gets used for a private shared memory segment, the _S
+ * version gets used for a shared memory segment with MAP_SHARED on.
+ * In a private shared memory segment, we do a copy-on-write if a task
+ * attempts to write to the page.
+ */
+ /* xwr */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_READONLY /* write to priv pg -> copy & make writable */
+#define __P011 PAGE_READONLY /* ditto */
+#define __P100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
+#define __P101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED /* we don't have (and don't need) write-only */
+#define __S011 PAGE_SHARED
+#define __S100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
+#define __S101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
+#define __S110 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
+#define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
+
+#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__,
pgd_val(e))
+#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__,
pmd_val(e))
+#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__,
pte_val(e))
+
+
+/*
+ * Some definitions to translate between mem_map, PTEs, and page addresses:
+ */
+
+
+/* Quick test to see if ADDR is a (potentially) valid physical address. */
+static inline long
+ia64_phys_addr_valid (unsigned long addr)
+{
+ return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
+}
+
+/*
+ * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
+ * memory. For the return value to be meaningful, ADDR must be >=
+ * PAGE_OFFSET. This operation can be relatively expensive (e.g.,
+ * require a hash-, or multi-level tree-lookup or something of that
+ * sort) but it guarantees to return TRUE only if accessing the page
+ * at that address does not cause an error. Note that there may be
+ * addresses for which kern_addr_valid() returns FALSE even though an
+ * access would not cause an error (e.g., this is typically true for
+ * memory mapped I/O regions.
+ *
+ * XXX Need to implement this for IA-64.
+ */
+#define kern_addr_valid(addr) (1)
+
+
+/*
+ * Now come the defines and routines to manage and access the three-level
+ * page table.
+ */
+
+/*
+ * On some architectures, special things need to be done when setting
+ * the PTE in a page table. Nothing special needs to be on IA-64.
+ */
+#define set_pte(ptep, pteval) (*(ptep) = (pteval))
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+#define RGN_SIZE (1UL << 61)
+#define RGN_KERNEL 7
+
+#define VMALLOC_START 0xa000000200000000UL
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+# define VMALLOC_END_INIT (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT -
9)))
+# define VMALLOC_END vmalloc_end
+ extern unsigned long vmalloc_end;
+#else
+# define VMALLOC_END (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT -
9)))
+#endif
+
+/* fs/proc/kcore.c */
+#define kc_vaddr_to_offset(v) ((v) - 0xa000000000000000UL)
+#define kc_offset_to_vaddr(o) ((o) + 0xa000000000000000UL)
+
+/*
+ * Conversion functions: convert page frame number (pfn) and a protection
value to a page
+ * table entry (pte).
+ */
+#define pfn_pte(pfn, pgprot) \
+({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot);
__pte; })
+
+/* Extract pfn from pte. */
+#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT)
+
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+/* This takes a physical page address that is used by the remapping functions
*/
+#define mk_pte_phys(physpage, pgprot) \
+({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
+
+#define pte_modify(_pte, newprot) \
+ (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) &
_PAGE_CHG_MASK)))
+
+#define page_pte_prot(page,prot) mk_pte(page, prot)
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pte_none(pte) (!pte_val(pte))
+#define pte_present(pte) (pte_val(pte) & (_PAGE_P |
_PAGE_PROTNONE))
+#define pte_clear(mm,addr,pte) (pte_val(*(pte)) = 0UL)
+/* pte_page() returns the "struct page *" corresponding to the PTE: */
+#define pte_page(pte) virt_to_page(((pte_val(pte) &
_PFN_MASK) + PAGE_OFFSET))
+
+#define pmd_none(pmd) (!pmd_val(pmd))
+#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
+#define pmd_present(pmd) (pmd_val(pmd) != 0UL)
+#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
+#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) &
_PFN_MASK))
+#define pmd_page(pmd) virt_to_page((pmd_val(pmd) +
PAGE_OFFSET))
+
+#define pud_none(pud) (!pud_val(pud))
+#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud)))
+#define pud_present(pud) (pud_val(pud) != 0UL)
+#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
+
+#define pud_page(pud) ((unsigned long) __va(pud_val(pud) &
_PFN_MASK))
+
+/*
+ * The following have defined behavior only work if pte_present() is true.
+ */
+#define pte_user(pte) ((pte_val(pte) & _PAGE_PL_MASK) == _PAGE_PL_3)
+#define pte_read(pte) (((pte_val(pte) & _PAGE_AR_MASK) >>
_PAGE_AR_SHIFT) < 6)
+#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >>
_PAGE_AR_SHIFT) - 2) <= 4)
+#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0)
+#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0)
+#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0)
+#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0)
+/*
+ * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit
in the
+ * access rights:
+ */
+#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW))
+#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
+#define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX))
+#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A))
+#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
+#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
+#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
+#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P))
+
+/*
+ * Macro to a page protection value as "uncacheable". Note that "protection"
is really a
+ * misnomer here as the protection value contains the memory attribute bits,
dirty bits,
+ * and various other bits as well.
+ */
+#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) &
~_PAGE_MA_MASK) | _PAGE_MA_UC)
+
+/*
+ * Macro to make mark a page protection value as "write-combining".
+ * Note that "protection" is really a misnomer here as the protection
+ * value contains the memory attribute bits, dirty bits, and various
+ * other bits as well. Accesses through a write-combining translation
+ * works bypasses the caches, but does allow for consecutive writes to
+ * be combined into single (but larger) write transactions.
+ */
+#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) &
~_PAGE_MA_MASK) | _PAGE_MA_WC)
+
+static inline unsigned long
+pgd_index (unsigned long address)
+{
+ unsigned long region = address >> 61;
+ unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3)
- 1);
+
+ return (region << (PAGE_SHIFT - 6)) | l1index;
+}
+
+/* The offset in the 1-level directory is given by the 3 region bits
+ (61..63) and the level-1 bits. */
+static inline pgd_t*
+pgd_offset (struct mm_struct *mm, unsigned long address)
+{
+ return mm->pgd + pgd_index(address);
+}
+
+/* In the kernel's mapped region we completely ignore the region number
+ (since we know it's in region number 5). */
+#define pgd_offset_k(addr) \
+ (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
+
+/* Look up a pgd entry in the gate area. On IA-64, the gate-area
+ resides in the kernel-mapped segment, hence we use pgd_offset_k()
+ here. */
+#define pgd_offset_gate(mm, addr) pgd_offset_k(addr)
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir,addr) \
+ ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD -
1)))
+
+/*
+ * Find an entry in the third-level page table. This looks more complicated
than it
+ * should be because some platforms place page tables in high memory.
+ */
+#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE
- 1))
+#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) +
pte_index(addr))
+#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr)
+#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr)
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+
+/* atomic versions of the some PTE manipulations: */
+
+static inline int
+ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+ if (!pte_young(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_A_BIT, ptep);
+#else
+ pte_t pte = *ptep;
+ if (!pte_young(pte))
+ return 0;
+ set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
+ return 1;
+#endif
+}
+
+static inline int
+ptep_test_and_clear_dirty (struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+ if (!pte_dirty(*ptep))
+ return 0;
+ return test_and_clear_bit(_PAGE_D_BIT, ptep);
+#else
+ pte_t pte = *ptep;
+ if (!pte_dirty(pte))
+ return 0;
+ set_pte_at(vma->vm_mm, addr, ptep, pte_mkclean(pte));
+ return 1;
+#endif
+}
+
+static inline pte_t
+ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+ return __pte(xchg((long *) ptep, 0));
+#else
+ pte_t pte = *ptep;
+ pte_clear(mm, addr, ptep);
+ return pte;
+#endif
+}
+
+static inline void
+ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+ unsigned long new, old;
+
+ do {
+ old = pte_val(*ptep);
+ new = pte_val(pte_wrprotect(__pte (old)));
+ } while (cmpxchg((unsigned long *) ptep, old, new) != old);
+#else
+ pte_t old_pte = *ptep;
+ set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+#endif
+}
+
+static inline int
+pte_same (pte_t a, pte_t b)
+{
+ return pte_val(a) == pte_val(b);
+}
+
+#define update_mmu_cache(vma, address, pte) do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern void paging_init (void);
+
+/*
+ * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number
of
+ * bits in the swap-type field of the swap pte. It would be nice to
+ * enforce that, but we can't easily include <linux/swap.h> here.
+ * (Of course, better still would be to define MAX_SWAPFILES_SHIFT
here...).
+ *
+ * Format of swap pte:
+ * bit 0 : present bit (must be zero)
+ * bit 1 : _PAGE_FILE (must be zero)
+ * bits 2- 8: swap-type
+ * bits 9-62: swap offset
+ * bit 63 : _PAGE_PROTNONE bit
+ *
+ * Format of file pte:
+ * bit 0 : present bit (must be zero)
+ * bit 1 : _PAGE_FILE (must be one)
+ * bits 2-62: file_offset/PAGE_SIZE
+ * bit 63 : _PAGE_PROTNONE bit
+ */
+#define __swp_type(entry) (((entry).val >> 2) & 0x7f)
+#define __swp_offset(entry) (((entry).val << 1) >> 10)
+#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) |
((long) (offset) << 9) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS 61
+#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3)
+#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE })
+
+/* XXX is this right? */
+#define io_remap_page_range(vma, vaddr, paddr, size, prot) \
+ remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+#define MK_IOSPACE_PFN(space, pfn) (pfn)
+#define GET_IOSPACE(pfn) 0
+#define GET_PFN(pfn) (pfn)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+extern struct page *zero_page_memmap_ptr;
+#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
+
+/* We provide our own get_unmapped_area to cope with VA holes for userland */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3))
+#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
+#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1))
+struct mmu_gather;
+void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+ unsigned long end, unsigned long floor, unsigned long ceiling);
+#endif
+
+/*
+ * IA-64 doesn't have any external MMU info: the page tables contain all the
necessary
+ * information. However, we use this routine to take care of any (delayed)
i-cache
+ * flushing that may be necessary.
+ */
+extern void lazy_mmu_prot_update (pte_t pte);
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+/*
+ * Update PTEP with ENTRY, which is guaranteed to be a less
+ * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and
+ * WRITABLE bits turned on, when the value at PTEP did not. The
+ * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE.
+ *
+ * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without
+ * having to worry about races. On SMP machines, there are only two
+ * cases where this is true:
+ *
+ * (1) *PTEP has the PRESENT bit turned OFF
+ * (2) ENTRY has the DIRTY bit turned ON
+ *
+ * On ia64, we could implement this routine with a cmpxchg()-loop
+ * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY.
+ * However, like on x86, we can get a more streamlined version by
+ * observing that it is OK to drop ACCESSED bit updates when
+ * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is
+ * result in an extra Access-bit fault, which would then turn on the
+ * ACCESSED bit in the low-level fault handler (iaccess_bit or
+ * daccess_bit in ivt.S).
+ */
+#ifdef CONFIG_SMP
+# define ptep_set_access_flags(__vma, __addr, __ptep, __entry,
__safely_writable) \
+do {
\
+ if (__safely_writable) {
\
+ set_pte(__ptep, __entry);
\
+ flush_tlb_page(__vma, __addr);
\
+ }
\
+} while (0)
+#else
+# define ptep_set_access_flags(__vma, __addr, __ptep, __entry,
__safely_writable) \
+ ptep_establish(__vma, __addr, __ptep, __entry)
+#endif
+
+# ifdef CONFIG_VIRTUAL_MEM_MAP
+ /* arch mem_map init routine is needed due to holes in a virtual mem_map */
+# define __HAVE_ARCH_MEMMAP_INIT
+ extern void memmap_init (unsigned long size, int nid, unsigned long zone,
+ unsigned long start_pfn);
+# endif /* CONFIG_VIRTUAL_MEM_MAP */
+# endif /* !__ASSEMBLY__ */
+
+/*
+ * Identity-mapped regions use a large page size. We'll call such large pages
+ * "granules". If you can think of a better name that's unambiguous, let me
+ * know...
+ */
+#if defined(CONFIG_IA64_GRANULE_64MB)
+# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M
+#elif defined(CONFIG_IA64_GRANULE_16MB)
+# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M
+#endif
+#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT)
+/*
+ * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL):
+ */
+#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M
+#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init() do { } while (0)
+
+/* These tell get_user_pages() that the first gate page is accessible from
user-level. */
+#define FIXADDR_USER_START GATE_ADDR
+#ifdef HAVE_BUGGY_SEGREL
+# define FIXADDR_USER_END (GATE_ADDR + 2*PAGE_SIZE)
+#else
+# define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE)
+#endif
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTE_SAME
+#define __HAVE_ARCH_PGD_OFFSET_GATE
+#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+
+#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable.h>
+
+#endif /* _ASM_IA64_PGTABLE_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2
xen/include/asm-ia64/linux/asm-generic/unaligned.h
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm-generic/unaligned.h Thu Sep 1
18:46:28 2005
@@ -0,0 +1,122 @@
+#ifndef _ASM_GENERIC_UNALIGNED_H_
+#define _ASM_GENERIC_UNALIGNED_H_
+
+/*
+ * For the benefit of those who are trying to port Linux to another
+ * architecture, here are some C-language equivalents.
+ *
+ * This is based almost entirely upon Richard Henderson's
+ * asm-alpha/unaligned.h implementation. Some comments were
+ * taken from David Mosberger's asm-ia64/unaligned.h header.
+ */
+
+#include <linux/types.h>
+
+/*
+ * The main single-value unaligned transfer routines.
+ */
+#define get_unaligned(ptr) \
+ ((__typeof__(*(ptr)))__get_unaligned((ptr), sizeof(*(ptr))))
+#define put_unaligned(x,ptr) \
+ __put_unaligned((unsigned long)(x), (ptr), sizeof(*(ptr)))
+
+/*
+ * This function doesn't actually exist. The idea is that when
+ * someone uses the macros below with an unsupported size (datatype),
+ * the linker will alert us to the problem via an unresolved reference
+ * error.
+ */
+extern void bad_unaligned_access_length(void) __attribute__((noreturn));
+
+struct __una_u64 { __u64 x __attribute__((packed)); };
+struct __una_u32 { __u32 x __attribute__((packed)); };
+struct __una_u16 { __u16 x __attribute__((packed)); };
+
+/*
+ * Elemental unaligned loads
+ */
+
+static inline unsigned long __uldq(const __u64 *addr)
+{
+ const struct __una_u64 *ptr = (const struct __una_u64 *) addr;
+ return ptr->x;
+}
+
+static inline unsigned long __uldl(const __u32 *addr)
+{
+ const struct __una_u32 *ptr = (const struct __una_u32 *) addr;
+ return ptr->x;
+}
+
+static inline unsigned long __uldw(const __u16 *addr)
+{
+ const struct __una_u16 *ptr = (const struct __una_u16 *) addr;
+ return ptr->x;
+}
+
+/*
+ * Elemental unaligned stores
+ */
+
+static inline void __ustq(__u64 val, __u64 *addr)
+{
+ struct __una_u64 *ptr = (struct __una_u64 *) addr;
+ ptr->x = val;
+}
+
+static inline void __ustl(__u32 val, __u32 *addr)
+{
+ struct __una_u32 *ptr = (struct __una_u32 *) addr;
+ ptr->x = val;
+}
+
+static inline void __ustw(__u16 val, __u16 *addr)
+{
+ struct __una_u16 *ptr = (struct __una_u16 *) addr;
+ ptr->x = val;
+}
+
+#define __get_unaligned(ptr, size) ({ \
+ const void *__gu_p = ptr; \
+ unsigned long val; \
+ switch (size) { \
+ case 1: \
+ val = *(const __u8 *)__gu_p; \
+ break; \
+ case 2: \
+ val = __uldw(__gu_p); \
+ break; \
+ case 4: \
+ val = __uldl(__gu_p); \
+ break; \
+ case 8: \
+ val = __uldq(__gu_p); \
+ break; \
+ default: \
+ bad_unaligned_access_length(); \
+ }; \
+ val; \
+})
+
+#define __put_unaligned(val, ptr, size) \
+do { \
+ void *__gu_p = ptr; \
+ switch (size) { \
+ case 1: \
+ *(__u8 *)__gu_p = val; \
+ break; \
+ case 2: \
+ __ustw(val, __gu_p); \
+ break; \
+ case 4: \
+ __ustl(val, __gu_p); \
+ break; \
+ case 8: \
+ __ustq(val, __gu_p); \
+ break; \
+ default: \
+ bad_unaligned_access_length(); \
+ }; \
+} while(0)
+
+#endif /* _ASM_GENERIC_UNALIGNED_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/numnodes.h
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/asm/numnodes.h Thu Sep 1 18:46:28 2005
@@ -0,0 +1,15 @@
+#ifndef _ASM_MAX_NUMNODES_H
+#define _ASM_MAX_NUMNODES_H
+
+#ifdef CONFIG_IA64_DIG
+/* Max 8 Nodes */
+#define NODES_SHIFT 3
+#elif defined(CONFIG_IA64_HP_ZX1) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB)
+/* Max 32 Nodes */
+#define NODES_SHIFT 5
+#elif defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
+/* Max 256 Nodes */
+#define NODES_SHIFT 8
+#endif
+
+#endif /* _ASM_MAX_NUMNODES_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/time.h
--- /dev/null Thu Sep 1 17:09:27 2005
+++ b/xen/include/asm-ia64/linux/time.h Thu Sep 1 18:46:28 2005
@@ -0,0 +1,181 @@
+#ifndef _LINUX_TIME_H
+#define _LINUX_TIME_H
+
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+#include <linux/seqlock.h>
+#endif
+
+#ifndef _STRUCT_TIMESPEC
+#define _STRUCT_TIMESPEC
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+};
+#endif /* _STRUCT_TIMESPEC */
+
+struct timeval {
+ time_t tv_sec; /* seconds */
+ suseconds_t tv_usec; /* microseconds */
+};
+
+struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+#ifdef __KERNEL__
+
+/* Parameters used to convert the timespec values */
+#ifndef USEC_PER_SEC
+#define USEC_PER_SEC (1000000L)
+#endif
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC (1000000000L)
+#endif
+
+#ifndef NSEC_PER_USEC
+#define NSEC_PER_USEC (1000L)
+#endif
+
+static __inline__ int timespec_equal(struct timespec *a, struct timespec *b)
+{
+ return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
+}
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon,
+ unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
+ mon += 12; /* Puts Feb last since it has leap day
*/
+ year -= 1;
+ }
+
+ return (((
+ (unsigned long) (year/4 - year/100 + year/400 + 367*mon/12 +
day) +
+ year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+extern struct timespec xtime;
+extern struct timespec wall_to_monotonic;
+extern seqlock_t xtime_lock;
+
+static inline unsigned long get_seconds(void)
+{
+ return xtime.tv_sec;
+}
+
+struct timespec current_kernel_time(void);
+
+#define CURRENT_TIME (current_kernel_time())
+#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 })
+
+extern void do_gettimeofday(struct timeval *tv);
+extern int do_settimeofday(struct timespec *tv);
+extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
+extern void clock_was_set(void); // call when ever the clock is set
+extern int do_posix_clock_monotonic_gettime(struct timespec *tp);
+extern long do_nanosleep(struct timespec *t);
+extern long do_utimes(char __user * filename, struct timeval * times);
+struct itimerval;
+extern int do_setitimer(int which, struct itimerval *value, struct itimerval
*ovalue);
+extern int do_getitimer(int which, struct itimerval *value);
+extern void getnstimeofday (struct timespec *tv);
+
+extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
+
+static inline void
+set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+{
+ while (nsec > NSEC_PER_SEC) {
+ nsec -= NSEC_PER_SEC;
+ ++sec;
+ }
+ while (nsec < 0) {
+ nsec += NSEC_PER_SEC;
+ --sec;
+ }
+ ts->tv_sec = sec;
+ ts->tv_nsec = nsec;
+}
+
+#endif /* __KERNEL__ */
+
+#define NFDBITS __NFDBITS
+
+#define FD_SETSIZE __FD_SETSIZE
+#define FD_SET(fd,fdsetp) __FD_SET(fd,fdsetp)
+#define FD_CLR(fd,fdsetp) __FD_CLR(fd,fdsetp)
+#define FD_ISSET(fd,fdsetp) __FD_ISSET(fd,fdsetp)
+#define FD_ZERO(fdsetp) __FD_ZERO(fdsetp)
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define ITIMER_REAL 0
+#define ITIMER_VIRTUAL 1
+#define ITIMER_PROF 2
+
+struct itimerspec {
+ struct timespec it_interval; /* timer period */
+ struct timespec it_value; /* timer expiration */
+};
+
+struct itimerval {
+ struct timeval it_interval; /* timer interval */
+ struct timeval it_value; /* current value */
+};
+
+
+/*
+ * The IDs of the various system clocks (for POSIX.1b interval timers).
+ */
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_REALTIME_HR 4
+#define CLOCK_MONOTONIC_HR 5
+
+/*
+ * The IDs of various hardware clocks
+ */
+
+
+#define CLOCK_SGI_CYCLE 10
+#define MAX_CLOCKS 16
+#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \
+ CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR)
+#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR)
+
+/*
+ * The various flags for setting POSIX.1b interval timers.
+ */
+
+#define TIMER_ABSTIME 0x01
+
+
+#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/acpi.c
--- a/xen/arch/ia64/acpi.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,678 +0,0 @@
-/*
- * acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999,2000 Walt Drummond <drummond@xxxxxxxxxxx>
- * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- * Copyright (C) 2000 Intel Corp.
- * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@xxxxxxxxx>
- * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
- * Copyright (C) 2001 Jenna Hall <jenna.s.hall@xxxxxxxxx>
- * Copyright (C) 2001 Takayoshi Kochi <t-kochi@xxxxxxxxxxxxx>
- * Copyright (C) 2002 Erich Focht <efocht@xxxxxxxxxx>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/irq.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/mmzone.h>
-#include <asm/io.h>
-//#include <asm/iosapic.h>
-#include <asm/machvec.h>
-#include <asm/page.h>
-#include <asm/system.h>
-#include <asm/numa.h>
-#include <asm/sal.h>
-//#include <asm/cyclone.h>
-
-#define BAD_MADT_ENTRY(entry, end) ( \
- (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
- ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
-
-#define PREFIX "ACPI: "
-
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
-void (*pm_power_off) (void);
-
-unsigned char acpi_kbd_controller_present = 1;
-unsigned char acpi_legacy_devices;
-
-const char *
-acpi_get_sysname (void)
-{
-/* #ifdef CONFIG_IA64_GENERIC */
- unsigned long rsdp_phys;
- struct acpi20_table_rsdp *rsdp;
- struct acpi_table_xsdt *xsdt;
- struct acpi_table_header *hdr;
-
- rsdp_phys = acpi_find_rsdp();
- if (!rsdp_phys) {
- printk(KERN_ERR "ACPI 2.0 RSDP not found, default to
\"dig\"\n");
- return "dig";
- }
-
- rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
- if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
- printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to
\"dig\"\n");
- return "dig";
- }
-
- xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
- hdr = &xsdt->header;
- if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
- printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to
\"dig\"\n");
- return "dig";
- }
-
- if (!strcmp(hdr->oem_id, "HP")) {
- return "hpzx1";
- }
- else if (!strcmp(hdr->oem_id, "SGI")) {
- return "sn2";
- }
-
- return "dig";
-/*
-#else
-# if defined (CONFIG_IA64_HP_SIM)
- return "hpsim";
-# elif defined (CONFIG_IA64_HP_ZX1)
- return "hpzx1";
-# elif defined (CONFIG_IA64_SGI_SN2)
- return "sn2";
-# elif defined (CONFIG_IA64_DIG)
- return "dig";
-# else
-# error Unknown platform. Fix acpi.c.
-# endif
-#endif
-*/
-}
-
-#ifdef CONFIG_ACPI_BOOT
-
-#define ACPI_MAX_PLATFORM_INTERRUPTS 256
-
-#if 0
-/* Array to record platform interrupt vectors for generic interrupt routing. */
-int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
- [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
-};
-
-enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
-
-/*
- * Interrupt routing API for device drivers. Provides interrupt vector for
- * a generic platform event. Currently only CPEI is implemented.
- */
-int
-acpi_request_vector (u32 int_type)
-{
- int vector = -1;
-
- if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
- /* corrected platform error interrupt */
- vector = platform_intr_list[int_type];
- } else
- printk(KERN_ERR "acpi_request_vector(): invalid interrupt
type\n");
- return vector;
-}
-#endif
-char *
-__acpi_map_table (unsigned long phys_addr, unsigned long size)
-{
- return __va(phys_addr);
-}
-
-/* --------------------------------------------------------------------------
- Boot-time Table Parsing
- --------------------------------------------------------------------------
*/
-
-static int total_cpus __initdata;
-static int available_cpus __initdata;
-struct acpi_table_madt * acpi_madt __initdata;
-static u8 has_8259;
-
-#if 0
-static int __init
-acpi_parse_lapic_addr_ovr (
- acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_lapic_addr_ovr *lapic;
-
- lapic = (struct acpi_table_lapic_addr_ovr *) header;
-
- if (BAD_MADT_ENTRY(lapic, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- if (lapic->address) {
- iounmap((void *) ipi_base_addr);
- ipi_base_addr = (unsigned long) ioremap(lapic->address, 0);
- }
- return 0;
-}
-
-
-static int __init
-acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_lsapic *lsapic;
-
- lsapic = (struct acpi_table_lsapic *) header;
-
- if (BAD_MADT_ENTRY(lsapic, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) |
lsapic->eid);
-
- if (!lsapic->flags.enabled)
- printk(" disabled");
- else {
- printk(" enabled");
-#ifdef CONFIG_SMP
- smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) |
lsapic->eid;
- if (hard_smp_processor_id()
- == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus])
- printk(" (BSP)");
-#endif
- ++available_cpus;
- }
-
- printk("\n");
-
- total_cpus++;
- return 0;
-}
-
-
-static int __init
-acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_lapic_nmi *lacpi_nmi;
-
- lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
-
- if (BAD_MADT_ENTRY(lacpi_nmi, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /* TBD: Support lapic_nmi entries */
- return 0;
-}
-
-
-static int __init
-acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_iosapic *iosapic;
-
- iosapic = (struct acpi_table_iosapic *) header;
-
- if (BAD_MADT_ENTRY(iosapic, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- iosapic_init(iosapic->address, iosapic->global_irq_base);
-
- return 0;
-}
-
-
-static int __init
-acpi_parse_plat_int_src (
- acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_plat_int_src *plintsrc;
- int vector;
-
- plintsrc = (struct acpi_table_plat_int_src *) header;
-
- if (BAD_MADT_ENTRY(plintsrc, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /*
- * Get vector assignment for this interrupt, set attributes,
- * and program the IOSAPIC routing table.
- */
- vector = iosapic_register_platform_intr(plintsrc->type,
- plintsrc->global_irq,
- plintsrc->iosapic_vector,
- plintsrc->eid,
- plintsrc->id,
- (plintsrc->flags.polarity == 1)
? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
- (plintsrc->flags.trigger == 1)
? IOSAPIC_EDGE : IOSAPIC_LEVEL);
-
- platform_intr_list[plintsrc->type] = vector;
- return 0;
-}
-
-
-static int __init
-acpi_parse_int_src_ovr (
- acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_int_src_ovr *p;
-
- p = (struct acpi_table_int_src_ovr *) header;
-
- if (BAD_MADT_ENTRY(p, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- iosapic_override_isa_irq(p->bus_irq, p->global_irq,
- (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH :
IOSAPIC_POL_LOW,
- (p->flags.trigger == 1) ? IOSAPIC_EDGE :
IOSAPIC_LEVEL);
- return 0;
-}
-
-
-static int __init
-acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
-{
- struct acpi_table_nmi_src *nmi_src;
-
- nmi_src = (struct acpi_table_nmi_src*) header;
-
- if (BAD_MADT_ENTRY(nmi_src, end))
- return -EINVAL;
-
- acpi_table_print_madt_entry(header);
-
- /* TBD: Support nimsrc entries */
- return 0;
-}
-/* Hook from generic ACPI tables.c */
-void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (!strncmp(oem_id, "IBM", 3) &&
- (!strncmp(oem_table_id, "SERMOW", 6))){
-
- /* Unfortunatly ITC_DRIFT is not yet part of the
- * official SAL spec, so the ITC_DRIFT bit is not
- * set by the BIOS on this hardware.
- */
- sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
-
- /*Start cyclone clock*/
- cyclone_setup(0);
- }
-}
-
-static int __init
-acpi_parse_madt (unsigned long phys_addr, unsigned long size)
-{
- if (!phys_addr || !size)
- return -EINVAL;
-
- acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
-
- /* remember the value for reference after free_initmem() */
-#ifdef CONFIG_ITANIUM
- has_8259 = 1; /* Firmware on old Itanium systems is broken */
-#else
- has_8259 = acpi_madt->flags.pcat_compat;
-#endif
- iosapic_system_init(has_8259);
-
- /* Get base address of IPI Message Block */
-
- if (acpi_madt->lapic_address)
- ipi_base_addr = (unsigned long)
ioremap(acpi_madt->lapic_address, 0);
-
- printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr);
-
- acpi_madt_oem_check(acpi_madt->header.oem_id,
- acpi_madt->header.oem_table_id);
-
- return 0;
-}
-#endif
-
-#ifdef CONFIG_ACPI_NUMA
-
-#undef SLIT_DEBUG
-
-#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
-
-static int __initdata srat_num_cpus; /* number of cpus */
-static u32 __initdata pxm_flag[PXM_FLAG_LEN];
-#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
-#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
-/* maps to convert between proximity domain and logical node ID */
-int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS];
-int __initdata nid_to_pxm_map[MAX_NUMNODES];
-static struct acpi_table_slit __initdata *slit_table;
-
-/*
- * ACPI 2.0 SLIT (System Locality Information Table)
- * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
- */
-void __init
-acpi_numa_slit_init (struct acpi_table_slit *slit)
-{
- u32 len;
-
- len = sizeof(struct acpi_table_header) + 8
- + slit->localities * slit->localities;
- if (slit->header.length != len) {
- printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d
actual\n",
- len, slit->header.length);
- memset(numa_slit, 10, sizeof(numa_slit));
- return;
- }
- slit_table = slit;
-}
-
-void __init
-acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
-{
- /* record this node in proximity bitmap */
- pxm_bit_set(pa->proximity_domain);
-
- node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) |
(pa->lsapic_eid);
- /* nid should be overridden as logical node id later */
- node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
- srat_num_cpus++;
-}
-
-void __init
-acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
-{
- unsigned long paddr, size;
- u8 pxm;
- struct node_memblk_s *p, *q, *pend;
-
- pxm = ma->proximity_domain;
-
- /* fill node memory chunk structure */
- paddr = ma->base_addr_hi;
- paddr = (paddr << 32) | ma->base_addr_lo;
- size = ma->length_hi;
- size = (size << 32) | ma->length_lo;
-
- /* Ignore disabled entries */
- if (!ma->flags.enabled)
- return;
-
- /* record this node in proximity bitmap */
- pxm_bit_set(pxm);
-
- /* Insertion sort based on base address */
- pend = &node_memblk[num_node_memblks];
- for (p = &node_memblk[0]; p < pend; p++) {
- if (paddr < p->start_paddr)
- break;
- }
- if (p < pend) {
- for (q = pend - 1; q >= p; q--)
- *(q + 1) = *q;
- }
- p->start_paddr = paddr;
- p->size = size;
- p->nid = pxm;
- num_node_memblks++;
-}
-
-void __init
-acpi_numa_arch_fixup (void)
-{
- int i, j, node_from, node_to;
-
- /* If there's no SRAT, fix the phys_id */
- if (srat_num_cpus == 0) {
- node_cpuid[0].phys_id = hard_smp_processor_id();
- return;
- }
-
- /* calculate total number of nodes in system from PXM bitmap */
- numnodes = 0; /* init total nodes in system */
-
- memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
- memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
- for (i = 0; i < MAX_PXM_DOMAINS; i++) {
- if (pxm_bit_test(i)) {
- pxm_to_nid_map[i] = numnodes;
- node_set_online(numnodes);
- nid_to_pxm_map[numnodes++] = i;
- }
- }
-
- /* set logical node id in memory chunk structure */
- for (i = 0; i < num_node_memblks; i++)
- node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
-
- /* assign memory bank numbers for each chunk on each node */
- for (i = 0; i < numnodes; i++) {
- int bank;
-
- bank = 0;
- for (j = 0; j < num_node_memblks; j++)
- if (node_memblk[j].nid == i)
- node_memblk[j].bank = bank++;
- }
-
- /* set logical node id in cpu structure */
- for (i = 0; i < srat_num_cpus; i++)
- node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
-
- printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes);
- printk(KERN_INFO "Number of memory chunks in system = %d\n",
num_node_memblks);
-
- if (!slit_table) return;
- memset(numa_slit, -1, sizeof(numa_slit));
- for (i=0; i<slit_table->localities; i++) {
- if (!pxm_bit_test(i))
- continue;
- node_from = pxm_to_nid_map[i];
- for (j=0; j<slit_table->localities; j++) {
- if (!pxm_bit_test(j))
- continue;
- node_to = pxm_to_nid_map[j];
- node_distance(node_from, node_to) =
- slit_table->entry[i*slit_table->localities + j];
- }
- }
-
-#ifdef SLIT_DEBUG
- printk("ACPI 2.0 SLIT locality table:\n");
- for (i = 0; i < numnodes; i++) {
- for (j = 0; j < numnodes; j++)
- printk("%03d ", node_distance(i,j));
- printk("\n");
- }
-#endif
-}
-#endif /* CONFIG_ACPI_NUMA */
-
-#if 0
-unsigned int
-acpi_register_gsi (u32 gsi, int polarity, int trigger)
-{
- return acpi_register_irq(gsi, polarity, trigger);
-}
-EXPORT_SYMBOL(acpi_register_gsi);
-static int __init
-acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
-{
- struct acpi_table_header *fadt_header;
- struct fadt_descriptor_rev2 *fadt;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- fadt_header = (struct acpi_table_header *) __va(phys_addr);
- if (fadt_header->revision != 3)
- return -ENODEV; /* Only deal with ACPI 2.0 FADT */
-
- fadt = (struct fadt_descriptor_rev2 *) fadt_header;
-
- if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
- acpi_kbd_controller_present = 0;
-
- if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
- acpi_legacy_devices = 1;
-
- acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE);
- return 0;
-}
-#endif
-
-unsigned long __init
-acpi_find_rsdp (void)
-{
- unsigned long rsdp_phys = 0;
-
- if (efi.acpi20)
- rsdp_phys = __pa(efi.acpi20);
- else if (efi.acpi)
- printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer
supported\n");
- return rsdp_phys;
-}
-
-#if 0
-int __init
-acpi_boot_init (void)
-{
-
- /*
- * MADT
- * ----
- * Parse the Multiple APIC Description Table (MADT), if exists.
- * Note that this table provides platform SMP configuration
- * information -- the successor to MPS tables.
- */
-
- if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
- printk(KERN_ERR PREFIX "Can't find MADT\n");
- goto skip_madt;
- }
-
- /* Local APIC */
-
- if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR,
acpi_parse_lapic_addr_ovr, 0) < 0)
- printk(KERN_ERR PREFIX "Error parsing LAPIC address override
entry\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
< 1)
- printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC
entries\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
< 0)
- printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
-
- /* I/O APIC */
-
- if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic,
NR_IOSAPICS) < 1)
- printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC
entries\n");
-
- /* System-Level Interrupt Routing */
-
- if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC,
acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
- printk(KERN_ERR PREFIX "Error parsing platform interrupt source
entry\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR,
acpi_parse_int_src_ovr, 0) < 0)
- printk(KERN_ERR PREFIX "Error parsing interrupt source
overrides entry\n");
-
- if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
- printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
- skip_madt:
-
- /*
- * FADT says whether a legacy keyboard controller is present.
- * The FADT also contains an SCI_INT line, by which the system
- * gets interrupts such as power and sleep buttons. If it's not
- * on a Legacy interrupt, it needs to be setup.
- */
- if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
- printk(KERN_ERR PREFIX "Can't find FADT\n");
-
-#ifdef CONFIG_SMP
- if (available_cpus == 0) {
- printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
- printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
- smp_boot_data.cpu_phys_id[available_cpus] =
hard_smp_processor_id();
- available_cpus = 1; /* We've got at least one of these, no? */
- }
- smp_boot_data.cpu_count = available_cpus;
-
- smp_build_cpu_map();
-# ifdef CONFIG_ACPI_NUMA
- if (srat_num_cpus == 0) {
- int cpu, i = 1;
- for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
- if (smp_boot_data.cpu_phys_id[cpu] !=
hard_smp_processor_id())
- node_cpuid[i++].phys_id =
smp_boot_data.cpu_phys_id[cpu];
- }
- build_cpu_to_node_map();
-# endif
-#endif
- /* Make boot-up look pretty */
- printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
total_cpus);
- return 0;
-}
-int
-acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
-{
- int vector;
-
- if (has_8259 && gsi < 16)
- *irq = isa_irq_to_vector(gsi);
- else {
- vector = gsi_to_vector(gsi);
- if (vector == -1)
- return -1;
-
- *irq = vector;
- }
- return 0;
-}
-
-int
-acpi_register_irq (u32 gsi, u32 polarity, u32 trigger)
-{
- if (has_8259 && gsi < 16)
- return isa_irq_to_vector(gsi);
-
- return iosapic_register_intr(gsi,
- (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
IOSAPIC_POL_LOW,
- (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
IOSAPIC_LEVEL);
-}
-EXPORT_SYMBOL(acpi_register_irq);
-#endif
-#endif /* CONFIG_ACPI_BOOT */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/dom0_ops.c
--- a/xen/arch/ia64/dom0_ops.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,237 +0,0 @@
-/******************************************************************************
- * Arch-specific dom0_ops.c
- *
- * Process command requests from domain-0 guest OS.
- *
- * Copyright (c) 2002, K A Fraser
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/lib.h>
-#include <xen/mm.h>
-#include <public/dom0_ops.h>
-#include <xen/sched.h>
-#include <xen/event.h>
-#include <asm/pdb.h>
-#include <xen/trace.h>
-#include <xen/console.h>
-#include <public/sched_ctl.h>
-
-long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op)
-{
- long ret = 0;
-
- if ( !IS_PRIV(current->domain) )
- return -EPERM;
-
- switch ( op->cmd )
- {
- case DOM0_GETPAGEFRAMEINFO:
- {
- struct pfn_info *page;
- unsigned long pfn = op->u.getpageframeinfo.pfn;
- domid_t dom = op->u.getpageframeinfo.domain;
- struct domain *d;
-
- ret = -EINVAL;
-
- if ( unlikely(pfn >= max_page) ||
- unlikely((d = find_domain_by_id(dom)) == NULL) )
- break;
-
- page = &frame_table[pfn];
-
- if ( likely(get_page(page, d)) )
- {
- ret = 0;
-
- op->u.getpageframeinfo.type = NOTAB;
-
- if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
- {
- switch ( page->u.inuse.type_info & PGT_type_mask )
- {
- default:
- panic("No such page type\n");
- break;
- }
- }
-
- put_page(page);
- }
-
- put_domain(d);
-
- copy_to_user(u_dom0_op, op, sizeof(*op));
- }
- break;
-
- case DOM0_GETPAGEFRAMEINFO2:
- {
-#define GPF2_BATCH 128
- int n,j;
- int num = op->u.getpageframeinfo2.num;
- domid_t dom = op->u.getpageframeinfo2.domain;
- unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array;
- struct domain *d;
- unsigned long *l_arr;
- ret = -ESRCH;
-
- if ( unlikely((d = find_domain_by_id(dom)) == NULL) )
- break;
-
- if ( unlikely(num > 1024) )
- {
- ret = -E2BIG;
- break;
- }
-
- l_arr = (unsigned long *)alloc_xenheap_page();
-
- ret = 0;
- for( n = 0; n < num; )
- {
- int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n);
-
- if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) )
- {
- ret = -EINVAL;
- break;
- }
-
- for( j = 0; j < k; j++ )
- {
- struct pfn_info *page;
- unsigned long mfn = l_arr[j];
-
- if ( unlikely(mfn >= max_page) )
- goto e2_err;
-
- page = &frame_table[mfn];
-
- if ( likely(get_page(page, d)) )
- {
- unsigned long type = 0;
-
- switch( page->u.inuse.type_info & PGT_type_mask )
- {
- default:
- panic("No such page type\n");
- break;
- }
-
- if ( page->u.inuse.type_info & PGT_pinned )
- type |= LPINTAB;
- l_arr[j] |= type;
- put_page(page);
- }
- else
- {
- e2_err:
- l_arr[j] |= XTAB;
- }
-
- }
-
- if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) )
- {
- ret = -EINVAL;
- break;
- }
-
- n += j;
- }
-
- free_xenheap_page((unsigned long)l_arr);
-
- put_domain(d);
- }
- break;
-#ifndef CONFIG_VTI
- /*
- * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 -
- * it actually allocates and maps pages.
- */
- case DOM0_GETMEMLIST:
- {
- unsigned long i;
- struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
- unsigned long start_page = op->u.getmemlist.max_pfns >> 32;
- unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff;
- unsigned long pfn;
- unsigned long *buffer = op->u.getmemlist.buffer;
- struct page *page;
-
- ret = -EINVAL;
- if ( d != NULL )
- {
- ret = 0;
-
- for ( i = start_page; i < (start_page + nr_pages); i++ )
- {
- page = map_new_domain_page(d, i << PAGE_SHIFT);
- if ( page == NULL )
- {
- ret = -ENOMEM;
- break;
- }
- pfn = page_to_pfn(page);
- if ( put_user(pfn, buffer) )
- {
- ret = -EFAULT;
- break;
- }
- buffer++;
- }
-
- op->u.getmemlist.num_pfns = i - start_page;
- copy_to_user(u_dom0_op, op, sizeof(*op));
-
- put_domain(d);
- }
- }
- break;
-#else
- case DOM0_GETMEMLIST:
- {
- int i;
- struct domain *d = find_domain_by_id(op->u.getmemlist.domain);
- unsigned long max_pfns = op->u.getmemlist.max_pfns;
- unsigned long pfn;
- unsigned long *buffer = op->u.getmemlist.buffer;
- struct list_head *list_ent;
-
- ret = -EINVAL;
- if (!d) {
- ret = 0;
-
- spin_lock(&d->page_alloc_lock);
- list_ent = d->page_list.next;
- for (i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++) {
- pfn = list_entry(list_ent, struct pfn_info, list) -
- frame_table;
- if (put_user(pfn, buffer)) {
- ret = -EFAULT;
- break;
- }
- buffer++;
- list_ent = frame_table[pfn].list.next;
- }
- spin_unlock(&d->page_alloc_lock);
-
- op->u.getmemlist.num_pfns = i;
- copy_to_user(u_dom0_op, op, sizeof(*op));
-
- put_domain(d);
- }
- }
- break;
-#endif // CONFIG_VTI
- default:
- ret = -ENOSYS;
-
- }
-
- return ret;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/dom_fw.c
--- a/xen/arch/ia64/dom_fw.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,688 +0,0 @@
-/*
- * Xen domain firmware emulation support
- * Copyright (C) 2004 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <xen/config.h>
-#include <asm/system.h>
-#include <asm/pgalloc.h>
-
-#include <linux/efi.h>
-#include <asm/io.h>
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <xen/acpi.h>
-
-#include <asm/dom_fw.h>
-
-struct ia64_boot_param *dom_fw_init(struct domain *, char *,int,char *,int);
-extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr);
-extern struct domain *dom0;
-extern unsigned long dom0_start;
-
-extern unsigned long running_on_sim;
-
-
-unsigned long dom_fw_base_mpa = -1;
-unsigned long imva_fw_base = -1;
-
-// return domain (meta)physical address for a given imva
-// this function is a call-back from dom_fw_init
-unsigned long dom_pa(unsigned long imva)
-{
- if (dom_fw_base_mpa == -1 || imva_fw_base == -1) {
- printf("dom_pa: uninitialized! (spinning...)\n");
- while(1);
- }
- if (imva - imva_fw_base > PAGE_SIZE) {
- printf("dom_pa: bad offset! imva=%p, imva_fw_base=%p
(spinning...)\n",imva,imva_fw_base);
- while(1);
- }
- return dom_fw_base_mpa + (imva - imva_fw_base);
-}
-
-// builds a hypercall bundle at domain physical address
-void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned
long hypercall)
-{
- unsigned long imva;
-
- if (d == dom0) paddr += dom0_start;
- imva = domain_mpa_to_imva(d,paddr);
- build_hypercall_bundle(imva,d->arch.breakimm,hypercall,1);
-}
-
-
-// builds a hypercall bundle at domain physical address
-void dom_fw_hypercall_patch(struct domain *d, unsigned long paddr, unsigned
long hypercall,unsigned long ret)
-{
- unsigned long imva;
-
- if (d == dom0) paddr += dom0_start;
- imva = domain_mpa_to_imva(d,paddr);
- build_hypercall_bundle(imva,d->arch.breakimm,hypercall,ret);
-}
-
-
-// FIXME: This is really a hack: Forcing the boot parameter block
-// at domain mpaddr 0 page, then grabbing only the low bits of the
-// Xen imva, which is the offset into the page
-unsigned long dom_fw_setup(struct domain *d, char *args, int arglen)
-{
- struct ia64_boot_param *bp;
-
- dom_fw_base_mpa = 0;
- if (d == dom0) dom_fw_base_mpa += dom0_start;
- imva_fw_base = domain_mpa_to_imva(d,dom_fw_base_mpa);
- bp = dom_fw_init(d,args,arglen,imva_fw_base,PAGE_SIZE);
- return dom_pa((unsigned long)bp);
-}
-
-
-/* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */
-
-#define MB (1024*1024UL)
-
-#define NUM_EFI_SYS_TABLES 6
-#define PASS_THRU_IOPORT_SPACE
-#ifdef PASS_THRU_IOPORT_SPACE
-# define NUM_MEM_DESCS 4
-#else
-# define NUM_MEM_DESCS 3
-#endif
-
-
-#define SECS_PER_HOUR (60 * 60)
-#define SECS_PER_DAY (SECS_PER_HOUR * 24)
-
-/* Compute the `struct tm' representation of *T,
- offset OFFSET seconds east of UTC,
- and store year, yday, mon, mday, wday, hour, min, sec into *TP.
- Return nonzero if successful. */
-int
-offtime (unsigned long t, efi_time_t *tp)
-{
- const unsigned short int __mon_yday[2][13] =
- {
- /* Normal years. */
- { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
- /* Leap years. */
- { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
- };
- long int days, rem, y;
- const unsigned short int *ip;
-
- days = t / SECS_PER_DAY;
- rem = t % SECS_PER_DAY;
- while (rem < 0) {
- rem += SECS_PER_DAY;
- --days;
- }
- while (rem >= SECS_PER_DAY) {
- rem -= SECS_PER_DAY;
- ++days;
- }
- tp->hour = rem / SECS_PER_HOUR;
- rem %= SECS_PER_HOUR;
- tp->minute = rem / 60;
- tp->second = rem % 60;
- /* January 1, 1970 was a Thursday. */
- y = 1970;
-
-# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
-# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
-# define __isleap(year) \
- ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-
- while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
- /* Guess a corrected year, assuming 365 days per year. */
- long int yg = y + days / 365 - (days % 365 < 0);
-
- /* Adjust DAYS and Y to match the guessed year. */
- days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
- - LEAPS_THRU_END_OF (y - 1));
- y = yg;
- }
- tp->year = y;
- ip = __mon_yday[__isleap(y)];
- for (y = 11; days < (long int) ip[y]; --y)
- continue;
- days -= ip[y];
- tp->month = y + 1;
- tp->day = days + 1;
- return 1;
-}
-
-extern struct ia64_pal_retval pal_emulator_static (unsigned long);
-
-/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
-
-#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
-
-#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
-#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
-#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
-
-#ifndef XEN
-static efi_status_t
-fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
-{
-#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
- struct {
- int tv_sec; /* must be 32bits to work */
- int tv_usec;
- } tv32bits;
-
- ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
-
- memset(tm, 0, sizeof(*tm));
- offtime(tv32bits.tv_sec, tm);
-
- if (tc)
- memset(tc, 0, sizeof(*tc));
-#else
-# error Not implemented yet...
-#endif
- return EFI_SUCCESS;
-}
-
-static void
-efi_reset_system (int reset_type, efi_status_t status, unsigned long
data_size, efi_char16_t *data)
-{
-#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
- ssc(status, 0, 0, 0, SSC_EXIT);
-#else
-# error Not implemented yet...
-#endif
-}
-
-static efi_status_t
-efi_unimplemented (void)
-{
- return EFI_UNSUPPORTED;
-}
-#endif /* !XEN */
-
-struct sal_ret_values
-sal_emulator (long index, unsigned long in1, unsigned long in2,
- unsigned long in3, unsigned long in4, unsigned long in5,
- unsigned long in6, unsigned long in7)
-{
- long r9 = 0;
- long r10 = 0;
- long r11 = 0;
- long status;
-
- /*
- * Don't do a "switch" here since that gives us code that
- * isn't self-relocatable.
- */
- status = 0;
- if (index == SAL_FREQ_BASE) {
- if (!running_on_sim)
- status = ia64_sal_freq_base(in1,&r9,&r10);
- else switch (in1) {
- case SAL_FREQ_BASE_PLATFORM:
- r9 = 200000000;
- break;
-
- case SAL_FREQ_BASE_INTERVAL_TIMER:
- r9 = 700000000;
- break;
-
- case SAL_FREQ_BASE_REALTIME_CLOCK:
- r9 = 1;
- break;
-
- default:
- status = -1;
- break;
- }
- } else if (index == SAL_PCI_CONFIG_READ) {
- if (current->domain == dom0) {
- u64 value;
- // note that args 2&3 are swapped!!
- status = ia64_sal_pci_config_read(in1,in3,in2,&value);
- r9 = value;
- }
- else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n");
- } else if (index == SAL_PCI_CONFIG_WRITE) {
- if (current->domain == dom0) {
- if (((in1 & ~0xffffffffUL) && (in4 == 0)) ||
- (in4 > 1) ||
- (in2 > 8) || (in2 & (in2-1)))
- printf("***
SAL_PCI_CONF_WRITE?!?(adr=%p,typ=%p,sz=%p,val=%p)\n",in1,in4,in2,in3);
- // note that args are in a different order!!
- status = ia64_sal_pci_config_write(in1,in4,in2,in3);
- }
- else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n");
- } else if (index == SAL_SET_VECTORS) {
- printf("*** CALLED SAL_SET_VECTORS. IGNORED...\n");
- } else if (index == SAL_GET_STATE_INFO) {
- printf("*** CALLED SAL_GET_STATE_INFO. IGNORED...\n");
- } else if (index == SAL_GET_STATE_INFO_SIZE) {
- printf("*** CALLED SAL_GET_STATE_INFO_SIZE. IGNORED...\n");
- } else if (index == SAL_CLEAR_STATE_INFO) {
- printf("*** CALLED SAL_CLEAR_STATE_INFO. IGNORED...\n");
- } else if (index == SAL_MC_RENDEZ) {
- printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n");
- } else if (index == SAL_MC_SET_PARAMS) {
- printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n");
- } else if (index == SAL_CACHE_FLUSH) {
- printf("*** CALLED SAL_CACHE_FLUSH. IGNORED...\n");
- } else if (index == SAL_CACHE_INIT) {
- printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n");
- } else if (index == SAL_UPDATE_PAL) {
- printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n");
- } else {
- printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n");
- status = -1;
- }
- return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-struct ia64_pal_retval
-xen_pal_emulator(unsigned long index, unsigned long in1,
- unsigned long in2, unsigned long in3)
-{
- long r9 = 0;
- long r10 = 0;
- long r11 = 0;
- long status = -1;
-
-#define USE_PAL_EMULATOR
-#ifdef USE_PAL_EMULATOR
- return pal_emulator_static(index);
-#endif
- if (running_on_sim) return pal_emulator_static(index);
- if (index >= PAL_COPY_PAL) {
- printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
- index);
- }
- else switch (index) {
- case PAL_MEM_ATTRIB:
- status = ia64_pal_mem_attrib(&r9);
- break;
- case PAL_FREQ_BASE:
- status = ia64_pal_freq_base(&r9);
- break;
- case PAL_PROC_GET_FEATURES:
- status = ia64_pal_proc_get_features(&r9,&r10,&r11);
- break;
- case PAL_BUS_GET_FEATURES:
- status = ia64_pal_bus_get_features(&r9,&r10,&r11);
- break;
- case PAL_FREQ_RATIOS:
- status = ia64_pal_freq_ratios(&r9,&r10,&r11);
- break;
- case PAL_PTCE_INFO:
- {
- // return hard-coded xen-specific values because ptc.e
- // is emulated on xen to always flush everything
- // these values result in only one ptc.e instruction
- status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0;
- }
- break;
- case PAL_VERSION:
- status = ia64_pal_version(&r9,&r10);
- break;
- case PAL_VM_PAGE_SIZE:
- status = ia64_pal_vm_page_size(&r9,&r10);
- break;
- case PAL_DEBUG_INFO:
- status = ia64_pal_debug_info(&r9,&r10);
- break;
- case PAL_CACHE_SUMMARY:
- status = ia64_pal_cache_summary(&r9,&r10);
- break;
- case PAL_VM_SUMMARY:
- // FIXME: what should xen return for these, figure out later
- // For now, linux does the right thing if pal call fails
- // In particular, rid_size must be set properly!
- //status = ia64_pal_vm_summary(&r9,&r10);
- break;
- case PAL_RSE_INFO:
- status = ia64_pal_rse_info(&r9,&r10);
- break;
- case PAL_VM_INFO:
- status = ia64_pal_vm_info(in1,in2,&r9,&r10);
- break;
- case PAL_REGISTER_INFO:
- status = ia64_pal_register_info(in1,&r9,&r10);
- break;
- case PAL_CACHE_FLUSH:
- /* FIXME */
- printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n");
- BUG();
- break;
- case PAL_PERF_MON_INFO:
- {
- unsigned long pm_buffer[16];
- int i;
- status = ia64_pal_perf_mon_info(pm_buffer,&r9);
- if (status != 0) {
- while(1)
- printk("PAL_PERF_MON_INFO fails
ret=%d\n",status);
- break;
- }
- if (copy_to_user((void __user *)in1,pm_buffer,128)) {
- while(1)
- printk("xen_pal_emulator: PAL_PERF_MON_INFO "
- "can't copy to user!!!!\n");
- status = -1;
- break;
- }
- }
- break;
- case PAL_CACHE_INFO:
- {
- pal_cache_config_info_t ci;
- status = ia64_pal_cache_config_info(in1,in2,&ci);
- if (status != 0) break;
- r9 = ci.pcci_info_1.pcci1_data;
- r10 = ci.pcci_info_2.pcci2_data;
- }
- break;
- case PAL_VM_TR_READ: /* FIXME: vcpu_get_tr?? */
- printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n");
- break;
- case PAL_HALT_INFO: /* inappropriate info for guest? */
- printk("PAL_HALT_INFO NOT IMPLEMENTED, IGNORED!\n");
- break;
- default:
- printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n",
- index);
- break;
- }
- return ((struct ia64_pal_retval) {status, r9, r10, r11});
-}
-
-#define NFUNCPTRS 20
-
-void print_md(efi_memory_desc_t *md)
-{
-#if 1
- printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx)
(%luMB)\n",
- md->type, md->attribute, md->phys_addr,
- md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
- md->num_pages >> (20 - EFI_PAGE_SHIFT));
-#endif
-}
-
-#define LSAPIC_NUM 16 // TEMP
-static u32 lsapic_flag=1;
-
-/* Provide only one LP to guest */
-static int
-acpi_update_lsapic (acpi_table_entry_header *header)
-{
- struct acpi_table_lsapic *lsapic;
-
- lsapic = (struct acpi_table_lsapic *) header;
- if (!lsapic)
- return -EINVAL;
-
- if (lsapic->flags.enabled && lsapic_flag) {
- printk("enable lsapic entry: 0x%lx\n", (u64)lsapic);
- lsapic_flag = 0; /* disable all the following processros */
- } else if (lsapic->flags.enabled) {
- printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic);
- lsapic->flags.enabled = 0;
- } else
- printk("lsapic entry is already disabled: 0x%lx\n",
(u64)lsapic);
-
- return 0;
-}
-
-static int
-acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size)
-{
- u8 checksum=0;
- u8* ptr;
- int len;
- struct acpi_table_madt* acpi_madt;
-
- if (!phys_addr || !size)
- return -EINVAL;
-
- acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
- acpi_madt->header.checksum=0;
-
- /* re-calculate MADT checksum */
- ptr = (u8*)acpi_madt;
- len = acpi_madt->header.length;
- while (len>0){
- checksum = (u8)( checksum + (*ptr++) );
- len--;
- }
- acpi_madt->header.checksum = 0x0 - checksum;
-
- return 0;
-}
-
-/* base is physical address of acpi table */
-void touch_acpi_table(void)
-{
- u64 count = 0;
- count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic,
NR_CPUS);
- if ( count < 1)
- printk("Error parsing MADT - no LAPIC entires\n");
- printk("Total %d lsapic entry\n", count);
- acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);
-
- return;
-}
-
-
-struct ia64_boot_param *
-dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int
fw_mem_size)
-{
- efi_system_table_t *efi_systab;
- efi_runtime_services_t *efi_runtime;
- efi_config_table_t *efi_tables;
- struct ia64_sal_systab *sal_systab;
- efi_memory_desc_t *efi_memmap, *md;
- unsigned long *pal_desc, *sal_desc;
- struct ia64_sal_desc_entry_point *sal_ed;
- struct ia64_boot_param *bp;
- unsigned long *pfn;
- unsigned char checksum = 0;
- char *cp, *cmd_line, *fw_vendor;
- int i = 0;
- unsigned long maxmem = d->max_pages * PAGE_SIZE;
- unsigned long start_mpaddr = ((d==dom0)?dom0_start:0);
-
-# define MAKE_MD(typ, attr, start, end, abs) \
- do { \
- md = efi_memmap + i++; \
- md->type = typ; \
- md->pad = 0; \
- md->phys_addr = abs ? start : start_mpaddr + start; \
- md->virt_addr = 0; \
- md->num_pages = (end - start) >> 12; \
- md->attribute = attr; \
- print_md(md); \
- } while (0)
-
-/* FIXME: should check size but for now we have a whole MB to play with.
- And if stealing code from fw-emu.c, watch out for new fw_vendor on the end!
- if (fw_mem_size < sizeof(fw_mem_proto)) {
- printf("sys_fw_init: insufficient space for fw_mem\n");
- return 0;
- }
-*/
- memset(fw_mem, 0, fw_mem_size);
-
-#ifdef XEN
-#else
- pal_desc = (unsigned long *) &pal_emulator_static;
- sal_desc = (unsigned long *) &sal_emulator;
-#endif
-
- cp = fw_mem;
- efi_systab = (void *) cp; cp += sizeof(*efi_systab);
- efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
- efi_tables = (void *) cp; cp += NUM_EFI_SYS_TABLES *
sizeof(*efi_tables);
- sal_systab = (void *) cp; cp += sizeof(*sal_systab);
- sal_ed = (void *) cp; cp += sizeof(*sal_ed);
- efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
- bp = (void *) cp; cp += sizeof(*bp);
- pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn);
- cmd_line = (void *) cp;
-
- if (args) {
- if (arglen >= 1024)
- arglen = 1023;
- memcpy(cmd_line, args, arglen);
- } else {
- arglen = 0;
- }
- cmd_line[arglen] = '\0';
-
- memset(efi_systab, 0, sizeof(efi_systab));
- efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
- efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
- efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
- cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit
boundary
-#define FW_VENDOR
"X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
- cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to
64-bit boundary
-
- memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR));
- efi_systab->fw_vendor = dom_pa(fw_vendor);
-
- efi_systab->fw_revision = 1;
- efi_systab->runtime = (void *) dom_pa(efi_runtime);
- efi_systab->nr_tables = NUM_EFI_SYS_TABLES;
- efi_systab->tables = dom_pa(efi_tables);
-
- efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
- efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
- efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
-#define EFI_HYPERCALL_PATCH(tgt,call) do { \
-
dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \
- tgt = dom_pa(pfn); \
- *pfn++ = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
- *pfn++ = 0; \
- } while (0)
-
- EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME);
- EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME);
- EFI_HYPERCALL_PATCH(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME);
- EFI_HYPERCALL_PATCH(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME);
-
EFI_HYPERCALL_PATCH(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP);
- EFI_HYPERCALL_PATCH(efi_runtime->get_variable,EFI_GET_VARIABLE);
-
EFI_HYPERCALL_PATCH(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE);
- EFI_HYPERCALL_PATCH(efi_runtime->set_variable,EFI_SET_VARIABLE);
-
EFI_HYPERCALL_PATCH(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT);
- EFI_HYPERCALL_PATCH(efi_runtime->reset_system,EFI_RESET_SYSTEM);
-
- efi_tables[0].guid = SAL_SYSTEM_TABLE_GUID;
- efi_tables[0].table = dom_pa(sal_systab);
- for (i = 1; i < NUM_EFI_SYS_TABLES; i++) {
- efi_tables[i].guid = NULL_GUID;
- efi_tables[i].table = 0;
- }
- if (d == dom0) {
- printf("Domain0 EFI passthrough:");
- i = 1;
- if (efi.mps) {
- efi_tables[i].guid = MPS_TABLE_GUID;
- efi_tables[i].table = __pa(efi.mps);
- printf(" MPS=%0xlx",efi_tables[i].table);
- i++;
- }
-
- touch_acpi_table();
-
- if (efi.acpi20) {
- efi_tables[i].guid = ACPI_20_TABLE_GUID;
- efi_tables[i].table = __pa(efi.acpi20);
- printf(" ACPI 2.0=%0xlx",efi_tables[i].table);
- i++;
- }
- if (efi.acpi) {
- efi_tables[i].guid = ACPI_TABLE_GUID;
- efi_tables[i].table = __pa(efi.acpi);
- printf(" ACPI=%0xlx",efi_tables[i].table);
- i++;
- }
- if (efi.smbios) {
- efi_tables[i].guid = SMBIOS_TABLE_GUID;
- efi_tables[i].table = __pa(efi.smbios);
- printf(" SMBIOS=%0xlx",efi_tables[i].table);
- i++;
- }
- if (efi.hcdp) {
- efi_tables[i].guid = HCDP_TABLE_GUID;
- efi_tables[i].table = __pa(efi.hcdp);
- printf(" HCDP=%0xlx",efi_tables[i].table);
- i++;
- }
- printf("\n");
- }
-
- /* fill in the SAL system table: */
- memcpy(sal_systab->signature, "SST_", 4);
- sal_systab->size = sizeof(*sal_systab);
- sal_systab->sal_rev_minor = 1;
- sal_systab->sal_rev_major = 0;
- sal_systab->entry_count = 1;
-
- strcpy(sal_systab->oem_id, "Xen/ia64");
- strcpy(sal_systab->product_id, "Xen/ia64");
-
- /* fill in an entry point: */
- sal_ed->type = SAL_DESC_ENTRY_POINT;
-#define FW_HYPERCALL_PATCH(tgt,call,ret) do { \
-
dom_fw_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call,ret); \
- tgt = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \
- } while (0)
- FW_HYPERCALL_PATCH(sal_ed->pal_proc,PAL_CALL,0);
- FW_HYPERCALL_PATCH(sal_ed->sal_proc,SAL_CALL,1);
- sal_ed->gp = 0; // will be ignored
-
- for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
- checksum += *cp;
-
- sal_systab->checksum = -checksum;
-
- /* simulate 1MB free memory at physical address zero */
- i = 0;
- MAKE_MD(EFI_BOOT_SERVICES_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);
- /* hypercall patches live here, masquerade as reserved PAL memory */
- MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0);
- MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 0);
-#ifdef PASS_THRU_IOPORT_SPACE
- if (d == dom0 && !running_on_sim) {
- /* pass through the I/O port space */
- efi_memory_desc_t *efi_get_io_md(void);
- efi_memory_desc_t *ia64_efi_io_md = efi_get_io_md();
- u32 type;
- u64 iostart, ioend, ioattr;
-
- type = ia64_efi_io_md->type;
- iostart = ia64_efi_io_md->phys_addr;
- ioend = ia64_efi_io_md->phys_addr +
- (ia64_efi_io_md->num_pages << 12);
- ioattr = ia64_efi_io_md->attribute;
- MAKE_MD(type,ioattr,iostart,ioend, 1);
- }
- else
- MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0);
-#endif
-
- bp->efi_systab = dom_pa(fw_mem);
- bp->efi_memmap = dom_pa(efi_memmap);
- bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
- bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
- bp->efi_memdesc_version = 1;
- bp->command_line = dom_pa(cmd_line);
- bp->console_info.num_cols = 80;
- bp->console_info.num_rows = 25;
- bp->console_info.orig_x = 0;
- bp->console_info.orig_y = 24;
- bp->fpswa = 0;
-
- return bp;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/domain.c
--- a/xen/arch/ia64/domain.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1103 +0,0 @@
-/*
- * Copyright (C) 1995 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000
- *
- * Copyright (C) 2005 Intel Co
- * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>
- *
- * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> Add CONFIG_VTI domain
support
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <xen/delay.h>
-#include <xen/softirq.h>
-#include <xen/mm.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-//#include <asm/mpspec.h>
-#include <xen/irq.h>
-#include <xen/event.h>
-//#include <xen/shadow.h>
-#include <xen/console.h>
-
-#include <xen/elf.h>
-//#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/dma.h> /* for MAX_DMA_ADDRESS */
-
-#include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */
-
-#include <asm/vcpu.h> /* for function declarations */
-#include <public/arch-ia64.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/vmx_vpd.h>
-#include <asm/pal.h>
-#include <public/io/ioreq.h>
-
-#define CONFIG_DOMAIN0_CONTIGUOUS
-unsigned long dom0_start = -1L;
-unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
-//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
-unsigned long dom0_align = 256*1024*1024;
-#ifdef DOMU_BUILD_STAGING
-unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
-unsigned long domU_staging_start;
-unsigned long domU_staging_align = 64*1024;
-unsigned long *domU_staging_area;
-#endif
-
-// initialized by arch/ia64/setup.c:find_initrd()
-unsigned long initrd_start = 0, initrd_end = 0;
-
-#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
-
-//extern int loadelfimage(char *);
-extern int readelfimage_base_and_size(char *, unsigned long,
- unsigned long *, unsigned long *, unsigned long *);
-
-unsigned long map_domain_page0(struct domain *);
-extern unsigned long dom_fw_setup(struct domain *, char *, int);
-
-/* this belongs in include/asm, but there doesn't seem to be a suitable place
*/
-void free_perdomain_pt(struct domain *d)
-{
- printf("free_perdomain_pt: not implemented\n");
- //free_page((unsigned long)d->mm.perdomain_pt);
-}
-
-int hlt_counter;
-
-void disable_hlt(void)
-{
- hlt_counter++;
-}
-
-void enable_hlt(void)
-{
- hlt_counter--;
-}
-
-static void default_idle(void)
-{
- if ( hlt_counter == 0 )
- {
- local_irq_disable();
- if ( !softirq_pending(smp_processor_id()) )
- safe_halt();
- //else
- local_irq_enable();
- }
-}
-
-void continue_cpu_idle_loop(void)
-{
- int cpu = smp_processor_id();
- for ( ; ; )
- {
-#ifdef IA64
-// __IRQ_STAT(cpu, idle_timestamp) = jiffies
-#else
- irq_stat[cpu].idle_timestamp = jiffies;
-#endif
- while ( !softirq_pending(cpu) )
- default_idle();
- raise_softirq(SCHEDULE_SOFTIRQ);
- do_softirq();
- }
-}
-
-void startup_cpu_idle_loop(void)
-{
- /* Just some sanity to ensure that the scheduler is set up okay. */
- ASSERT(current->domain == IDLE_DOMAIN_ID);
- raise_softirq(SCHEDULE_SOFTIRQ);
- do_softirq();
-
- /*
- * Declares CPU setup done to the boot processor.
- * Therefore memory barrier to ensure state is visible.
- */
- smp_mb();
-#if 0
-//do we have to ensure the idle task has a shared page so that, for example,
-//region registers can be loaded from it. Apparently not...
- idle0_task.shared_info = (void *)alloc_xenheap_page();
- memset(idle0_task.shared_info, 0, PAGE_SIZE);
- /* pin mapping */
- // FIXME: Does this belong here? Or do only at domain switch time?
- {
- /* WARNING: following must be inlined to avoid nested fault */
- unsigned long psr = ia64_clear_ic();
- ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR,
- pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >>
PAGE_SHIFT, PAGE_KERNEL)),
- PAGE_SHIFT);
- ia64_set_psr(psr);
- ia64_srlz_i();
- }
-#endif
-
- continue_cpu_idle_loop();
-}
-
-struct vcpu *arch_alloc_vcpu_struct(void)
-{
- /* Per-vp stack is used here. So we need keep vcpu
- * same page as per-vp stack */
- return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER);
-}
-
-void arch_free_vcpu_struct(struct vcpu *v)
-{
- free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
-}
-
-static void init_switch_stack(struct vcpu *v)
-{
- struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
- struct switch_stack *sw = (struct switch_stack *) regs - 1;
- extern void ia64_ret_from_clone;
-
- memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs));
- sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET;
- sw->b0 = (unsigned long) &ia64_ret_from_clone;
- sw->ar_fpsr = FPSR_DEFAULT;
- v->arch._thread.ksp = (unsigned long) sw - 16;
- // stay on kernel stack because may get interrupts!
- // ia64_ret_from_clone (which b0 gets in new_thread) switches
- // to user stack
- v->arch._thread.on_ustack = 0;
- memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
-}
-
-void arch_do_createdomain(struct vcpu *v)
-{
- struct domain *d = v->domain;
- struct thread_info *ti = alloc_thread_info(v);
-
- /* Clear thread_info to clear some important fields, like preempt_count
*/
- memset(ti, 0, sizeof(struct thread_info));
- init_switch_stack(v);
-
- d->shared_info = (void *)alloc_xenheap_page();
- if (!d->shared_info) {
- printk("ERROR/HALTING: CAN'T ALLOC PAGE\n");
- while (1);
- }
- memset(d->shared_info, 0, PAGE_SIZE);
- d->shared_info->vcpu_data[0].arch.privregs =
- alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
- printf("arch_vcpu_info=%p\n",
d->shared_info->vcpu_data[0].arch.privregs);
- memset(d->shared_info->vcpu_data[0].arch.privregs, 0, PAGE_SIZE);
- v->vcpu_info = &(d->shared_info->vcpu_data[0]);
-
- d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
-
-#ifdef CONFIG_VTI
- /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
- * to this solution. Maybe it can be deferred until we know created
- * one as vmx domain */
- v->arch.vtlb = init_domain_tlb(v);
-#endif
-
- /* We may also need emulation rid for region4, though it's unlikely
- * to see guest issue uncacheable access in metaphysical mode. But
- * keep such info here may be more sane.
- */
- if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL)
- || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL))
- BUG();
- VCPU(v, metaphysical_mode) = 1;
- v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
- v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
- v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
- v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
-#define DOMAIN_RID_BITS_DEFAULT 18
- if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
- BUG();
- v->arch.starting_rid = d->arch.starting_rid;
- v->arch.ending_rid = d->arch.ending_rid;
- // the following will eventually need to be negotiated dynamically
- d->xen_vastart = XEN_START_ADDR;
- d->xen_vaend = XEN_END_ADDR;
- d->shared_info_va = SHAREDINFO_ADDR;
- d->arch.breakimm = 0x1000;
- v->arch.breakimm = d->arch.breakimm;
-
- d->arch.mm = xmalloc(struct mm_struct);
- if (unlikely(!d->arch.mm)) {
- printk("Can't allocate mm_struct for domain %d\n",d->domain_id);
- return -ENOMEM;
- }
- memset(d->arch.mm, 0, sizeof(*d->arch.mm));
- d->arch.mm->pgd = pgd_alloc(d->arch.mm);
- if (unlikely(!d->arch.mm->pgd)) {
- printk("Can't allocate pgd for domain %d\n",d->domain_id);
- return -ENOMEM;
- }
-}
-
-void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
-{
- struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
-
- printf("arch_getdomaininfo_ctxt\n");
- c->regs = *regs;
- c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector;
-#if 0
- if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs,
- v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) {
- printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs);
- return -EFAULT;
- }
-#endif
-
- c->shared = v->domain->shared_info->arch;
-}
-
-int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
-{
- struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
- struct domain *d = v->domain;
- int i, rc, ret;
- unsigned long progress = 0;
-
- printf("arch_set_info_guest\n");
- if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
- return 0;
-
- if (c->flags & VGCF_VMX_GUEST) {
- if (!vmx_enabled) {
- printk("No VMX hardware feature for vmx domain.\n");
- return -EINVAL;
- }
-
- vmx_setup_platform(v, c);
- }
-
- *regs = c->regs;
- new_thread(v, regs->cr_iip, 0, 0);
-
- v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
- if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs,
- c->vcpu.privregs, sizeof(mapped_regs_t))) {
- printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n",
c->vcpu.privregs);
- return -EFAULT;
- }
-
- v->arch.domain_itm_last = -1L;
- d->shared_info->arch = c->shared;
-
- /* Don't redo final setup */
- set_bit(_VCPUF_initialised, &v->vcpu_flags);
- return 0;
-}
-
-void arch_do_boot_vcpu(struct vcpu *v)
-{
- struct domain *d = v->domain;
- printf("arch_do_boot_vcpu: not implemented\n");
-
- d->shared_info->vcpu_data[v->vcpu_id].arch.privregs =
- alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
- printf("arch_vcpu_info=%p\n",
d->shared_info->vcpu_data[v->vcpu_id].arch.privregs);
- memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0,
PAGE_SIZE);
- return;
-}
-
-void domain_relinquish_resources(struct domain *d)
-{
- /* FIXME */
- printf("domain_relinquish_resources: not implemented\n");
-}
-
-// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
-// and linux/arch/ia64/kernel/process.c:kernel_thread()
-void new_thread(struct vcpu *v,
- unsigned long start_pc,
- unsigned long start_stack,
- unsigned long start_info)
-{
- struct domain *d = v->domain;
- struct pt_regs *regs;
- struct ia64_boot_param *bp;
- extern char saved_command_line[];
-
-
-#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (d == dom0) start_pc += dom0_start;
-#endif
-
- regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
- if (VMX_DOMAIN(v)) {
- /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
- regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro
*/
- } else {
- regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
- | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
- & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
- regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
- }
- regs->cr_iip = start_pc;
- regs->cr_ifs = 1UL << 63; /* or clear? */
- regs->ar_fpsr = FPSR_DEFAULT;
-
- if (VMX_DOMAIN(v)) {
-#ifdef CONFIG_VTI
- vmx_init_all_rr(v);
- if (d == dom0)
- VMX_VPD(v,vgr[12]) =
dom_fw_setup(d,saved_command_line,256L);
- /* Virtual processor context setup */
- VMX_VPD(v, vpsr) = IA64_PSR_BN;
- VPD_CR(v, dcr) = 0;
-#endif
- } else {
- init_all_rr(v);
- if (d == dom0)
- regs->r28 = dom_fw_setup(d,saved_command_line,256L);
- else {
- regs->ar_rsc |= (2 << 2); /* force PL2/3 */
- regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0
console=tty0 root=/dev/hda1",256L); //FIXME
- }
- VCPU(v, banknum) = 1;
- VCPU(v, metaphysical_mode) = 1;
- d->shared_info->arch.flags = (d == dom0) ?
(SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN)
: 0;
- }
-}
-
-static struct page * map_new_domain0_page(unsigned long mpaddr)
-{
- if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
- printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr);
-printk("map_new_domain0_page:
start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
- while(1);
- }
- return pfn_to_page((mpaddr >> PAGE_SHIFT));
-}
-
-/* allocate new page for domain and map it to the specified metaphysical addr
*/
-struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr)
-{
- struct mm_struct *mm = d->arch.mm;
- struct page *p = (struct page *)0;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-extern unsigned long vhpt_paddr, vhpt_pend;
-
- if (!mm->pgd) {
- printk("map_new_domain_page: domain pgd must exist!\n");
- return(p);
- }
- pgd = pgd_offset(mm,mpaddr);
- if (pgd_none(*pgd))
- pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
-
- pud = pud_offset(pgd, mpaddr);
- if (pud_none(*pud))
- pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
-
- pmd = pmd_offset(pud, mpaddr);
- if (pmd_none(*pmd))
- pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
-// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
-
- pte = pte_offset_map(pmd, mpaddr);
- if (pte_none(*pte)) {
-#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (d == dom0) p = map_new_domain0_page(mpaddr);
- else
-#endif
- {
- p = alloc_domheap_page(d);
- // zero out pages for security reasons
- memset(__va(page_to_phys(p)),0,PAGE_SIZE);
- }
- if (unlikely(!p)) {
-printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n");
- return(p);
- }
-if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) {
- printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p));
-}
- set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT,
- __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
- }
- else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr);
- return p;
-}
-
-/* map a physical address to the specified metaphysical addr */
-void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long
physaddr)
-{
- struct mm_struct *mm = d->arch.mm;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- if (!mm->pgd) {
- printk("map_domain_page: domain pgd must exist!\n");
- return;
- }
- pgd = pgd_offset(mm,mpaddr);
- if (pgd_none(*pgd))
- pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
-
- pud = pud_offset(pgd, mpaddr);
- if (pud_none(*pud))
- pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
-
- pmd = pmd_offset(pud, mpaddr);
- if (pmd_none(*pmd))
- pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr));
-// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr));
-
- pte = pte_offset_map(pmd, mpaddr);
- if (pte_none(*pte)) {
- set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT,
- __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
- }
- else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr);
-}
-
-void mpafoo(unsigned long mpaddr)
-{
- extern unsigned long privop_trace;
- if (mpaddr == 0x3800)
- privop_trace = 1;
-}
-
-unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
-{
- struct mm_struct *mm = d->arch.mm;
- pgd_t *pgd = pgd_offset(mm, mpaddr);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
-#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (d == dom0) {
- if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
- //printk("lookup_domain_mpa: bad dom0 mpaddr
%p!\n",mpaddr);
-//printk("lookup_domain_mpa:
start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size);
- mpafoo(mpaddr);
- }
- pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
- __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
- pte = &pteval;
- return *(unsigned long *)pte;
- }
-#endif
-tryagain:
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd,mpaddr);
- if (pud_present(*pud)) {
- pmd = pmd_offset(pud,mpaddr);
- if (pmd_present(*pmd)) {
- pte = pte_offset_map(pmd,mpaddr);
- if (pte_present(*pte)) {
-//printk("lookup_domain_page: found mapping for %lx,
pte=%lx\n",mpaddr,pte_val(*pte));
- return *(unsigned long *)pte;
- }
- }
- }
- }
- /* if lookup fails and mpaddr is "legal", "create" the page */
- if ((mpaddr >> PAGE_SHIFT) < d->max_pages) {
- if (map_new_domain_page(d,mpaddr)) goto tryagain;
- }
- printk("lookup_domain_mpa: bad mpa %p (> %p\n",
- mpaddr,d->max_pages<<PAGE_SHIFT);
- mpafoo(mpaddr);
- return 0;
-}
-
-// FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
-#ifndef CONFIG_VTI
-unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
-{
- unsigned long pte = lookup_domain_mpa(d,mpaddr);
- unsigned long imva;
-
- pte &= _PAGE_PPN_MASK;
- imva = __va(pte);
- imva |= mpaddr & ~PAGE_MASK;
- return(imva);
-}
-#else // CONFIG_VTI
-unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
-{
- unsigned long imva = __gpa_to_mpa(d, mpaddr);
-
- return __va(imva);
-}
-#endif // CONFIG_VTI
-
-// remove following line if not privifying in memory
-//#define HAVE_PRIVIFY_MEMORY
-#ifndef HAVE_PRIVIFY_MEMORY
-#define privify_memory(x,y) do {} while(0)
-#endif
-
-// see arch/x86/xxx/domain_build.c
-int elf_sanity_check(Elf_Ehdr *ehdr)
-{
- return (IS_ELF(*ehdr));
-}
-
-static void copy_memory(void *dst, void *src, int size)
-{
- int remain;
-
- if (IS_XEN_ADDRESS(dom0,src)) {
- memcpy(dst,src,size);
- }
- else {
- printf("About to call __copy_from_user(%p,%p,%d)\n",
- dst,src,size);
- while (remain = __copy_from_user(dst,src,size)) {
- printf("incomplete user copy, %d remain of %d\n",
- remain,size);
- dst += size - remain; src += size - remain;
- size -= remain;
- }
- }
-}
-
-void loaddomainelfimage(struct domain *d, unsigned long image_start)
-{
- char *elfbase = image_start;
- //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
- Elf_Ehdr ehdr;
- Elf_Phdr phdr;
- int h, filesz, memsz, paddr;
- unsigned long elfaddr, dom_mpaddr, dom_imva;
- struct page *p;
- unsigned long pteval;
-
- copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
- for ( h = 0; h < ehdr.e_phnum; h++ ) {
- copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
- sizeof(Elf_Phdr));
- //if ( !is_loadable_phdr(phdr) )
- if ((phdr.p_type != PT_LOAD)) {
- continue;
- }
- filesz = phdr.p_filesz; memsz = phdr.p_memsz;
- elfaddr = elfbase + phdr.p_offset;
- dom_mpaddr = phdr.p_paddr;
-//printf("p_offset: %x, size=%x\n",elfaddr,filesz);
-#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (d == dom0) {
- if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) {
- printf("Domain0 doesn't fit in allocated space!\n");
- while(1);
- }
- dom_imva = __va(dom_mpaddr + dom0_start);
- copy_memory(dom_imva,elfaddr,filesz);
- if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
-//FIXME: This test for code seems to find a lot more than objdump -x does
- if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
- }
- else
-#endif
- while (memsz > 0) {
-#ifdef DOMU_AUTO_RESTART
- pteval = lookup_domain_mpa(d,dom_mpaddr);
- if (pteval) dom_imva = __va(pteval & _PFN_MASK);
- else { printf("loaddomainelfimage: BAD!\n"); while(1); }
-#else
- p = map_new_domain_page(d,dom_mpaddr);
- if (unlikely(!p)) BUG();
- dom_imva = __va(page_to_phys(p));
-#endif
- if (filesz > 0) {
- if (filesz >= PAGE_SIZE)
- copy_memory(dom_imva,elfaddr,PAGE_SIZE);
- else { // copy partial page, zero the rest of page
- copy_memory(dom_imva,elfaddr,filesz);
- memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
- }
-//FIXME: This test for code seems to find a lot more than objdump -x does
- if (phdr.p_flags & PF_X)
- privify_memory(dom_imva,PAGE_SIZE);
- }
- else if (memsz > 0) // always zero out entire page
- memset(dom_imva,0,PAGE_SIZE);
- memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
- elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
- }
- }
-}
-
-int
-parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
-{
- Elf_Ehdr ehdr;
-
- copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
-
- if ( !elf_sanity_check(&ehdr) ) {
- printk("ELF sanity check failed.\n");
- return -EINVAL;
- }
-
- if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
- {
- printk("ELF program headers extend beyond end of image.\n");
- return -EINVAL;
- }
-
- if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
- {
- printk("ELF section headers extend beyond end of image.\n");
- return -EINVAL;
- }
-
-#if 0
- /* Find the section-header strings table. */
- if ( ehdr.e_shstrndx == SHN_UNDEF )
- {
- printk("ELF image has no section-header strings table
(shstrtab).\n");
- return -EINVAL;
- }
-#endif
-
- *entry = ehdr.e_entry;
-printf("parsedomainelfimage: entry point = %p\n",*entry);
-
- return 0;
-}
-
-
-void alloc_dom0(void)
-{
-#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (platform_is_hp_ski()) {
- dom0_size = 128*1024*1024; //FIXME: Should be configurable
- }
- printf("alloc_dom0: starting (initializing %d
MB...)\n",dom0_size/(1024*1024));
-
- /* FIXME: The first trunk (say 256M) should always be assigned to
- * Dom0, since Dom0's physical == machine address for DMA purpose.
- * Some old version linux, like 2.4, assumes physical memory existing
- * in 2nd 64M space.
- */
- dom0_start = alloc_boot_pages(
- dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT);
- dom0_start <<= PAGE_SHIFT;
- if (!dom0_start) {
- printf("construct_dom0: can't allocate contiguous memory size=%p\n",
- dom0_size);
- while(1);
- }
- printf("alloc_dom0: dom0_start=%p\n",dom0_start);
-#else
- dom0_start = 0;
-#endif
-
-}
-
-#ifdef DOMU_BUILD_STAGING
-void alloc_domU_staging(void)
-{
- domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
- printf("alloc_domU_staging: starting (initializing %d
MB...)\n",domU_staging_size/(1024*1024));
- domU_staging_start = alloc_boot_pages(
- domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT);
- domU_staging_start <<= PAGE_SHIFT;
- if (!domU_staging_size) {
- printf("alloc_domU_staging: can't allocate, spinning...\n");
- while(1);
- }
- else domU_staging_area = (unsigned long *)__va(domU_staging_start);
- printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area);
-
-}
-
-unsigned long
-domU_staging_read_8(unsigned long at)
-{
- // no way to return errors so just do it
- return domU_staging_area[at>>3];
-
-}
-
-unsigned long
-domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b,
- unsigned long c, unsigned long d)
-{
- if (at + 32 > domU_staging_size) return -1;
- if (at & 0x1f) return -1;
- at >>= 3;
- domU_staging_area[at++] = a;
- domU_staging_area[at++] = b;
- domU_staging_area[at++] = c;
- domU_staging_area[at] = d;
- return 0;
-
-}
-#endif
-
-/*
- * Domain 0 has direct access to all devices absolutely. However
- * the major point of this stub here, is to allow alloc_dom_mem
- * handled with order > 0 request. Dom0 requires that bit set to
- * allocate memory for other domains.
- */
-void physdev_init_dom0(struct domain *d)
-{
- set_bit(_DOMF_physdev_access, &d->domain_flags);
-}
-
-extern unsigned long running_on_sim;
-unsigned int vmx_dom0 = 0;
-int construct_dom0(struct domain *d,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pt_pages;
- unsigned long count;
- unsigned long alloc_start, alloc_end;
- struct pfn_info *page = NULL;
- start_info_t *si;
- struct vcpu *v = d->vcpu[0];
-
- struct domain_setup_info dsi;
- unsigned long p_start;
- unsigned long pkern_start;
- unsigned long pkern_entry;
- unsigned long pkern_end;
- unsigned long ret, progress = 0;
-
-//printf("construct_dom0: starting\n");
- /* Sanity! */
-#ifndef CLONE_DOMAIN0
- if ( d != dom0 )
- BUG();
- if ( test_bit(_DOMF_constructed, &d->domain_flags) )
- BUG();
-#endif
-
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- printk("*** LOADING DOMAIN 0 ***\n");
-
- alloc_start = dom0_start;
- alloc_end = dom0_start + dom0_size;
- d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE;
- image_start = __va(ia64_boot_param->initrd_start);
- image_len = ia64_boot_param->initrd_size;
-//printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
-//printk("First word of image: %lx\n",*(unsigned long *)image_start);
-
-//printf("construct_dom0: about to call parseelfimage\n");
- dsi.image_addr = (unsigned long)image_start;
- dsi.image_len = image_len;
- rc = parseelfimage(&dsi);
- if ( rc != 0 )
- return rc;
-
-#ifdef CONFIG_VTI
- /* Temp workaround */
- if (running_on_sim)
- dsi.xen_section_string = (char *)1;
-
- /* Check whether dom0 is vti domain */
- if ((!vmx_enabled) && !dsi.xen_section_string) {
- printk("Lack of hardware support for unmodified vmx dom0\n");
- panic("");
- }
-
- if (vmx_enabled && !dsi.xen_section_string) {
- printk("Dom0 is vmx domain!\n");
- vmx_dom0 = 1;
- }
-#endif
-
- p_start = dsi.v_start;
- pkern_start = dsi.v_kernstart;
- pkern_end = dsi.v_kernend;
- pkern_entry = dsi.v_kernentry;
-
-//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx,
pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
-
- if ( (p_start & (PAGE_SIZE-1)) != 0 )
- {
- printk("Initial guest OS must load to a page boundary.\n");
- return -EINVAL;
- }
-
- printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %lx->%lx\n"
- " Entry address: %lx\n"
- " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
- pkern_start, pkern_end, pkern_entry);
-
- if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
-
- // if high 3 bits of pkern start are non-zero, error
-
- // if pkern end is after end of metaphysical memory, error
- // (we should be able to deal with this... later)
-
-
- //
-
-#if 0
- strcpy(d->name,"Domain0");
-#endif
-
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
-
-#ifdef CONFIG_VTI
- /* Construct a frame-allocation list for the initial domain, since these
- * pages are allocated by boot allocator and pfns are not set properly
- */
- for ( mfn = (alloc_start>>PAGE_SHIFT);
- mfn < (alloc_end>>PAGE_SHIFT);
- mfn++ )
- {
- page = &frame_table[mfn];
- page_set_owner(page, d);
- page->u.inuse.type_info = 0;
- page->count_info = PGC_allocated | 1;
- list_add_tail(&page->list, &d->page_list);
-
- /* Construct 1:1 mapping */
- machine_to_phys_mapping[mfn] = mfn;
- }
-
- /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
- * for dom0
- */
- d->arch.pmt = NULL;
-#endif
-
- /* Copy the OS image. */
- loaddomainelfimage(d,image_start);
-
- /* Copy the initial ramdisk. */
- //if ( initrd_len != 0 )
- // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-
- /* Sync d/i cache conservatively */
- ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
- if (ret != PAL_STATUS_SUCCESS)
- panic("PAL CACHE FLUSH failed for dom0.\n");
- printk("Sync i/d cache for dom0 image SUCC\n");
-
-#if 0
- /* Set up start info area. */
- //si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
- si->nr_pages = d->tot_pages;
- si->shared_info = virt_to_phys(d->shared_info);
- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- //si->pt_base = vpt_start;
- //si->nr_pt_frames = nr_pt_pages;
- //si->mfn_list = vphysmap_start;
-
- if ( initrd_len != 0 )
- {
- //si->mod_start = vinitrd_start;
- si->mod_len = initrd_len;
- printk("Initrd len 0x%lx, start at 0x%08lx\n",
- si->mod_len, si->mod_start);
- }
-
- dst = si->cmd_line;
- if ( cmdline != NULL )
- {
- for ( i = 0; i < 255; i++ )
- {
- if ( cmdline[i] == '\0' )
- break;
- *dst++ = cmdline[i];
- }
- }
- *dst = '\0';
-
- zap_low_mappings(); /* Do the same for the idle page tables. */
-#endif
-
- /* Give up the VGA console if DOM0 is configured to grab it. */
- if (cmdline != NULL)
- console_endboot(strstr(cmdline, "tty0") != NULL);
-
- /* VMX specific construction for Dom0, if hardware supports VMX
- * and Dom0 is unmodified image
- */
- printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
- if (vmx_dom0)
- vmx_final_setup_domain(dom0);
-
- set_bit(_DOMF_constructed, &d->domain_flags);
-
- new_thread(v, pkern_entry, 0, 0);
- physdev_init_dom0(d);
-
- // FIXME: Hack for keyboard input
-#ifdef CLONE_DOMAIN0
-if (d == dom0)
-#endif
- serial_input_init();
- if (d == dom0) {
- VCPU(v, delivery_mask[0]) = -1L;
- VCPU(v, delivery_mask[1]) = -1L;
- VCPU(v, delivery_mask[2]) = -1L;
- VCPU(v, delivery_mask[3]) = -1L;
- }
- else __set_bit(0x30, VCPU(v, delivery_mask));
-
- return 0;
-}
-
-// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
-int construct_domU(struct domain *d,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- int i, rc;
- struct vcpu *v = d->vcpu[0];
- unsigned long pkern_entry;
-
-#ifndef DOMU_AUTO_RESTART
- if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG();
-#endif
-
- printk("*** LOADING DOMAIN %d ***\n",d->domain_id);
-
- d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size
- // FIXME: use domain0 command line
- rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
- printk("parsedomainelfimage returns %d\n",rc);
- if ( rc != 0 ) return rc;
-
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
-
- /* Copy the OS image. */
- printk("calling loaddomainelfimage(%p,%p)\n",d,image_start);
- loaddomainelfimage(d,image_start);
- printk("loaddomainelfimage returns\n");
-
- set_bit(_DOMF_constructed, &d->domain_flags);
-
- printk("calling new_thread, entry=%p\n",pkern_entry);
-#ifdef DOMU_AUTO_RESTART
- v->domain->arch.image_start = image_start;
- v->domain->arch.image_len = image_len;
- v->domain->arch.entry = pkern_entry;
-#endif
- new_thread(v, pkern_entry, 0, 0);
- printk("new_thread returns\n");
- __set_bit(0x30, VCPU(v, delivery_mask));
-
- return 0;
-}
-
-#ifdef DOMU_AUTO_RESTART
-void reconstruct_domU(struct vcpu *v)
-{
- /* re-copy the OS image to reset data values to original */
- printk("reconstruct_domU: restarting domain %d...\n",
- v->domain->domain_id);
- loaddomainelfimage(v->domain,v->domain->arch.image_start);
- new_thread(v, v->domain->arch.entry, 0, 0);
-}
-#endif
-
-// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
-int launch_domainU(unsigned long size)
-{
-#ifdef CLONE_DOMAIN0
- static int next = CLONE_DOMAIN0+1;
-#else
- static int next = 1;
-#endif
-
- struct domain *d = do_createdomain(next,0);
- if (!d) {
- printf("launch_domainU: couldn't create\n");
- return 1;
- }
- else next++;
- if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) {
- printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n",
- d->domain_id,domU_staging_area,size);
- return 2;
- }
- domain_unpause_by_systemcontroller(d);
-}
-
-void machine_restart(char * __unused)
-{
- if (platform_is_hp_ski()) dummy();
- printf("machine_restart called: spinning....\n");
- while(1);
-}
-
-void machine_halt(void)
-{
- if (platform_is_hp_ski()) dummy();
- printf("machine_halt called: spinning....\n");
- while(1);
-}
-
-void dummy_called(char *function)
-{
- if (platform_is_hp_ski()) asm("break 0;;");
- printf("dummy called in %s: spinning....\n", function);
- while(1);
-}
-
-
-#if 0
-void switch_to(struct vcpu *prev, struct vcpu *next)
-{
- struct vcpu *last;
-
- __switch_to(prev,next,last);
- //set_current(next);
-}
-#endif
-
-void domain_pend_keyboard_interrupt(int irq)
-{
- vcpu_pend_interrupt(dom0->vcpu[0],irq);
-}
-
-void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
-{
- if ( v->processor == newcpu )
- return;
-
- set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
- v->processor = newcpu;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/grant_table.c
--- a/xen/arch/ia64/grant_table.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1288 +0,0 @@
-#ifndef CONFIG_VTI
-// temporarily in arch/ia64 until can merge into common/grant_table.c
-/******************************************************************************
- * common/grant_table.c
- *
- * Mechanism for granting foreign access to page frames, and receiving
- * page-ownership transfers.
- *
- * Copyright (c) 2005 Christopher Clark
- * Copyright (c) 2004 K A Fraser
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#define GRANT_DEBUG 0
-#define GRANT_DEBUG_VERBOSE 0
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/shadow.h>
-#include <xen/mm.h>
-#ifdef __ia64__
-#define __addr_ok(a) 1 // FIXME-ia64: a variant of access_ok??
-// FIXME-ia64: need to implement real cmpxchg_user on ia64
-//#define cmpxchg_user(_p,_o,_n) ((*_p == _o) ? ((*_p = _n), 0) : ((_o = *_p),
0))
-// FIXME-ia64: these belong in an asm/grant_table.h... PAGE_SIZE different
-#undef ORDER_GRANT_FRAMES
-//#undef NUM_GRANT_FRAMES
-#define ORDER_GRANT_FRAMES 0
-//#define NUM_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES)
-#endif
-
-#define PIN_FAIL(_lbl, _rc, _f, _a...) \
- do { \
- DPRINTK( _f, ## _a ); \
- rc = (_rc); \
- goto _lbl; \
- } while ( 0 )
-
-static inline int
-get_maptrack_handle(
- grant_table_t *t)
-{
- unsigned int h;
- if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) )
- return -1;
- t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT;
- t->map_count++;
- return h;
-}
-
-static inline void
-put_maptrack_handle(
- grant_table_t *t, int handle)
-{
- t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT;
- t->maptrack_head = handle;
- t->map_count--;
-}
-
-static int
-__gnttab_activate_grant_ref(
- struct domain *mapping_d, /* IN */
- struct vcpu *mapping_ed,
- struct domain *granting_d,
- grant_ref_t ref,
- u16 dev_hst_ro_flags,
- unsigned long host_virt_addr,
- unsigned long *pframe ) /* OUT */
-{
- domid_t sdom;
- u16 sflags;
- active_grant_entry_t *act;
- grant_entry_t *sha;
- s16 rc = 1;
- unsigned long frame = 0;
- int retries = 0;
-
- /*
- * Objectives of this function:
- * . Make the record ( granting_d, ref ) active, if not already.
- * . Update shared grant entry of owner, indicating frame is mapped.
- * . Increment the owner act->pin reference counts.
- * . get_page on shared frame if new mapping.
- * . get_page_type if this is first RW mapping of frame.
- * . Add PTE to virtual address space of mapping_d, if necessary.
- * Returns:
- * . -ve: error
- * . 1: ok
- * . 0: ok and TLB invalidate of host_virt_addr needed.
- *
- * On success, *pframe contains mfn.
- */
-
- /*
- * We bound the number of times we retry CMPXCHG on memory locations that
- * we share with a guest OS. The reason is that the guest can modify that
- * location at a higher rate than we can read-modify-CMPXCHG, so the guest
- * could cause us to livelock. There are a few cases where it is valid for
- * the guest to race our updates (e.g., to change the GTF_readonly flag),
- * so we allow a few retries before failing.
- */
-
- act = &granting_d->grant_table->active[ref];
- sha = &granting_d->grant_table->shared[ref];
-
- spin_lock(&granting_d->grant_table->lock);
-
- if ( act->pin == 0 )
- {
- /* CASE 1: Activating a previously inactive entry. */
-
- sflags = sha->flags;
- sdom = sha->domid;
-
- for ( ; ; )
- {
- u32 scombo, prev_scombo, new_scombo;
-
- if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
- unlikely(sdom != mapping_d->domain_id) )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
- sflags, sdom, mapping_d->domain_id);
-
- /* Merge two 16-bit values into a 32-bit combined update. */
- /* NB. Endianness! */
- prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
-
- new_scombo = scombo | GTF_reading;
- if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
- {
- new_scombo |= GTF_writing;
- if ( unlikely(sflags & GTF_readonly) )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Attempt to write-pin a r/o grant entry.\n");
- }
-
- /* NB. prev_scombo is updated in place to seen value. */
- if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
- prev_scombo,
- new_scombo)) )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Fault while modifying shared flags and domid.\n");
-
- /* Did the combined update work (did we see what we expected?). */
- if ( likely(prev_scombo == scombo) )
- break;
-
- if ( retries++ == 4 )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Shared grant entry is unstable.\n");
-
- /* Didn't see what we expected. Split out the seen flags & dom. */
- /* NB. Endianness! */
- sflags = (u16)prev_scombo;
- sdom = (u16)(prev_scombo >> 16);
- }
-
- /* rmb(); */ /* not on x86 */
-
- frame = __gpfn_to_mfn_foreign(granting_d, sha->frame);
-
-#ifdef __ia64__
-// FIXME-ia64: any error checking need to be done here?
-#else
- if ( unlikely(!pfn_valid(frame)) ||
- unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
- get_page(&frame_table[frame], granting_d) :
- get_page_and_type(&frame_table[frame], granting_d,
- PGT_writable_page))) )
- {
- clear_bit(_GTF_writing, &sha->flags);
- clear_bit(_GTF_reading, &sha->flags);
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Could not pin the granted frame (%lx)!\n", frame);
- }
-#endif
-
- if ( dev_hst_ro_flags & GNTMAP_device_map )
- act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
- GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( dev_hst_ro_flags & GNTMAP_host_map )
- act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
- act->domid = sdom;
- act->frame = frame;
- }
- else
- {
- /* CASE 2: Active modications to an already active entry. */
-
- /*
- * A cheesy check for possible pin-count overflow.
- * A more accurate check cannot be done with a single comparison.
- */
- if ( (act->pin & 0x80808080U) != 0 )
- PIN_FAIL(unlock_out, ENOSPC,
- "Risk of counter overflow %08x\n", act->pin);
-
- frame = act->frame;
-
- if ( !(dev_hst_ro_flags & GNTMAP_readonly) &&
- !((sflags = sha->flags) & GTF_writing) )
- {
- for ( ; ; )
- {
- u16 prev_sflags;
-
- if ( unlikely(sflags & GTF_readonly) )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Attempt to write-pin a r/o grant entry.\n");
-
- prev_sflags = sflags;
-
- /* NB. prev_sflags is updated in place to seen value. */
- if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
- prev_sflags | GTF_writing)) )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Fault while modifying shared flags.\n");
-
- if ( likely(prev_sflags == sflags) )
- break;
-
- if ( retries++ == 4 )
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Shared grant entry is unstable.\n");
-
- sflags = prev_sflags;
- }
-
-#ifdef __ia64__
-// FIXME-ia64: any error checking need to be done here?
-#else
- if ( unlikely(!get_page_type(&frame_table[frame],
- PGT_writable_page)) )
- {
- clear_bit(_GTF_writing, &sha->flags);
- PIN_FAIL(unlock_out, GNTST_general_error,
- "Attempt to write-pin a unwritable page.\n");
- }
-#endif
- }
-
- if ( dev_hst_ro_flags & GNTMAP_device_map )
- act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
- GNTPIN_devr_inc : GNTPIN_devw_inc;
-
- if ( dev_hst_ro_flags & GNTMAP_host_map )
- act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
- }
-
- /*
- * At this point:
- * act->pin updated to reflect mapping.
- * sha->flags updated to indicate to granting domain mapping done.
- * frame contains the mfn.
- */
-
- spin_unlock(&granting_d->grant_table->lock);
-
-#ifdef __ia64__
-// FIXME-ia64: any error checking need to be done here?
-#else
- if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
- {
- /* Write update into the pagetable. */
- l1_pgentry_t pte;
- pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED |
_PAGE_DIRTY);
- if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
- l1e_add_flags(pte,_PAGE_RW);
- rc = update_grant_va_mapping( host_virt_addr, pte,
- mapping_d, mapping_ed );
-
- /*
- * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
- * This is done in the outer gnttab_map_grant_ref.
- */
-
- if ( rc < 0 )
- {
- /* Failure: undo and abort. */
-
- spin_lock(&granting_d->grant_table->lock);
-
- if ( dev_hst_ro_flags & GNTMAP_readonly )
- {
- act->pin -= GNTPIN_hstr_inc;
- }
- else
- {
- act->pin -= GNTPIN_hstw_inc;
- if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
- {
- clear_bit(_GTF_writing, &sha->flags);
- put_page_type(&frame_table[frame]);
- }
- }
-
- if ( act->pin == 0 )
- {
- clear_bit(_GTF_reading, &sha->flags);
- put_page(&frame_table[frame]);
- }
-
- spin_unlock(&granting_d->grant_table->lock);
- }
-
- }
-#endif
-
- *pframe = frame;
- return rc;
-
- unlock_out:
- spin_unlock(&granting_d->grant_table->lock);
- return rc;
-}
-
-/*
- * Returns 0 if TLB flush / invalidate required by caller.
- * va will indicate the address to be invalidated.
- */
-static int
-__gnttab_map_grant_ref(
- gnttab_map_grant_ref_t *uop,
- unsigned long *va)
-{
- domid_t dom;
- grant_ref_t ref;
- struct domain *ld, *rd;
- struct vcpu *led;
- u16 dev_hst_ro_flags;
- int handle;
- unsigned long frame = 0, host_virt_addr;
- int rc;
-
- led = current;
- ld = led->domain;
-
- /* Bitwise-OR avoids short-circuiting which screws control flow. */
- if ( unlikely(__get_user(dom, &uop->dom) |
- __get_user(ref, &uop->ref) |
- __get_user(host_virt_addr, &uop->host_addr) |
- __get_user(dev_hst_ro_flags, &uop->flags)) )
- {
- DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
- return -EFAULT; /* don't set status */
- }
-
-
- if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) &&
- unlikely(!__addr_ok(host_virt_addr)))
- {
- DPRINTK("Bad virtual address (%lx) or flags (%x).\n",
- host_virt_addr, dev_hst_ro_flags);
- (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
- return GNTST_bad_gntref;
- }
-
- if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
- unlikely((dev_hst_ro_flags &
- (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
- {
- DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
- (void)__put_user(GNTST_bad_gntref, &uop->handle);
- return GNTST_bad_gntref;
- }
-
- if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
- unlikely(ld == rd) )
- {
- if ( rd != NULL )
- put_domain(rd);
- DPRINTK("Could not find domain %d\n", dom);
- (void)__put_user(GNTST_bad_domain, &uop->handle);
- return GNTST_bad_domain;
- }
-
- /* Get a maptrack handle. */
- if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
- {
- int i;
- grant_mapping_t *new_mt;
- grant_table_t *lgt = ld->grant_table;
-
- /* Grow the maptrack table. */
- new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1);
- if ( new_mt == NULL )
- {
- put_domain(rd);
- DPRINTK("No more map handles available\n");
- (void)__put_user(GNTST_no_device_space, &uop->handle);
- return GNTST_no_device_space;
- }
-
- memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order);
- for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ )
- new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
-
- free_xenheap_pages(lgt->maptrack, lgt->maptrack_order);
- lgt->maptrack = new_mt;
- lgt->maptrack_order += 1;
- lgt->maptrack_limit <<= 1;
-
- printk("Doubled maptrack size\n");
- handle = get_maptrack_handle(ld->grant_table);
- }
-
-#if GRANT_DEBUG_VERBOSE
- DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
- ref, dom, dev_hst_ro_flags);
-#endif
-
- if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
- dev_hst_ro_flags,
- host_virt_addr, &frame)))
- {
- /*
- * Only make the maptrack live _after_ writing the pte, in case we
- * overwrite the same frame number, causing a maptrack walk to find it
- */
- ld->grant_table->maptrack[handle].domid = dom;
-
- ld->grant_table->maptrack[handle].ref_and_flags
- = (ref << MAPTRACK_REF_SHIFT) |
- (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
-
- (void)__put_user(frame, &uop->dev_bus_addr);
-
- if ( dev_hst_ro_flags & GNTMAP_host_map )
- *va = host_virt_addr;
-
- (void)__put_user(handle, &uop->handle);
- }
- else
- {
- (void)__put_user(rc, &uop->handle);
- put_maptrack_handle(ld->grant_table, handle);
- }
-
- put_domain(rd);
- return rc;
-}
-
-static long
-gnttab_map_grant_ref(
- gnttab_map_grant_ref_t *uop, unsigned int count)
-{
- int i, flush = 0;
- unsigned long va = 0;
-
- for ( i = 0; i < count; i++ )
- if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
- flush++;
-
-#ifdef __ia64__
-// FIXME-ia64: probably need to do something here to avoid stale mappings?
-#else
- if ( flush == 1 )
- flush_tlb_one_mask(current->domain->cpumask, va);
- else if ( flush != 0 )
- flush_tlb_mask(current->domain->cpumask);
-#endif
-
- return 0;
-}
-
-static int
-__gnttab_unmap_grant_ref(
- gnttab_unmap_grant_ref_t *uop,
- unsigned long *va)
-{
- domid_t dom;
- grant_ref_t ref;
- u16 handle;
- struct domain *ld, *rd;
-
- active_grant_entry_t *act;
- grant_entry_t *sha;
- grant_mapping_t *map;
- u16 flags;
- s16 rc = 1;
- unsigned long frame, virt;
-
- ld = current->domain;
-
- /* Bitwise-OR avoids short-circuiting which screws control flow. */
- if ( unlikely(__get_user(virt, &uop->host_addr) |
- __get_user(frame, &uop->dev_bus_addr) |
- __get_user(handle, &uop->handle)) )
- {
- DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
- return -EFAULT; /* don't set status */
- }
-
- map = &ld->grant_table->maptrack[handle];
-
- if ( unlikely(handle >= ld->grant_table->maptrack_limit) ||
- unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) )
- {
- DPRINTK("Bad handle (%d).\n", handle);
- (void)__put_user(GNTST_bad_handle, &uop->status);
- return GNTST_bad_handle;
- }
-
- dom = map->domid;
- ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
- flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
-
- if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
- unlikely(ld == rd) )
- {
- if ( rd != NULL )
- put_domain(rd);
- DPRINTK("Could not find domain %d\n", dom);
- (void)__put_user(GNTST_bad_domain, &uop->status);
- return GNTST_bad_domain;
- }
-
-#if GRANT_DEBUG_VERBOSE
- DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
- ref, dom, handle);
-#endif
-
- act = &rd->grant_table->active[ref];
- sha = &rd->grant_table->shared[ref];
-
- spin_lock(&rd->grant_table->lock);
-
- if ( frame == 0 )
- {
- frame = act->frame;
- }
- else
- {
- if ( unlikely(frame != act->frame) )
- PIN_FAIL(unmap_out, GNTST_general_error,
- "Bad frame number doesn't match gntref.\n");
- if ( flags & GNTMAP_device_map )
- act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
- : GNTPIN_devw_inc;
-
- map->ref_and_flags &= ~GNTMAP_device_map;
- (void)__put_user(0, &uop->dev_bus_addr);
-
- /* Frame is now unmapped for device access. */
- }
-
- if ( (virt != 0) &&
- (flags & GNTMAP_host_map) &&
- ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
- {
-#ifdef __ia64__
-// FIXME-ia64: any error checking need to be done here?
-#else
- l1_pgentry_t *pl1e;
- unsigned long _ol1e;
-
- pl1e = &linear_pg_table[l1_linear_offset(virt)];
-
- if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
- {
- DPRINTK("Could not find PTE entry for address %lx\n", virt);
- rc = -EINVAL;
- goto unmap_out;
- }
-
- /*
- * Check that the virtual address supplied is actually mapped to
- * act->frame.
- */
- if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
- {
- DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n",
- _ol1e, virt, frame);
- rc = -EINVAL;
- goto unmap_out;
- }
-
- /* Delete pagetable entry. */
- if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
- {
- DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n",
- pl1e, virt);
- rc = -EINVAL;
- goto unmap_out;
- }
-#endif
-
- map->ref_and_flags &= ~GNTMAP_host_map;
-
- act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
- : GNTPIN_hstw_inc;
-
- rc = 0;
- *va = virt;
- }
-
- if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
- {
- map->ref_and_flags = 0;
- put_maptrack_handle(ld->grant_table, handle);
- }
-
-#ifdef __ia64__
-// FIXME-ia64: any error checking need to be done here? I think not and then
-// this can probably be macro-ized into nothingness
-#else
- /* If just unmapped a writable mapping, mark as dirtied */
- if ( unlikely(shadow_mode_log_dirty(rd)) &&
- !( flags & GNTMAP_readonly ) )
- mark_dirty(rd, frame);
-#endif
-
- /* If the last writable mapping has been removed, put_page_type */
- if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
- ( !( flags & GNTMAP_readonly ) ) )
- {
- clear_bit(_GTF_writing, &sha->flags);
- put_page_type(&frame_table[frame]);
- }
-
- if ( act->pin == 0 )
- {
- clear_bit(_GTF_reading, &sha->flags);
- put_page(&frame_table[frame]);
- }
-
- unmap_out:
- (void)__put_user(rc, &uop->status);
- spin_unlock(&rd->grant_table->lock);
- put_domain(rd);
- return rc;
-}
-
-static long
-gnttab_unmap_grant_ref(
- gnttab_unmap_grant_ref_t *uop, unsigned int count)
-{
- int i, flush = 0;
- unsigned long va = 0;
-
- for ( i = 0; i < count; i++ )
- if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
- flush++;
-
-#ifdef __ia64__
-// FIXME-ia64: probably need to do something here to avoid stale mappings?
-#else
- if ( flush == 1 )
- flush_tlb_one_mask(current->domain->cpumask, va);
- else if ( flush != 0 )
- flush_tlb_mask(current->domain->cpumask);
-#endif
-
- return 0;
-}
-
-static long
-gnttab_setup_table(
- gnttab_setup_table_t *uop, unsigned int count)
-{
- gnttab_setup_table_t op;
- struct domain *d;
- int i;
- unsigned long addr;
-
- if ( count != 1 )
- return -EINVAL;
-
- if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
- {
- DPRINTK("Fault while reading gnttab_setup_table_t.\n");
- return -EFAULT;
- }
-
- if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
- {
- DPRINTK("Xen only supports up to %d grant-table frames per domain.\n",
- NR_GRANT_FRAMES);
- (void)put_user(GNTST_general_error, &uop->status);
- return 0;
- }
-
- if ( op.dom == DOMID_SELF )
- {
- op.dom = current->domain->domain_id;
- }
- else if ( unlikely(!IS_PRIV(current->domain)) )
- {
- (void)put_user(GNTST_permission_denied, &uop->status);
- return 0;
- }
-
- if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
- {
- DPRINTK("Bad domid %d.\n", op.dom);
- (void)put_user(GNTST_bad_domain, &uop->status);
- return 0;
- }
-
- if ( op.nr_frames <= NR_GRANT_FRAMES )
- {
- ASSERT(d->grant_table != NULL);
- (void)put_user(GNTST_okay, &uop->status);
-#ifdef __ia64__
- if (d == dom0) {
- for ( i = 0; i < op.nr_frames; i++ )
- (void)put_user(
- (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
- &uop->frame_list[i]);
- } else {
- /* IA64 hack - need to map it somewhere */
- addr = (1UL << 40);
- map_domain_page(d, addr, virt_to_phys(d->grant_table->shared));
- (void)put_user(addr >> PAGE_SHIFT, &uop->frame_list[0]);
- }
-#else
- for ( i = 0; i < op.nr_frames; i++ )
- (void)put_user(
- (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i,
- &uop->frame_list[i]);
-#endif
- }
-
- put_domain(d);
- return 0;
-}
-
-#if GRANT_DEBUG
-static int
-gnttab_dump_table(gnttab_dump_table_t *uop)
-{
- grant_table_t *gt;
- gnttab_dump_table_t op;
- struct domain *d;
- u32 shared_mfn;
- active_grant_entry_t *act;
- grant_entry_t sha_copy;
- grant_mapping_t *maptrack;
- int i;
-
-
- if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
- {
- DPRINTK("Fault while reading gnttab_dump_table_t.\n");
- return -EFAULT;
- }
-
- if ( op.dom == DOMID_SELF )
- {
- op.dom = current->domain->domain_id;
- }
-
- if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
- {
- DPRINTK("Bad domid %d.\n", op.dom);
- (void)put_user(GNTST_bad_domain, &uop->status);
- return 0;
- }
-
- ASSERT(d->grant_table != NULL);
- gt = d->grant_table;
- (void)put_user(GNTST_okay, &uop->status);
-
- shared_mfn = virt_to_phys(d->grant_table->shared);
-
- DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
- op.dom, shared_mfn);
-
- ASSERT(d->grant_table->active != NULL);
- ASSERT(d->grant_table->shared != NULL);
- ASSERT(d->grant_table->maptrack != NULL);
-
- for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
- {
- sha_copy = gt->shared[i];
-
- if ( sha_copy.flags )
- {
- DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) "
- "dom:(%hu) frame:(%lx)\n",
- op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
- }
- }
-
- spin_lock(>->lock);
-
- for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
- {
- act = >->active[i];
-
- if ( act->pin )
- {
- DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) "
- "dom:(%hu) frame:(%lx)\n",
- op.dom, i, act->pin, act->domid, act->frame);
- }
- }
-
- for ( i = 0; i < gt->maptrack_limit; i++ )
- {
- maptrack = >->maptrack[i];
-
- if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
- {
- DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) "
- "dom:(%hu)\n",
- op.dom, i,
- maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
- maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
- maptrack->domid);
- }
- }
-
- spin_unlock(>->lock);
-
- put_domain(d);
- return 0;
-}
-#endif
-
-long
-do_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count)
-{
- long rc;
-
- if ( count > 512 )
- return -EINVAL;
-
- LOCK_BIGLOCK(current->domain);
-
- rc = -EFAULT;
- switch ( cmd )
- {
- case GNTTABOP_map_grant_ref:
- if ( unlikely(!array_access_ok(
- uop, count, sizeof(gnttab_map_grant_ref_t))) )
- goto out;
- rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count);
- break;
- case GNTTABOP_unmap_grant_ref:
- if ( unlikely(!array_access_ok(
- uop, count, sizeof(gnttab_unmap_grant_ref_t))) )
- goto out;
- rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count);
- break;
- case GNTTABOP_setup_table:
- rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
- break;
-#if GRANT_DEBUG
- case GNTTABOP_dump_table:
- rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
- break;
-#endif
- default:
- rc = -ENOSYS;
- break;
- }
-
-out:
- UNLOCK_BIGLOCK(current->domain);
-
- return rc;
-}
-
-int
-gnttab_check_unmap(
- struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
-{
- /* Called when put_page is invoked on a page belonging to a foreign domain.
- * Instead of decrementing the frame table ref count, locate the grant
- * table entry, if any, and if found, decrement that count.
- * Called a _lot_ at domain creation because pages mapped by priv domains
- * also traverse this.
- */
-
- /* Note: If the same frame is mapped multiple times, and then one of
- * the ptes is overwritten, which maptrack handle gets invalidated?
- * Advice: Don't do it. Explicitly unmap.
- */
-
- unsigned int handle, ref, refcount;
- grant_table_t *lgt, *rgt;
- active_grant_entry_t *act;
- grant_mapping_t *map;
- int found = 0;
-
- lgt = ld->grant_table;
-
-#if GRANT_DEBUG_VERBOSE
- if ( ld->domain_id != 0 )
- {
- DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
- rd->domain_id, ld->domain_id, frame, readonly);
- }
-#endif
-
- /* Fast exit if we're not mapping anything using grant tables */
- if ( lgt->map_count == 0 )
- return 0;
-
- if ( get_domain(rd) == 0 )
- {
- DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n",
- rd->domain_id);
- return 0;
- }
-
- rgt = rd->grant_table;
-
- for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
- {
- map = &lgt->maptrack[handle];
-
- if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) &&
- ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly))))
- {
- ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT);
- act = &rgt->active[ref];
-
- spin_lock(&rgt->lock);
-
- if ( act->frame != frame )
- {
- spin_unlock(&rgt->lock);
- continue;
- }
-
- refcount = act->pin & ( readonly ? GNTPIN_hstr_mask
- : GNTPIN_hstw_mask );
- if ( refcount == 0 )
- {
- spin_unlock(&rgt->lock);
- continue;
- }
-
- /* gotcha */
- DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n",
- rd->domain_id, ld->domain_id, frame, readonly);
-
- if ( readonly )
- act->pin -= GNTPIN_hstr_inc;
- else
- {
- act->pin -= GNTPIN_hstw_inc;
-
- /* any more granted writable mappings? */
- if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
- {
- clear_bit(_GTF_writing, &rgt->shared[ref].flags);
- put_page_type(&frame_table[frame]);
- }
- }
-
- if ( act->pin == 0 )
- {
- clear_bit(_GTF_reading, &rgt->shared[ref].flags);
- put_page(&frame_table[frame]);
- }
- spin_unlock(&rgt->lock);
-
- clear_bit(GNTMAP_host_map, &map->ref_and_flags);
-
- if ( !(map->ref_and_flags & GNTMAP_device_map) )
- put_maptrack_handle(lgt, handle);
-
- found = 1;
- break;
- }
- }
- put_domain(rd);
-
- return found;
-}
-
-int
-gnttab_prepare_for_transfer(
- struct domain *rd, struct domain *ld, grant_ref_t ref)
-{
- grant_table_t *rgt;
- grant_entry_t *sha;
- domid_t sdom;
- u16 sflags;
- u32 scombo, prev_scombo;
- int retries = 0;
- unsigned long target_pfn;
-
- DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
- rd->domain_id, ld->domain_id, ref);
-
- if ( unlikely((rgt = rd->grant_table) == NULL) ||
- unlikely(ref >= NR_GRANT_ENTRIES) )
- {
- DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n",
- rd->domain_id, ref);
- return 0;
- }
-
- spin_lock(&rgt->lock);
-
- sha = &rgt->shared[ref];
-
- sflags = sha->flags;
- sdom = sha->domid;
-
- for ( ; ; )
- {
- target_pfn = sha->frame;
-
- if ( unlikely(target_pfn >= max_page ) )
- {
- DPRINTK("Bad pfn (%lx)\n", target_pfn);
- goto fail;
- }
-
- if ( unlikely(sflags != GTF_accept_transfer) ||
- unlikely(sdom != ld->domain_id) )
- {
- DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
- sflags, sdom, ld->domain_id);
- goto fail;
- }
-
- /* Merge two 16-bit values into a 32-bit combined update. */
- /* NB. Endianness! */
- prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
-
- /* NB. prev_scombo is updated in place to seen value. */
- if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
- prev_scombo | GTF_transfer_committed)) )
- {
- DPRINTK("Fault while modifying shared flags and domid.\n");
- goto fail;
- }
-
- /* Did the combined update work (did we see what we expected?). */
- if ( likely(prev_scombo == scombo) )
- break;
-
- if ( retries++ == 4 )
- {
- DPRINTK("Shared grant entry is unstable.\n");
- goto fail;
- }
-
- /* Didn't see what we expected. Split out the seen flags & dom. */
- /* NB. Endianness! */
- sflags = (u16)prev_scombo;
- sdom = (u16)(prev_scombo >> 16);
- }
-
- spin_unlock(&rgt->lock);
- return 1;
-
- fail:
- spin_unlock(&rgt->lock);
- return 0;
-}
-
-void
-gnttab_notify_transfer(
- struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
-{
- grant_entry_t *sha;
- unsigned long pfn;
-
- DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
- rd->domain_id, ld->domain_id, ref);
-
- sha = &rd->grant_table->shared[ref];
-
- spin_lock(&rd->grant_table->lock);
-
-#ifdef __ia64__
-// FIXME-ia64: any error checking need to be done here?
-#else
- pfn = sha->frame;
-
- if ( unlikely(pfn >= max_page ) )
- DPRINTK("Bad pfn (%lx)\n", pfn);
- else
- {
- machine_to_phys_mapping[frame] = pfn;
-
- if ( unlikely(shadow_mode_log_dirty(ld)))
- mark_dirty(ld, frame);
-
- if (shadow_mode_translate(ld))
- __phys_to_machine_mapping[pfn] = frame;
- }
-#endif
- sha->frame = __mfn_to_gpfn(rd, frame);
- sha->domid = rd->domain_id;
- wmb();
- sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
-
- spin_unlock(&rd->grant_table->lock);
-
- return;
-}
-
-int
-grant_table_create(
- struct domain *d)
-{
- grant_table_t *t;
- int i;
-
- if ( (t = xmalloc(grant_table_t)) == NULL )
- goto no_mem;
-
- /* Simple stuff. */
- memset(t, 0, sizeof(*t));
- spin_lock_init(&t->lock);
-
- /* Active grant table. */
- if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES))
- == NULL )
- goto no_mem;
- memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
-
- /* Tracking of mapped foreign frames table */
- if ( (t->maptrack = alloc_xenheap_page()) == NULL )
- goto no_mem;
- t->maptrack_order = 0;
- t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t);
- memset(t->maptrack, 0, PAGE_SIZE);
- for ( i = 0; i < t->maptrack_limit; i++ )
- t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
-
- /* Shared grant table. */
- t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES);
- if ( t->shared == NULL )
- goto no_mem;
- memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
-
-#ifdef __ia64__
-// I don't think there's anything to do here on ia64?...
-#else
- for ( i = 0; i < NR_GRANT_FRAMES; i++ )
- {
- SHARE_PFN_WITH_DOMAIN(
- virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
- machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] =
- INVALID_M2P_ENTRY;
- }
-#endif
-
- /* Okay, install the structure. */
- wmb(); /* avoid races with lock-free access to d->grant_table */
- d->grant_table = t;
- return 0;
-
- no_mem:
- if ( t != NULL )
- {
- xfree(t->active);
- if ( t->maptrack != NULL )
- free_xenheap_page(t->maptrack);
- xfree(t);
- }
- return -ENOMEM;
-}
-
-void
-gnttab_release_dev_mappings(grant_table_t *gt)
-{
- grant_mapping_t *map;
- domid_t dom;
- grant_ref_t ref;
- u16 handle;
- struct domain *ld, *rd;
- unsigned long frame;
- active_grant_entry_t *act;
- grant_entry_t *sha;
-
- ld = current->domain;
-
- for ( handle = 0; handle < gt->maptrack_limit; handle++ )
- {
- map = >->maptrack[handle];
-
- if ( map->ref_and_flags & GNTMAP_device_map )
- {
- dom = map->domid;
- ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
-
- DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
- handle, ref,
- map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
-
- if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
- unlikely(ld == rd) )
- {
- if ( rd != NULL )
- put_domain(rd);
-
- printk(KERN_WARNING "Grant release: No dom%d\n", dom);
- continue;
- }
-
- act = &rd->grant_table->active[ref];
- sha = &rd->grant_table->shared[ref];
-
- spin_lock(&rd->grant_table->lock);
-
- if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) )
- {
- frame = act->frame;
-
- if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) &&
- ( (act->pin & GNTPIN_devw_mask) > 0 ) )
- {
- clear_bit(_GTF_writing, &sha->flags);
- put_page_type(&frame_table[frame]);
- }
-
- act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask);
-
- if ( act->pin == 0 )
- {
- clear_bit(_GTF_reading, &sha->flags);
- map->ref_and_flags = 0;
- put_page(&frame_table[frame]);
- }
- else
- map->ref_and_flags &= ~GNTMAP_device_map;
- }
-
- spin_unlock(&rd->grant_table->lock);
-
- put_domain(rd);
- }
- }
-}
-
-
-void
-grant_table_destroy(
- struct domain *d)
-{
- grant_table_t *t;
-
- if ( (t = d->grant_table) != NULL )
- {
- /* Free memory relating to this grant table. */
- d->grant_table = NULL;
- free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES);
- free_xenheap_page(t->maptrack);
- xfree(t->active);
- xfree(t);
- }
-}
-
-void
-grant_table_init(
- void)
-{
- /* Nothing. */
-}
-#endif
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/hpsimserial.c
--- a/xen/arch/ia64/hpsimserial.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,23 +0,0 @@
-/*
- * HP Ski simulator serial I/O
- *
- * Copyright (C) 2004 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- */
-
-#include <linux/config.h>
-#include <xen/sched.h>
-#include <xen/serial.h>
-#include "hpsim_ssc.h"
-
-static void hp_ski_putc(struct serial_port *port, char c)
-{
- ia64_ssc(c,0,0,0,SSC_PUTCHAR);
-}
-
-static struct uart_driver hp_ski = { .putc = hp_ski_putc };
-
-void hpsim_serial_init(void)
-{
- serial_register_uart(0, &hp_ski, 0);
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/hypercall.c
--- a/xen/arch/ia64/hypercall.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,182 +0,0 @@
-/*
- * Hypercall implementations
- *
- * Copyright (C) 2005 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-
-#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
-#include <asm/sal.h> /* FOR struct ia64_sal_retval */
-
-#include <asm/vcpu.h>
-#include <asm/dom_fw.h>
-
-extern unsigned long translate_domain_mpaddr(unsigned long);
-extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64);
-extern struct ia64_sal_retval
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
-
-unsigned long idle_when_pending = 0;
-unsigned long pal_halt_light_count = 0;
-
-int
-ia64_hypercall (struct pt_regs *regs)
-{
- struct vcpu *v = (struct domain *) current;
- struct ia64_sal_retval x;
- struct ia64_pal_retval y;
- unsigned long *tv, *tc;
- int pi;
-
- switch (regs->r2) {
- case FW_HYPERCALL_PAL_CALL:
- //printf("*** PAL hypercall: index=%d\n",regs->r28);
- //FIXME: This should call a C routine
-#if 0
- // This is very conservative, but avoids a possible
- // (and deadly) freeze in paravirtualized domains due
- // to a yet-to-be-found bug where pending_interruption
- // is zero when it shouldn't be. Since PAL is called
- // in the idle loop, this should resolve it
- VCPU(v,pending_interruption) = 1;
-#endif
- if (regs->r28 == PAL_HALT_LIGHT) {
-#define SPURIOUS_VECTOR 15
- pi = vcpu_check_pending_interrupts(v);
- if (pi != SPURIOUS_VECTOR) {
- if (!VCPU(v,pending_interruption))
- idle_when_pending++;
- vcpu_pend_unspecified_interrupt(v);
-//printf("idle w/int#%d pending!\n",pi);
-//this shouldn't happen, but it apparently does quite a bit! so don't
-//allow it to happen... i.e. if a domain has an interrupt pending and
-//it tries to halt itself because it thinks it is idle, just return here
-//as deliver_pending_interrupt is called on the way out and will deliver it
- }
- else {
- pal_halt_light_count++;
- do_sched_op(SCHEDOP_yield);
- }
- //break;
- }
- else if (regs->r28 >= PAL_COPY_PAL) { /* FIXME */
- printf("stacked PAL hypercalls not supported\n");
- regs->r8 = -1;
- break;
- }
- else y = xen_pal_emulator(regs->r28,regs->r29,
- regs->r30,regs->r31);
- regs->r8 = y.status; regs->r9 = y.v0;
- regs->r10 = y.v1; regs->r11 = y.v2;
- break;
- case FW_HYPERCALL_SAL_CALL:
- x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33),
- vcpu_get_gr(v,34),vcpu_get_gr(v,35),
- vcpu_get_gr(v,36),vcpu_get_gr(v,37),
- vcpu_get_gr(v,38),vcpu_get_gr(v,39));
- regs->r8 = x.status; regs->r9 = x.v0;
- regs->r10 = x.v1; regs->r11 = x.v2;
- break;
- case FW_HYPERCALL_EFI_RESET_SYSTEM:
- printf("efi.reset_system called ");
- if (current->domain == dom0) {
- printf("(by dom0)\n ");
- (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
- }
-#ifdef DOMU_AUTO_RESTART
- else {
- reconstruct_domU(current);
- return 0; // don't increment ip!
- }
-#else
- printf("(not supported for non-0 domain)\n");
- regs->r8 = EFI_UNSUPPORTED;
-#endif
- break;
- case FW_HYPERCALL_EFI_GET_TIME:
- tv = vcpu_get_gr(v,32);
- tc = vcpu_get_gr(v,33);
- //printf("efi_get_time(%p,%p) called...",tv,tc);
- tv = __va(translate_domain_mpaddr(tv));
- if (tc) tc = __va(translate_domain_mpaddr(tc));
- regs->r8 = (*efi.get_time)(tv,tc);
- //printf("and returns %lx\n",regs->r8);
- break;
- case FW_HYPERCALL_EFI_SET_TIME:
- case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
- case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
- // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
- // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
- // POINTER ARGUMENTS WILL BE VIRTUAL!!
- case FW_HYPERCALL_EFI_GET_VARIABLE:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
- case FW_HYPERCALL_EFI_SET_VARIABLE:
- case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
- // FIXME: need fixes in efi.h from 2.6.9
- regs->r8 = EFI_UNSUPPORTED;
- break;
- case 0xffff: // test dummy hypercall
- regs->r8 = dump_privop_counts_to_user(
- vcpu_get_gr(v,32),
- vcpu_get_gr(v,33));
- break;
- case 0xfffe: // test dummy hypercall
- regs->r8 = zero_privop_counts_to_user(
- vcpu_get_gr(v,32),
- vcpu_get_gr(v,33));
- break;
- case 0xfffd: // test dummy hypercall
- regs->r8 = launch_domainU(
- vcpu_get_gr(v,32));
- break;
- case 0xfffc: // test dummy hypercall
- regs->r8 = domU_staging_write_32(
- vcpu_get_gr(v,32),
- vcpu_get_gr(v,33),
- vcpu_get_gr(v,34),
- vcpu_get_gr(v,35),
- vcpu_get_gr(v,36));
- break;
- case 0xfffb: // test dummy hypercall
- regs->r8 = domU_staging_read_8(vcpu_get_gr(v,32));
- break;
-
- case __HYPERVISOR_dom0_op:
- regs->r8 = do_dom0_op(regs->r14);
- break;
-
- case __HYPERVISOR_dom_mem_op:
-#ifdef CONFIG_VTI
- regs->r8 = do_dom_mem_op(regs->r14, regs->r15, regs->r16,
regs->r17, regs->r18);
-#else
- /* we don't handle reservations; just return success */
- regs->r8 = regs->r16;
-#endif
- break;
-
- case __HYPERVISOR_event_channel_op:
- regs->r8 = do_event_channel_op(regs->r14);
- break;
-
-#ifndef CONFIG_VTI
- case __HYPERVISOR_grant_table_op:
- regs->r8 = do_grant_table_op(regs->r14, regs->r15, regs->r16);
- break;
-#endif
-
- case __HYPERVISOR_console_io:
- regs->r8 = do_console_io(regs->r14, regs->r15, regs->r16);
- break;
-
- default:
- printf("unknown hypercall %x\n", regs->r2);
- regs->r8 = (unsigned long)-1;
- }
- return 1;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/hyperprivop.S
--- a/xen/arch/ia64/hyperprivop.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1592 +0,0 @@
-/*
- * arch/ia64/kernel/hyperprivop.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- */
-
-#include <linux/config.h>
-
-#include <asm/asmmacro.h>
-#include <asm/kregs.h>
-#include <asm/offsets.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <public/arch-ia64.h>
-
-#if 1 // change to 0 to turn off all fast paths
-#define FAST_HYPERPRIVOPS
-#define FAST_HYPERPRIVOP_CNT
-#define FAST_REFLECT_CNT
-//#define FAST_TICK
-#define FAST_BREAK
-#define FAST_ACCESS_REFLECT
-#define FAST_RFI
-#define FAST_SSM_I
-#define FAST_PTC_GA
-#undef RFI_TO_INTERRUPT // not working yet
-#endif
-
-#ifdef CONFIG_SMP
-#warning "FIXME: ptc.ga instruction requires spinlock for SMP"
-#undef FAST_PTC_GA
-#endif
-
-// FIXME: turn off for now... but NaTs may crash Xen so re-enable soon!
-//#define HANDLE_AR_UNAT
-
-// FIXME: This is defined in include/asm-ia64/hw_irq.h but this
-// doesn't appear to be include'able from assembly?
-#define IA64_TIMER_VECTOR 0xef
-
-// Should be included from common header file (also in process.c)
-// NO PSR_CLR IS DIFFERENT! (CPL)
-#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
-#define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT)
-// note IA64_PSR_PK removed from following, why is this necessary?
-#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
- IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
- IA64_PSR_IT | IA64_PSR_BN)
-
-#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
- IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
- IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
- IA64_PSR_MC | IA64_PSR_IS | \
- IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
- IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
-
-// Note: not hand-scheduled for now
-// Registers at entry
-// r16 == cr.isr
-// r17 == cr.iim
-// r18 == XSI_PSR_IC_OFS
-// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
-// r31 == pr
-GLOBAL_ENTRY(fast_hyperprivop)
-#ifndef FAST_HYPERPRIVOPS // see beginning of file
- br.sptk.many dispatch_break_fault ;;
-#endif
- // HYPERPRIVOP_SSM_I?
- // assumes domain interrupts pending, so just do it
- cmp.eq p7,p6=XEN_HYPER_SSM_I,r17
-(p7) br.sptk.many hyper_ssm_i;;
-
- // FIXME. This algorithm gives up (goes to the slow path) if there
- // are ANY interrupts pending, even if they are currently
- // undeliverable. This should be improved later...
- adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r20=[r20] ;;
- cmp.eq p7,p0=r0,r20
-(p7) br.cond.sptk.many 1f
- movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r20=[r20];;
- adds r21=IA64_VCPU_IRR0_OFFSET,r20;
- adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
- ld8 r23=[r21],16; ld8 r24=[r22],16;;
- ld8 r21=[r21]; ld8 r22=[r22];;
- or r23=r23,r24; or r21=r21,r22;;
- or r20=r23,r21;;
-1: // when we get to here r20=~=interrupts pending
-
- // HYPERPRIVOP_RFI?
- cmp.eq p7,p6=XEN_HYPER_RFI,r17
-(p7) br.sptk.many hyper_rfi;;
-
- // HYPERPRIVOP_GET_IVR?
- cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17
-(p7) br.sptk.many hyper_get_ivr;;
-
- cmp.ne p7,p0=r20,r0
-(p7) br.spnt.many dispatch_break_fault ;;
-
- // HYPERPRIVOP_COVER?
- cmp.eq p7,p6=XEN_HYPER_COVER,r17
-(p7) br.sptk.many hyper_cover;;
-
- // HYPERPRIVOP_SSM_DT?
- cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17
-(p7) br.sptk.many hyper_ssm_dt;;
-
- // HYPERPRIVOP_RSM_DT?
- cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17
-(p7) br.sptk.many hyper_rsm_dt;;
-
- // HYPERPRIVOP_GET_TPR?
- cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17
-(p7) br.sptk.many hyper_get_tpr;;
-
- // HYPERPRIVOP_SET_TPR?
- cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17
-(p7) br.sptk.many hyper_set_tpr;;
-
- // HYPERPRIVOP_EOI?
- cmp.eq p7,p6=XEN_HYPER_EOI,r17
-(p7) br.sptk.many hyper_eoi;;
-
- // HYPERPRIVOP_SET_ITM?
- cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17
-(p7) br.sptk.many hyper_set_itm;;
-
- // HYPERPRIVOP_SET_RR?
- cmp.eq p7,p6=XEN_HYPER_SET_RR,r17
-(p7) br.sptk.many hyper_set_rr;;
-
- // HYPERPRIVOP_GET_RR?
- cmp.eq p7,p6=XEN_HYPER_GET_RR,r17
-(p7) br.sptk.many hyper_get_rr;;
-
- // HYPERPRIVOP_PTC_GA?
- cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17
-(p7) br.sptk.many hyper_ptc_ga;;
-
- // HYPERPRIVOP_ITC_D?
- cmp.eq p7,p6=XEN_HYPER_ITC_D,r17
-(p7) br.sptk.many hyper_itc_d;;
-
- // HYPERPRIVOP_ITC_I?
- cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
-(p7) br.sptk.many hyper_itc_i;;
-
- // HYPERPRIVOP_THASH?
- cmp.eq p7,p6=XEN_HYPER_THASH,r17
-(p7) br.sptk.many hyper_thash;;
-
- // if not one of the above, give up for now and do it the slow way
- br.sptk.many dispatch_break_fault ;;
-
-
-// give up for now if: ipsr.be==1, ipsr.pp==1
-// from reflect_interruption, don't need to:
-// - printf first extint (debug only)
-// - check for interrupt collection enabled (routine will force on)
-// - set ifa (not valid for extint)
-// - set iha (not valid for extint)
-// - set itir (not valid for extint)
-// DO need to
-// - increment the HYPER_SSM_I fast_hyperprivop counter
-// - set shared_mem iip to instruction after HYPER_SSM_I
-// - set cr.iip to guest iva+0x3000
-// - set shared_mem ipsr to [vcpu_get_ipsr_int_state]
-// be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0;
-// i = shared_mem interrupt_delivery_enabled
-// ic = shared_mem interrupt_collection_enabled
-// ri = instruction after HYPER_SSM_I
-// all other bits unchanged from real cr.ipsr
-// - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!)
-// - set shared_mem isr: isr.ei to instr following HYPER_SSM_I
-// and isr.ri to cr.isr.ri (all other bits zero)
-// - cover and set shared_mem precover_ifs to cr.ifs
-// ^^^ MISSED THIS FOR fast_break??
-// - set shared_mem ifs and incomplete_regframe to 0
-// - set shared_mem interrupt_delivery_enabled to 0
-// - set shared_mem interrupt_collection_enabled to 0
-// - set r31 to SHAREDINFO_ADDR
-// - virtual bank switch 0
-// maybe implement later
-// - verify that there really IS a deliverable interrupt pending
-// - set shared_mem iva
-// needs to be done but not implemented (in reflect_interruption)
-// - set shared_mem iipa
-// don't know for sure
-// - set shared_mem unat
-// r16 == cr.isr
-// r17 == cr.iim
-// r18 == XSI_PSR_IC
-// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
-// r31 == pr
-ENTRY(hyper_ssm_i)
-#ifndef FAST_SSM_I
- br.spnt.few dispatch_break_fault ;;
-#endif
- // give up for now if: ipsr.be==1, ipsr.pp==1
- mov r30=cr.ipsr;;
- mov r29=cr.iip;;
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.sptk.many dispatch_break_fault ;;
- extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.sptk.many dispatch_break_fault ;;
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- // set shared_mem iip to instruction after HYPER_SSM_I
- extr.u r20=r30,41,2 ;;
- cmp.eq p6,p7=2,r20 ;;
-(p6) mov r20=0
-(p6) adds r29=16,r29
-(p7) adds r20=1,r20 ;;
- dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet
- adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r29 ;;
- // set shared_mem isr
- extr.u r16=r16,38,1;; // grab cr.isr.ir bit
- dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
- dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri
- adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r16 ;;
- // set cr.ipsr
- mov r29=r30 ;;
- movl r28=DELIVER_PSR_SET;;
- movl r27=~DELIVER_PSR_CLR;;
- or r29=r29,r28;;
- and r29=r29,r27;;
- mov cr.ipsr=r29;;
- // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
- extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
- cmp.eq p6,p7=3,r29;;
-(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
-(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
- ;;
- // FOR SSM_I ONLY, also turn on psr.i and psr.ic
- movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);;
- movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
- or r30=r30,r28;;
- and r30=r30,r27;;
- adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r30 ;;
- // set shared_mem interrupt_delivery_enabled to 0
- // set shared_mem interrupt_collection_enabled to 0
- st8 [r18]=r0;;
- // cover and set shared_mem precover_ifs to cr.ifs
- // set shared_mem ifs and incomplete_regframe to 0
- cover ;;
- mov r20=cr.ifs;;
- adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r21]=r0 ;;
- adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r0 ;;
- adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r20 ;;
- // leave cr.ifs alone for later rfi
- // set iip to go to domain IVA break instruction vector
- movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r22=[r22];;
- adds r22=IA64_VCPU_IVA_OFFSET,r22;;
- ld8 r23=[r22];;
- movl r24=0x3000;;
- add r24=r24,r23;;
- mov cr.iip=r24;;
- // OK, now all set to go except for switch to virtual bank0
- mov r30=r2; mov r29=r3;;
- adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
- adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
- bsw.1;;
- // FIXME?: ar.unat is not really handled correctly,
- // but may not matter if the OS is NaT-clean
- .mem.offset 0,0; st8.spill [r2]=r16,16;
- .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r18,16;
- .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r20,16;
- .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r22,16;
- .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r24,16;
- .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r26,16;
- .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r28,16;
- .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r30,16;
- .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
- movl r31=XSI_IPSR;;
- bsw.0 ;;
- mov r2=r30; mov r3=r29;;
- adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r20]=r0 ;;
- mov pr=r31,-1 ;;
- rfi
- ;;
-
-// reflect domain clock interrupt
-// r31 == pr
-// r30 == cr.ivr
-// r29 == rp
-GLOBAL_ENTRY(fast_tick_reflect)
-#ifndef FAST_TICK // see beginning of file
- br.cond.sptk.many rp;;
-#endif
- mov r28=IA64_TIMER_VECTOR;;
- cmp.ne p6,p0=r28,r30
-(p6) br.cond.spnt.few rp;;
- movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
- ld8 r26=[r20];;
- mov r27=ar.itc;;
- adds r27=200,r27;; // safety margin
- cmp.ltu p6,p0=r26,r27
-(p6) br.cond.spnt.few rp;;
- mov r17=cr.ipsr;;
- // slow path if: ipsr.be==1, ipsr.pp==1
- extr.u r21=r17,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p6,p0=r21,r0
-(p6) br.cond.spnt.few rp;;
- extr.u r21=r17,IA64_PSR_PP_BIT,1 ;;
- cmp.ne p6,p0=r21,r0
-(p6) br.cond.spnt.few rp;;
- // definitely have a domain tick
- mov cr.eoi=r0;;
- mov rp=r29;;
- mov cr.itm=r26;; // ensure next tick
-#ifdef FAST_REFLECT_CNT
- movl r20=fast_reflect_count+((0x3000>>8)*8);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- // vcpu_pend_timer(current)
- movl r18=XSI_PSR_IC;;
- adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r20=[r20];;
- cmp.eq p6,p0=r20,r0 // if cr.itv==0 done
-(p6) br.cond.spnt.few fast_tick_reflect_done;;
- tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit
-(p6) br.cond.spnt.few fast_tick_reflect_done;;
- extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
- extr.u r26=r20,6,2;; // r26 has irr index of itv.vector
- movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r19=[r19];;
- adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
- adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
- ld8 r24=[r22];;
- ld8 r23=[r23];;
- cmp.eq p6,p0=r23,r24 // skip if this tick already delivered
-(p6) br.cond.spnt.few fast_tick_reflect_done;;
- // set irr bit
- adds r21=IA64_VCPU_IRR0_OFFSET,r19;
- shl r26=r26,3;;
- add r21=r21,r26;;
- mov r25=1;;
- shl r22=r25,r27;;
- ld8 r23=[r21];;
- or r22=r22,r23;;
- st8 [r21]=r22;;
- // set PSCB(pending_interruption)!
- adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r20]=r25;;
-
- // if interrupted at pl0, we're done
- extr.u r16=r17,IA64_PSR_CPL0_BIT,2;;
- cmp.eq p6,p0=r16,r0;;
-(p6) br.cond.spnt.few fast_tick_reflect_done;;
- // if guest vpsr.i is off, we're done
- adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r21=[r21];;
- cmp.eq p6,p0=r21,r0
-(p6) br.cond.spnt.few fast_tick_reflect_done;;
-
- // OK, we have a clock tick to deliver to the active domain!
- // so deliver to iva+0x3000
- // r17 == cr.ipsr
- // r18 == XSI_PSR_IC
- // r19 == IA64_KR(CURRENT)
- // r31 == pr
- mov r16=cr.isr;;
- mov r29=cr.iip;;
- adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r29 ;;
- // set shared_mem isr
- extr.u r16=r16,38,1;; // grab cr.isr.ir bit
- dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
- extr.u r20=r17,41,2 ;; // get ipsr.ri
- dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
- adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r16 ;;
- // set cr.ipsr (make sure cpl==2!)
- mov r29=r17 ;;
- movl r28=DELIVER_PSR_SET;;
- movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
- or r29=r29,r28;;
- and r29=r29,r27;;
- mov cr.ipsr=r29;;
- // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set)
- extr.u r29=r17,IA64_PSR_CPL0_BIT,2;;
- cmp.eq p6,p7=3,r29;;
-(p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2
-(p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2
- ;;
- movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
- movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);;
- dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
- or r17=r17,r28;;
- and r17=r17,r27;;
- ld4 r16=[r18],4;;
- cmp.ne p6,p0=r16,r0;;
-(p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;;
- ld4 r16=[r18],-4;;
- cmp.ne p6,p0=r16,r0;;
-(p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;;
- adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r17 ;;
- // set shared_mem interrupt_delivery_enabled to 0
- // set shared_mem interrupt_collection_enabled to 0
- st8 [r18]=r0;;
- // cover and set shared_mem precover_ifs to cr.ifs
- // set shared_mem ifs and incomplete_regframe to 0
- cover ;;
- mov r20=cr.ifs;;
- adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r21]=r0 ;;
- adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r0 ;;
- adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r20 ;;
- // leave cr.ifs alone for later rfi
- // set iip to go to domain IVA break instruction vector
- adds r22=IA64_VCPU_IVA_OFFSET,r19;;
- ld8 r23=[r22];;
- movl r24=0x3000;;
- add r24=r24,r23;;
- mov cr.iip=r24;;
- // OK, now all set to go except for switch to virtual bank0
- mov r30=r2; mov r29=r3;;
-#ifdef HANDLE_AR_UNAT
- mov r28=ar.unat;
-#endif
- adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
- adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
- bsw.1;;
- .mem.offset 0,0; st8.spill [r2]=r16,16;
- .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r18,16;
- .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r20,16;
- .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r22,16;
- .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r24,16;
- .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r26,16;
- .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r28,16;
- .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r30,16;
- .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
-#ifdef HANDLE_AR_UNAT
- // bank0 regs have no NaT bit, so ensure they are NaT clean
- mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0;
- mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0;
- mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0;
- mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;;
-#endif
- bsw.0 ;;
- mov r2=r30; mov r3=r29;;
-#ifdef HANDLE_AR_UNAT
- mov ar.unat=r28;
-#endif
- adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r20]=r0 ;;
-fast_tick_reflect_done:
- mov pr=r31,-1 ;;
- rfi
-END(fast_tick_reflect)
-
-// reflect domain breaks directly to domain
-// r16 == cr.isr
-// r17 == cr.iim
-// r18 == XSI_PSR_IC
-// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits)
-// r31 == pr
-GLOBAL_ENTRY(fast_break_reflect)
-#ifndef FAST_BREAK // see beginning of file
- br.sptk.many dispatch_break_fault ;;
-#endif
- mov r30=cr.ipsr;;
- mov r29=cr.iip;;
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0 ;;
-(p7) br.spnt.few dispatch_break_fault ;;
- extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
- cmp.ne p7,p0=r21,r0 ;;
-(p7) br.spnt.few dispatch_break_fault ;;
-#if 1 /* special handling in case running on simulator */
- movl r20=first_break;;
- ld4 r23=[r20];;
- movl r21=0x80001;
- movl r22=0x80002;;
- cmp.ne p7,p0=r23,r0;;
-(p7) br.spnt.few dispatch_break_fault ;;
- cmp.eq p7,p0=r21,r17;
-(p7) br.spnt.few dispatch_break_fault ;;
- cmp.eq p7,p0=r22,r17;
-(p7) br.spnt.few dispatch_break_fault ;;
-#endif
- movl r20=0x2c00;
- // save iim in shared_info
- adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r17;;
- // fall through
-
-
-// reflect to domain ivt+r20
-// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
-// r16 == cr.isr
-// r18 == XSI_PSR_IC
-// r20 == offset into ivt
-// r29 == iip
-// r30 == ipsr
-// r31 == pr
-ENTRY(fast_reflect)
-#ifdef FAST_REFLECT_CNT
- movl r22=fast_reflect_count;
- shr r23=r20,5;;
- add r22=r22,r23;;
- ld8 r21=[r22];;
- adds r21=1,r21;;
- st8 [r22]=r21;;
-#endif
- // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
- adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r29;;
- // set shared_mem isr
- adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r16 ;;
- // set cr.ipsr
- mov r29=r30 ;;
- movl r28=DELIVER_PSR_SET;;
- movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
- or r29=r29,r28;;
- and r29=r29,r27;;
- mov cr.ipsr=r29;;
- // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
- extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
- cmp.eq p6,p7=3,r29;;
-(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2
-(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2
- ;;
- movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);;
- movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);;
- or r30=r30,r28;;
- and r30=r30,r27;;
- // also set shared_mem ipsr.i and ipsr.ic appropriately
- ld8 r24=[r18];;
- extr.u r22=r24,32,32
- cmp4.eq p6,p7=r24,r0;;
-(p6) dep r30=0,r30,IA64_PSR_IC_BIT,1
-(p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
- cmp4.eq p6,p7=r22,r0;;
-(p6) dep r30=0,r30,IA64_PSR_I_BIT,1
-(p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
- adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r30 ;;
- // set shared_mem interrupt_delivery_enabled to 0
- // set shared_mem interrupt_collection_enabled to 0
- st8 [r18]=r0;;
- // cover and set shared_mem precover_ifs to cr.ifs
- // set shared_mem ifs and incomplete_regframe to 0
- cover ;;
- mov r24=cr.ifs;;
- adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r21]=r0 ;;
- adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r0 ;;
- adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r24 ;;
- // vpsr.i = vpsr.ic = 0 on delivery of interruption
- st8 [r18]=r0;;
- // FIXME: need to save iipa and isr to be arch-compliant
- // set iip to go to domain IVA break instruction vector
- movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r22=[r22];;
- adds r22=IA64_VCPU_IVA_OFFSET,r22;;
- ld8 r23=[r22];;
- add r20=r20,r23;;
- mov cr.iip=r20;;
- // OK, now all set to go except for switch to virtual bank0
- mov r30=r2; mov r29=r3;;
-#ifdef HANDLE_AR_UNAT
- mov r28=ar.unat;
-#endif
- adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
- adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
- bsw.1;;
- .mem.offset 0,0; st8.spill [r2]=r16,16;
- .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r18,16;
- .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r20,16;
- .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r22,16;
- .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r24,16;
- .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r26,16;
- .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r28,16;
- .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r30,16;
- .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
-#ifdef HANDLE_AR_UNAT
- // bank0 regs have no NaT bit, so ensure they are NaT clean
- mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0;
- mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0;
- mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0;
- mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;;
-#endif
- movl r31=XSI_IPSR;;
- bsw.0 ;;
- mov r2=r30; mov r3=r29;;
-#ifdef HANDLE_AR_UNAT
- mov ar.unat=r28;
-#endif
- adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r20]=r0 ;;
- mov pr=r31,-1 ;;
- rfi
- ;;
-
-// reflect access faults (0x2400,0x2800,0x5300) directly to domain
-// r16 == isr
-// r17 == ifa
-// r19 == reflect number (only pass-thru to dispatch_reflection)
-// r20 == offset into ivt
-// r31 == pr
-GLOBAL_ENTRY(fast_access_reflect)
-#ifndef FAST_ACCESS_REFLECT // see beginning of file
- br.spnt.few dispatch_reflection ;;
-#endif
- mov r30=cr.ipsr;;
- mov r29=cr.iip;;
- extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.spnt.few dispatch_reflection ;;
- extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
- cmp.ne p7,p0=r21,r0
-(p7) br.spnt.few dispatch_reflection ;;
- extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
- cmp.eq p7,p0=r21,r0
-(p7) br.spnt.few dispatch_reflection ;;
- movl r18=XSI_PSR_IC;;
- ld8 r21=[r18];;
- cmp.eq p7,p0=r0,r21
-(p7) br.spnt.few dispatch_reflection ;;
- // set shared_mem ifa, FIXME: should we validate it?
- mov r17=cr.ifa;;
- adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r21]=r17 ;;
- // get rr[ifa] and save to itir in shared memory (extra bits ignored)
- shr.u r22=r17,61
- adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18
- adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
- shladd r22=r22,3,r21;;
- ld8 r22=[r22];;
- st8 [r23]=r22;;
- br.cond.sptk.many fast_reflect;;
-
-
-// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
-ENTRY(hyper_rfi)
-#ifndef FAST_RFI
- br.spnt.few dispatch_break_fault ;;
-#endif
- // if no interrupts pending, proceed
- mov r30=r0
- cmp.eq p7,p0=r20,r0
-(p7) br.sptk.many 1f
- ;;
- adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r21=[r20];; // r21 = vcr.ipsr
- extr.u r22=r21,IA64_PSR_I_BIT,1 ;;
- mov r30=r22
- // r30 determines whether we might deliver an immediate extint
-1:
- adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r21=[r20];; // r21 = vcr.ipsr
- extr.u r22=r21,IA64_PSR_BE_BIT,1 ;;
- // if turning on psr.be, give up for now and do it the slow way
- cmp.ne p7,p0=r22,r0
-(p7) br.spnt.few dispatch_break_fault ;;
- // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way
- movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);;
- and r22=r20,r21
- ;;
- cmp.ne p7,p0=r22,r20
-(p7) br.spnt.few dispatch_break_fault ;;
- // if was in metaphys mode, do it the slow way (FIXME later?)
- adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r20=[r20];;
- cmp.ne p7,p0=r20,r0
-(p7) br.spnt.few dispatch_break_fault ;;
- // if domain hasn't already done virtual bank switch
- // do it the slow way (FIXME later?)
-#if 0
- adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r20=[r20];;
- cmp.eq p7,p0=r20,r0
-(p7) br.spnt.few dispatch_break_fault ;;
-#endif
- // validate vcr.iip, if in Xen range, do it the slow way
- adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r22=[r20];;
- movl r23=XEN_VIRT_SPACE_LOW
- movl r24=XEN_VIRT_SPACE_HIGH ;;
- cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) &&
-(p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high)
-(p7) br.spnt.few dispatch_break_fault ;;
-#ifndef RFI_TO_INTERRUPT // see beginning of file
- cmp.ne p6,p0=r30,r0
-(p6) br.cond.spnt.few dispatch_break_fault ;;
-#endif
-
-1: // OK now, let's do an rfi.
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);;
- ld8 r23=[r20];;
- adds r23=1,r23;;
- st8 [r20]=r23;;
-#endif
-#ifdef RFI_TO_INTERRUPT
- // maybe do an immediate interrupt delivery?
- cmp.ne p6,p0=r30,r0
-(p6) br.cond.spnt.few rfi_check_extint;;
-#endif
-
-just_do_rfi:
- // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
- mov cr.iip=r22;;
- adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r20]=r0 ;;
- adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r20=[r20];;
- dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set
- mov cr.ifs=r20 ;;
- // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3;
- dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;;
- // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic
- mov r19=r0 ;;
- extr.u r23=r21,IA64_PSR_I_BIT,1 ;;
- cmp.ne p7,p6=r23,r0 ;;
- // not done yet
-(p7) dep r19=-1,r19,32,1
- extr.u r23=r21,IA64_PSR_IC_BIT,1 ;;
- cmp.ne p7,p6=r23,r0 ;;
-(p7) dep r19=-1,r19,0,1 ;;
- st8 [r18]=r19 ;;
- // force on psr.ic, i, dt, rt, it, bn
- movl
r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN)
- ;;
- or r21=r21,r20
- ;;
- mov cr.ipsr=r21
- adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r21=[r20];;
- cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"
-(p7) br.cond.spnt.few 1f;
- // OK, now all set to go except for switch to virtual bank1
- mov r22=1;; st4 [r20]=r22;
- mov r30=r2; mov r29=r3;;
- adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
- adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
- bsw.1;;
- // FIXME?: ar.unat is not really handled correctly,
- // but may not matter if the OS is NaT-clean
- .mem.offset 0,0; ld8.fill r16=[r2],16 ;
- .mem.offset 8,0; ld8.fill r17=[r3],16 ;;
- .mem.offset 0,0; ld8.fill r18=[r2],16 ;
- .mem.offset 0,0; ld8.fill r19=[r3],16 ;;
- .mem.offset 8,0; ld8.fill r20=[r2],16 ;
- .mem.offset 8,0; ld8.fill r21=[r3],16 ;;
- .mem.offset 8,0; ld8.fill r22=[r2],16 ;
- .mem.offset 8,0; ld8.fill r23=[r3],16 ;;
- .mem.offset 8,0; ld8.fill r24=[r2],16 ;
- .mem.offset 8,0; ld8.fill r25=[r3],16 ;;
- .mem.offset 8,0; ld8.fill r26=[r2],16 ;
- .mem.offset 8,0; ld8.fill r27=[r3],16 ;;
- .mem.offset 8,0; ld8.fill r28=[r2],16 ;
- .mem.offset 8,0; ld8.fill r29=[r3],16 ;;
- .mem.offset 8,0; ld8.fill r30=[r2],16 ;
- .mem.offset 8,0; ld8.fill r31=[r3],16 ;;
- bsw.0 ;;
- mov r2=r30; mov r3=r29;;
-1: mov pr=r31,-1
- ;;
- rfi
- ;;
-
-#ifdef RFI_TO_INTERRUPT
-GLOBAL_ENTRY(rfi_check_extint)
- //br.sptk.many dispatch_break_fault ;;
-
- // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
- // make sure none of these get trashed in case going to just_do_rfi
- movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r30=[r30];;
- adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
- mov r25=192
- adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
- ld8 r23=[r16];;
- cmp.eq p6,p0=r23,r0;;
-(p6) adds r16=-8,r16;;
-(p6) adds r24=-8,r24;;
-(p6) adds r25=-64,r25;;
-(p6) ld8 r23=[r16];;
-(p6) cmp.eq p6,p0=r23,r0;;
-(p6) adds r16=-8,r16;;
-(p6) adds r24=-8,r24;;
-(p6) adds r25=-64,r25;;
-(p6) ld8 r23=[r16];;
-(p6) cmp.eq p6,p0=r23,r0;;
-(p6) adds r16=-8,r16;;
-(p6) adds r24=-8,r24;;
-(p6) adds r25=-64,r25;;
-(p6) ld8 r23=[r16];;
-(p6) cmp.eq p6,p0=r23,r0;;
- cmp.eq p6,p0=r23,r0
-(p6) br.cond.spnt.few just_do_rfi; // this is actually an error
- // r16 points to non-zero element of irr, r23 has value
- // r24 points to corr element of insvc, r25 has elt*64
- ld8 r26=[r24];;
- cmp.geu p6,p0=r26,r23
-(p6) br.cond.spnt.many just_do_rfi;
-
- // not masked by insvc, get vector number
- shr.u r26=r23,1;;
- or r26=r23,r26;;
- shr.u r27=r26,2;;
- or r26=r26,r27;;
- shr.u r27=r26,4;;
- or r26=r26,r27;;
- shr.u r27=r26,8;;
- or r26=r26,r27;;
- shr.u r27=r26,16;;
- or r26=r26,r27;;
- shr.u r27=r26,32;;
- or r26=r26,r27;;
- andcm r26=0xffffffffffffffff,r26;;
- popcnt r26=r26;;
- sub r26=63,r26;;
- // r26 now contains the bit index (mod 64)
- mov r27=1;;
- shl r27=r27,r26;;
- // r27 now contains the (within the proper word) bit mask
- add r26=r25,r26
- // r26 now contains the vector [0..255]
- adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r20=[r20] ;;
- extr.u r28=r20,16,1
- extr.u r29=r20,4,4 ;;
- cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi
-(p6) br.cond.spnt.few just_do_rfi;;
- shl r29=r29,4;;
- adds r29=15,r29;;
- cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi
-(p6) br.cond.spnt.few just_do_rfi;;
-
-// this doesn't work yet (dies early after getting to user mode)
-// but happens relatively infrequently, so fix it later.
-// NOTE that these will be counted incorrectly for now (for privcnt output)
-GLOBAL_ENTRY(rfi_with_interrupt)
-#if 1
- br.sptk.many dispatch_break_fault ;;
-#endif
-
- // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000
- // r18 == XSI_PSR_IC
- // r21 == vipsr (ipsr in shared_mem)
- // r30 == IA64_KR(CURRENT)
- // r31 == pr
- mov r17=cr.ipsr;;
- mov r16=cr.isr;;
- // set shared_mem isr
- extr.u r16=r16,38,1;; // grab cr.isr.ir bit
- dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero)
- extr.u r20=r21,41,2 ;; // get v(!)psr.ri
- dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei
- adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r22]=r16 ;;
- // set cr.ipsr (make sure cpl==2!)
- mov r29=r17 ;;
- movl r28=DELIVER_PSR_SET;;
- movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
- or r29=r29,r28;;
- and r29=r29,r27;;
- mov cr.ipsr=r29;;
- // v.ipsr and v.iip are already set (and v.iip validated) as rfi target
- // set shared_mem interrupt_delivery_enabled to 0
- // set shared_mem interrupt_collection_enabled to 0
- st8 [r18]=r0;;
- // cover and set shared_mem precover_ifs to cr.ifs
- // set shared_mem ifs and incomplete_regframe to 0
-#if 0
- cover ;;
- mov r20=cr.ifs;;
- adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r22]=r0 ;;
- adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r22]=r0 ;;
- adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r22]=r20 ;;
- // leave cr.ifs alone for later rfi
-#else
- adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r22]=r0 ;;
- adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r20=[r22];;
- st8 [r22]=r0 ;;
- adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
- st8 [r22]=r20 ;;
-#endif
- // set iip to go to domain IVA break instruction vector
- adds r22=IA64_VCPU_IVA_OFFSET,r30;;
- ld8 r23=[r22];;
- movl r24=0x3000;;
- add r24=r24,r23;;
- mov cr.iip=r24;;
-#if 0
- // OK, now all set to go except for switch to virtual bank0
- mov r30=r2; mov r29=r3;;
- adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
- adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
- bsw.1;;
- // FIXME: need to handle ar.unat!
- .mem.offset 0,0; st8.spill [r2]=r16,16;
- .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r18,16;
- .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r20,16;
- .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r22,16;
- .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r24,16;
- .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r26,16;
- .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r28,16;
- .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
- .mem.offset 0,0; st8.spill [r2]=r30,16;
- .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
- movl r31=XSI_IPSR;;
- bsw.0 ;;
- mov r2=r30; mov r3=r29;;
-#else
- bsw.1;;
- movl r31=XSI_IPSR;;
- bsw.0 ;;
-#endif
- adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
- st4 [r20]=r0 ;;
- mov pr=r31,-1 ;;
- rfi
-#endif // RFI_TO_INTERRUPT
-
-ENTRY(hyper_cover)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- mov r24=cr.ipsr
- mov r25=cr.iip;;
- // skip test for vpsr.ic.. it's a prerequisite for hyperprivops
- cover ;;
- adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
- mov r30=cr.ifs;;
- adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18
- ld4 r21=[r20] ;;
- cmp.eq p6,p7=r21,r0 ;;
-(p6) st8 [r22]=r30;;
-(p7) st4 [r20]=r0;;
- mov cr.ifs=r0;;
- // adjust return address to skip over break instruction
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-
-// return from metaphysical mode (meta=1) to virtual mode (meta=0)
-ENTRY(hyper_ssm_dt)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- mov r24=cr.ipsr
- mov r25=cr.iip;;
- adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r21=[r20];;
- cmp.eq p7,p0=r21,r0 // meta==0?
-(p7) br.spnt.many 1f ;; // already in virtual mode
- movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r22=[r22];;
- adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
- ld4 r23=[r22];;
- mov rr[r0]=r23;;
- srlz.i;;
- st4 [r20]=r0 ;;
- // adjust return address to skip over break instruction
-1: extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-
-// go to metaphysical mode (meta=1) from virtual mode (meta=0)
-ENTRY(hyper_rsm_dt)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- mov r24=cr.ipsr
- mov r25=cr.iip;;
- adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld4 r21=[r20];;
- cmp.ne p7,p0=r21,r0 // meta==0?
-(p7) br.spnt.many 1f ;; // already in metaphysical mode
- movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r22=[r22];;
- adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
- ld4 r23=[r22];;
- mov rr[r0]=r23;;
- srlz.i;;
- adds r21=1,r0 ;;
- st4 [r20]=r21 ;;
- // adjust return address to skip over break instruction
-1: extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-
-ENTRY(hyper_get_tpr)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- mov r24=cr.ipsr
- mov r25=cr.iip;;
- adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r8=[r20];;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_get_tpr)
-
-// if we get to here, there are no interrupts pending so we
-// can change virtual tpr to any value without fear of provoking
-// (or accidentally missing) delivering an interrupt
-ENTRY(hyper_set_tpr)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- mov r24=cr.ipsr
- mov r25=cr.iip;;
- movl r27=0xff00;;
- adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
- andcm r8=r8,r27;;
- st8 [r20]=r8;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_set_tpr)
-
-ENTRY(hyper_get_ivr)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);;
- ld8 r21=[r22];;
- adds r21=1,r21;;
- st8 [r22]=r21;;
-#endif
- mov r8=15;;
- // when we get to here r20=~=interrupts pending
- cmp.eq p7,p0=r20,r0;;
-(p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
-(p7) st4 [r20]=r0;;
-(p7) br.spnt.many 1f ;;
- movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r30=[r30];;
- adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
- mov r25=192
- adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
- ld8 r23=[r22];;
- cmp.eq p6,p0=r23,r0;;
-(p6) adds r22=-8,r22;;
-(p6) adds r24=-8,r24;;
-(p6) adds r25=-64,r25;;
-(p6) ld8 r23=[r22];;
-(p6) cmp.eq p6,p0=r23,r0;;
-(p6) adds r22=-8,r22;;
-(p6) adds r24=-8,r24;;
-(p6) adds r25=-64,r25;;
-(p6) ld8 r23=[r22];;
-(p6) cmp.eq p6,p0=r23,r0;;
-(p6) adds r22=-8,r22;;
-(p6) adds r24=-8,r24;;
-(p6) adds r25=-64,r25;;
-(p6) ld8 r23=[r22];;
-(p6) cmp.eq p6,p0=r23,r0;;
- cmp.eq p6,p0=r23,r0
-(p6) br.cond.spnt.few 1f; // this is actually an error
- // r22 points to non-zero element of irr, r23 has value
- // r24 points to corr element of insvc, r25 has elt*64
- ld8 r26=[r24];;
- cmp.geu p6,p0=r26,r23
-(p6) br.cond.spnt.many 1f;
- // not masked by insvc, get vector number
- shr.u r26=r23,1;;
- or r26=r23,r26;;
- shr.u r27=r26,2;;
- or r26=r26,r27;;
- shr.u r27=r26,4;;
- or r26=r26,r27;;
- shr.u r27=r26,8;;
- or r26=r26,r27;;
- shr.u r27=r26,16;;
- or r26=r26,r27;;
- shr.u r27=r26,32;;
- or r26=r26,r27;;
- andcm r26=0xffffffffffffffff,r26;;
- popcnt r26=r26;;
- sub r26=63,r26;;
- // r26 now contains the bit index (mod 64)
- mov r27=1;;
- shl r27=r27,r26;;
- // r27 now contains the (within the proper word) bit mask
- add r26=r25,r26
- // r26 now contains the vector [0..255]
- adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r20=[r20] ;;
- extr.u r28=r20,16,1
- extr.u r29=r20,4,4 ;;
- cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS
-(p6) br.cond.spnt.few 1f;
- shl r29=r29,4;;
- adds r29=15,r29;;
- cmp.ge p6,p0=r29,r26
-(p6) br.cond.spnt.few 1f;
- // OK, have an unmasked vector to process/return
- ld8 r25=[r24];;
- or r25=r25,r27;;
- st8 [r24]=r25;;
- ld8 r25=[r22];;
- andcm r25=r25,r27;;
- st8 [r22]=r25;;
- mov r8=r26;;
- // if its a clock tick, remember itm to avoid delivering it twice
- adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;;
- ld8 r20=[r20];;
- extr.u r20=r20,0,8;;
- cmp.eq p6,p0=r20,r8
- adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30
- adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;;
- ld8 r23=[r23];;
-(p6) st8 [r22]=r23;;
- // all done
-1: mov r24=cr.ipsr
- mov r25=cr.iip;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_get_ivr)
-
-ENTRY(hyper_eoi)
- // when we get to here r20=~=interrupts pending
- cmp.ne p7,p0=r20,r0
-(p7) br.spnt.many dispatch_break_fault ;;
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r22=[r22];;
- adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
- ld8 r23=[r22];;
- cmp.eq p6,p0=r23,r0;;
-(p6) adds r22=-8,r22;;
-(p6) ld8 r23=[r22];;
-(p6) cmp.eq p6,p0=r23,r0;;
-(p6) adds r22=-8,r22;;
-(p6) ld8 r23=[r22];;
-(p6) cmp.eq p6,p0=r23,r0;;
-(p6) adds r22=-8,r22;;
-(p6) ld8 r23=[r22];;
-(p6) cmp.eq p6,p0=r23,r0;;
- cmp.eq p6,p0=r23,r0
-(p6) br.cond.spnt.few 1f; // this is actually an error
- // r22 points to non-zero element of insvc, r23 has value
- shr.u r24=r23,1;;
- or r24=r23,r24;;
- shr.u r25=r24,2;;
- or r24=r24,r25;;
- shr.u r25=r24,4;;
- or r24=r24,r25;;
- shr.u r25=r24,8;;
- or r24=r24,r25;;
- shr.u r25=r24,16;;
- or r24=r24,r25;;
- shr.u r25=r24,32;;
- or r24=r24,r25;;
- andcm r24=0xffffffffffffffff,r24;;
- popcnt r24=r24;;
- sub r24=63,r24;;
- // r24 now contains the bit index
- mov r25=1;;
- shl r25=r25,r24;;
- andcm r23=r23,r25;;
- st8 [r22]=r23;;
-1: mov r24=cr.ipsr
- mov r25=cr.iip;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_eoi)
-
-ENTRY(hyper_set_itm)
- // when we get to here r20=~=interrupts pending
- cmp.ne p7,p0=r20,r0
-(p7) br.spnt.many dispatch_break_fault ;;
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
- ld8 r21=[r20];;
- movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r20=[r20];;
- adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
- st8 [r20]=r8;;
- cmp.geu p6,p0=r21,r8;;
-(p6) mov r21=r8;;
- // now "safe set" cr.itm=r21
- mov r23=100;;
-2: mov cr.itm=r21;;
- srlz.d;;
- mov r22=ar.itc ;;
- cmp.leu p6,p0=r21,r22;;
- add r21=r21,r23;;
- shl r23=r23,1;;
-(p6) br.cond.spnt.few 2b;;
-1: mov r24=cr.ipsr
- mov r25=cr.iip;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_set_itm)
-
-ENTRY(hyper_get_rr)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- extr.u r25=r8,61,3;;
- adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
- shl r25=r25,3;;
- add r20=r20,r25;;
- ld8 r8=[r20];;
-1: mov r24=cr.ipsr
- mov r25=cr.iip;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_get_rr)
-
-ENTRY(hyper_set_rr)
- extr.u r25=r8,61,3;;
- cmp.leu p7,p0=7,r25 // punt on setting rr7
-(p7) br.spnt.many dispatch_break_fault ;;
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- extr.u r26=r9,8,24 // r26 = r9.rid
- movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r20=[r20];;
- adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
- ld4 r22=[r21];;
- adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
- ld4 r23=[r21];;
- adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;;
- add r22=r26,r22;;
- cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid
-(p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return
- // r21=starting_rid
- adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
- shl r25=r25,3;;
- add r20=r20,r25;;
- st8 [r20]=r9;; // store away exactly what was passed
- // but adjust value actually placed in rr[r8]
- // r22 contains adjusted rid, "mangle" it (see regionreg.c)
- // and set ps to PAGE_SHIFT and ve to 1
- extr.u r27=r22,0,8
- extr.u r28=r22,8,8
- extr.u r29=r22,16,8;;
- dep.z r23=PAGE_SHIFT,2,6;;
- dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3
- dep r23=r27,r23,24,8;;
- dep r23=r28,r23,16,8;;
- dep r23=r29,r23,8,8
- cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical
-(p6) st4 [r24]=r23
- mov rr[r8]=r23;;
- // done, mosey on back
-1: mov r24=cr.ipsr
- mov r25=cr.iip;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_set_rr)
-
-// this routine was derived from optimized assembly output from
-// vcpu_thash so it is dense and difficult to read but it works
-// On entry:
-// r18 == XSI_PSR_IC
-// r31 == pr
-GLOBAL_ENTRY(hyper_thash)
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- shr.u r20 = r8, 61
- addl r25 = 1, r0
- movl r17 = 0xe000000000000000
- ;;
- and r21 = r17, r8 // VHPT_Addr1
- ;;
- shladd r28 = r20, 3, r18
- adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
- ;;
- adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
- addl r28 = 32767, r0
- ld8 r24 = [r19] // pta
- ;;
- ld8 r23 = [r27] // rrs[vadr>>61]
- extr.u r26 = r24, 2, 6
- ;;
- extr.u r22 = r23, 2, 6
- shl r30 = r25, r26
- ;;
- shr.u r19 = r8, r22
- shr.u r29 = r24, 15
- ;;
- adds r17 = -1, r30
- ;;
- shladd r27 = r19, 3, r0
- extr.u r26 = r17, 15, 46
- ;;
- andcm r24 = r29, r26
- and r19 = r28, r27
- shr.u r25 = r27, 15
- ;;
- and r23 = r26, r25
- ;;
- or r22 = r24, r23
- ;;
- dep.z r20 = r22, 15, 46
- ;;
- or r16 = r20, r21
- ;;
- or r8 = r19, r16
- // done, update iip/ipsr to next instruction
- mov r24=cr.ipsr
- mov r25=cr.iip;;
- extr.u r26=r24,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r25=16,r25
-(p7) adds r26=1,r26
- ;;
- dep r24=r26,r24,41,2
- ;;
- mov cr.ipsr=r24
- mov cr.iip=r25
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_thash)
-
-ENTRY(hyper_ptc_ga)
-#ifndef FAST_PTC_GA
- br.spnt.few dispatch_break_fault ;;
-#endif
- // FIXME: validate not flushing Xen addresses
-#ifdef FAST_HYPERPRIVOP_CNT
- movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
- ld8 r21=[r20];;
- adds r21=1,r21;;
- st8 [r20]=r21;;
-#endif
- mov r28=r8
- extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2)
- mov r20=1
- shr.u r24=r8,61
- addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga)
- movl r26=0x8000000000000000 // INVALID_TI_TAG
- mov r30=ar.lc
- ;;
- shl r19=r20,r19
- cmp.eq p7,p0=7,r24
-(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7
- ;;
- cmp.le p7,p0=r19,r0 // skip flush if size<=0
-(p7) br.cond.dpnt 2f ;;
- extr.u r24=r19,0,PAGE_SHIFT
- shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages
- cmp.ne p7,p0=r24,r0 ;;
-(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter
- mov ar.lc=r23
- movl r29=PAGE_SIZE;;
-1:
- thash r25=r28 ;;
- adds r25=16,r25 ;;
- ld8 r24=[r25] ;;
- // FIXME: should check if tag matches, not just blow it away
- or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1
- st8 [r25]=r24
- ptc.ga r28,r27 ;;
- srlz.i ;;
- add r28=r29,r28
- br.cloop.sptk.few 1b
- ;;
-2:
- mov ar.lc=r30 ;;
- mov r29=cr.ipsr
- mov r30=cr.iip;;
- movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r27=[r27];;
- adds r25=IA64_VCPU_DTLB_OFFSET,r27
- adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
- ld8 r24=[r25]
- ld8 r27=[r26] ;;
- and r24=-2,r24
- and r27=-2,r27 ;;
- st8 [r25]=r24 // set 1-entry i/dtlb as not present
- st8 [r26]=r27 ;;
- // increment to point to next instruction
- extr.u r26=r29,41,2 ;;
- cmp.eq p6,p7=2,r26 ;;
-(p6) mov r26=0
-(p6) adds r30=16,r30
-(p7) adds r26=1,r26
- ;;
- dep r29=r26,r29,41,2
- ;;
- mov cr.ipsr=r29
- mov cr.iip=r30
- mov pr=r31,-1 ;;
- rfi
- ;;
-END(hyper_ptc_ga)
-
-ENTRY(hyper_itc_d)
- br.spnt.many dispatch_break_fault ;;
-END(hyper_itc_d)
-
-ENTRY(hyper_itc_i)
- br.spnt.many dispatch_break_fault ;;
-END(hyper_itc_i)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/idle0_task.c
--- a/xen/arch/ia64/idle0_task.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,58 +0,0 @@
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/desc.h>
-
-#define INIT_MM(name) \
-{ \
- .pgd = swapper_pg_dir, \
- .mm_users = ATOMIC_INIT(2), \
- .mm_count = ATOMIC_INIT(1), \
- .page_table_lock = SPIN_LOCK_UNLOCKED, \
- .mmlist = LIST_HEAD_INIT(name.mmlist), \
-}
-
-#define IDLE0_EXEC_DOMAIN(_ed,_d) \
-{ \
- processor: 0, \
- mm: 0, \
- thread: INIT_THREAD, \
- domain: (_d) \
-}
-
-#define IDLE0_DOMAIN(_t) \
-{ \
- domain_id: IDLE_DOMAIN_ID, \
- domain_flags:DOMF_idle_domain, \
- refcnt: ATOMIC_INIT(1) \
-}
-
-struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
-
-struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain);
-#if 0
-struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu,
- &idle0_domain);
-#endif
-
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is properly aligned due to the way process
stacks are
- * handled. This is done by having a special ".data.init_task" section...
- */
-union {
- struct {
- struct domain task;
- } s;
- unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task")));
-// = {{
- ;
-//.task = IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain),
-//};
-//};
-
-EXPORT_SYMBOL(init_task);
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/irq.c
--- a/xen/arch/ia64/irq.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1503 +0,0 @@
-/*
- * linux/arch/ia64/kernel/irq.c
- *
- * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the code used by various IRQ handling routines:
- * asking for different IRQ's should be done through these routines
- * instead of just grabbing them. Thus setups with different IRQ numbers
- * shouldn't result in any weird surprises, and installing new handlers
- * should be easier.
- *
- * Copyright (C) Ashok Raj<ashok.raj@xxxxxxxxx>, Intel Corporation 2004
- *
- * 4/14/2004: Added code to handle cpu migration and do safe irq
- * migration without lossing interrupts for iosapic
- * architecture.
- */
-
-/*
- * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
- *
- * IRQs are in fact implemented a bit like signal handlers for the kernel.
- * Naturally it's not a 1:1 relation, but there are similarities.
- */
-
-#include <linux/config.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#ifndef XEN
-#include <linux/signal.h>
-#endif
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#ifndef XEN
-#include <linux/random.h>
-#include <linux/cpu.h>
-#endif
-#include <linux/ctype.h>
-#ifndef XEN
-#include <linux/smp_lock.h>
-#endif
-#include <linux/init.h>
-#ifndef XEN
-#include <linux/kernel_stat.h>
-#endif
-#include <linux/irq.h>
-#ifndef XEN
-#include <linux/proc_fs.h>
-#endif
-#include <linux/seq_file.h>
-#ifndef XEN
-#include <linux/kallsyms.h>
-#include <linux/notifier.h>
-#endif
-
-#include <asm/atomic.h>
-#ifndef XEN
-#include <asm/cpu.h>
-#endif
-#include <asm/io.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/bitops.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#ifndef XEN
-#include <asm/tlbflush.h>
-#endif
-#include <asm/delay.h>
-#include <asm/irq.h>
-
-#ifdef XEN
-#include <xen/event.h>
-#define _irq_desc irq_desc
-#define irq_descp(irq) &irq_desc[irq]
-#define apicid_to_phys_cpu_present(x) 1
-#endif
-
-
-/*
- * Linux has a controller-independent x86 interrupt architecture.
- * every controller has a 'controller-template', that is used
- * by the main code to do the right thing. Each driver-visible
- * interrupt source is transparently wired to the appropriate
- * controller. Thus drivers need not be aware of the
- * interrupt-controller.
- *
- * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
- * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
- * (IO-APICs assumed to be messaging to Pentium local-APICs)
- *
- * the code is designed to be easily extended with new/different
- * interrupt controllers, without having to do assembly magic.
- */
-
-/*
- * Controller mappings for all interrupt sources:
- */
-irq_desc_t _irq_desc[NR_IRQS] __cacheline_aligned = {
- [0 ... NR_IRQS-1] = {
- .status = IRQ_DISABLED,
- .handler = &no_irq_type,
- .lock = SPIN_LOCK_UNLOCKED
- }
-};
-
-/*
- * This is updated when the user sets irq affinity via /proc
- */
-cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
-
-#ifdef CONFIG_IA64_GENERIC
-irq_desc_t * __ia64_irq_desc (unsigned int irq)
-{
- return _irq_desc + irq;
-}
-
-ia64_vector __ia64_irq_to_vector (unsigned int irq)
-{
- return (ia64_vector) irq;
-}
-
-unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
-{
- return (unsigned int) vec;
-}
-#endif
-
-static void register_irq_proc (unsigned int irq);
-
-/*
- * Special irq handlers.
- */
-
-#ifdef XEN
-void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
-#else
-irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
-{ return IRQ_NONE; }
-#endif
-
-/*
- * Generic no controller code
- */
-
-static void enable_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-static void disable_none(unsigned int irq) { }
-static void ack_none(unsigned int irq)
-{
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves, it doesn't deserve
- * a generic callback i think.
- */
-#ifdef CONFIG_X86
- printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Currently unexpected vectors happen only on SMP and APIC.
- * We _must_ ack these because every local APIC has only N
- * irq slots per priority level, and a 'hanging, unacked' IRQ
- * holds up an irq slot - in excessive cases (when multiple
- * unexpected vectors occur) that might lock up the APIC
- * completely.
- */
- ack_APIC_irq();
-#endif
-#endif
-#ifdef CONFIG_IA64
- printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq,
smp_processor_id());
-#endif
-}
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define shutdown_none disable_none
-#define end_none enable_none
-
-struct hw_interrupt_type no_irq_type = {
- "none",
- startup_none,
- shutdown_none,
- enable_none,
- disable_none,
- ack_none,
- end_none
-};
-
-atomic_t irq_err_count;
-#ifdef CONFIG_X86_IO_APIC
-#ifdef APIC_MISMATCH_DEBUG
-atomic_t irq_mis_count;
-#endif
-#endif
-
-/*
- * Generic, controller-independent functions:
- */
-
-#ifndef XEN
-int show_interrupts(struct seq_file *p, void *v)
-{
- int j, i = *(loff_t *) v;
- struct irqaction * action;
- irq_desc_t *idesc;
- unsigned long flags;
-
- if (i == 0) {
- seq_puts(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- idesc = irq_descp(i);
- spin_lock_irqsave(&idesc->lock, flags);
- action = idesc->action;
- if (!action)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %14s", idesc->handler->typename);
- seq_printf(p, " %s", action->name);
-
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
-
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&idesc->lock, flags);
- } else if (i == NR_IRQS) {
- seq_puts(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
- seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
- seq_puts(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
irq_stat[j].apic_timer_irqs);
- seq_putc(p, '\n');
-#endif
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#ifdef CONFIG_X86_IO_APIC
-#ifdef APIC_MISMATCH_DEBUG
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-#endif
- }
- return 0;
-}
-#endif
-
-#ifdef CONFIG_SMP
-inline void synchronize_irq(unsigned int irq)
-{
-#ifndef XEN
- struct irq_desc *desc = irq_desc + irq;
-
- while (desc->status & IRQ_INPROGRESS)
- cpu_relax();
-#endif
-}
-EXPORT_SYMBOL(synchronize_irq);
-#endif
-
-/*
- * This should really return information about whether
- * we should do bottom half handling etc. Right now we
- * end up _always_ checking the bottom half, which is a
- * waste of time and is not what some drivers would
- * prefer.
- */
-int handle_IRQ_event(unsigned int irq,
- struct pt_regs *regs, struct irqaction *action)
-{
- int status = 1; /* Force the "do bottom halves" bit */
- int retval = 0;
-
-#ifndef XEN
- if (!(action->flags & SA_INTERRUPT))
-#endif
- local_irq_enable();
-
-#ifdef XEN
- action->handler(irq, action->dev_id, regs);
-#else
- do {
- status |= action->flags;
- retval |= action->handler(irq, action->dev_id, regs);
- action = action->next;
- } while (action);
- if (status & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
-#endif
- local_irq_disable();
- return retval;
-}
-
-#ifndef XEN
-static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
- struct irqaction *action;
-
- if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
- printk(KERN_ERR "irq event %d: bogus return value %x\n",
- irq, action_ret);
- } else {
- printk(KERN_ERR "irq %d: nobody cared!\n", irq);
- }
- dump_stack();
- printk(KERN_ERR "handlers:\n");
- action = desc->action;
- do {
- printk(KERN_ERR "[<%p>]", action->handler);
- print_symbol(" (%s)",
- (unsigned long)action->handler);
- printk("\n");
- action = action->next;
- } while (action);
-}
-
-static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
- static int count = 100;
-
- if (count) {
- count--;
- __report_bad_irq(irq, desc, action_ret);
- }
-}
-#endif
-
-static int noirqdebug;
-
-static int __init noirqdebug_setup(char *str)
-{
- noirqdebug = 1;
- printk("IRQ lockup detection disabled\n");
- return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
-
-/*
- * If 99,900 of the previous 100,000 interrupts have not been handled then
- * assume that the IRQ is stuck in some manner. Drop a diagnostic and try to
- * turn the IRQ off.
- *
- * (The other 100-of-100,000 interrupts may have been a correctly-functioning
- * device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
- */
-#ifndef XEN
-static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
- if (action_ret != IRQ_HANDLED) {
- desc->irqs_unhandled++;
- if (action_ret != IRQ_NONE)
- report_bad_irq(irq, desc, action_ret);
- }
-
- desc->irq_count++;
- if (desc->irq_count < 100000)
- return;
-
- desc->irq_count = 0;
- if (desc->irqs_unhandled > 99900) {
- /*
- * The interrupt is stuck
- */
- __report_bad_irq(irq, desc, action_ret);
- /*
- * Now kill the IRQ
- */
- printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
- desc->status |= IRQ_DISABLED;
- desc->handler->disable(irq);
- }
- desc->irqs_unhandled = 0;
-}
-#endif
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
- */
-
-/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Disables and Enables are
- * nested.
- * Unlike disable_irq(), this function does not ensure existing
- * instances of the IRQ handler have completed before returning.
- *
- * This function may be called from IRQ context.
- */
-
-inline void disable_irq_nosync(unsigned int irq)
-{
- irq_desc_t *desc = irq_descp(irq);
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
- if (!desc->depth++) {
- desc->status |= IRQ_DISABLED;
- desc->handler->disable(irq);
- }
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and Disables are
- * nested.
- * This function waits for any pending IRQ handlers for this interrupt
- * to complete before returning. If you use this function while
- * holding a resource the IRQ handler may need you will deadlock.
- *
- * This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
-{
- irq_desc_t *desc = irq_descp(irq);
-
- disable_irq_nosync(irq);
- if (desc->action)
- synchronize_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-/**
- * enable_irq - enable handling of an irq
- * @irq: Interrupt to enable
- *
- * Undoes the effect of one call to disable_irq(). If this
- * matches the last disable, processing of interrupts on this
- * IRQ line is re-enabled.
- *
- * This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
-{
- irq_desc_t *desc = irq_descp(irq);
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
- switch (desc->depth) {
- case 1: {
- unsigned int status = desc->status & ~IRQ_DISABLED;
- desc->status = status;
-#ifndef XEN
- if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- desc->status = status | IRQ_REPLAY;
- hw_resend_irq(desc->handler,irq);
- }
-#endif
- desc->handler->enable(irq);
- /* fall-through */
- }
- default:
- desc->depth--;
- break;
- case 0:
- printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n",
- irq, (void *) __builtin_return_address(0));
- }
- spin_unlock_irqrestore(&desc->lock, flags);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
-{
- irq_desc_t *desc = irq_desc + irq;
- struct irqaction * action;
- unsigned int status;
-
-#ifndef XEN
- kstat_this_cpu.irqs[irq]++;
-#endif
- if (desc->status & IRQ_PER_CPU) {
- irqreturn_t action_ret;
-
- /*
- * No locking required for CPU-local interrupts:
- */
- desc->handler->ack(irq);
- action_ret = handle_IRQ_event(irq, regs, desc->action);
-#ifndef XEN
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
-#endif
- desc->handler->end(irq);
- return 1;
- }
-
- spin_lock(&desc->lock);
- desc->handler->ack(irq);
- /*
- * REPLAY is when Linux resends an IRQ that was dropped earlier
- * WAITING is used by probe to mark irqs that are being tested
- */
-#ifdef XEN
- status = desc->status & ~IRQ_REPLAY;
-#else
- status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-#endif
- status |= IRQ_PENDING; /* we _want_ to handle it */
-
- /*
- * If the IRQ is disabled for whatever reason, we cannot
- * use the action we have.
- */
- action = NULL;
- if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
- action = desc->action;
- status &= ~IRQ_PENDING; /* we commit to handling */
- status |= IRQ_INPROGRESS; /* we are handling it */
- }
- desc->status = status;
-
- /*
- * If there is no IRQ handler or it was disabled, exit early.
- * Since we set PENDING, if another processor is handling
- * a different instance of this same irq, the other processor
- * will take care of it.
- */
- if (unlikely(!action))
- goto out;
-
- /*
- * Edge triggered interrupts need to remember
- * pending events.
- * This applies to any hw interrupts that allow a second
- * instance of the same irq to arrive while we are in do_IRQ
- * or in the handler. But the code here only handles the _second_
- * instance of the irq, not the third or fourth. So it is mostly
- * useful for irq hardware that does not mask cleanly in an
- * SMP environment.
- */
- for (;;) {
- irqreturn_t action_ret;
-
- spin_unlock(&desc->lock);
-
- action_ret = handle_IRQ_event(irq, regs, action);
-
- spin_lock(&desc->lock);
-#ifndef XEN
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
-#endif
- if (likely(!(desc->status & IRQ_PENDING)))
- break;
- desc->status &= ~IRQ_PENDING;
- }
- desc->status &= ~IRQ_INPROGRESS;
-
-out:
- /*
- * The ->end() handler has to deal with interrupts which got
- * disabled while the handler was running.
- */
- desc->handler->end(irq);
- spin_unlock(&desc->lock);
-
- return 1;
-}
-
-/**
- * request_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
- *
- * SA_SHIRQ Interrupt is shared
- *
- * SA_INTERRUPT Disable local interrupts while processing
- *
- * SA_SAMPLE_RANDOM The interrupt can be used for entropy
- *
- */
-
-int request_irq(unsigned int irq,
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long irqflags,
- const char * devname,
- void *dev_id)
-{
- int retval;
- struct irqaction * action;
-
-#if 1
- /*
- * Sanity-check: shared interrupts should REALLY pass in
- * a real dev-ID, otherwise we'll have trouble later trying
- * to figure out which interrupt is which (messes up the
- * interrupt freeing logic etc).
- */
- if (irqflags & SA_SHIRQ) {
- if (!dev_id)
- printk(KERN_ERR "Bad boy: %s called us without a
dev_id!\n", devname);
- }
-#endif
-
- if (irq >= NR_IRQS)
- return -EINVAL;
- if (!handler)
- return -EINVAL;
-
- action = xmalloc(struct irqaction);
- if (!action)
- return -ENOMEM;
-
- action->handler = handler;
-#ifndef XEN
- action->flags = irqflags;
- action->mask = 0;
-#endif
- action->name = devname;
-#ifndef XEN
- action->next = NULL;
-#endif
- action->dev_id = dev_id;
-
- retval = setup_irq(irq, action);
- if (retval)
- xfree(action);
- return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function. The function
- * does not return until any executing interrupts for this IRQ
- * have completed.
- *
- * This function must not be called from interrupt context.
- */
-
-#ifdef XEN
-void free_irq(unsigned int irq)
-#else
-void free_irq(unsigned int irq, void *dev_id)
-#endif
-{
- irq_desc_t *desc;
- struct irqaction **p;
- unsigned long flags;
-
- if (irq >= NR_IRQS)
- return;
-
- desc = irq_descp(irq);
- spin_lock_irqsave(&desc->lock,flags);
-#ifdef XEN
- if (desc->action) {
- struct irqaction * action = desc->action;
- desc->action = NULL;
-#else
- p = &desc->action;
- for (;;) {
- struct irqaction * action = *p;
- if (action) {
- struct irqaction **pp = p;
- p = &action->next;
- if (action->dev_id != dev_id)
- continue;
-
- /* Found it - now remove it from the list of entries */
- *pp = action->next;
- if (!desc->action) {
-#endif
- desc->status |= IRQ_DISABLED;
- desc->handler->shutdown(irq);
-#ifndef XEN
- }
-#endif
- spin_unlock_irqrestore(&desc->lock,flags);
-
- /* Wait to make sure it's not being used on another CPU
*/
- synchronize_irq(irq);
- xfree(action);
- return;
- }
- printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
- spin_unlock_irqrestore(&desc->lock,flags);
-#ifndef XEN
- return;
- }
-#endif
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * IRQ autodetection code..
- *
- * This depends on the fact that any interrupt that
- * comes in on to an unassigned handler will get stuck
- * with "IRQ_WAITING" cleared and the interrupt
- * disabled.
- */
-
-static DECLARE_MUTEX(probe_sem);
-
-/**
- * probe_irq_on - begin an interrupt autodetect
- *
- * Commence probing for an interrupt. The interrupts are scanned
- * and a mask of potential interrupt lines is returned.
- *
- */
-
-#ifndef XEN
-unsigned long probe_irq_on(void)
-{
- unsigned int i;
- irq_desc_t *desc;
- unsigned long val;
- unsigned long delay;
-
- down(&probe_sem);
- /*
- * something may have generated an irq long ago and we want to
- * flush such a longstanding irq before considering it as spurious.
- */
- for (i = NR_IRQS-1; i > 0; i--) {
- desc = irq_descp(i);
-
- spin_lock_irq(&desc->lock);
- if (!desc->action)
- desc->handler->startup(i);
- spin_unlock_irq(&desc->lock);
- }
-
- /* Wait for longstanding interrupts to trigger. */
- for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
- /* about 20ms delay */ barrier();
-
- /*
- * enable any unassigned irqs
- * (we must startup again here because if a longstanding irq
- * happened in the previous stage, it may have masked itself)
- */
- for (i = NR_IRQS-1; i > 0; i--) {
- desc = irq_descp(i);
-
- spin_lock_irq(&desc->lock);
- if (!desc->action) {
- desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
- if (desc->handler->startup(i))
- desc->status |= IRQ_PENDING;
- }
- spin_unlock_irq(&desc->lock);
- }
-
- /*
- * Wait for spurious interrupts to trigger
- */
- for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
- /* about 100ms delay */ barrier();
-
- /*
- * Now filter out any obviously spurious interrupts
- */
- val = 0;
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_descp(i);
- unsigned int status;
-
- spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- /* It triggered already - consider it spurious. */
- if (!(status & IRQ_WAITING)) {
- desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
- } else
- if (i < 32)
- val |= 1 << i;
- }
- spin_unlock_irq(&desc->lock);
- }
-
- return val;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-/**
- * probe_irq_mask - scan a bitmap of interrupt lines
- * @val: mask of interrupts to consider
- *
- * Scan the ISA bus interrupt lines and return a bitmap of
- * active interrupts. The interrupt probe logic state is then
- * returned to its previous value.
- *
- * Note: we need to scan all the irq's even though we will
- * only return ISA irq numbers - just so that we reset them
- * all to a known state.
- */
-unsigned int probe_irq_mask(unsigned long val)
-{
- int i;
- unsigned int mask;
-
- mask = 0;
- for (i = 0; i < 16; i++) {
- irq_desc_t *desc = irq_descp(i);
- unsigned int status;
-
- spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- if (!(status & IRQ_WAITING))
- mask |= 1 << i;
-
- desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
- }
- spin_unlock_irq(&desc->lock);
- }
- up(&probe_sem);
-
- return mask & val;
-}
-EXPORT_SYMBOL(probe_irq_mask);
-
-/**
- * probe_irq_off - end an interrupt autodetect
- * @val: mask of potential interrupts (unused)
- *
- * Scans the unused interrupt lines and returns the line which
- * appears to have triggered the interrupt. If no interrupt was
- * found then zero is returned. If more than one interrupt is
- * found then minus the first candidate is returned to indicate
- * their is doubt.
- *
- * The interrupt probe logic state is returned to its previous
- * value.
- *
- * BUGS: When used in a module (which arguably shouldn't happen)
- * nothing prevents two IRQ probe callers from overlapping. The
- * results of this are non-optimal.
- */
-
-int probe_irq_off(unsigned long val)
-{
- int i, irq_found, nr_irqs;
-
- nr_irqs = 0;
- irq_found = 0;
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_descp(i);
- unsigned int status;
-
- spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- if (!(status & IRQ_WAITING)) {
- if (!nr_irqs)
- irq_found = i;
- nr_irqs++;
- }
- desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
- }
- spin_unlock_irq(&desc->lock);
- }
- up(&probe_sem);
-
- if (nr_irqs > 1)
- irq_found = -irq_found;
- return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-#endif
-
-int setup_irq(unsigned int irq, struct irqaction * new)
-{
- int shared = 0;
- unsigned long flags;
- struct irqaction *old, **p;
- irq_desc_t *desc = irq_descp(irq);
-
-#ifndef XEN
- if (desc->handler == &no_irq_type)
- return -ENOSYS;
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & SA_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
-
- if (new->flags & SA_PERCPU_IRQ) {
- desc->status |= IRQ_PER_CPU;
- desc->handler = &irq_type_ia64_lsapic;
- }
-#endif
-
- /*
- * The following block of code has to be executed atomically
- */
- spin_lock_irqsave(&desc->lock,flags);
- p = &desc->action;
- if ((old = *p) != NULL) {
-#ifdef XEN
- if (1) {
- /* Can't share interrupts unless both agree to */
-#else
- if (!(old->flags & new->flags & SA_SHIRQ)) {
-#endif
- spin_unlock_irqrestore(&desc->lock,flags);
- return -EBUSY;
- }
-
-#ifndef XEN
- /* add new interrupt at end of irq queue */
- do {
- p = &old->next;
- old = *p;
- } while (old);
- shared = 1;
-#endif
- }
-
- *p = new;
-
-#ifndef XEN
- if (!shared) {
-#else
- {
-#endif
- desc->depth = 0;
-#ifdef XEN
- desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
-#else
- desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING |
IRQ_INPROGRESS);
-#endif
- desc->handler->startup(irq);
- }
- spin_unlock_irqrestore(&desc->lock,flags);
-
-#ifndef XEN
- register_irq_proc(irq);
-#endif
- return 0;
-}
-
-#ifndef XEN
-
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-
-#ifdef CONFIG_SMP
-
-static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
-
-static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-
-static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
-
-void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
-{
- cpumask_t mask = CPU_MASK_NONE;
-
- cpu_set(cpu_logical_id(hwid), mask);
-
- if (irq < NR_IRQS) {
- irq_affinity[irq] = mask;
- irq_redir[irq] = (char) (redir & 0xff);
- }
-}
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : "");
-
- len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]);
- if (count - len < 2)
- return -EINVAL;
- len += sprintf(page + len, "\n");
- return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- unsigned int irq = (unsigned long) data;
- int full_count = count, err;
- cpumask_t new_value, tmp;
-# define R_PREFIX_LEN 16
- char rbuf[R_PREFIX_LEN];
- int rlen;
- int prelen;
- irq_desc_t *desc = irq_descp(irq);
- unsigned long flags;
-
- if (!desc->handler->set_affinity)
- return -EIO;
-
- /*
- * If string being written starts with a prefix of 'r' or 'R'
- * and some limited number of spaces, set IA64_IRQ_REDIRECTED.
- * If more than (R_PREFIX_LEN - 2) spaces are passed, they won't
- * all be trimmed as part of prelen, the untrimmed spaces will
- * cause the hex parsing to fail, and this write() syscall will
- * fail with EINVAL.
- */
-
- if (!count)
- return -EINVAL;
- rlen = min(sizeof(rbuf)-1, count);
- if (copy_from_user(rbuf, buffer, rlen))
- return -EFAULT;
- rbuf[rlen] = 0;
- prelen = 0;
- if (tolower(*rbuf) == 'r') {
- prelen = strspn(rbuf, "Rr ");
- irq |= IA64_IRQ_REDIRECTED;
- }
-
- err = cpumask_parse(buffer+prelen, count-prelen, new_value);
- if (err)
- return err;
-
- /*
- * Do not allow disabling IRQs completely - it's a too easy
- * way to make the system unusable accidentally :-) At least
- * one online CPU still has to be targeted.
- */
- cpus_and(tmp, new_value, cpu_online_map);
- if (cpus_empty(tmp))
- return -EINVAL;
-
- spin_lock_irqsave(&desc->lock, flags);
- pending_irq_cpumask[irq] = new_value;
- spin_unlock_irqrestore(&desc->lock, flags);
-
- return full_count;
-}
-
-void move_irq(int irq)
-{
- /* note - we hold desc->lock */
- cpumask_t tmp;
- irq_desc_t *desc = irq_descp(irq);
-
- if (!cpus_empty(pending_irq_cpumask[irq])) {
- cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
- if (unlikely(!cpus_empty(tmp))) {
- desc->handler->set_affinity(irq,
pending_irq_cpumask[irq]);
- }
- cpus_clear(pending_irq_cpumask[irq]);
- }
-}
-
-
-#endif /* CONFIG_SMP */
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-unsigned int vectors_in_migration[NR_IRQS];
-
-/*
- * Since cpu_online_map is already updated, we just need to check for
- * affinity that has zeros
- */
-static void migrate_irqs(void)
-{
- cpumask_t mask;
- irq_desc_t *desc;
- int irq, new_cpu;
-
- for (irq=0; irq < NR_IRQS; irq++) {
- desc = irq_descp(irq);
-
- /*
- * No handling for now.
- * TBD: Implement a disable function so we can now
- * tell CPU not to respond to these local intr sources.
- * such as ITV,CPEI,MCA etc.
- */
- if (desc->status == IRQ_PER_CPU)
- continue;
-
- cpus_and(mask, irq_affinity[irq], cpu_online_map);
- if (any_online_cpu(mask) == NR_CPUS) {
- /*
- * Save it for phase 2 processing
- */
- vectors_in_migration[irq] = irq;
-
- new_cpu = any_online_cpu(cpu_online_map);
- mask = cpumask_of_cpu(new_cpu);
-
- /*
- * Al three are essential, currently WARN_ON.. maybe
panic?
- */
- if (desc->handler && desc->handler->disable &&
- desc->handler->enable &&
desc->handler->set_affinity) {
- desc->handler->disable(irq);
- desc->handler->set_affinity(irq, mask);
- desc->handler->enable(irq);
- } else {
- WARN_ON((!(desc->handler) ||
!(desc->handler->disable) ||
- !(desc->handler->enable) ||
-
!(desc->handler->set_affinity)));
- }
- }
- }
-}
-
-void fixup_irqs(void)
-{
- unsigned int irq;
- extern void ia64_process_pending_intr(void);
-
- ia64_set_itv(1<<16);
- /*
- * Phase 1: Locate irq's bound to this cpu and
- * relocate them for cpu removal.
- */
- migrate_irqs();
-
- /*
- * Phase 2: Perform interrupt processing for all entries reported in
- * local APIC.
- */
- ia64_process_pending_intr();
-
- /*
- * Phase 3: Now handle any interrupts not captured in local APIC.
- * This is to account for cases that device interrupted during the time
the
- * rte was being disabled and re-programmed.
- */
- for (irq=0; irq < NR_IRQS; irq++) {
- if (vectors_in_migration[irq]) {
- vectors_in_migration[irq]=0;
- do_IRQ(irq, NULL);
- }
- }
-
- /*
- * Now let processor die. We do irq disable and max_xtp() to
- * ensure there is no more interrupts routed to this processor.
- * But the local timer interrupt can have 1 pending which we
- * take care in timer_interrupt().
- */
- max_xtp();
- local_irq_disable();
-}
-#endif
-
-#ifndef XEN
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
- if (count - len < 2)
- return -EINVAL;
- len += sprintf(page + len, "\n");
- return len;
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- cpumask_t *mask = (cpumask_t *)data;
- unsigned long full_count = count, err;
- cpumask_t new_value;
-
- err = cpumask_parse(buffer, count, new_value);
- if (err)
- return err;
-
- *mask = new_value;
- return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
- char name [MAX_NAMELEN];
-
- if (!root_irq_dir || (irq_descp(irq)->handler == &no_irq_type) ||
irq_dir[irq])
- return;
-
- memset(name, 0, MAX_NAMELEN);
- sprintf(name, "%d", irq);
-
- /* create /proc/irq/1234 */
- irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-#ifdef CONFIG_SMP
- {
- struct proc_dir_entry *entry;
-
- /* create /proc/irq/1234/smp_affinity */
- entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
- if (entry) {
- entry->nlink = 1;
- entry->data = (void *)(long)irq;
- entry->read_proc = irq_affinity_read_proc;
- entry->write_proc = irq_affinity_write_proc;
- }
-
- smp_affinity_entry[irq] = entry;
- }
-#endif
-}
-
-cpumask_t prof_cpu_mask = CPU_MASK_ALL;
-
-void init_irq_proc (void)
-{
- struct proc_dir_entry *entry;
- int i;
-
- /* create /proc/irq */
- root_irq_dir = proc_mkdir("irq", 0);
-
- /* create /proc/irq/prof_cpu_mask */
- entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-
- if (!entry)
- return;
-
- entry->nlink = 1;
- entry->data = (void *)&prof_cpu_mask;
- entry->read_proc = prof_cpu_mask_read_proc;
- entry->write_proc = prof_cpu_mask_write_proc;
-
- /*
- * Create entries for all existing IRQs.
- */
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_descp(i)->handler == &no_irq_type)
- continue;
- register_irq_proc(i);
- }
-}
-#endif
-
-
-#ifdef XEN
-/*
- * HANDLING OF GUEST-BOUND PHYSICAL IRQS
- */
-
-#define IRQ_MAX_GUESTS 7
-typedef struct {
- u8 nr_guests;
- u8 in_flight;
- u8 shareable;
- struct domain *guest[IRQ_MAX_GUESTS];
-} irq_guest_action_t;
-
-static void __do_IRQ_guest(int irq)
-{
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
- struct domain *d;
- int i;
-
- for ( i = 0; i < action->nr_guests; i++ )
- {
- d = action->guest[i];
- if ( !test_and_set_bit(irq, &d->pirq_mask) )
- action->in_flight++;
- send_guest_pirq(d, irq);
- }
-}
-
-int pirq_guest_unmask(struct domain *d)
-{
- irq_desc_t *desc;
- int i, j, pirq;
- u32 m;
- shared_info_t *s = d->shared_info;
-
- for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ )
- {
- m = d->pirq_mask[i];
- while ( (j = ffs(m)) != 0 )
- {
- m &= ~(1 << --j);
- pirq = (i << 5) + j;
- desc = &irq_desc[pirq];
- spin_lock_irq(&desc->lock);
- if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
- test_and_clear_bit(pirq, &d->pirq_mask) &&
- (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
- desc->handler->end(pirq);
- spin_unlock_irq(&desc->lock);
- }
- }
-
- return 0;
-}
-
-int pirq_guest_bind(struct vcpu *d, int irq, int will_share)
-{
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action;
- unsigned long flags;
- int rc = 0;
-
- if ( !IS_CAPABLE_PHYSDEV(d->domain) )
- return -EPERM;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- action = (irq_guest_action_t *)desc->action;
-
- if ( !(desc->status & IRQ_GUEST) )
- {
- if ( desc->action != NULL )
- {
- DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
- irq, desc->action->name);
- rc = -EBUSY;
- goto out;
- }
-
- action = xmalloc(irq_guest_action_t);
- if ( (desc->action = (struct irqaction *)action) == NULL )
- {
- DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
- rc = -ENOMEM;
- goto out;
- }
-
- action->nr_guests = 0;
- action->in_flight = 0;
- action->shareable = will_share;
-
- desc->depth = 0;
- desc->status |= IRQ_GUEST;
- desc->status &= ~IRQ_DISABLED;
- desc->handler->startup(irq);
-
- /* Attempt to bind the interrupt target to the correct CPU. */
-#if 0 /* FIXME CONFIG_SMP ??? */
- if ( desc->handler->set_affinity != NULL )
- desc->handler->set_affinity(
- irq, apicid_to_phys_cpu_present(d->processor));
-#endif
- }
- else if ( !will_share || !action->shareable )
- {
- DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n",
- irq);
- rc = -EBUSY;
- goto out;
- }
-
- if ( action->nr_guests == IRQ_MAX_GUESTS )
- {
- DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
- rc = -EBUSY;
- goto out;
- }
-
- action->guest[action->nr_guests++] = d;
-
- out:
- spin_unlock_irqrestore(&desc->lock, flags);
- return rc;
-}
-
-int pirq_guest_unbind(struct domain *d, int irq)
-{
- irq_desc_t *desc = &irq_desc[irq];
- irq_guest_action_t *action;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&desc->lock, flags);
-
- action = (irq_guest_action_t *)desc->action;
-
- if ( test_and_clear_bit(irq, &d->pirq_mask) &&
- (--action->in_flight == 0) )
- desc->handler->end(irq);
-
- if ( action->nr_guests == 1 )
- {
- desc->action = NULL;
- xfree(action);
- desc->depth = 1;
- desc->status |= IRQ_DISABLED;
- desc->status &= ~IRQ_GUEST;
- desc->handler->shutdown(irq);
- }
- else
- {
- i = 0;
- while ( action->guest[i] != d )
- i++;
- memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
- action->nr_guests--;
- }
-
- spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
-}
-
-#endif
-
-#ifdef XEN
-#ifdef IA64
-// this is a temporary hack until real console input is implemented
-irqreturn_t guest_forward_keyboard_input(int irq, void *nada, struct pt_regs
*regs)
-{
- domain_pend_keyboard_interrupt(irq);
-}
-
-void serial_input_init(void)
-{
- int retval;
- int irq = 0x30; // FIXME
-
- retval =
request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL);
- if (retval) {
- printk("serial_input_init: broken request_irq call\n");
- while(1);
- }
-}
-#endif
-#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/ivt.S
--- a/xen/arch/ia64/ivt.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1975 +0,0 @@
-
-#ifdef XEN
-//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled??
-// these are all hacked out for now as the entire IVT
-// will eventually be replaced... just want to use it
-// for startup code to handle TLB misses
-//#define ia64_leave_kernel 0
-//#define ia64_ret_from_syscall 0
-//#define ia64_handle_irq 0
-//#define ia64_fault 0
-#define ia64_illegal_op_fault 0
-#define ia64_prepare_handle_unaligned 0
-#define ia64_bad_break 0
-#define ia64_trace_syscall 0
-#define sys_call_table 0
-#define sys_ni_syscall 0
-#include <asm/vhpt.h>
-#endif
-/*
- * arch/ia64/kernel/ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- * Stephane Eranian <eranian@xxxxxxxxxx>
- * David Mosberger <davidm@xxxxxxxxxx>
- * Copyright (C) 2000, 2002-2003 Intel Co
- * Asit Mallick <asit.k.mallick@xxxxxxxxx>
- * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
- * Kenneth Chen <kenneth.w.chen@xxxxxxxxx>
- * Fenghua Yu <fenghua.yu@xxxxxxxxx>
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP
- * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now
uses virtual PT.
- */
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for critical
- * interruptions like TLB misses.
- *
- * For each entry, the comment is as follows:
- *
- * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
- * entry offset ----/ / / / /
- * entry number ---------/ / / /
- * size of the entry -------------/ / /
- * vector name -------------------------------------/ /
- * interruptions triggering this vector ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-#include <linux/config.h>
-
-#include <asm/asmmacro.h>
-#include <asm/break.h>
-#include <asm/ia32.h>
-#include <asm/kregs.h>
-#include <asm/offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-
-#if 1
-# define PSR_DEFAULT_BITS psr.ac
-#else
-# define PSR_DEFAULT_BITS 0
-#endif
-
-#if 0
- /*
- * This lets you track the last eight faults that occurred on the CPU. Make
sure ar.k2 isn't
- * needed for something else before enabling this...
- */
-# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov
ar.k2=r16
-#else
-# define DBG_FAULT(i)
-#endif
-
-#define MINSTATE_VIRT /* needed by minstate.h */
-#include "minstate.h"
-
-#define FAULT(n)
\
- mov r31=pr;
\
- mov r19=n;; /* prepare to save predicates */
\
- br.sptk.many dispatch_to_fault_handler
-
-#ifdef XEN
-#define REFLECT(n)
\
- mov r31=pr;
\
- mov r19=n;; /* prepare to save predicates */
\
- br.sptk.many dispatch_reflection
-#endif
-
- .section .text.ivt,"ax"
-
- .align 32768 // align on 32KB boundary
- .global ia64_ivt
-ia64_ivt:
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(vhpt_miss)
- DBG_FAULT(0)
- /*
- * The VHPT vector is invoked when the TLB entry for the virtual page
table
- * is missing. This happens only as a result of a previous
- * (the "original") TLB miss, which may either be caused by an
instruction
- * fetch or a data access (or non-access).
- *
- * What we do here is normal TLB miss handing for the _original_ miss,
followed
- * by inserting the TLB entry for the virtual page table page that the
VHPT
- * walker was attempting to access. The latter gets inserted as long
- * as both L1 and L2 have valid mappings for the faulting address.
- * The TLB entry for the original miss gets inserted only if
- * the L3 entry indicates that the page is present.
- *
- * do_page_fault gets invoked in the following cases:
- * - the faulting virtual address uses unimplemented address bits
- * - the faulting virtual address has no L1, L2, or L3 mapping
- */
- mov r16=cr.ifa // get address that caused the
TLB miss
-#ifdef CONFIG_HUGETLB_PAGE
- movl r18=PAGE_SHIFT
- mov r25=cr.itir
-#endif
- ;;
- rsm psr.dt // use physical addressing for
data
- mov r31=pr // save the predicate registers
-#ifdef XEN
- movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
-#else
- mov r19=IA64_KR(PT_BASE) // get page table base address
-#endif
- shl r21=r16,3 // shift bit 60 into sign bit
- shr.u r17=r16,61 // get the region number into
r17
- ;;
- shr r22=r21,3
-#ifdef CONFIG_HUGETLB_PAGE
- extr.u r26=r25,2,6
- ;;
- cmp.ne p8,p0=r18,r26
- sub r27=r26,r18
- ;;
-(p8) dep r25=r18,r25,2,6
-(p8) shr r22=r22,r27
-#endif
- ;;
- cmp.eq p6,p7=5,r17 // is IFA pointing into to
region 5?
- shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the
faulting address
- ;;
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in
place
-
- srlz.d
- LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at
swapper_pg_dir
-
- .pred.rel "mutex", p6, p7
-(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) <<
7) | IFA(33,39))*8)
- cmp.eq p7,p6=0,r21 // unused address bits all
zeroes?
- shr.u r18=r22,PMD_SHIFT // shift L2 index into position
- ;;
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page
table entry
- ;;
-(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
- dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page
table entry
- ;;
-(p7) ld8 r18=[r21] // read the L3 PTE
- mov r19=cr.isr // cr.isr bit 0 tells us if
this is an insn miss
- ;;
-(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
- mov r22=cr.iha // get the VHPT address that
caused the TLB miss
- ;; // avoid RAW on p7
-(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB
miss?
- dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page
address
- ;;
-(p10) itc.i r18 // insert the instruction TLB
entry
-(p11) itc.d r18 // insert the data TLB entry
-(p6) br.cond.spnt.many page_fault // handle bad address/page not
present (page fault)
- mov cr.ifa=r22
-
-#ifdef CONFIG_HUGETLB_PAGE
-(p8) mov cr.itir=r25 // change to default page-size
for VHPT
-#endif
-
- /*
- * Now compute and insert the TLB entry for the virtual page table. We
never
- * execute in a page table page so there is no need to set the
exception deferral
- * bit.
- */
- adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
- ;;
-(p7) itc.d r24
- ;;
-#ifdef CONFIG_SMP
- /*
- * Tell the assemblers dependency-violation checker that the above
"itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- /*
- * Re-check L2 and L3 pagetable. If they changed, we may have received
a ptc.g
- * between reading the pagetable and the "itc". If so, flush the entry
we
- * inserted and retry.
- */
- ld8 r25=[r21] // read L3 PTE again
- ld8 r26=[r17] // read L2 entry again
- ;;
- cmp.ne p6,p7=r26,r20 // did L2 entry change
- mov r27=PAGE_SHIFT<<2
- ;;
-(p6) ptc.l r22,r27 // purge PTE page translation
-(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
- ;;
-(p6) ptc.l r16,r27 // purge translation
-#endif
-
- mov pr=r31,-1 // restore predicate registers
- rfi
-END(vhpt_miss)
-
- .org ia64_ivt+0x400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(itlb_miss)
- DBG_FAULT(1)
-#ifdef XEN
- VHPT_CCHAIN_LOOKUP(itlb_miss,i)
-#ifdef VHPT_GLOBAL
- br.cond.sptk page_fault
- ;;
-#endif
-#endif
- /*
- * The ITLB handler accesses the L3 PTE via the virtually mapped linear
- * page table. If a nested TLB miss occurs, we switch into physical
- * mode, walk the page table, and then re-execute the L3 PTE read
- * and go on normally after that.
- */
- mov r16=cr.ifa // get virtual address
- mov r29=b0 // save b0
- mov r31=pr // save predicates
-.itlb_fault:
- mov r17=cr.iha // get virtual address of L3 PTE
- movl r30=1f // load nested fault
continuation point
- ;;
-1: ld8 r18=[r17] // read L3 PTE
- ;;
- mov b0=r29
- tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
-(p6) br.cond.spnt page_fault
- ;;
- itc.i r18
- ;;
-#ifdef CONFIG_SMP
- /*
- * Tell the assemblers dependency-violation checker that the above
"itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r19=[r17] // read L3 PTE again and see if
same
- mov r20=PAGE_SHIFT<<2 // setup page size for purge
- ;;
- cmp.ne p7,p0=r18,r19
- ;;
-(p7) ptc.l r16,r20
-#endif
- mov pr=r31,-1
- rfi
-END(itlb_miss)
-
- .org ia64_ivt+0x0800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(dtlb_miss)
- DBG_FAULT(2)
-#ifdef XEN
- VHPT_CCHAIN_LOOKUP(dtlb_miss,d)
-#ifdef VHPT_GLOBAL
- br.cond.sptk page_fault
- ;;
-#endif
-#endif
- /*
- * The DTLB handler accesses the L3 PTE via the virtually mapped linear
- * page table. If a nested TLB miss occurs, we switch into physical
- * mode, walk the page table, and then re-execute the L3 PTE read
- * and go on normally after that.
- */
- mov r16=cr.ifa // get virtual address
- mov r29=b0 // save b0
- mov r31=pr // save predicates
-dtlb_fault:
- mov r17=cr.iha // get virtual address of L3 PTE
- movl r30=1f // load nested fault
continuation point
- ;;
-1: ld8 r18=[r17] // read L3 PTE
- ;;
- mov b0=r29
- tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
-(p6) br.cond.spnt page_fault
- ;;
- itc.d r18
- ;;
-#ifdef CONFIG_SMP
- /*
- * Tell the assemblers dependency-violation checker that the above
"itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r19=[r17] // read L3 PTE again and see if
same
- mov r20=PAGE_SHIFT<<2 // setup page size for purge
- ;;
- cmp.ne p7,p0=r18,r19
- ;;
-(p7) ptc.l r16,r20
-#endif
- mov pr=r31,-1
- rfi
-END(dtlb_miss)
-
- .org ia64_ivt+0x0c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(alt_itlb_miss)
- DBG_FAULT(3)
-#ifdef XEN
-//#ifdef VHPT_GLOBAL
-// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i)
-// br.cond.sptk page_fault
-// ;;
-//#endif
-#endif
-#ifdef XEN
- mov r31=pr
- mov r16=cr.ifa // get address that caused the TLB miss
- ;;
-late_alt_itlb_miss:
- movl r17=PAGE_KERNEL
- mov r21=cr.ipsr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- ;;
-#else
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r21=cr.ipsr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r31=pr
- ;;
-#endif
-#ifdef CONFIG_DISABLE_VHPT
- shr.u r22=r16,61 // get the region number into
r21
- ;;
- cmp.gt p8,p0=6,r22 // user mode
- ;;
-(p8) thash r17=r16
- ;;
-(p8) mov cr.iha=r17
-(p8) mov r29=b0 // save b0
-(p8) br.cond.dptk .itlb_fault
-#endif
- extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
-#ifdef XEN
- shr.u r18=r16,55 // move address bit 59 to bit 4
- ;;
- and r18=0x10,r18 // bit 4=address-bit(59)
-#else
- shr.u r18=r16,57 // move address bit 61 to bit 4
- ;;
- andcm r18=0x10,r18 // bit 4=~address-bit(61)
-#endif
- cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
- or r19=r17,r19 // insert PTE control bits into r19
- ;;
- or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
-(p8) br.cond.spnt page_fault
- ;;
- itc.i r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
-END(alt_itlb_miss)
-
- .org ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(alt_dtlb_miss)
- DBG_FAULT(4)
-#ifdef XEN
-//#ifdef VHPT_GLOBAL
-// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d)
-// br.cond.sptk page_fault
-// ;;
-//#endif
-#endif
-#ifdef XEN
- mov r31=pr
- mov r16=cr.ifa // get address that caused the TLB miss
- ;;
-late_alt_dtlb_miss:
- movl r17=PAGE_KERNEL
- mov r20=cr.isr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r21=cr.ipsr
- ;;
-#else
-#endif
-#ifdef CONFIG_DISABLE_VHPT
- shr.u r22=r16,61 // get the region number into
r21
- ;;
- cmp.gt p8,p0=6,r22 // access to region 0-5
- ;;
-(p8) thash r17=r16
- ;;
-(p8) mov cr.iha=r17
-(p8) mov r29=b0 // save b0
-(p8) br.cond.dptk dtlb_fault
-#endif
- extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
- and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
- tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
-#ifdef XEN
- shr.u r18=r16,55 // move address bit 59 to bit 4
- and r19=r19,r16 // clear ed, reserved bits, and
PTE control bits
- tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
- ;;
- and r18=0x10,r18 // bit 4=address-bit(59)
-#else
- shr.u r18=r16,57 // move address bit 61 to bit 4
- and r19=r19,r16 // clear ed, reserved bits, and
PTE control bits
- tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
- ;;
- andcm r18=0x10,r18 // bit 4=~address-bit(61)
-#endif
- cmp.ne p8,p0=r0,r23
-(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
-(p8) br.cond.spnt page_fault
-#ifdef XEN
- ;;
- // Test for Xen address, if not handle via page_fault
- // note that 0xf000 (cached) and 0xe800 (uncached) addresses
- // should be OK.
- extr.u r22=r16,59,5;;
- cmp.eq p8,p0=0x1e,r22
-(p8) br.cond.spnt 1f;;
- cmp.ne p8,p0=0x1d,r22
-(p8) br.cond.sptk page_fault ;;
-1:
-#endif
-
- dep r21=-1,r21,IA64_PSR_ED_BIT,1
- or r19=r19,r17 // insert PTE control bits into r19
- ;;
- or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
-(p6) mov cr.ipsr=r21
- ;;
-(p7) itc.d r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
-END(alt_dtlb_miss)
-
- .org ia64_ivt+0x1400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(nested_dtlb_miss)
- /*
- * In the absence of kernel bugs, we get here when the virtually mapped
linear
- * page table is accessed non-speculatively (e.g., in the Dirty-bit,
Instruction
- * Access-bit, or Data Access-bit faults). If the DTLB entry for the
virtual page
- * table is missing, a nested TLB miss fault is triggered and control is
- * transferred to this point. When this happens, we lookup the pte for
the
- * faulting address by walking the page table in physical mode and
return to the
- * continuation point passed in register r30 (or call page_fault if the
address is
- * not mapped).
- *
- * Input: r16: faulting address
- * r29: saved b0
- * r30: continuation address
- * r31: saved pr
- *
- * Output: r17: physical address of L3 PTE of faulting address
- * r29: saved b0
- * r30: continuation address
- * r31: saved pr
- *
- * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
- */
- rsm psr.dt // switch to using physical
data addressing
-#ifdef XEN
- movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
-#else
- mov r19=IA64_KR(PT_BASE) // get the page table base
address
-#endif
- shl r21=r16,3 // shift bit 60 into sign bit
- ;;
- shr.u r17=r16,61 // get the region number into
r17
- ;;
- cmp.eq p6,p7=5,r17 // is faulting address in
region 5?
- shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting
address
- ;;
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in
place
-
- srlz.d
- LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at
swapper_pg_dir
-
- .pred.rel "mutex", p6, p7
-(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) <<
7) | IFA(33,39))*8)
- cmp.eq p7,p6=0,r21 // unused address bits all
zeroes?
- shr.u r18=r16,PMD_SHIFT // shift L2 index into position
- ;;
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page
table entry
- ;;
-(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page
table entry
-(p6) br.cond.spnt page_fault
- mov b0=r30
- br.sptk.many b0 // return to continuation point
-END(nested_dtlb_miss)
-
- .org ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(ikey_miss)
-#ifdef XEN
- REFLECT(6)
-#endif
- DBG_FAULT(6)
- FAULT(6)
-END(ikey_miss)
-
-
//-----------------------------------------------------------------------------------
- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is
faulting address)
-ENTRY(page_fault)
- ssm psr.dt
- ;;
- srlz.i
- ;;
- SAVE_MIN_WITH_COVER
-#ifdef XEN
- alloc r15=ar.pfs,0,0,4,0
- mov out0=cr.ifa
- mov out1=cr.isr
- mov out3=cr.itir
-#else
- alloc r15=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=cr.isr
-#endif
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collectin is on
- ;;
-(p15) ssm psr.i // restore psr.i
- movl r14=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12 // out2 = pointer to pt_regs
- br.call.sptk.many b6=ia64_do_page_fault // ignore return address
-END(page_fault)
-
- .org ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(dkey_miss)
-#ifdef XEN
- REFLECT(7)
-#endif
- DBG_FAULT(7)
- FAULT(7)
-END(dkey_miss)
-
- .org ia64_ivt+0x2000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(dirty_bit)
-#ifdef XEN
- REFLECT(8)
-#endif
- DBG_FAULT(8)
- /*
- * What we do here is to simply turn on the dirty bit in the PTE. We
need to
- * update both the page-table and the TLB entry. To efficiently access
the PTE,
- * we address it through the virtual page table. Most likely, the TLB
entry for
- * the relevant virtual page table page is still present in the TLB so
we can
- * normally do this without additional TLB misses. In case the
necessary virtual
- * page table TLB entry isn't present, we take a nested TLB miss hit
where we look
- * up the physical address of the L3 PTE and then continue at label 1
below.
- */
- mov r16=cr.ifa // get the address that caused
the fault
- movl r30=1f // load continuation point in
case of nested fault
- ;;
- thash r17=r16 // compute virtual address of
L3 PTE
- mov r29=b0 // save b0 in case of nested
fault
- mov r31=pr // save pr
-#ifdef CONFIG_SMP
- mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed
bits
- ;;
- cmpxchg8.acq r26=[r17],r25,ar.ccv
- mov r24=PAGE_SHIFT<<2
- ;;
- cmp.eq p6,p7=r26,r18
- ;;
-(p6) itc.d r25 // install updated PTE
- ;;
- /*
- * Tell the assemblers dependency-violation checker that the above
"itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r18=[r17] // read PTE again
- ;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly
installed
- ;;
-(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
- mov ar.ccv=r28
-#else
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed
bits
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
-#endif
- mov pr=r31,-1 // restore pr
- rfi
-END(dirty_bit)
-
- .org ia64_ivt+0x2400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(iaccess_bit)
-#ifdef XEN
- mov r31=pr;
- mov r16=cr.isr
- mov r17=cr.ifa
- mov r19=9
- movl r20=0x2400
- br.sptk.many fast_access_reflect;;
-#endif
- DBG_FAULT(9)
- // Like Entry 8, except for instruction access
- mov r16=cr.ifa // get the address that caused
the fault
- movl r30=1f // load continuation point in
case of nested fault
- mov r31=pr // save predicates
-#ifdef CONFIG_ITANIUM
- /*
- * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
- */
- mov r17=cr.ipsr
- ;;
- mov r18=cr.iip
- tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
- ;;
-(p6) mov r16=r18 // if so, use cr.iip instead of
cr.ifa
-#endif /* CONFIG_ITANIUM */
- ;;
- thash r17=r16 // compute virtual address of
L3 PTE
- mov r29=b0 // save b0 in case of nested
fault)
-#ifdef CONFIG_SMP
- mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
- ;;
- mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_A,r18 // set the accessed bit
- ;;
- cmpxchg8.acq r26=[r17],r25,ar.ccv
- mov r24=PAGE_SHIFT<<2
- ;;
- cmp.eq p6,p7=r26,r18
- ;;
-(p6) itc.i r25 // install updated PTE
- ;;
- /*
- * Tell the assemblers dependency-violation checker that the above
"itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
-
- ld8 r18=[r17] // read PTE again
- ;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly
installed
- ;;
-(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
- mov ar.ccv=r28
-#else /* !CONFIG_SMP */
- ;;
-1: ld8 r18=[r17]
- ;;
- or r18=_PAGE_A,r18 // set the accessed bit
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.i r18 // install updated PTE
-#endif /* !CONFIG_SMP */
- mov pr=r31,-1
- rfi
-END(iaccess_bit)
-
- .org ia64_ivt+0x2800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(daccess_bit)
-#ifdef XEN
- mov r31=pr;
- mov r16=cr.isr
- mov r17=cr.ifa
- mov r19=10
- movl r20=0x2800
- br.sptk.many fast_access_reflect;;
-#endif
- DBG_FAULT(10)
- // Like Entry 8, except for data access
- mov r16=cr.ifa // get the address that caused
the fault
- movl r30=1f // load continuation point in
case of nested fault
- ;;
- thash r17=r16 // compute virtual address of
L3 PTE
- mov r31=pr
- mov r29=b0 // save b0 in case of nested
fault)
-#ifdef CONFIG_SMP
- mov r28=ar.ccv // save ar.ccv
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_A,r18 // set the dirty bit
- ;;
- cmpxchg8.acq r26=[r17],r25,ar.ccv
- mov r24=PAGE_SHIFT<<2
- ;;
- cmp.eq p6,p7=r26,r18
- ;;
-(p6) itc.d r25 // install updated PTE
- /*
- * Tell the assemblers dependency-violation checker that the above
"itc" instructions
- * cannot possibly affect the following loads:
- */
- dv_serialize_data
- ;;
- ld8 r18=[r17] // read PTE again
- ;;
- cmp.eq p6,p7=r18,r25 // is it same as the newly
installed
- ;;
-(p7) ptc.l r16,r24
- mov ar.ccv=r28
-#else
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- or r18=_PAGE_A,r18 // set the accessed bit
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
-#endif
- mov b0=r29 // restore b0
- mov pr=r31,-1
- rfi
-END(daccess_bit)
-
- .org ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(break_fault)
- /*
- * The streamlined system call entry/exit paths only save/restore the
initial part
- * of pt_regs. This implies that the callers of system-calls must
adhere to the
- * normal procedure calling conventions.
- *
- * Registers to be saved & restored:
- * CR registers: cr.ipsr, cr.iip, cr.ifs
- * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore,
ar.fpsr
- * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
- * Registers to be restored only:
- * r8-r11: output value from the system call.
- *
- * During system call exit, scratch registers (including r15) are
modified/cleared
- * to prevent leaking bits from kernel to user level.
- */
- DBG_FAULT(11)
-#ifdef XEN
- mov r16=cr.isr
- mov r17=cr.iim
- mov r31=pr
- ;;
- movl r18=XSI_PSR_IC
- ;;
- ld8 r19=[r18]
- ;;
- cmp.eq p7,p0=r0,r17 // is this a psuedo-cover?
-(p7) br.spnt.many dispatch_privop_fault
- ;;
- // if vpsr.ic is off, we have a hyperprivop
- // A hyperprivop is hand-coded assembly with psr.ic off
- // which means no calls, no use of r1-r15 and no memory accesses
- // except to pinned addresses!
- cmp4.eq p7,p0=r0,r19
-(p7) br.sptk.many fast_hyperprivop
- ;;
- movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r22 = [r22]
- ;;
- adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;;
- ld4 r23=[r22];;
- cmp4.eq p6,p7=r23,r17 // Xen-reserved breakimm?
-(p6) br.spnt.many dispatch_break_fault
- ;;
- br.sptk.many fast_break_reflect
- ;;
-#endif
- movl r16=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
- ld8 r16=[r16]
- mov r17=cr.iim
- mov r18=__IA64_BREAK_SYSCALL
- mov r21=ar.fpsr
- mov r29=cr.ipsr
- mov r19=b6
- mov r25=ar.unat
- mov r27=ar.rsc
- mov r26=ar.pfs
- mov r28=cr.iip
-#ifndef XEN
- mov r31=pr // prepare to save predicates
-#endif
- mov r20=r1
- ;;
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
- cmp.eq p0,p7=r18,r17 // is this a system call? (p7
<- false, if so)
-(p7) br.cond.spnt non_syscall
- ;;
- ld1 r17=[r16] // load
current->thread.on_ustack flag
- st1 [r16]=r0 // clear
current->thread.on_ustack flag
- add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for
MINSTATE_START_SAVE_MIN_VIRT
- ;;
- invala
-
- /* adjust return address so we skip over the break instruction: */
-
- extr.u r8=r29,41,2 // extract ei field from cr.ipsr
- ;;
- cmp.eq p6,p7=2,r8 // isr.ei==2?
- mov r2=r1 // setup r2 for
ia64_syscall_setup
- ;;
-(p6) mov r8=0 // clear ei to 0
-(p6) adds r28=16,r28 // switch cr.iip to next bundle
cr.ipsr.ei wrapped
-(p7) adds r8=1,r8 // increment ei to next slot
- ;;
- cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode
already?
- dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
- ;;
-
- // switch from user to kernel RBS:
- MINSTATE_START_SAVE_MIN_VIRT
- br.call.sptk.many b7=ia64_syscall_setup
- ;;
- MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- mov r3=NR_syscalls - 1
- ;;
-(p15) ssm psr.i // restore psr.i
- // p10==true means out registers are more than 8 or r15's Nat is true
-(p10) br.cond.spnt.many ia64_ret_from_syscall
- ;;
- movl r16=sys_call_table
-
- adds r15=-1024,r15 // r15 contains the syscall
number---subtract 1024
- movl r2=ia64_ret_from_syscall
- ;;
- shladd r20=r15,3,r16 // r20 = sys_call_table +
8*(syscall-1024)
- cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall <
1024 + NR_syscalls) ?
- mov rp=r2 // set the real return addr
- ;;
-(p6) ld8 r20=[r20] // load address of syscall
entry point
-(p7) movl r20=sys_ni_syscall
-
- add r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 r2=[r2] // r2 =
current_thread_info()->flags
- ;;
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
- ;;
- cmp.eq p8,p0=r2,r0
- mov b6=r20
- ;;
-(p8) br.call.sptk.many b6=b6 // ignore this return addr
- br.cond.sptk ia64_trace_syscall
- // NOT REACHED
-END(break_fault)
-
- .org ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(interrupt)
- DBG_FAULT(12)
- mov r31=pr // prepare to save predicates
- ;;
-#ifdef XEN
- mov r30=cr.ivr // pass cr.ivr as first arg
- // FIXME: this is a hack... use cpuinfo.ksoftirqd because its
- // not used anywhere else and we need a place to stash ivr and
- // there's no registers available unused by SAVE_MIN/REST
- movl r29=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
- st8 [r29]=r30;;
- movl r28=slow_interrupt;;
- mov r29=rp;;
- mov rp=r28;;
- br.cond.sptk.many fast_tick_reflect
- ;;
-slow_interrupt:
- mov rp=r29;;
-#endif
- SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
- SAVE_REST
- ;;
- alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-#ifdef XEN
- movl out0=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
- ld8 out0=[out0];;
-#else
- mov out0=cr.ivr // pass cr.ivr as first arg
-#endif
- add out1=16,sp // pass pointer to pt_regs as second arg
- ;;
- srlz.d // make sure we see the effect of cr.ivr
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.call.sptk.many b6=ia64_handle_irq
-END(interrupt)
-
- .org ia64_ivt+0x3400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
- DBG_FAULT(13)
- FAULT(13)
-
-#ifdef XEN
- // There is no particular reason for this code to be here, other than
that
- // there happens to be space here that would go unused otherwise. If
this
- // fault ever gets "unreserved", simply moved the following code to a
more
- // suitable spot...
-
-GLOBAL_ENTRY(dispatch_break_fault)
- SAVE_MIN_WITH_COVER
- ;;
-dispatch_break_fault_post_save:
- alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
- mov out0=cr.ifa
- adds out1=16,sp
- mov out2=cr.isr // FIXME: pity to make this slow access twice
- mov out3=cr.iim // FIXME: pity to make this slow access twice
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.sptk.many ia64_prepare_handle_break
-END(dispatch_break_fault)
-#endif
-
- .org ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
- DBG_FAULT(14)
- FAULT(14)
-
- /*
- * There is no particular reason for this code to be here, other than
that
- * there happens to be space here that would go unused otherwise. If
this
- * fault ever gets "unreserved", simply moved the following code to a
more
- * suitable spot...
- *
- * ia64_syscall_setup() is a separate subroutine so that it can
- * allocate stacked registers so it can safely demine any
- * potential NaT values from the input registers.
- *
- * On entry:
- * - executing on bank 0 or bank 1 register set (doesn't matter)
- * - r1: stack pointer
- * - r2: current task pointer
- * - r3: preserved
- * - r11: original contents (saved ar.pfs to be saved)
- * - r12: original contents (sp to be saved)
- * - r13: original contents (tp to be saved)
- * - r15: original contents (syscall # to be saved)
- * - r18: saved bsp (after switching to kernel stack)
- * - r19: saved b6
- * - r20: saved r1 (gp)
- * - r21: saved ar.fpsr
- * - r22: kernel's register backing store base (krbs_base)
- * - r23: saved ar.bspstore
- * - r24: saved ar.rnat
- * - r25: saved ar.unat
- * - r26: saved ar.pfs
- * - r27: saved ar.rsc
- * - r28: saved cr.iip
- * - r29: saved cr.ipsr
- * - r31: saved pr
- * - b0: original contents (to be saved)
- * On exit:
- * - executing on bank 1 registers
- * - psr.ic enabled, interrupts restored
- * - p10: TRUE if syscall is invoked with more than 8 out
- * registers or r15's Nat is true
- * - r1: kernel's gp
- * - r3: preserved (same as on entry)
- * - r8: -EINVAL if p10 is true
- * - r12: points to kernel stack
- * - r13: points to current task
- * - p15: TRUE if interrupts need to be re-enabled
- * - ar.fpsr: set to kernel settings
- */
-GLOBAL_ENTRY(ia64_syscall_setup)
-#ifndef XEN
-#if PT(B6) != 0
-# error This code assumes that b6 is the first field in pt_regs.
-#endif
-#endif
- st8 [r1]=r19 // save b6
- add r16=PT(CR_IPSR),r1 // initialize first base pointer
- add r17=PT(R11),r1 // initialize second base
pointer
- ;;
- alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
- st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
- tnat.nz p8,p0=in0
-
- st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
- tnat.nz p9,p0=in1
-(pKStk) mov r18=r0 // make sure r18 isn't
NaT
- ;;
-
- st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
- st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
- mov r28=b0 // save b0 (2 cyc)
- ;;
-
- st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
- dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
-(p8) mov in0=-1
- ;;
-
- st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
- extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
- and r8=0x7f,r19 // A // get sof of ar.pfs
-
- st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
- tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
-(p9) mov in1=-1
- ;;
-
-(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
- tnat.nz p10,p0=in2
- add r11=8,r11
- ;;
-(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat
field
-(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
- tnat.nz p11,p0=in3
- ;;
-(p10) mov in2=-1
- tnat.nz p12,p0=in4 // [I0]
-(p11) mov in3=-1
- ;;
-(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
-(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
- shl r18=r18,16 // compute ar.rsc to be used
for "loadrs"
- ;;
- st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
- st8 [r17]=r28,PT(R1)-PT(B0) // save b0
- tnat.nz p13,p0=in5 // [I0]
- ;;
- st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for
"loadrs"
- st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
-(p12) mov in4=-1
- ;;
-
-.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
-.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
-(p13) mov in5=-1
- ;;
- st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
- tnat.nz p14,p0=in6
- cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
- ;;
- stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see
handle_syscall_error)
-(p9) tnat.nz p10,p0=r15
- adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes
of scratch)
-
- st8.spill [r17]=r15 // save r15
- tnat.nz p8,p0=in7
- nop.i 0
-
- mov r13=r2 // establish `current'
- movl r1=__gp // establish kernel global
pointer
- ;;
-(p14) mov in6=-1
-(p8) mov in7=-1
- nop.i 0
-
- cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
- movl r17=FPSR_DEFAULT
- ;;
- mov.m ar.fpsr=r17 // set ar.fpsr to kernel
default value
-(p10) mov r8=-EINVAL
- br.ret.sptk.many b7
-END(ia64_syscall_setup)
-
- .org ia64_ivt+0x3c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
- DBG_FAULT(15)
- FAULT(15)
-
- /*
- * Squatting in this space ...
- *
- * This special case dispatcher for illegal operation faults allows
preserved
- * registers to be modified through a callback function (asm only) that
is handed
- * back from the fault handler in r8. Up to three arguments can be
passed to the
- * callback function by returning an aggregate with the callback as its
first
- * element, followed by the arguments.
- */
-ENTRY(dispatch_illegal_op_fault)
- SAVE_MIN_WITH_COVER
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- ;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
- mov out0=ar.ec
- ;;
- SAVE_REST
- ;;
- br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0: ;;
- alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
- mov out0=r9
- mov out1=r10
- mov out2=r11
- movl r15=ia64_leave_kernel
- ;;
- mov rp=r15
- mov b6=r8
- ;;
- cmp.ne p6,p0=0,r8
-(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
- br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
- .org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
- DBG_FAULT(16)
- FAULT(16)
-
-#ifdef XEN
- // There is no particular reason for this code to be here, other than
that
- // there happens to be space here that would go unused otherwise. If
this
- // fault ever gets "unreserved", simply moved the following code to a
more
- // suitable spot...
-
-ENTRY(dispatch_privop_fault)
- SAVE_MIN_WITH_COVER
- ;;
- alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first
in insn group!)
- mov out0=cr.ifa
- adds out1=16,sp
- mov out2=cr.isr // FIXME: pity to make this slow access twice
- mov out3=cr.itir
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.sptk.many ia64_prepare_handle_privop
-END(dispatch_privop_fault)
-#endif
-
-
- .org ia64_ivt+0x4400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
- DBG_FAULT(17)
- FAULT(17)
-
-ENTRY(non_syscall)
- SAVE_MIN_WITH_COVER
-
- // There is no particular reason for this code to be here, other than
that
- // there happens to be space here that would go unused otherwise. If
this
- // fault ever gets "unreserved", simply moved the following code to a
more
- // suitable spot...
-
- alloc r14=ar.pfs,0,0,2,0
- mov out0=cr.iim
- add out1=16,sp
- adds r3=8,r2 // set up second base pointer for
SAVE_REST
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- movl r15=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r15
- ;;
- br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore
return addr
-END(non_syscall)
-
- .org ia64_ivt+0x4800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
- DBG_FAULT(18)
- FAULT(18)
-
- /*
- * There is no particular reason for this code to be here, other than
that
- * there happens to be space here that would go unused otherwise. If
this
- * fault ever gets "unreserved", simply moved the following code to a
more
- * suitable spot...
- */
-
-ENTRY(dispatch_unaligned_handler)
- SAVE_MIN_WITH_COVER
- ;;
- alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first
in insn group!)
- mov out0=cr.ifa
- adds out1=16,sp
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
- .org ia64_ivt+0x4c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
- DBG_FAULT(19)
- FAULT(19)
-
- /*
- * There is no particular reason for this code to be here, other than
that
- * there happens to be space here that would go unused otherwise. If
this
- * fault ever gets "unreserved", simply moved the following code to a
more
- * suitable spot...
- */
-
-ENTRY(dispatch_to_fault_handler)
- /*
- * Input:
- * psr.ic: off
- * r19: fault vector number (e.g., 24 for General Exception)
- * r31: contains saved predicates (pr)
- */
- SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,5,0
- mov out0=r15
- mov out1=cr.isr
- mov out2=cr.ifa
- mov out3=cr.iim
- mov out4=cr.itir
- ;;
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
for SAVE_REST
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
-//
-// --- End of long entries, Beginning of short entries
-//
-
- .org ia64_ivt+0x5000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
-ENTRY(page_not_present)
-#ifdef XEN
- REFLECT(20)
-#endif
- DBG_FAULT(20)
- mov r16=cr.ifa
- rsm psr.dt
- /*
- * The Linux page fault handler doesn't expect non-present pages to be
in
- * the TLB. Flush the existing entry now, so we meet that expectation.
- */
- mov r17=PAGE_SHIFT<<2
- ;;
- ptc.l r16,r17
- ;;
- mov r31=pr
- srlz.d
- br.sptk.many page_fault
-END(page_not_present)
-
- .org ia64_ivt+0x5100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
-ENTRY(key_permission)
-#ifdef XEN
- REFLECT(21)
-#endif
- DBG_FAULT(21)
- mov r16=cr.ifa
- rsm psr.dt
- mov r31=pr
- ;;
- srlz.d
- br.sptk.many page_fault
-END(key_permission)
-
- .org ia64_ivt+0x5200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(iaccess_rights)
-#ifdef XEN
- REFLECT(22)
-#endif
- DBG_FAULT(22)
- mov r16=cr.ifa
- rsm psr.dt
- mov r31=pr
- ;;
- srlz.d
- br.sptk.many page_fault
-END(iaccess_rights)
-
- .org ia64_ivt+0x5300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(daccess_rights)
-#ifdef XEN
- mov r31=pr;
- mov r16=cr.isr
- mov r17=cr.ifa
- mov r19=23
- movl r20=0x5300
- br.sptk.many fast_access_reflect;;
-#endif
- DBG_FAULT(23)
- mov r16=cr.ifa
- rsm psr.dt
- mov r31=pr
- ;;
- srlz.d
- br.sptk.many page_fault
-END(daccess_rights)
-
- .org ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(general_exception)
- DBG_FAULT(24)
- mov r16=cr.isr
- mov r31=pr
- ;;
-#ifdef XEN
- cmp4.ge p6,p0=0x20,r16
-(p6) br.sptk.many dispatch_privop_fault
-#else
- cmp4.eq p6,p0=0,r16
-(p6) br.sptk.many dispatch_illegal_op_fault
-#endif
- ;;
- mov r19=24 // fault number
- br.sptk.many dispatch_to_fault_handler
-END(general_exception)
-
- .org ia64_ivt+0x5500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(disabled_fp_reg)
-#ifdef XEN
- REFLECT(25)
-#endif
- DBG_FAULT(25)
- rsm psr.dfh // ensure we can access fph
- ;;
- srlz.d
- mov r31=pr
- mov r19=25
- br.sptk.many dispatch_to_fault_handler
-END(disabled_fp_reg)
-
- .org ia64_ivt+0x5600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(nat_consumption)
-#ifdef XEN
- REFLECT(26)
-#endif
- DBG_FAULT(26)
- FAULT(26)
-END(nat_consumption)
-
- .org ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(speculation_vector)
-#ifdef XEN
- // this probably need not reflect...
- REFLECT(27)
-#endif
- DBG_FAULT(27)
- /*
- * A [f]chk.[as] instruction needs to take the branch to the recovery
code but
- * this part of the architecture is not implemented in hardware on some
CPUs, such
- * as Itanium. Thus, in general we need to emulate the behavior. IIM
contains
- * the relative target (not yet sign extended). So after sign
extending it we
- * simply add it to IIP. We also need to reset the EI field of the
IPSR to zero,
- * i.e., the slot to restart into.
- *
- * cr.imm contains zero_ext(imm21)
- */
- mov r18=cr.iim
- ;;
- mov r17=cr.iip
- shl r18=r18,43 // put sign bit in position (43=64-21)
- ;;
-
- mov r16=cr.ipsr
- shr r18=r18,39 // sign extend (39=43-4)
- ;;
-
- add r17=r17,r18 // now add the offset
- ;;
- mov cr.iip=r17
- dep r16=0,r16,41,2 // clear EI
- ;;
-
- mov cr.ipsr=r16
- ;;
-
- rfi // and go back
-END(speculation_vector)
-
- .org ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
- DBG_FAULT(28)
- FAULT(28)
-
- .org ia64_ivt+0x5900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(debug_vector)
-#ifdef XEN
- REFLECT(29)
-#endif
- DBG_FAULT(29)
- FAULT(29)
-END(debug_vector)
-
- .org ia64_ivt+0x5a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(unaligned_access)
-#ifdef XEN
- REFLECT(30)
-#endif
- DBG_FAULT(30)
- mov r16=cr.ipsr
- mov r31=pr // prepare to save predicates
- ;;
- br.sptk.many dispatch_unaligned_handler
-END(unaligned_access)
-
- .org ia64_ivt+0x5b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(unsupported_data_reference)
-#ifdef XEN
- REFLECT(31)
-#endif
- DBG_FAULT(31)
- FAULT(31)
-END(unsupported_data_reference)
-
- .org ia64_ivt+0x5c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
-ENTRY(floating_point_fault)
-#ifdef XEN
- REFLECT(32)
-#endif
- DBG_FAULT(32)
- FAULT(32)
-END(floating_point_fault)
-
- .org ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(floating_point_trap)
-#ifdef XEN
- REFLECT(33)
-#endif
- DBG_FAULT(33)
- FAULT(33)
-END(floating_point_trap)
-
- .org ia64_ivt+0x5e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(lower_privilege_trap)
-#ifdef XEN
- REFLECT(34)
-#endif
- DBG_FAULT(34)
- FAULT(34)
-END(lower_privilege_trap)
-
- .org ia64_ivt+0x5f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(taken_branch_trap)
-#ifdef XEN
- REFLECT(35)
-#endif
- DBG_FAULT(35)
- FAULT(35)
-END(taken_branch_trap)
-
- .org ia64_ivt+0x6000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(single_step_trap)
-#ifdef XEN
- REFLECT(36)
-#endif
- DBG_FAULT(36)
- FAULT(36)
-END(single_step_trap)
-
- .org ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Reserved
- DBG_FAULT(37)
- FAULT(37)
-
- .org ia64_ivt+0x6200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
- DBG_FAULT(38)
- FAULT(38)
-
- .org ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
- DBG_FAULT(39)
- FAULT(39)
-
- .org ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
- DBG_FAULT(40)
- FAULT(40)
-
- .org ia64_ivt+0x6500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
- DBG_FAULT(41)
- FAULT(41)
-
- .org ia64_ivt+0x6600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
- DBG_FAULT(42)
- FAULT(42)
-
- .org ia64_ivt+0x6700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
- DBG_FAULT(43)
- FAULT(43)
-
- .org ia64_ivt+0x6800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
- DBG_FAULT(44)
- FAULT(44)
-
- .org ia64_ivt+0x6900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(ia32_exception)
-#ifdef XEN
- REFLECT(45)
-#endif
- DBG_FAULT(45)
- FAULT(45)
-END(ia32_exception)
-
- .org ia64_ivt+0x6a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
-ENTRY(ia32_intercept)
-#ifdef XEN
- REFLECT(46)
-#endif
- DBG_FAULT(46)
-#ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
- mov r16=cr.isr
- ;;
- extr.u r17=r16,16,8 // get ISR.code
- mov r18=ar.eflag
- mov r19=cr.iim // old eflag value
- ;;
- cmp.ne p6,p0=2,r17
-(p6) br.cond.spnt 1f // not a system flag fault
- xor r16=r18,r19
- ;;
- extr.u r17=r16,18,1 // get the eflags.ac bit
- ;;
- cmp.eq p6,p0=0,r17
-(p6) br.cond.spnt 1f // eflags.ac bit didn't change
- ;;
- mov pr=r31,-1 // restore predicate registers
- rfi
-
-1:
-#endif // CONFIG_IA32_SUPPORT
- FAULT(46)
-END(ia32_intercept)
-
- .org ia64_ivt+0x6b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
-ENTRY(ia32_interrupt)
-#ifdef XEN
- REFLECT(47)
-#endif
- DBG_FAULT(47)
-#ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
- br.sptk.many dispatch_to_ia32_handler
-#else
- FAULT(47)
-#endif
-END(ia32_interrupt)
-
- .org ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
- DBG_FAULT(48)
- FAULT(48)
-
- .org ia64_ivt+0x6d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
- DBG_FAULT(49)
- FAULT(49)
-
- .org ia64_ivt+0x6e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
- DBG_FAULT(50)
- FAULT(50)
-
- .org ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
- DBG_FAULT(51)
- FAULT(51)
-
- .org ia64_ivt+0x7000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7000 Entry 52 (size 16 bundles) Reserved
- DBG_FAULT(52)
- FAULT(52)
-
- .org ia64_ivt+0x7100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
- DBG_FAULT(53)
- FAULT(53)
-
- .org ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
- DBG_FAULT(54)
- FAULT(54)
-
- .org ia64_ivt+0x7300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
- DBG_FAULT(55)
- FAULT(55)
-
- .org ia64_ivt+0x7400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
- DBG_FAULT(56)
- FAULT(56)
-
- .org ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
- DBG_FAULT(57)
- FAULT(57)
-
- .org ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
- DBG_FAULT(58)
- FAULT(58)
-
- .org ia64_ivt+0x7700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
- DBG_FAULT(59)
- FAULT(59)
-
- .org ia64_ivt+0x7800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
- DBG_FAULT(60)
- FAULT(60)
-
- .org ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
- DBG_FAULT(61)
- FAULT(61)
-
- .org ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
- DBG_FAULT(62)
- FAULT(62)
-
- .org ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
- DBG_FAULT(63)
- FAULT(63)
-
- .org ia64_ivt+0x7c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
- DBG_FAULT(64)
- FAULT(64)
-
- .org ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
- DBG_FAULT(65)
- FAULT(65)
-
- .org ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
- DBG_FAULT(66)
- FAULT(66)
-
- .org ia64_ivt+0x7f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
- DBG_FAULT(67)
- FAULT(67)
-
-#ifdef XEN
- .org ia64_ivt+0x8000
-GLOBAL_ENTRY(dispatch_reflection)
- /*
- * Input:
- * psr.ic: off
- * r19: intr type (offset into ivt, see ia64_int.h)
- * r31: contains saved predicates (pr)
- */
- SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,5,0
- mov out4=r15
- mov out0=cr.ifa
- adds out1=16,sp
- mov out2=cr.isr
- mov out3=cr.iim
-// mov out3=cr.itir
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.sptk.many ia64_prepare_handle_reflection
-END(dispatch_reflection)
-
-#define SAVE_MIN_COVER_DONE DO_SAVE_MIN(,mov r30=cr.ifs,)
-
-// same as dispatch_break_fault except cover has already been done
-GLOBAL_ENTRY(dispatch_slow_hyperprivop)
- SAVE_MIN_COVER_DONE
- ;;
- br.sptk.many dispatch_break_fault_post_save
-END(dispatch_slow_hyperprivop)
-#endif
-
-#ifdef CONFIG_IA32_SUPPORT
-
- /*
- * There is no particular reason for this code to be here, other than
that
- * there happens to be space here that would go unused otherwise. If
this
- * fault ever gets "unreserved", simply moved the following code to a
more
- * suitable spot...
- */
-
- // IA32 interrupt entry point
-
-ENTRY(dispatch_to_ia32_handler)
- SAVE_MIN
- ;;
- mov r14=cr.isr
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption
collection is on
- ;;
-(p15) ssm psr.i
- adds r3=8,r2 // Base pointer for SAVE_REST
- ;;
- SAVE_REST
- ;;
- mov r15=0x80
- shr r14=r14,16 // Get interrupt number
- ;;
- cmp.ne p6,p0=r14,r15
-(p6) br.call.dpnt.many b6=non_ia32_syscall
-
- adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW
conventions
- adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
- ;;
- cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
- ld8 r8=[r14] // get r8
- ;;
- st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't
use the GP)
- ;;
- alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
- ;;
- ld4 r8=[r14],8 // r8 == eax (syscall number)
- mov r15=IA32_NR_syscalls
- ;;
- cmp.ltu.unc p6,p7=r8,r15
- ld4 out1=[r14],8 // r9 == ecx
- ;;
- ld4 out2=[r14],8 // r10 == edx
- ;;
- ld4 out0=[r14] // r11 == ebx
- adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
- ;;
- ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
- ;;
- ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- ld4 out4=[r14] // r15 == edi
- movl r16=ia32_syscall_table
- ;;
-(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
- ld4 r2=[r2] // r2 = current_thread_info()->flags
- ;;
- ld8 r16=[r16]
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
- ;;
- mov b6=r16
- movl r15=ia32_ret_from_syscall
- cmp.eq p8,p0=r2,r0
- ;;
- mov rp=r15
-(p8) br.call.sptk.many b6=b6
- br.cond.sptk ia32_trace_syscall
-
-non_ia32_syscall:
- alloc r15=ar.pfs,0,0,2,0
- mov out0=r14 // interrupt #
- add out1=16,sp // pointer to pt_regs
- ;; // avoid WAW on CFM
- br.call.sptk.many rp=ia32_bad_interrupt
-.ret1: movl r15=ia64_leave_kernel
- ;;
- mov rp=r15
- br.ret.sptk.many rp
-END(dispatch_to_ia32_handler)
-
-#endif /* CONFIG_IA32_SUPPORT */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/bitop.c
--- a/xen/arch/ia64/linux/lib/bitop.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,88 +0,0 @@
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/intrinsics.h>
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/*
- * Find next zero bit in a bitmap reasonably efficiently..
- */
-
-int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long
offset)
-{
- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
- unsigned long result = offset & ~63UL;
- unsigned long tmp;
-
- if (offset >= size)
- return size;
- size -= result;
- offset &= 63UL;
- if (offset) {
- tmp = *(p++);
- tmp |= ~0UL >> (64-offset);
- if (size < 64)
- goto found_first;
- if (~tmp)
- goto found_middle;
- size -= 64;
- result += 64;
- }
- while (size & ~63UL) {
- if (~(tmp = *(p++)))
- goto found_middle;
- result += 64;
- size -= 64;
- }
- if (!size)
- return result;
- tmp = *p;
-found_first:
- tmp |= ~0UL << size;
- if (tmp == ~0UL) /* any bits zero? */
- return result + size; /* nope */
-found_middle:
- return result + ffz(tmp);
-}
-EXPORT_SYMBOL(__find_next_zero_bit);
-
-/*
- * Find next bit in a bitmap reasonably efficiently..
- */
-int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
-{
- unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
- unsigned long result = offset & ~63UL;
- unsigned long tmp;
-
- if (offset >= size)
- return size;
- size -= result;
- offset &= 63UL;
- if (offset) {
- tmp = *(p++);
- tmp &= ~0UL << offset;
- if (size < 64)
- goto found_first;
- if (tmp)
- goto found_middle;
- size -= 64;
- result += 64;
- }
- while (size & ~63UL) {
- if ((tmp = *(p++)))
- goto found_middle;
- result += 64;
- size -= 64;
- }
- if (!size)
- return result;
- tmp = *p;
- found_first:
- tmp &= ~0UL >> (64-size);
- if (tmp == 0UL) /* Are any bits set? */
- return result + size; /* Nope. */
- found_middle:
- return result + __ffs(tmp);
-}
-EXPORT_SYMBOL(__find_next_bit);
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/clear_page.S
--- a/xen/arch/ia64/linux/lib/clear_page.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 1999-2002 Hewlett-Packard Co
- * Stephane Eranian <eranian@xxxxxxxxxx>
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx>
- *
- * 1/06/01 davidm Tuned for Itanium.
- * 2/12/02 kchen Tuned for both Itanium and McKinley
- * 3/08/02 davidm Some more tweaking
- */
-#include <linux/config.h>
-
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-
-#ifdef CONFIG_ITANIUM
-# define L3_LINE_SIZE 64 // Itanium L3 line size
-# define PREFETCH_LINES 9 // magic number
-#else
-# define L3_LINE_SIZE 128 // McKinley L3 line size
-# define PREFETCH_LINES 12 // magic number
-#endif
-
-#define saved_lc r2
-#define dst_fetch r3
-#define dst1 r8
-#define dst2 r9
-#define dst3 r10
-#define dst4 r11
-
-#define dst_last r31
-
-GLOBAL_ENTRY(clear_page)
- .prologue
- .regstk 1,0,0,0
- mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count,
-1=repeat/until
- .save ar.lc, saved_lc
- mov saved_lc = ar.lc
-
- .body
- mov ar.lc = (PREFETCH_LINES - 1)
- mov dst_fetch = in0
- adds dst1 = 16, in0
- adds dst2 = 32, in0
- ;;
-.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
- adds dst3 = 48, in0 // executing this multiple times is
harmless
- br.cloop.sptk.few .fetch
- ;;
- addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
- mov ar.lc = r16 // one L3 line per iteration
- adds dst4 = 64, in0
- ;;
-#ifdef CONFIG_ITANIUM
- // Optimized for Itanium
-1: stf.spill.nta [dst1] = f0, 64
- stf.spill.nta [dst2] = f0, 64
- cmp.lt p8,p0=dst_fetch, dst_last
- ;;
-#else
- // Optimized for McKinley
-1: stf.spill.nta [dst1] = f0, 64
- stf.spill.nta [dst2] = f0, 64
- stf.spill.nta [dst3] = f0, 64
- stf.spill.nta [dst4] = f0, 128
- cmp.lt p8,p0=dst_fetch, dst_last
- ;;
- stf.spill.nta [dst1] = f0, 64
- stf.spill.nta [dst2] = f0, 64
-#endif
- stf.spill.nta [dst3] = f0, 64
-(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
- br.cloop.sptk.few 1b
- ;;
- mov ar.lc = saved_lc // restore lc
- br.ret.sptk.many rp
-END(clear_page)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/copy_page_mck.S
--- a/xen/arch/ia64/linux/lib/copy_page_mck.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,185 +0,0 @@
-/*
- * McKinley-optimized version of copy_page().
- *
- * Copyright (C) 2002 Hewlett-Packard Co
- * David Mosberger <davidm@xxxxxxxxxx>
- *
- * Inputs:
- * in0: address of target page
- * in1: address of source page
- * Output:
- * no return value
- *
- * General idea:
- * - use regular loads and stores to prefetch data to avoid consuming
M-slot just for
- * lfetches => good for in-cache performance
- * - avoid l2 bank-conflicts by not storing into the same 16-byte bank
within a single
- * cycle
- *
- * Principle of operation:
- * First, note that L1 has a line-size of 64 bytes and L2 a line-size of
128 bytes.
- * To avoid secondary misses in L2, we prefetch both source and
destination with a line-size
- * of 128 bytes. When both of these lines are in the L2 and the first
half of the
- * source line is in L1, we start copying the remaining words. The second
half of the
- * source line is prefetched in an earlier iteration, so that by the time
we start
- * accessing it, it's also present in the L1.
- *
- * We use a software-pipelined loop to control the overall operation. The
pipeline
- * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used
for prefetching
- * source cache-lines. The second PREFETCH_DIST stages are used for
prefetching destination
- * cache-lines, the last K stages are used to copy the cache-line words
not copied by
- * the prefetches. The four relevant points in the pipelined are called
A, B, C, D:
- * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a
destination-line
- * should be prefetched, p[C] is TRUE if the second half of an L2 line
should be brought
- * into L1D and p[D] is TRUE if a cacheline needs to be copied.
- *
- * This all sounds very complicated, but thanks to the modulo-scheduled
loop support,
- * the resulting code is very regular and quite easy to follow (once you
get the idea).
- *
- * As a secondary optimization, the first 2*PREFETCH_DIST iterations are
implemented
- * as the separate .prefetch_loop. Logically, this loop performs exactly
like the
- * main-loop (.line_copy), but has all known-to-be-predicated-off
instructions removed,
- * so that each loop iteration is faster (again, good for cached case).
- *
- * When reading the code, it helps to keep the following picture in mind:
- *
- * word 0 word 1
- * +------+------+---
- * | v[x] | t1 | ^
- * | t2 | t3 | |
- * | t4 | t5 | |
- * | t6 | t7 | | 128 bytes
- * | n[y] | t9 | | (L2 cache line)
- * | t10 | t11 | |
- * | t12 | t13 | |
- * | t14 | t15 | v
- * +------+------+---
- *
- * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C]
- * to fetch the second-half of the L2 cache line into L1, and the tX words
are copied in
- * an order that avoids bank conflicts.
- */
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-
-#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2
misses (8 ld, 8 st)
-
-#define src0 r2
-#define src1 r3
-#define dst0 r9
-#define dst1 r10
-#define src_pre_mem r11
-#define dst_pre_mem r14
-#define src_pre_l2 r15
-#define dst_pre_l2 r16
-#define t1 r17
-#define t2 r18
-#define t3 r19
-#define t4 r20
-#define t5 t1 // alias!
-#define t6 t2 // alias!
-#define t7 t3 // alias!
-#define t9 t5 // alias!
-#define t10 t4 // alias!
-#define t11 t7 // alias!
-#define t12 t6 // alias!
-#define t14 t10 // alias!
-#define t13 r21
-#define t15 r22
-
-#define saved_lc r23
-#define saved_pr r24
-
-#define A 0
-#define B (PREFETCH_DIST)
-#define C (B + PREFETCH_DIST)
-#define D (C + 3)
-#define N (D + 1)
-#define Nrot ((N + 7) & ~7)
-
-GLOBAL_ENTRY(copy_page)
- .prologue
- alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
-
- .rotr v[2*PREFETCH_DIST], n[D-C+1]
- .rotp p[N]
-
- .save ar.lc, saved_lc
- mov saved_lc = ar.lc
- .save pr, saved_pr
- mov saved_pr = pr
- .body
-
- mov src_pre_mem = in1
- mov pr.rot = 0x10000
- mov ar.ec = 1 // special unrolled loop
-
- mov dst_pre_mem = in0
- mov ar.lc = 2*PREFETCH_DIST - 1
-
- add src_pre_l2 = 8*8, in1
- add dst_pre_l2 = 8*8, in0
- add src0 = 8, in1 // first t1 src
- add src1 = 3*8, in1 // first t3 src
- add dst0 = 8, in0 // first t1 dst
- add dst1 = 3*8, in0 // first t3 dst
- mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
- nop.m 0
- nop.i 0
- ;;
- // same as .line_copy loop, but with all predicated-off instructions
removed:
-.prefetch_loop:
-(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0
-(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2
- br.ctop.sptk .prefetch_loop
- ;;
- cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop
cleared it to zero)
- mov ar.lc = t1 // with 64KB pages, t1 is too
big to fit in 8 bits!
- mov ar.ec = N // # of stages in pipeline
- ;;
-.line_copy:
-(p[D]) ld8 t2 = [src0], 3*8 // M0
-(p[D]) ld8 t4 = [src1], 3*8 // M1
-(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory
-(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2
- ;;
-(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory
-(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2
-(p[D]) st8 [dst0] = t1, 8 // M2
-(p[D]) st8 [dst1] = t3, 8 // M3
- ;;
-(p[D]) ld8 t5 = [src0], 8
-(p[D]) ld8 t7 = [src1], 3*8
-(p[D]) st8 [dst0] = t2, 3*8
-(p[D]) st8 [dst1] = t4, 3*8
- ;;
-(p[D]) ld8 t6 = [src0], 3*8
-(p[D]) ld8 t10 = [src1], 8
-(p[D]) st8 [dst0] = t5, 8
-(p[D]) st8 [dst1] = t7, 3*8
- ;;
-(p[D]) ld8 t9 = [src0], 3*8
-(p[D]) ld8 t11 = [src1], 3*8
-(p[D]) st8 [dst0] = t6, 3*8
-(p[D]) st8 [dst1] = t10, 8
- ;;
-(p[D]) ld8 t12 = [src0], 8
-(p[D]) ld8 t14 = [src1], 8
-(p[D]) st8 [dst0] = t9, 3*8
-(p[D]) st8 [dst1] = t11, 3*8
- ;;
-(p[D]) ld8 t13 = [src0], 4*8
-(p[D]) ld8 t15 = [src1], 4*8
-(p[D]) st8 [dst0] = t12, 8
-(p[D]) st8 [dst1] = t14, 8
- ;;
-(p[D-1])ld8 t1 = [src0], 8
-(p[D-1])ld8 t3 = [src1], 8
-(p[D]) st8 [dst0] = t13, 4*8
-(p[D]) st8 [dst1] = t15, 4*8
- br.ctop.sptk .line_copy
- ;;
- mov ar.lc = saved_lc
- mov pr = saved_pr, -1
- br.ret.sptk.many rp
-END(copy_page)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/flush.S
--- a/xen/arch/ia64/linux/lib/flush.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,61 +0,0 @@
-/*
- * Cache flushing routines.
- *
- * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- *
- * 05/28/05 Zoltan Menyhart Dynamic stride size
- */
-
-#include <asm/asmmacro.h>
-
-
- /*
- * flush_icache_range(start,end)
- *
- * Make i-cache(s) coherent with d-caches.
- *
- * Must deal with range from start to end-1 but nothing else (need
to
- * be careful not to touch addresses that may be unmapped).
- *
- * Note: "in0" and "in1" are preserved for debugging purposes.
- */
-GLOBAL_ENTRY(flush_icache_range)
-
- .prologue
- alloc r2=ar.pfs,2,0,0,0
- movl r3=ia64_i_cache_stride_shift
- mov r21=1
- ;;
- ld8 r20=[r3] // r20: stride shift
- sub r22=in1,r0,1 // last byte address
- ;;
- shr.u r23=in0,r20 // start / (stride size)
- shr.u r22=r22,r20 // (last byte address) / (stride size)
- shl r21=r21,r20 // r21: stride size of the i-cache(s)
- ;;
- sub r8=r22,r23 // number of strides - 1
- shl r24=r23,r20 // r24: addresses for "fc.i" =
- // "start" rounded down to stride
boundary
- .save ar.lc,r3
- mov r3=ar.lc // save ar.lc
- ;;
-
- .body
- mov ar.lc=r8
- ;;
- /*
- * 32 byte aligned loop, even number of (actually 2) bundles
- */
-.Loop: fc.i r24 // issuable on M0 only
- add r24=r21,r24 // we flush "stride size" bytes per
iteration
- nop.i 0
- br.cloop.sptk.few .Loop
- ;;
- sync.i
- ;;
- srlz.i
- ;;
- mov ar.lc=r3 // restore ar.lc
- br.ret.sptk.many rp
-END(flush_icache_range)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/idiv32.S
--- a/xen/arch/ia64/linux/lib/idiv32.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2000 Hewlett-Packard Co
- * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
- *
- * 32-bit integer division.
- *
- * This code is based on the application note entitled "Divide, Square Root
- * and Remainder Algorithms for the IA-64 Architecture". This document
- * is available as Intel document number 248725-002 or via the web at
- * http://developer.intel.com/software/opensource/numerics/
- *
- * For more details on the theory behind these algorithms, see "IA-64
- * and Elementary Functions" by Peter Markstein; HP Professional Books
- * (http://www.hp.com/go/retailbooks/)
- */
-
-#include <asm/asmmacro.h>
-
-#ifdef MODULO
-# define OP mod
-#else
-# define OP div
-#endif
-
-#ifdef UNSIGNED
-# define SGN u
-# define EXTEND zxt4
-# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
-# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
-#else
-# define SGN
-# define EXTEND sxt4
-# define INT_TO_FP(a,b) fcvt.xf a=b
-# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
-#endif
-
-#define PASTE1(a,b) a##b
-#define PASTE(a,b) PASTE1(a,b)
-#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3))
-
-GLOBAL_ENTRY(NAME)
- .regstk 2,0,0,0
- // Transfer inputs to FP registers.
- mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias)
- EXTEND in0 = in0 // in0 = a
- EXTEND in1 = in1 // in1 = b
- ;;
- setf.sig f8 = in0
- setf.sig f9 = in1
-#ifdef MODULO
- sub in1 = r0, in1 // in1 = -b
-#endif
- ;;
- // Convert the inputs to FP, to avoid FP software-assist faults.
- INT_TO_FP(f8, f8)
- INT_TO_FP(f9, f9)
- ;;
- setf.exp f7 = r2 // f7 = 2^-34
- frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b)
- ;;
-(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0
-(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1
- ;;
-#ifdef MODULO
- setf.sig f9 = in1 // f9 = -b
-#endif
-(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0
-(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34
- ;;
-#ifdef MODULO
- setf.sig f7 = in0
-#endif
-(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1
- ;;
- FP_TO_INT(f6, f6) // q = trunc(q2)
- ;;
-#ifdef MODULO
- xma.l f6 = f6, f9, f7 // r = q*(-b) + a
- ;;
-#endif
- getf.sig r8 = f6 // transfer result to result register
- br.ret.sptk.many rp
-END(NAME)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/idiv64.S
--- a/xen/arch/ia64/linux/lib/idiv64.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 1999-2000 Hewlett-Packard Co
- * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@xxxxxxxxxx>
- *
- * 64-bit integer division.
- *
- * This code is based on the application note entitled "Divide, Square Root
- * and Remainder Algorithms for the IA-64 Architecture". This document
- * is available as Intel document number 248725-002 or via the web at
- * http://developer.intel.com/software/opensource/numerics/
- *
- * For more details on the theory behind these algorithms, see "IA-64
- * and Elementary Functions" by Peter Markstein; HP Professional Books
- * (http://www.hp.com/go/retailbooks/)
- */
-
-#include <asm/asmmacro.h>
-
-#ifdef MODULO
-# define OP mod
-#else
-# define OP div
-#endif
-
-#ifdef UNSIGNED
-# define SGN u
-# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
-# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
-#else
-# define SGN
-# define INT_TO_FP(a,b) fcvt.xf a=b
-# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
-#endif
-
-#define PASTE1(a,b) a##b
-#define PASTE(a,b) PASTE1(a,b)
-#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3))
-
-GLOBAL_ENTRY(NAME)
- .regstk 2,0,0,0
- // Transfer inputs to FP registers.
- setf.sig f8 = in0
- setf.sig f9 = in1
- ;;
- // Convert the inputs to FP, to avoid FP software-assist faults.
- INT_TO_FP(f8, f8)
- INT_TO_FP(f9, f9)
- ;;
- frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b)
- ;;
-(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0
-(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1
- ;;
-(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0
-(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0
- ;;
-#ifdef MODULO
- sub in1 = r0, in1 // in1 = -b
-#endif
-(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1
-(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0
- ;;
-(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1
-(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a
- ;;
-#ifdef MODULO
- setf.sig f8 = in0 // f8 = a
- setf.sig f9 = in1 // f9 = -b
-#endif
-(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2
- ;;
- FP_TO_INT(f11, f11) // q = trunc(q3)
- ;;
-#ifdef MODULO
- xma.l f11 = f11, f9, f8 // r = q*(-b) + a
- ;;
-#endif
- getf.sig r8 = f11 // transfer result to result register
- br.ret.sptk.many rp
-END(NAME)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/memcpy_mck.S
--- a/xen/arch/ia64/linux/lib/memcpy_mck.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,661 +0,0 @@
-/*
- * Itanium 2-optimized version of memcpy and copy_user function
- *
- * Inputs:
- * in0: destination address
- * in1: source address
- * in2: number of bytes to copy
- * Output:
- * 0 if success, or number of byte NOT copied if error occurred.
- *
- * Copyright (C) 2002 Intel Corp.
- * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx>
- */
-#include <linux/config.h>
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-
-#define EK(y...) EX(y)
-
-/* McKinley specific optimization */
-
-#define retval r8
-#define saved_pfs r31
-#define saved_lc r10
-#define saved_pr r11
-#define saved_in0 r14
-#define saved_in1 r15
-#define saved_in2 r16
-
-#define src0 r2
-#define src1 r3
-#define dst0 r17
-#define dst1 r18
-#define cnt r9
-
-/* r19-r30 are temp for each code section */
-#define PREFETCH_DIST 8
-#define src_pre_mem r19
-#define dst_pre_mem r20
-#define src_pre_l2 r21
-#define dst_pre_l2 r22
-#define t1 r23
-#define t2 r24
-#define t3 r25
-#define t4 r26
-#define t5 t1 // alias!
-#define t6 t2 // alias!
-#define t7 t3 // alias!
-#define n8 r27
-#define t9 t5 // alias!
-#define t10 t4 // alias!
-#define t11 t7 // alias!
-#define t12 t6 // alias!
-#define t14 t10 // alias!
-#define t13 r28
-#define t15 r29
-#define tmp r30
-
-/* defines for long_copy block */
-#define A 0
-#define B (PREFETCH_DIST)
-#define C (B + PREFETCH_DIST)
-#define D (C + 1)
-#define N (D + 1)
-#define Nrot ((N + 7) & ~7)
-
-/* alias */
-#define in0 r32
-#define in1 r33
-#define in2 r34
-
-GLOBAL_ENTRY(memcpy)
- and r28=0x7,in0
- and r29=0x7,in1
- mov f6=f0
- br.cond.sptk .common_code
- ;;
-END(memcpy)
-GLOBAL_ENTRY(__copy_user)
- .prologue
-// check dest alignment
- and r28=0x7,in0
- and r29=0x7,in1
- mov f6=f1
- mov saved_in0=in0 // save dest pointer
- mov saved_in1=in1 // save src pointer
- mov saved_in2=in2 // save len
- ;;
-.common_code:
- cmp.gt p15,p0=8,in2 // check for small size
- cmp.ne p13,p0=0,r28 // check dest alignment
- cmp.ne p14,p0=0,r29 // check src alignment
- add src0=0,in1
- sub r30=8,r28 // for .align_dest
- mov retval=r0 // initialize return value
- ;;
- add dst0=0,in0
- add dst1=1,in0 // dest odd index
- cmp.le p6,p0 = 1,r30 // for .align_dest
-(p15) br.cond.dpnt .memcpy_short
-(p13) br.cond.dpnt .align_dest
-(p14) br.cond.dpnt .unaligned_src
- ;;
-
-// both dest and src are aligned on 8-byte boundary
-.aligned_src:
- .save ar.pfs, saved_pfs
- alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
- .save pr, saved_pr
- mov saved_pr=pr
-
- shr.u cnt=in2,7 // this much cache line
- ;;
- cmp.lt p6,p0=2*PREFETCH_DIST,cnt
- cmp.lt p7,p8=1,cnt
- .save ar.lc, saved_lc
- mov saved_lc=ar.lc
- .body
- add cnt=-1,cnt
- add src_pre_mem=0,in1 // prefetch src pointer
- add dst_pre_mem=0,in0 // prefetch dest pointer
- ;;
-(p7) mov ar.lc=cnt // prefetch count
-(p8) mov ar.lc=r0
-(p6) br.cond.dpnt .long_copy
- ;;
-
-.prefetch:
- lfetch.fault [src_pre_mem], 128
- lfetch.fault.excl [dst_pre_mem], 128
- br.cloop.dptk.few .prefetch
- ;;
-
-.medium_copy:
- and tmp=31,in2 // copy length after iteration
- shr.u r29=in2,5 // number of 32-byte iteration
- add dst1=8,dst0 // 2nd dest pointer
- ;;
- add cnt=-1,r29 // ctop iteration adjustment
- cmp.eq p10,p0=r29,r0 // do we really need to loop?
- add src1=8,src0 // 2nd src pointer
- cmp.le p6,p0=8,tmp
- ;;
- cmp.le p7,p0=16,tmp
- mov ar.lc=cnt // loop setup
- cmp.eq p16,p17 = r0,r0
- mov ar.ec=2
-(p10) br.dpnt.few .aligned_src_tail
- ;;
- TEXT_ALIGN(32)
-1:
-EX(.ex_handler, (p16) ld8 r34=[src0],16)
-EK(.ex_handler, (p16) ld8 r38=[src1],16)
-EX(.ex_handler, (p17) st8 [dst0]=r33,16)
-EK(.ex_handler, (p17) st8 [dst1]=r37,16)
- ;;
-EX(.ex_handler, (p16) ld8 r32=[src0],16)
-EK(.ex_handler, (p16) ld8 r36=[src1],16)
-EX(.ex_handler, (p16) st8 [dst0]=r34,16)
-EK(.ex_handler, (p16) st8 [dst1]=r38,16)
- br.ctop.dptk.few 1b
- ;;
-
-.aligned_src_tail:
-EX(.ex_handler, (p6) ld8 t1=[src0])
- mov ar.lc=saved_lc
- mov ar.pfs=saved_pfs
-EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8)
- cmp.le p8,p0=24,tmp
- and r21=-8,tmp
- ;;
-EX(.ex_hndlr_s, (p8) ld8 t3=[src1])
-EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1
- and in2=7,tmp // remaining length
-EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2
- add src0=src0,r21 // setting up src pointer
- add dst0=dst0,r21 // setting up dest pointer
- ;;
-EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3
- mov pr=saved_pr,-1
- br.dptk.many .memcpy_short
- ;;
-
-/* code taken from copy_page_mck */
-.long_copy:
- .rotr v[2*PREFETCH_DIST]
- .rotp p[N]
-
- mov src_pre_mem = src0
- mov pr.rot = 0x10000
- mov ar.ec = 1 // special unrolled loop
-
- mov dst_pre_mem = dst0
-
- add src_pre_l2 = 8*8, src0
- add dst_pre_l2 = 8*8, dst0
- ;;
- add src0 = 8, src_pre_mem // first t1 src
- mov ar.lc = 2*PREFETCH_DIST - 1
- shr.u cnt=in2,7 // number of lines
- add src1 = 3*8, src_pre_mem // first t3 src
- add dst0 = 8, dst_pre_mem // first t1 dst
- add dst1 = 3*8, dst_pre_mem // first t3 dst
- ;;
- and tmp=127,in2 // remaining bytes after this
block
- add cnt = -(2*PREFETCH_DIST) - 1, cnt
- // same as .line_copy loop, but with all predicated-off instructions
removed:
-.prefetch_loop:
-EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0
-EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2
- br.ctop.sptk .prefetch_loop
- ;;
- cmp.eq p16, p0 = r0, r0 // reset p16 to 1
- mov ar.lc = cnt
- mov ar.ec = N // # of stages in pipeline
- ;;
-.line_copy:
-EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0
-EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1
-EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2
prefetch dst from memory
-EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3
prefetch dst from L2
- ;;
-EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0
prefetch src from memory
-EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1
prefetch src from L2
-EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2
-EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3
- ;;
-EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8)
-EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8)
-EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8)
-EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8)
- ;;
-EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8)
-EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8)
-EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8)
-EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8)
- ;;
-EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8)
-EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8)
-EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8)
-EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8)
- ;;
-EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8)
-EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8)
-EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8)
-EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8)
- ;;
-EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8)
-EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8)
-EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8)
-EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8)
- ;;
-EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8)
-EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8)
-EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8)
-EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8)
- br.ctop.sptk .line_copy
- ;;
-
- add dst0=-8,dst0
- add src0=-8,src0
- mov in2=tmp
- .restore sp
- br.sptk.many .medium_copy
- ;;
-
-#define BLOCK_SIZE 128*32
-#define blocksize r23
-#define curlen r24
-
-// dest is on 8-byte boundary, src is not. We need to do
-// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle.
-.unaligned_src:
- .prologue
- .save ar.pfs, saved_pfs
- alloc saved_pfs=ar.pfs,3,5,0,8
- .save ar.lc, saved_lc
- mov saved_lc=ar.lc
- .save pr, saved_pr
- mov saved_pr=pr
- .body
-.4k_block:
- mov saved_in0=dst0 // need to save all input arguments
- mov saved_in2=in2
- mov blocksize=BLOCK_SIZE
- ;;
- cmp.lt p6,p7=blocksize,in2
- mov saved_in1=src0
- ;;
-(p6) mov in2=blocksize
- ;;
- shr.u r21=in2,7 // this much cache line
- shr.u r22=in2,4 // number of 16-byte iteration
- and curlen=15,in2 // copy length after iteration
- and r30=7,src0 // source alignment
- ;;
- cmp.lt p7,p8=1,r21
- add cnt=-1,r21
- ;;
-
- add src_pre_mem=0,src0 // prefetch src pointer
- add dst_pre_mem=0,dst0 // prefetch dest pointer
- and src0=-8,src0 // 1st src pointer
-(p7) mov ar.lc = cnt
-(p8) mov ar.lc = r0
- ;;
- TEXT_ALIGN(32)
-1: lfetch.fault [src_pre_mem], 128
- lfetch.fault.excl [dst_pre_mem], 128
- br.cloop.dptk.few 1b
- ;;
-
- shladd dst1=r22,3,dst0 // 2nd dest pointer
- shladd src1=r22,3,src0 // 2nd src pointer
- cmp.eq p8,p9=r22,r0 // do we really need to loop?
- cmp.le p6,p7=8,curlen; // have at least 8 byte remaining?
- add cnt=-1,r22 // ctop iteration adjustment
- ;;
-EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer
-EK(.ex_handler, (p9) ld8 r37=[src1],8)
-(p8) br.dpnt.few .noloop
- ;;
-
-// The jump address is calculated based on src alignment. The COPYU
-// macro below need to confine its size to power of two, so an entry
-// can be caulated using shl instead of an expensive multiply. The
-// size is then hard coded by the following #define to match the
-// actual size. This make it somewhat tedious when COPYU macro gets
-// changed and this need to be adjusted to match.
-#define LOOP_SIZE 6
-1:
- mov r29=ip // jmp_table thread
- mov ar.lc=cnt
- ;;
- add r29=.jump_table - 1b - (.jmp1-.jump_table), r29
- shl r28=r30, LOOP_SIZE // jmp_table thread
- mov ar.ec=2 // loop setup
- ;;
- add r29=r29,r28 // jmp_table thread
- cmp.eq p16,p17=r0,r0
- ;;
- mov b6=r29 // jmp_table thread
- ;;
- br.cond.sptk.few b6
-
-// for 8-15 byte case
-// We will skip the loop, but need to replicate the side effect
-// that the loop produces.
-.noloop:
-EX(.ex_handler, (p6) ld8 r37=[src1],8)
- add src0=8,src0
-(p6) shl r25=r30,3
- ;;
-EX(.ex_handler, (p6) ld8 r27=[src1])
-(p6) shr.u r28=r37,r25
-(p6) sub r26=64,r25
- ;;
-(p6) shl r27=r27,r26
- ;;
-(p6) or r21=r28,r27
-
-.unaligned_src_tail:
-/* check if we have more than blocksize to copy, if so go back */
- cmp.gt p8,p0=saved_in2,blocksize
- ;;
-(p8) add dst0=saved_in0,blocksize
-(p8) add src0=saved_in1,blocksize
-(p8) sub in2=saved_in2,blocksize
-(p8) br.dpnt .4k_block
- ;;
-
-/* we have up to 15 byte to copy in the tail.
- * part of work is already done in the jump table code
- * we are at the following state.
- * src side:
- *
- * xxxxxx xx <----- r21 has xxxxxxxx already
- * -------- -------- --------
- * 0 8 16
- * ^
- * |
- * src1
- *
- * dst
- * -------- -------- --------
- * ^
- * |
- * dst1
- */
-EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy
-(p6) add curlen=-8,curlen // update length
- mov ar.pfs=saved_pfs
- ;;
- mov ar.lc=saved_lc
- mov pr=saved_pr,-1
- mov in2=curlen // remaining length
- mov dst0=dst1 // dest pointer
- add src0=src1,r30 // forward by src alignment
- ;;
-
-// 7 byte or smaller.
-.memcpy_short:
- cmp.le p8,p9 = 1,in2
- cmp.le p10,p11 = 2,in2
- cmp.le p12,p13 = 3,in2
- cmp.le p14,p15 = 4,in2
- add src1=1,src0 // second src pointer
- add dst1=1,dst0 // second dest pointer
- ;;
-
-EX(.ex_handler_short, (p8) ld1 t1=[src0],2)
-EK(.ex_handler_short, (p10) ld1 t2=[src1],2)
-(p9) br.ret.dpnt rp // 0 byte copy
- ;;
-
-EX(.ex_handler_short, (p8) st1 [dst0]=t1,2)
-EK(.ex_handler_short, (p10) st1 [dst1]=t2,2)
-(p11) br.ret.dpnt rp // 1 byte copy
-
-EX(.ex_handler_short, (p12) ld1 t3=[src0],2)
-EK(.ex_handler_short, (p14) ld1 t4=[src1],2)
-(p13) br.ret.dpnt rp // 2 byte copy
- ;;
-
- cmp.le p6,p7 = 5,in2
- cmp.le p8,p9 = 6,in2
- cmp.le p10,p11 = 7,in2
-
-EX(.ex_handler_short, (p12) st1 [dst0]=t3,2)
-EK(.ex_handler_short, (p14) st1 [dst1]=t4,2)
-(p15) br.ret.dpnt rp // 3 byte copy
- ;;
-
-EX(.ex_handler_short, (p6) ld1 t5=[src0],2)
-EK(.ex_handler_short, (p8) ld1 t6=[src1],2)
-(p7) br.ret.dpnt rp // 4 byte copy
- ;;
-
-EX(.ex_handler_short, (p6) st1 [dst0]=t5,2)
-EK(.ex_handler_short, (p8) st1 [dst1]=t6,2)
-(p9) br.ret.dptk rp // 5 byte copy
-
-EX(.ex_handler_short, (p10) ld1 t7=[src0],2)
-(p11) br.ret.dptk rp // 6 byte copy
- ;;
-
-EX(.ex_handler_short, (p10) st1 [dst0]=t7,2)
- br.ret.dptk rp // done all cases
-
-
-/* Align dest to nearest 8-byte boundary. We know we have at
- * least 7 bytes to copy, enough to crawl to 8-byte boundary.
- * Actual number of byte to crawl depend on the dest alignment.
- * 7 byte or less is taken care at .memcpy_short
-
- * src0 - source even index
- * src1 - source odd index
- * dst0 - dest even index
- * dst1 - dest odd index
- * r30 - distance to 8-byte boundary
- */
-
-.align_dest:
- add src1=1,in1 // source odd index
- cmp.le p7,p0 = 2,r30 // for .align_dest
- cmp.le p8,p0 = 3,r30 // for .align_dest
-EX(.ex_handler_short, (p6) ld1 t1=[src0],2)
- cmp.le p9,p0 = 4,r30 // for .align_dest
- cmp.le p10,p0 = 5,r30
- ;;
-EX(.ex_handler_short, (p7) ld1 t2=[src1],2)
-EK(.ex_handler_short, (p8) ld1 t3=[src0],2)
- cmp.le p11,p0 = 6,r30
-EX(.ex_handler_short, (p6) st1 [dst0] = t1,2)
- cmp.le p12,p0 = 7,r30
- ;;
-EX(.ex_handler_short, (p9) ld1 t4=[src1],2)
-EK(.ex_handler_short, (p10) ld1 t5=[src0],2)
-EX(.ex_handler_short, (p7) st1 [dst1] = t2,2)
-EK(.ex_handler_short, (p8) st1 [dst0] = t3,2)
- ;;
-EX(.ex_handler_short, (p11) ld1 t6=[src1],2)
-EK(.ex_handler_short, (p12) ld1 t7=[src0],2)
- cmp.eq p6,p7=r28,r29
-EX(.ex_handler_short, (p9) st1 [dst1] = t4,2)
-EK(.ex_handler_short, (p10) st1 [dst0] = t5,2)
- sub in2=in2,r30
- ;;
-EX(.ex_handler_short, (p11) st1 [dst1] = t6,2)
-EK(.ex_handler_short, (p12) st1 [dst0] = t7)
- add dst0=in0,r30 // setup arguments
- add src0=in1,r30
-(p6) br.cond.dptk .aligned_src
-(p7) br.cond.dpnt .unaligned_src
- ;;
-
-/* main loop body in jump table format */
-#define COPYU(shift)
\
-1:
\
-EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */
\
-EK(.ex_handler, (p16) ld8 r36=[src1],8);
\
- (p17) shrp r35=r33,r34,shift;; /* 1 */
\
-EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail
section */ \
- nop.m 0;
\
- (p16) shrp r38=r36,r37,shift;
\
-EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */
\
-EK(.ex_handler, (p17) st8 [dst1]=r39,8);
\
- br.ctop.dptk.few 1b;;
\
- (p7) add src1=-8,src1; /* back out for <8 byte case */
\
- shrp r21=r22,r38,shift; /* speculative work */
\
- br.sptk.few .unaligned_src_tail /* branch out of jump table */
\
- ;;
- TEXT_ALIGN(32)
-.jump_table:
- COPYU(8) // unaligned cases
-.jmp1:
- COPYU(16)
- COPYU(24)
- COPYU(32)
- COPYU(40)
- COPYU(48)
- COPYU(56)
-
-#undef A
-#undef B
-#undef C
-#undef D
-
-/*
- * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
- * instruction failed in the bundle. The exception algorithm is that we
- * first figure out the faulting address, then detect if there is any
- * progress made on the copy, if so, redo the copy from last known copied
- * location up to the faulting address (exclusive). In the copy_from_user
- * case, remaining byte in kernel buffer will be zeroed.
- *
- * Take copy_from_user as an example, in the code there are multiple loads
- * in a bundle and those multiple loads could span over two pages, the
- * faulting address is calculated as page_round_down(max(src0, src1)).
- * This is based on knowledge that if we can access one byte in a page, we
- * can access any byte in that page.
- *
- * predicate used in the exception handler:
- * p6-p7: direction
- * p10-p11: src faulting addr calculation
- * p12-p13: dst faulting addr calculation
- */
-
-#define A r19
-#define B r20
-#define C r21
-#define D r22
-#define F r28
-
-#define memset_arg0 r32
-#define memset_arg2 r33
-
-#define saved_retval loc0
-#define saved_rtlink loc1
-#define saved_pfs_stack loc2
-
-.ex_hndlr_s:
- add src0=8,src0
- br.sptk .ex_handler
- ;;
-.ex_hndlr_d:
- add dst0=8,dst0
- br.sptk .ex_handler
- ;;
-.ex_hndlr_lcpy_1:
- mov src1=src_pre_mem
- mov dst1=dst_pre_mem
- cmp.gtu p10,p11=src_pre_mem,saved_in1
- cmp.gtu p12,p13=dst_pre_mem,saved_in0
- ;;
-(p10) add src0=8,saved_in1
-(p11) mov src0=saved_in1
-(p12) add dst0=8,saved_in0
-(p13) mov dst0=saved_in0
- br.sptk .ex_handler
-.ex_handler_lcpy:
- // in line_copy block, the preload addresses should always ahead
- // of the other two src/dst pointers. Furthermore, src1/dst1 should
- // always ahead of src0/dst0.
- mov src1=src_pre_mem
- mov dst1=dst_pre_mem
-.ex_handler:
- mov pr=saved_pr,-1 // first restore pr, lc, and pfs
- mov ar.lc=saved_lc
- mov ar.pfs=saved_pfs
- ;;
-.ex_handler_short: // fault occurred in these sections didn't change pr, lc,
pfs
- cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction
- cmp.ltu p10,p11=src0,src1
- cmp.ltu p12,p13=dst0,dst1
- fcmp.eq p8,p0=f6,f0 // is it memcpy?
- mov tmp = dst0
- ;;
-(p11) mov src1 = src0 // pick the larger of the two
-(p13) mov dst0 = dst1 // make dst0 the smaller one
-(p13) mov dst1 = tmp // and dst1 the larger one
- ;;
-(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
-(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
- ;;
-(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store
-(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load
- mov retval=saved_in2
-(p8) ld1 tmp=[src1] // force an oops for memcpy call
-(p8) st1 [dst1]=r0 // force an oops for memcpy call
-(p14) br.ret.sptk.many rp
-
-/*
- * The remaining byte to copy is calculated as:
- *
- * A = (faulting_addr - orig_src) -> len to faulting ld address
- * or
- * (faulting_addr - orig_dst) -> len to faulting st address
- * B = (cur_dst - orig_dst) -> len copied so far
- * C = A - B -> len need to be copied
- * D = orig_len - A -> len need to be zeroed
- */
-(p6) sub A = F, saved_in0
-(p7) sub A = F, saved_in1
- clrrrb
- ;;
- alloc saved_pfs_stack=ar.pfs,3,3,3,0
- sub B = dst0, saved_in0 // how many byte copied so far
- ;;
- sub C = A, B
- sub D = saved_in2, A
- ;;
- cmp.gt p8,p0=C,r0 // more than 1 byte?
- add memset_arg0=saved_in0, A
-(p6) mov memset_arg2=0 // copy_to_user should not call memset
-(p7) mov memset_arg2=D // copy_from_user need to have kbuf
zeroed
- mov r8=0
- mov saved_retval = D
- mov saved_rtlink = b0
-
- add out0=saved_in0, B
- add out1=saved_in1, B
- mov out2=C
-(p8) br.call.sptk.few b0=__copy_user // recursive call
- ;;
-
- add saved_retval=saved_retval,r8 // above might return non-zero
value
- cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte?
- mov out0=memset_arg0 // *s
- mov out1=r0 // c
- mov out2=memset_arg2 // n
-(p8) br.call.sptk.few b0=memset
- ;;
-
- mov retval=saved_retval
- mov ar.pfs=saved_pfs_stack
- mov b0=saved_rtlink
- br.ret.sptk.many rp
-
-/* end of McKinley specific optimization */
-END(__copy_user)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/memset.S
--- a/xen/arch/ia64/linux/lib/memset.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,362 +0,0 @@
-/* Optimized version of the standard memset() function.
-
- Copyright (c) 2002 Hewlett-Packard Co/CERN
- Sverre Jarp <Sverre.Jarp@xxxxxxx>
-
- Return: dest
-
- Inputs:
- in0: dest
- in1: value
- in2: count
-
- The algorithm is fairly straightforward: set byte by byte until we
- we get to a 16B-aligned address, then loop on 128 B chunks using an
- early store as prefetching, then loop on 32B chucks, then clear remaining
- words, finally clear remaining bytes.
- Since a stf.spill f0 can store 16B in one go, we use this instruction
- to get peak speed when value = 0. */
-
-#include <asm/asmmacro.h>
-#undef ret
-
-#define dest in0
-#define value in1
-#define cnt in2
-
-#define tmp r31
-#define save_lc r30
-#define ptr0 r29
-#define ptr1 r28
-#define ptr2 r27
-#define ptr3 r26
-#define ptr9 r24
-#define loopcnt r23
-#define linecnt r22
-#define bytecnt r21
-
-#define fvalue f6
-
-// This routine uses only scratch predicate registers (p6 - p15)
-#define p_scr p6 // default register for
same-cycle branches
-#define p_nz p7
-#define p_zr p8
-#define p_unalgn p9
-#define p_y p11
-#define p_n p12
-#define p_yy p13
-#define p_nn p14
-
-#define MIN1 15
-#define MIN1P1HALF 8
-#define LINE_SIZE 128
-#define LSIZE_SH 7 // shift amount
-#define PREF_AHEAD 8
-
-GLOBAL_ENTRY(memset)
-{ .mmi
- .prologue
- alloc tmp = ar.pfs, 3, 0, 0, 0
- lfetch.nt1 [dest] //
- .save ar.lc, save_lc
- mov.i save_lc = ar.lc
- .body
-} { .mmi
- mov ret0 = dest // return value
- cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is
zero
- cmp.eq p_scr, p0 = cnt, r0
-;; }
-{ .mmi
- and ptr2 = -(MIN1+1), dest // aligned address
- and tmp = MIN1, dest // prepare to check for correct
alignment
- tbit.nz p_y, p_n = dest, 0 // Do we have an odd address?
(M_B_U)
-} { .mib
- mov ptr1 = dest
- mux1 value = value, @brcst // create 8 identical bytes in
word
-(p_scr) br.ret.dpnt.many rp // return immediately
if count = 0
-;; }
-{ .mib
- cmp.ne p_unalgn, p0 = tmp, r0 //
-} { .mib
- sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1
higher than loopcnt
- cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
-(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few
(M_B_U)
-;; }
-{ .mmi
-(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
-(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
-;; }
-{ .mib
-(p_y) add cnt = -8, cnt //
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
-} { .mib
-(p_y) st8 [ptr2] = value,-4 //
-(p_n) add ptr2 = 4, ptr2 //
-;; }
-{ .mib
-(p_yy) add cnt = -4, cnt //
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
-} { .mib
-(p_yy) st4 [ptr2] = value,-2 //
-(p_nn) add ptr2 = 2, ptr2 //
-;; }
-{ .mmi
- mov tmp = LINE_SIZE+1 // for compare
-(p_y) add cnt = -2, cnt //
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
-} { .mmi
- setf.sig fvalue=value // transfer value to FLP side
-(p_y) st2 [ptr2] = value,-1 //
-(p_n) add ptr2 = 1, ptr2 //
-;; }
-
-{ .mmi
-(p_yy) st1 [ptr2] = value //
- cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
-} { .mbb
-(p_yy) add cnt = -1, cnt //
-(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
-;; }
-
-{ .mib
- nop.m 0
- shr.u linecnt = cnt, LSIZE_SH
-(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill
-;; }
-
- TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache
lines; fill later
-{ .mmi
- and tmp = -(LINE_SIZE), cnt // compute end of range
- mov ptr9 = ptr1 // used for prefetching
- and cnt = (LINE_SIZE-1), cnt // remainder
-} { .mmi
- mov loopcnt = PREF_AHEAD-1 // default prefetch loop
- cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
-;; }
-{ .mmi
-(p_scr) add loopcnt = -1, linecnt //
- add ptr2 = 8, ptr1 // start of stores (beyond
prefetch stores)
- add ptr1 = tmp, ptr1 // first address beyond total
range
-;; }
-{ .mmi
- add tmp = -1, linecnt // next loop count
- mov.i ar.lc = loopcnt //
-;; }
-.pref_l1a:
-{ .mib
- stf8 [ptr9] = fvalue, 128 // Do stores one cache line
apart
- nop.i 0
- br.cloop.dptk.few .pref_l1a
-;; }
-{ .mmi
- add ptr0 = 16, ptr2 // Two stores in parallel
- mov.i ar.lc = tmp //
-;; }
-.l1ax:
- { .mmi
- stf8 [ptr2] = fvalue, 8
- stf8 [ptr0] = fvalue, 8
- ;; }
- { .mmi
- stf8 [ptr2] = fvalue, 24
- stf8 [ptr0] = fvalue, 24
- ;; }
- { .mmi
- stf8 [ptr2] = fvalue, 8
- stf8 [ptr0] = fvalue, 8
- ;; }
- { .mmi
- stf8 [ptr2] = fvalue, 24
- stf8 [ptr0] = fvalue, 24
- ;; }
- { .mmi
- stf8 [ptr2] = fvalue, 8
- stf8 [ptr0] = fvalue, 8
- ;; }
- { .mmi
- stf8 [ptr2] = fvalue, 24
- stf8 [ptr0] = fvalue, 24
- ;; }
- { .mmi
- stf8 [ptr2] = fvalue, 8
- stf8 [ptr0] = fvalue, 32
- cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
- ;; }
-{ .mmb
- stf8 [ptr2] = fvalue, 24
-(p_scr) stf8 [ptr9] = fvalue, 128
- br.cloop.dptk.few .l1ax
-;; }
-{ .mbb
- cmp.le p_scr, p0 = 8, cnt // just a few bytes left ?
-(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2
- br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3
-;; }
-
- TEXT_ALIGN(32)
-.l1b: // ------------------------------------ // L1B: store ahead into cache
lines; fill later
-{ .mmi
- and tmp = -(LINE_SIZE), cnt // compute end of range
- mov ptr9 = ptr1 // used for prefetching
- and cnt = (LINE_SIZE-1), cnt // remainder
-} { .mmi
- mov loopcnt = PREF_AHEAD-1 // default prefetch loop
- cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
-;; }
-{ .mmi
-(p_scr) add loopcnt = -1, linecnt
- add ptr2 = 16, ptr1 // start of stores (beyond
prefetch stores)
- add ptr1 = tmp, ptr1 // first address beyond total
range
-;; }
-{ .mmi
- add tmp = -1, linecnt // next loop count
- mov.i ar.lc = loopcnt
-;; }
-.pref_l1b:
-{ .mib
- stf.spill [ptr9] = f0, 128 // Do stores one cache line
apart
- nop.i 0
- br.cloop.dptk.few .pref_l1b
-;; }
-{ .mmi
- add ptr0 = 16, ptr2 // Two stores in parallel
- mov.i ar.lc = tmp
-;; }
-.l1bx:
- { .mmi
- stf.spill [ptr2] = f0, 32
- stf.spill [ptr0] = f0, 32
- ;; }
- { .mmi
- stf.spill [ptr2] = f0, 32
- stf.spill [ptr0] = f0, 32
- ;; }
- { .mmi
- stf.spill [ptr2] = f0, 32
- stf.spill [ptr0] = f0, 64
- cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
- ;; }
-{ .mmb
- stf.spill [ptr2] = f0, 32
-(p_scr) stf.spill [ptr9] = f0, 128
- br.cloop.dptk.few .l1bx
-;; }
-{ .mib
- cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
-(p_scr) br.cond.dpnt.many .move_bytes_from_alignment //
-;; }
-
-.fraction_of_line:
-{ .mib
- add ptr2 = 16, ptr1
- shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
-;; }
-{ .mib
- cmp.eq p_scr, p0 = loopcnt, r0
- add loopcnt = -1, loopcnt
-(p_scr) br.cond.dpnt.many .store_words
-;; }
-{ .mib
- and cnt = 0x1f, cnt // compute the remaining cnt
- mov.i ar.lc = loopcnt
-;; }
- TEXT_ALIGN(32)
-.l2: // ------------------------------------ // L2A: store 32B in 2 cycles
-{ .mmb
- stf8 [ptr1] = fvalue, 8
- stf8 [ptr2] = fvalue, 8
-;; } { .mmb
- stf8 [ptr1] = fvalue, 24
- stf8 [ptr2] = fvalue, 24
- br.cloop.dptk.many .l2
-;; }
-.store_words:
-{ .mib
- cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
-(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
-;; }
-
-{ .mmi
- stf8 [ptr1] = fvalue, 8 // store
- cmp.le p_y, p_n = 16, cnt
- add cnt = -8, cnt // subtract
-;; }
-{ .mmi
-(p_y) stf8 [ptr1] = fvalue, 8 // store
-(p_y) cmp.le.unc p_yy, p_nn = 16, cnt
-(p_y) add cnt = -8, cnt // subtract
-;; }
-{ .mmi // store
-(p_yy) stf8 [ptr1] = fvalue, 8
-(p_yy) add cnt = -8, cnt // subtract
-;; }
-
-.move_bytes_from_alignment:
-{ .mib
- cmp.eq p_scr, p0 = cnt, r0
- tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a
st4 ?
-(p_scr) br.cond.dpnt.few .restore_and_exit
-;; }
-{ .mib
-(p_y) st4 [ptr1] = value,4
- tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a
st2 ?
-;; }
-{ .mib
-(p_yy) st2 [ptr1] = value,2
- tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a
st1 ?
-;; }
-
-{ .mib
-(p_y) st1 [ptr1] = value
-;; }
-.restore_and_exit:
-{ .mib
- nop.m 0
- mov.i ar.lc = save_lc
- br.ret.sptk.many rp
-;; }
-
-.move_bytes_unaligned:
-{ .mmi
- .pred.rel "mutex",p_y, p_n
- .pred.rel "mutex",p_yy, p_nn
-(p_n) cmp.le p_yy, p_nn = 4, cnt
-(p_y) cmp.le p_yy, p_nn = 5, cnt
-(p_n) add ptr2 = 2, ptr1
-} { .mmi
-(p_y) add ptr2 = 3, ptr1
-(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte
[15, 14 (or less) left]
-(p_y) add cnt = -1, cnt
-;; }
-{ .mmi
-(p_yy) cmp.le.unc p_y, p0 = 8, cnt
- add ptr3 = ptr1, cnt // prepare last store
- mov.i ar.lc = save_lc
-} { .mmi
-(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
-(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11,
10 (o less) left]
-(p_yy) add cnt = -4, cnt
-;; }
-{ .mmi
-(p_y) cmp.le.unc p_yy, p0 = 8, cnt
- add ptr3 = -1, ptr3 // last store
- tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the
end ?
-} { .mmi
-(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
-(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6
(or less) left]
-(p_y) add cnt = -4, cnt
-;; }
-{ .mmi
-(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
-(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2
(or less) left]
- tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the
end ?
-} { .mmi
-(p_yy) add cnt = -4, cnt
-;; }
-{ .mmb
-(p_scr) st2 [ptr1] = value // fill 2 (aligned)
bytes
-(p_y) st1 [ptr3] = value // fill last byte (using ptr3)
- br.ret.sptk.many rp
-}
-END(memset)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/strlen.S
--- a/xen/arch/ia64/linux/lib/strlen.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,192 +0,0 @@
-/*
- *
- * Optimized version of the standard strlen() function
- *
- *
- * Inputs:
- * in0 address of string
- *
- * Outputs:
- * ret0 the number of characters in the string (0 if empty string)
- * does not count the \0
- *
- * Copyright (C) 1999, 2001 Hewlett-Packard Co
- * Stephane Eranian <eranian@xxxxxxxxxx>
- *
- * 09/24/99 S.Eranian add speculation recovery code
- */
-
-#include <asm/asmmacro.h>
-
-//
-//
-// This is an enhanced version of the basic strlen. it includes a combination
-// of compute zero index (czx), parallel comparisons, speculative loads and
-// loop unroll using rotating registers.
-//
-// General Ideas about the algorithm:
-// The goal is to look at the string in chunks of 8 bytes.
-// so we need to do a few extra checks at the beginning because the
-// string may not be 8-byte aligned. In this case we load the 8byte
-// quantity which includes the start of the string and mask the unused
-// bytes with 0xff to avoid confusing czx.
-// We use speculative loads and software pipelining to hide memory
-// latency and do read ahead safely. This way we defer any exception.
-//
-// Because we don't want the kernel to be relying on particular
-// settings of the DCR register, we provide recovery code in case
-// speculation fails. The recovery code is going to "redo" the work using
-// only normal loads. If we still get a fault then we generate a
-// kernel panic. Otherwise we return the strlen as usual.
-//
-// The fact that speculation may fail can be caused, for instance, by
-// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
-// a NaT bit will be set if the translation is not present. The normal
-// load, on the other hand, will cause the translation to be inserted
-// if the mapping exists.
-//
-// It should be noted that we execute recovery code only when we need
-// to use the data that has been speculatively loaded: we don't execute
-// recovery code on pure read ahead data.
-//
-// Remarks:
-// - the cmp r0,r0 is used as a fast way to initialize a predicate
-// register to 1. This is required to make sure that we get the parallel
-// compare correct.
-//
-// - we don't use the epilogue counter to exit the loop but we need to set
-// it to zero beforehand.
-//
-// - after the loop we must test for Nat values because neither the
-// czx nor cmp instruction raise a NaT consumption fault. We must be
-// careful not to look too far for a Nat for which we don't care.
-// For instance we don't need to look at a NaT in val2 if the zero byte
-// was in val1.
-//
-// - Clearly performance tuning is required.
-//
-//
-//
-#define saved_pfs r11
-#define tmp r10
-#define base r16
-#define orig r17
-#define saved_pr r18
-#define src r19
-#define mask r20
-#define val r21
-#define val1 r22
-#define val2 r23
-
-GLOBAL_ENTRY(strlen)
- .prologue
- .save ar.pfs, saved_pfs
- alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
-
- .rotr v[2], w[2] // declares our 4 aliases
-
- extr.u tmp=in0,0,3 // tmp=least significant 3 bits
- mov orig=in0 // keep trackof initial byte address
- dep src=0,in0,0,3 // src=8byte-aligned in0 address
- .save pr, saved_pr
- mov saved_pr=pr // preserve predicates (rotation)
- ;;
-
- .body
-
- ld8 v[1]=[src],8 // must not speculate: can fail here
- shl tmp=tmp,3 // multiply by 8bits/byte
- mov mask=-1 // our mask
- ;;
- ld8.s w[1]=[src],8 // speculatively load next
- cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and
- sub tmp=64,tmp // how many bits to shift our mask on the right
- ;;
- shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
- mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
- ;;
- add base=-16,src // keep track of aligned base
- or v[1]=v[1],mask // now we have a safe initial byte pattern
- ;;
-1:
- ld8.s v[0]=[src],8 // speculatively load next
- czx1.r val1=v[1] // search 0 byte from right
- czx1.r val2=w[1] // search 0 byte from right following 8bytes
- ;;
- ld8.s w[0]=[src],8 // speculatively load next to next
- cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
- cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
-(p6) br.wtop.dptk 1b // loop until p6 == 0
- ;;
- //
- // We must return try the recovery code iff
- // val1_is_nat || (val1==8 && val2_is_nat)
- //
- // XXX Fixme
- // - there must be a better way of doing the test
- //
- cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
- tnat.nz p6,p7=val1 // test NaT on val1
-(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT
- ;;
- //
- // if we come here p7 is true, i.e., initialized for // cmp
- //
- cmp.eq.and p7,p0=8,val1// val1==8?
- tnat.nz.and p7,p0=val2 // test NaT if val2
-(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT
- ;;
-(p8) mov val1=val2 // the other test got us out of the loop
-(p8) adds src=-16,src // correct position when 3 ahead
-(p9) adds src=-24,src // correct position when 4 ahead
- ;;
- sub ret0=src,orig // distance from base
- sub tmp=8,val1 // which byte in word
- mov pr=saved_pr,0xffffffffffff0000
- ;;
- sub ret0=ret0,tmp // adjust
- mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
- br.ret.sptk.many rp // end of normal execution
-
- //
- // Outlined recovery code when speculation failed
- //
- // This time we don't use speculation and rely on the normal exception
- // mechanism. that's why the loop is not as good as the previous one
- // because read ahead is not possible
- //
- // IMPORTANT:
- // Please note that in the case of strlen() as opposed to strlen_user()
- // we don't use the exception mechanism, as this function is not
- // supposed to fail. If that happens it means we have a bug and the
- // code will cause of kernel fault.
- //
- // XXX Fixme
- // - today we restart from the beginning of the string instead
- // of trying to continue where we left off.
- //
-.recover:
- ld8 val=[base],8 // will fail if unrecoverable fault
- ;;
- or val=val,mask // remask first bytes
- cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
- ;;
- //
- // ar.ec is still zero here
- //
-2:
-(p6) ld8 val=[base],8 // will fail if unrecoverable fault
- ;;
- czx1.r val1=val // search 0 byte from right
- ;;
- cmp.eq p6,p0=8,val1 // val1==8 ?
-(p6) br.wtop.dptk 2b // loop until p6 == 0
- ;; // (avoid WAW on p63)
- sub ret0=base,orig // distance from base
- sub tmp=8,val1
- mov pr=saved_pr,0xffffffffffff0000
- ;;
- sub ret0=ret0,tmp // length=now - back -1
- mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
- br.ret.sptk.many rp // end of successful recovery code
-END(strlen)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/mm.c
--- a/xen/arch/ia64/mm.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,152 +0,0 @@
-/******************************************************************************
- * arch/ia64/mm.c
- *
- * Copyright (c) 2002-2005 K A Fraser
- * Copyright (c) 2004 Christian Limpach
- * Copyright (c) 2005, Intel Corporation.
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * A description of the x86 page table API:
- *
- * Domains trap to do_mmu_update with a list of update requests.
- * This is a list of (ptr, val) pairs, where the requested operation
- * is *ptr = val.
- *
- * Reference counting of pages:
- * ----------------------------
- * Each page has two refcounts: tot_count and type_count.
- *
- * TOT_COUNT is the obvious reference count. It counts all uses of a
- * physical page frame by a domain, including uses as a page directory,
- * a page table, or simple mappings via a PTE. This count prevents a
- * domain from releasing a frame back to the free pool when it still holds
- * a reference to it.
- *
- * TYPE_COUNT is more subtle. A frame can be put to one of three
- * mutually-exclusive uses: it might be used as a page directory, or a
- * page table, or it may be mapped writable by the domain [of course, a
- * frame may not be used in any of these three ways!].
- * So, type_count is a count of the number of times a frame is being
- * referred to in its current incarnation. Therefore, a page can only
- * change its type when its type count is zero.
- *
- * Pinning the page type:
- * ----------------------
- * The type of a page can be pinned/unpinned with the commands
- * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
- * pinning is not reference counted, so it can't be nested).
- * This is useful to prevent a page's type count falling to zero, at which
- * point safety checks would need to be carried out next time the count
- * is increased again.
- *
- * A further note on writable page mappings:
- * -----------------------------------------
- * For simplicity, the count of writable mappings for a page may not
- * correspond to reality. The 'writable count' is incremented for every
- * PTE which maps the page with the _PAGE_RW flag set. However, for
- * write access to be possible the page directory entry must also have
- * its _PAGE_RW bit set. We do not check this as it complicates the
- * reference counting considerably [consider the case of multiple
- * directory entries referencing a single page table, some with the RW
- * bit set, others not -- it starts getting a bit messy].
- * In normal use, this simplification shouldn't be a problem.
- * However, the logic can be added if required.
- *
- * One more note on read-only page mappings:
- * -----------------------------------------
- * We want domains to be able to map pages for read-only access. The
- * main reason is that page tables and directories should be readable
- * by a domain, but it would not be safe for them to be writable.
- * However, domains have free access to rings 1 & 2 of the Intel
- * privilege model. In terms of page protection, these are considered
- * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
- * read-only restrictions are respected in supervisor mode -- if the
- * bit is clear then any mapped page is writable.
- *
- * We get round this by always setting the WP bit and disallowing
- * updates to it. This is very unlikely to cause a problem for guest
- * OS's, which will generally use the WP bit to simplify copy-on-write
- * implementation (in that case, OS wants a fault when it writes to
- * an application-supplied buffer).
- */
-
-#include <xen/config.h>
-#include <public/xen.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/mm.h>
-#include <xen/errno.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/vmmu.h>
-#include <asm/regionreg.h>
-#include <asm/vmx_mm_def.h>
-/*
- uregs->ptr is virtual address
- uregs->val is pte value
- */
-#ifdef CONFIG_VTI
-int do_mmu_update(mmu_update_t *ureqs,u64 count,u64 *pdone,u64 foreigndom)
-{
- int i,cmd;
- u64 mfn, gpfn;
- VCPU *vcpu;
- mmu_update_t req;
- ia64_rr rr;
- thash_cb_t *hcb;
- thash_data_t entry={0},*ovl;
- vcpu = current;
- search_section_t sections;
- hcb = vmx_vcpu_get_vtlb(vcpu);
- for ( i = 0; i < count; i++ )
- {
- copy_from_user(&req, ureqs, sizeof(req));
- cmd = req.ptr&3;
- req.ptr &= ~3;
- if(cmd ==MMU_NORMAL_PT_UPDATE){
- entry.page_flags = req.val;
- entry.locked = 1;
- entry.tc = 1;
- entry.cl = DSIDE_TLB;
- rr = vmx_vcpu_rr(vcpu, req.ptr);
- entry.ps = rr.ps;
- entry.key = redistribute_rid(rr.rid);
- entry.rid = rr.rid;
- entry.vadr = PAGEALIGN(req.ptr,entry.ps);
- sections.tr = 1;
- sections.tc = 0;
- ovl = thash_find_overlap(hcb, &entry, sections);
- if (ovl) {
- // generate MCA.
- panic("Tlb conflict!!");
- return;
- }
- thash_purge_and_insert(hcb, &entry);
- }else if(cmd == MMU_MACHPHYS_UPDATE){
- mfn = req.ptr >>PAGE_SHIFT;
- gpfn = req.val;
- set_machinetophys(mfn,gpfn);
- }else{
- printf("Unkown command of mmu_update:ptr: %lx,val: %lx
\n",req.ptr,req.val);
- while(1);
- }
- ureqs ++;
- }
- return 0;
-}
-#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/mm_init.c
--- a/xen/arch/ia64/mm_init.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,547 +0,0 @@
-/*
- * Initialize MMU support.
- *
- * Copyright (C) 1998-2003 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#ifdef XEN
-#include <xen/sched.h>
-#endif
-#include <linux/bootmem.h>
-#include <linux/efi.h>
-#include <linux/elf.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/module.h>
-#ifndef XEN
-#include <linux/personality.h>
-#endif
-#include <linux/reboot.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#ifndef XEN
-#include <linux/proc_fs.h>
-#endif
-
-#ifndef XEN
-#include <asm/a.out.h>
-#endif
-#include <asm/bitops.h>
-#include <asm/dma.h>
-#ifndef XEN
-#include <asm/ia32.h>
-#endif
-#include <asm/io.h>
-#include <asm/machvec.h>
-#include <asm/numa.h>
-#include <asm/patch.h>
-#include <asm/pgalloc.h>
-#include <asm/sal.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/tlb.h>
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-#include <asm/mca.h>
-
-#ifndef XEN
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-#endif
-
-extern void ia64_tlb_init (void);
-
-unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
-
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-unsigned long vmalloc_end = VMALLOC_END_INIT;
-EXPORT_SYMBOL(vmalloc_end);
-struct page *vmem_map;
-EXPORT_SYMBOL(vmem_map);
-#endif
-
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr; /* map entry for zero page */
-EXPORT_SYMBOL(zero_page_memmap_ptr);
-
-#ifdef XEN
-void *high_memory;
-EXPORT_SYMBOL(high_memory);
-
-/////////////////////////////////////////////
-// following from linux-2.6.7/mm/mmap.c
-/* description of effects of mapping type and prot in current implementation.
- * this is due to the limited x86 page protection hardware. The expected
- * behavior is in parens:
- *
- * map_type prot
- * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
- * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
- * w: (no) no w: (no) no w: (yes) yes w: (no) no
- * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
- *
- * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
- * w: (no) no w: (no) no w: (copy) copy w: (no) no
- * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
- *
- */
-pgprot_t protection_map[16] = {
- __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
- __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
-void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
-{
- printf("insert_vm_struct: called, not implemented yet\n");
-}
-
-/////////////////////////////////////////////
-//following from linux/mm/memory.c
-
-#ifndef __ARCH_HAS_4LEVEL_HACK
-/*
- * Allocate page upper directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
- */
-pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long
address)
-{
- pud_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pud_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pgd_present(*pgd)) {
- pud_free(new);
- goto out;
- }
- pgd_populate(mm, pgd, new);
- out:
- return pud_offset(pgd, address);
-}
-
-/*
- * Allocate page middle directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
- */
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long
address)
-{
- pmd_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pmd_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pud_present(*pud)) {
- pmd_free(new);
- goto out;
- }
- pud_populate(mm, pud, new);
- out:
- return pmd_offset(pud, address);
-}
-#endif
-
-pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long
address)
-{
- if (!pmd_present(*pmd)) {
- struct page *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pte_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
-
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pmd_present(*pmd)) {
- pte_free(new);
- goto out;
- }
- inc_page_state(nr_page_table_pages);
- pmd_populate(mm, pmd, new);
- }
-out:
- return pte_offset_map(pmd, address);
-}
-/////////////////////////////////////////////
-#endif /* XEN */
-
-void
-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
-{
- unsigned long addr;
- struct page *page;
-
- if (!pte_exec(pte))
- return; /* not an executable page... */
-
- page = pte_page(pte);
- /* don't use VADDR: it may not be mapped on this CPU (or may have just
been flushed): */
- addr = (unsigned long) page_address(page);
-
- if (test_bit(PG_arch_1, &page->flags))
- return; /* i-cache is already coherent
with d-cache */
-
- flush_icache_range(addr, addr + PAGE_SIZE);
- set_bit(PG_arch_1, &page->flags); /* mark page as clean */
-}
-
-inline void
-ia64_set_rbs_bot (void)
-{
-#ifdef XEN
- unsigned stack_size = MAX_USER_STACK_SIZE;
-#else
- unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16;
-#endif
-
- if (stack_size > MAX_USER_STACK_SIZE)
- stack_size = MAX_USER_STACK_SIZE;
- current->arch._thread.rbs_bot = STACK_TOP - stack_size;
-}
-
-/*
- * This performs some platform-dependent address space initialization.
- * On IA-64, we want to setup the VM area for the register backing
- * store (which grows upwards) and install the gateway page which is
- * used for signal trampolines, etc.
- */
-void
-ia64_init_addr_space (void)
-{
-#ifdef XEN
-printf("ia64_init_addr_space: called, not implemented\n");
-#else
- struct vm_area_struct *vma;
-
- ia64_set_rbs_bot();
-
- /*
- * If we're out of memory and kmem_cache_alloc() returns NULL, we
simply ignore
- * the problem. When the process attempts to write to the register
backing store
- * for the first time, it will get a SEGFAULT in this case.
- */
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (vma) {
- memset(vma, 0, sizeof(*vma));
- vma->vm_mm = current->mm;
- vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK;
- vma->vm_end = vma->vm_start + PAGE_SIZE;
- vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
- vma->vm_flags =
VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
- insert_vm_struct(current->mm, vma);
- }
-
- /* map NaT-page at address zero to speed up speculative dereferencing
of NULL: */
- if (!(current->personality & MMAP_PAGE_ZERO)) {
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (vma) {
- memset(vma, 0, sizeof(*vma));
- vma->vm_mm = current->mm;
- vma->vm_end = PAGE_SIZE;
- vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY)
| _PAGE_MA_NAT);
- vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
VM_RESERVED;
- insert_vm_struct(current->mm, vma);
- }
- }
-#endif
-}
-
-setup_gate (void)
-{
- printk("setup_gate not-implemented.\n");
-}
-
-void __devinit
-ia64_mmu_init (void *my_cpu_data)
-{
- unsigned long psr, pta, impl_va_bits;
- extern void __devinit tlb_init (void);
- int cpu;
-
-#ifdef CONFIG_DISABLE_VHPT
-# define VHPT_ENABLE_BIT 0
-#else
-# define VHPT_ENABLE_BIT 1
-#endif
-
- /* Pin mapping for percpu area into TLB */
- psr = ia64_clear_ic();
- ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
- pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
- PERCPU_PAGE_SHIFT);
-
- ia64_set_psr(psr);
- ia64_srlz_i();
-
- /*
- * Check if the virtually mapped linear page table (VMLPT) overlaps
with a mapped
- * address space. The IA-64 architecture guarantees that at least 50
bits of
- * virtual address space are implemented but if we pick a large enough
page size
- * (e.g., 64KB), the mapped address space is big enough that it will
overlap with
- * VMLPT. I assume that once we run on machines big enough to warrant
64KB pages,
- * IMPL_VA_MSB will be significantly bigger, so this is unlikely to
become a
- * problem in practice. Alternatively, we could truncate the top of
the mapped
- * address space to not permit mappings that would overlap with the
VMLPT.
- * --davidm 00/12/06
- */
-# define pte_bits 3
-# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
- /*
- * The virtual page table has to cover the entire implemented address
space within
- * a region even though not all of this space may be mappable. The
reason for
- * this is that the Access bit and Dirty bit fault handlers perform
- * non-speculative accesses to the virtual page table, so the address
range of the
- * virtual page table itself needs to be covered by virtual page table.
- */
-# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
-# define POW2(n) (1ULL << (n))
-
- impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
-
- if (impl_va_bits < 51 || impl_va_bits > 61)
- panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits
- 1);
-
-#ifdef XEN
- vhpt_init();
-#endif
-#if 0
- /* place the VMLPT at the end of each page-table mapped region: */
- pta = POW2(61) - POW2(vmlpt_bits);
-
- if (POW2(mapped_space_bits) >= pta)
- panic("mm/init: overlap between virtually mapped linear page
table and "
- "mapped kernel space!");
- /*
- * Set the (virtually mapped linear) page table address. Bit
- * 8 selects between the short and long format, bits 2-7 the
- * size of the table, and bit 0 whether the VHPT walker is
- * enabled.
- */
- ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
-#endif
- ia64_tlb_init();
-
-#ifdef CONFIG_HUGETLB_PAGE
- ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
- ia64_srlz_d();
-#endif
-
- cpu = smp_processor_id();
-
-#ifndef XEN
- /* mca handler uses cr.lid as key to pick the right entry */
- ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID);
-
- /* insert this percpu data information into our list for MCA recovery
purposes */
- ia64_mca_tlb_list[cpu].percpu_paddr =
pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL));
- /* Also save per-cpu tlb flush recipe for use in physical mode mca
handler */
- ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base;
- ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0];
- ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1];
- ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0];
- ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1];
-#endif
-}
-
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-
-int
-create_mem_map_page_table (u64 start, u64 end, void *arg)
-{
- unsigned long address, start_page, end_page;
- struct page *map_start, *map_end;
- int node;
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
-
- map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
- map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
-
- start_page = (unsigned long) map_start & PAGE_MASK;
- end_page = PAGE_ALIGN((unsigned long) map_end);
- node = paddr_to_nid(__pa(start));
-
- for (address = start_page; address < end_page; address += PAGE_SIZE) {
- pgd = pgd_offset_k(address);
- if (pgd_none(*pgd))
- pgd_populate(&init_mm, pgd,
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
- pmd = pmd_offset(pgd, address);
-
- if (pmd_none(*pmd))
- pmd_populate_kernel(&init_mm, pmd,
alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
- pte = pte_offset_kernel(pmd, address);
-
- if (pte_none(*pte))
- set_pte(pte,
pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >>
PAGE_SHIFT,
- PAGE_KERNEL));
- }
- return 0;
-}
-
-struct memmap_init_callback_data {
- struct page *start;
- struct page *end;
- int nid;
- unsigned long zone;
-};
-
-static int
-virtual_memmap_init (u64 start, u64 end, void *arg)
-{
- struct memmap_init_callback_data *args;
- struct page *map_start, *map_end;
-
- args = (struct memmap_init_callback_data *) arg;
-
- map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
- map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
-
- if (map_start < args->start)
- map_start = args->start;
- if (map_end > args->end)
- map_end = args->end;
-
- /*
- * We have to initialize "out of bounds" struct page elements that fit
completely
- * on the same pages that were allocated for the "in bounds" elements
because they
- * may be referenced later (and found to be "reserved").
- */
- map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) /
sizeof(struct page);
- map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long)
map_end)
- / sizeof(struct page));
-
- if (map_start < map_end)
- memmap_init_zone(map_start, (unsigned long) (map_end -
map_start),
- args->nid, args->zone, page_to_pfn(map_start));
- return 0;
-}
-
-void
-memmap_init (struct page *start, unsigned long size, int nid,
- unsigned long zone, unsigned long start_pfn)
-{
- if (!vmem_map)
- memmap_init_zone(start, size, nid, zone, start_pfn);
- else {
- struct memmap_init_callback_data args;
-
- args.start = start;
- args.end = start + size;
- args.nid = nid;
- args.zone = zone;
-
- efi_memmap_walk(virtual_memmap_init, &args);
- }
-}
-
-int
-ia64_pfn_valid (unsigned long pfn)
-{
- char byte;
- struct page *pg = pfn_to_page(pfn);
-
- return (__get_user(byte, (char *) pg) == 0)
- && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
- || (__get_user(byte, (char *) (pg + 1) - 1) == 0));
-}
-EXPORT_SYMBOL(ia64_pfn_valid);
-
-int
-find_largest_hole (u64 start, u64 end, void *arg)
-{
- u64 *max_gap = arg;
-
- static u64 last_end = PAGE_OFFSET;
-
- /* NOTE: this algorithm assumes efi memmap table is ordered */
-
-#ifdef XEN
-//printf("find_largest_hole:
start=%lx,end=%lx,max_gap=%lx\n",start,end,*(unsigned long *)arg);
-#endif
- if (*max_gap < (start - last_end))
- *max_gap = start - last_end;
- last_end = end;
-#ifdef XEN
-//printf("find_largest_hole2: max_gap=%lx,last_end=%lx\n",*max_gap,last_end);
-#endif
- return 0;
-}
-#endif /* CONFIG_VIRTUAL_MEM_MAP */
-
-static int
-count_reserved_pages (u64 start, u64 end, void *arg)
-{
- unsigned long num_reserved = 0;
- unsigned long *count = arg;
-
- for (; start < end; start += PAGE_SIZE)
- if (PageReserved(virt_to_page(start)))
- ++num_reserved;
- *count += num_reserved;
- return 0;
-}
-
-/*
- * Boot command-line option "nolwsys" can be used to disable the use of any
light-weight
- * system call handler. When this option is in effect, all fsyscalls will end
up bubbling
- * down into the kernel and calling the normal (heavy-weight) syscall handler.
This is
- * useful for performance testing, but conceivably could also come in handy
for debugging
- * purposes.
- */
-
-static int nolwsys;
-
-static int __init
-nolwsys_setup (char *s)
-{
- nolwsys = 1;
- return 1;
-}
-
-__setup("nolwsys", nolwsys_setup);
-
-void
-mem_init (void)
-{
-#ifdef CONFIG_PCI
- /*
- * This needs to be called _after_ the command line has been parsed but
_before_
- * any drivers that may need the PCI DMA interface are initialized or
bootmem has
- * been freed.
- */
- platform_dma_init();
-#endif
-
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/mmio.c
--- a/xen/arch/ia64/mmio.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,515 +0,0 @@
-
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
- * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx)
- */
-
-#include <linux/sched.h>
-#include <asm/tlb.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/gcc_intrin.h>
-#include <linux/interrupt.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/privop.h>
-#include <asm/types.h>
-#include <public/io/ioreq.h>
-#include <asm/mm.h>
-#include <asm/vmx.h>
-
-/*
-struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base)
-{
- int i;
- for (i=0; mio_base[i].iot != NOT_IO; i++ ) {
- if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end )
- return &mio_base[i];
- }
- return NULL;
-}
-*/
-
-#define PIB_LOW_HALF(ofst) !(ofst&(1<<20))
-#define PIB_OFST_INTA 0x1E0000
-#define PIB_OFST_XTP 0x1E0008
-
-static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int
ma)
-{
- switch (pib_off) {
- case PIB_OFST_INTA:
- panic("Undefined write on PIB INTA\n");
- break;
- case PIB_OFST_XTP:
- if ( s == 1 && ma == 4 /* UC */) {
- vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src;
- }
- else {
- panic("Undefined write on PIB XTP\n");
- }
- break;
- default:
- if ( PIB_LOW_HALF(pib_off) ) { // lower half
- if ( s != 8 || ma != 0x4 /* UC */ ) {
- panic("Undefined IPI-LHF write with s %d, ma %d!\n", s, ma);
- }
- else {
- write_ipi(vcpu, pib_off, *(uint64_t *)src);
- // TODO for SM-VP
- }
- }
- else { // upper half
- printf("IPI-UHF write %lx\n",pib_off);
- panic("Not support yet for SM-VP\n");
- }
- break;
- }
-}
-
-static void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int
ma)
-{
- switch (pib_off) {
- case PIB_OFST_INTA:
- // todo --- emit on processor system bus.
- if ( s == 1 && ma == 4) { // 1 byte load
- // TODO: INTA read from IOSAPIC
- }
- else {
- panic("Undefined read on PIB INTA\n");
- }
- break;
- case PIB_OFST_XTP:
- if ( s == 1 && ma == 4) {
- *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp;
- }
- else {
- panic("Undefined read on PIB XTP\n");
- }
- break;
- default:
- if ( PIB_LOW_HALF(pib_off) ) { // lower half
- if ( s != 8 || ma != 4 ) {
- panic("Undefined IPI-LHF read!\n");
- }
- else {
-#ifdef IPI_DEBUG
- printf("IPI-LHF read %lx\n",pib_off);
-#endif
- *(uint64_t *)dest = 0; // TODO for SM-VP
- }
- }
- else { // upper half
- if ( s != 1 || ma != 4 ) {
- panic("Undefined PIB-UHF read!\n");
- }
- else {
-#ifdef IPI_DEBUG
- printf("IPI-UHF read %lx\n",pib_off);
-#endif
- *(uint8_t *)dest = 0; // TODO for SM-VP
- }
- }
- break;
- }
-}
-
-static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
-{
- struct vcpu *v = current;
- vcpu_iodata_t *vio;
- ioreq_t *p;
- unsigned long addr;
-
- vio = get_vio(v->domain, v->vcpu_id);
- if (vio == 0) {
- panic("bad shared page: %lx", (unsigned long)vio);
- }
- p = &vio->vp_ioreq;
- p->addr = pa;
- p->size = s;
- p->count = 1;
- p->dir = dir;
- if(dir==IOREQ_WRITE) //write;
- p->u.data = *val;
- p->pdata_valid = 0;
- p->port_mm = 1;
- p->df = 0;
-
- set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
- p->state = STATE_IOREQ_READY;
- evtchn_send(iopacket_port(v->domain));
- vmx_wait_io();
- if(dir==IOREQ_READ){ //read
- *val=p->u.data;
- }
- return;
-}
-#define TO_LEGACY_IO(pa) (((pa)>>12<<2)|((pa)&0x3))
-
-static void legacy_io_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir)
-{
- struct vcpu *v = current;
- vcpu_iodata_t *vio;
- ioreq_t *p;
- unsigned long addr;
-
- vio = get_vio(v->domain, v->vcpu_id);
- if (vio == 0) {
- panic("bad shared page: %lx");
- }
- p = &vio->vp_ioreq;
- p->addr = TO_LEGACY_IO(pa&0x3ffffffUL);
- p->size = s;
- p->count = 1;
- p->dir = dir;
- if(dir==IOREQ_WRITE) //write;
- p->u.data = *val;
- p->pdata_valid = 0;
- p->port_mm = 0;
- p->df = 0;
-
- set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
- p->state = STATE_IOREQ_READY;
- evtchn_send(iopacket_port(v->domain));
-
- vmx_wait_io();
- if(dir==IOREQ_READ){ //read
- *val=p->u.data;
- }
-#ifdef DEBUG_PCI
- if(dir==IOREQ_WRITE)
- if(p->addr == 0xcf8UL)
- printk("Write 0xcf8, with val [0x%lx]\n", p->u.data);
- else
- if(p->addr == 0xcfcUL)
- printk("Read 0xcfc, with val [0x%lx]\n", p->u.data);
-#endif //DEBUG_PCI
- return;
-}
-
-static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma,
int dir)
-{
- struct virutal_platform_def *v_plat;
- //mmio_type_t iot;
- unsigned long iot;
- iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT);
- v_plat = vmx_vcpu_get_plat(vcpu);
-
- switch (iot) {
- case GPFN_PIB:
- if(!dir)
- pib_write(vcpu, dest, src_pa - v_plat->pib_base, s, ma);
- else
- pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma);
- break;
- case GPFN_GFW:
- break;
- case GPFN_IOSAPIC:
- case GPFN_FRAME_BUFFER:
- case GPFN_LOW_MMIO:
- low_mmio_access(vcpu, src_pa, dest, s, dir);
- break;
- case GPFN_LEGACY_IO:
- legacy_io_access(vcpu, src_pa, dest, s, dir);
- break;
- default:
- panic("Bad I/O access\n");
- break;
- }
- return;
-}
-
-/*
- * Read or write data in guest virtual address mode.
- */
-/*
-void
-memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
-{
- uint64_t pa;
-
- if (!vtlb->nomap)
- panic("Normal memory write shouldn't go to this point!");
- pa = PPN_2_PA(vtlb->ppn);
- pa += POFFSET((u64)dest, vtlb->ps);
- mmio_write (vcpu, src, pa, s, vtlb->ma);
-}
-
-
-void
-memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
-{
- uint64_t pa = (uint64_t)dest;
- int ma;
-
- if ( pa & (1UL <<63) ) {
- // UC
- ma = 4;
- pa <<=1;
- pa >>=1;
- }
- else {
- // WBL
- ma = 0; // using WB for WBL
- }
- mmio_write (vcpu, src, pa, s, ma);
-}
-
-void
-memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s)
-{
- uint64_t pa;
-
- if (!vtlb->nomap)
- panic("Normal memory write shouldn't go to this point!");
- pa = PPN_2_PA(vtlb->ppn);
- pa += POFFSET((u64)src, vtlb->ps);
-
- mmio_read(vcpu, pa, dest, s, vtlb->ma);
-}
-
-void
-memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s)
-{
- uint64_t pa = (uint64_t)src;
- int ma;
-
- if ( pa & (1UL <<63) ) {
- // UC
- ma = 4;
- pa <<=1;
- pa >>=1;
- }
- else {
- // WBL
- ma = 0; // using WB for WBL
- }
- mmio_read(vcpu, pa, dest, s, ma);
-}
-*/
-
-
-/*
- * Deliver IPI message. (Only U-VP is supported now)
- * offset: address offset to IPI space.
- * value: deliver value.
- */
-static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector)
-{
-#ifdef IPI_DEBUG
- printf ("deliver_ipi %lx %lx\n",dm,vector);
-#endif
- switch ( dm ) {
- case 0: // INT
- vmx_vcpu_pend_interrupt (vcpu, vector);
- break;
- case 2: // PMI
- // TODO -- inject guest PMI
- panic ("Inject guest PMI!\n");
- break;
- case 4: // NMI
- vmx_vcpu_pend_interrupt (vcpu, 2);
- break;
- case 5: // INIT
- // TODO -- inject guest INIT
- panic ("Inject guest INIT!\n");
- break;
- case 7: // ExtINT
- vmx_vcpu_pend_interrupt (vcpu, 0);
- break;
- case 1:
- case 3:
- case 6:
- default:
- panic ("Deliver reserved IPI!\n");
- break;
- }
-}
-
-/*
- * TODO: Use hash table for the lookup.
- */
-static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid)
-{
- int i;
- VCPU *vcpu;
- LID lid;
- for (i=0; i<MAX_VIRT_CPUS; i++) {
- vcpu = d->vcpu[i];
- if (!vcpu)
- continue;
- lid.val = VPD_CR(vcpu, lid);
- if ( lid.id == id && lid.eid == eid ) {
- return vcpu;
- }
- }
- return NULL;
-}
-
-/*
- * execute write IPI op.
- */
-static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value)
-{
- VCPU *target_cpu;
-
- target_cpu = lid_2_vcpu(vcpu->domain,
- ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid);
- if ( target_cpu == NULL ) panic("Unknown IPI cpu\n");
- if ( target_cpu == vcpu ) {
- // IPI to self
- deliver_ipi (vcpu, ((ipi_d_t)value).dm,
- ((ipi_d_t)value).vector);
- return 1;
- }
- else {
- // TODO: send Host IPI to inject guest SMP IPI interruption
- panic ("No SM-VP supported!\n");
- return 0;
- }
-}
-
-
-/*
- dir 1: read 0:write
- inst_type 0:integer 1:floating point
- */
-extern IA64_BUNDLE __vmx_get_domain_bundle(u64 iip);
-#define SL_INTEGER 0 // store/load interger
-#define SL_FLOATING 1 // store/load floating
-
-void emulate_io_inst(VCPU *vcpu, u64 padr, u64 ma)
-{
- REGS *regs;
- IA64_BUNDLE bundle;
- int slot, dir, inst_type;
- size_t size;
- u64 data, value,post_update, slot1a, slot1b, temp;
- INST64 inst;
- regs=vcpu_regs(vcpu);
- bundle = __vmx_get_domain_bundle(regs->cr_iip);
- slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
- if (!slot) inst.inst = bundle.slot0;
- else if (slot == 1){
- slot1a=bundle.slot1a;
- slot1b=bundle.slot1b;
- inst.inst =slot1a + (slot1b<<18);
- }
- else if (slot == 2) inst.inst = bundle.slot2;
-
-
- // Integer Load/Store
- if(inst.M1.major==4&&inst.M1.m==0&&inst.M1.x==0){
- inst_type = SL_INTEGER; //
- size=(inst.M1.x6&0x3);
- if((inst.M1.x6>>2)>0xb){ // write
- dir=IOREQ_WRITE; //write
- vmx_vcpu_get_gr(vcpu,inst.M4.r2,&data);
- }else if((inst.M1.x6>>2)<0xb){ // read
- dir=IOREQ_READ;
- vmx_vcpu_get_gr(vcpu,inst.M1.r1,&value);
- }
- }
- // Integer Load + Reg update
- else if(inst.M2.major==4&&inst.M2.m==1&&inst.M2.x==0){
- inst_type = SL_INTEGER;
- dir = IOREQ_READ; //write
- size = (inst.M2.x6&0x3);
- vmx_vcpu_get_gr(vcpu,inst.M2.r1,&value);
- vmx_vcpu_get_gr(vcpu,inst.M2.r3,&temp);
- vmx_vcpu_get_gr(vcpu,inst.M2.r2,&post_update);
- temp += post_update;
- vmx_vcpu_set_gr(vcpu,inst.M2.r3,temp,0);
- }
- // Integer Load/Store + Imm update
- else if(inst.M3.major==5){
- inst_type = SL_INTEGER; //
- size=(inst.M3.x6&0x3);
- if((inst.M5.x6>>2)>0xb){ // write
- dir=IOREQ_WRITE; //write
- vmx_vcpu_get_gr(vcpu,inst.M5.r2,&data);
- vmx_vcpu_get_gr(vcpu,inst.M5.r3,&temp);
- post_update = (inst.M5.i<<7)+inst.M5.imm7;
- if(inst.M5.s)
- temp -= post_update;
- else
- temp += post_update;
- vmx_vcpu_set_gr(vcpu,inst.M5.r3,temp,0);
-
- }else if((inst.M3.x6>>2)<0xb){ // read
- dir=IOREQ_READ;
- vmx_vcpu_get_gr(vcpu,inst.M3.r1,&value);
- vmx_vcpu_get_gr(vcpu,inst.M3.r3,&temp);
- post_update = (inst.M3.i<<7)+inst.M3.imm7;
- if(inst.M3.s)
- temp -= post_update;
- else
- temp += post_update;
- vmx_vcpu_set_gr(vcpu,inst.M3.r3,temp,0);
-
- }
- }
- // Floating-point Load/Store
-// else if(inst.M6.major==6&&inst.M6.m==0&&inst.M6.x==0&&inst.M6.x6==3){
-// inst_type=SL_FLOATING; //fp
-// dir=IOREQ_READ;
-// size=3; //ldfd
-// }
- else{
- printf("This memory access instruction can't be emulated two: %lx\n
",inst.inst);
- while(1);
- }
-
- size = 1 << size;
- if(dir==IOREQ_WRITE){
- mmio_access(vcpu, padr, &data, size, ma, dir);
- }else{
- mmio_access(vcpu, padr, &data, size, ma, dir);
- if(size==0)
- data = (value & 0xffffffffffffff00U) | (data & 0xffU);
- else if(size==1)
- data = (value & 0xffffffffffff0000U) | (data & 0xffffU);
- else if(size==2)
- data = (value & 0xffffffff00000000U) | (data & 0xffffffffU);
-
- if(inst_type==SL_INTEGER){ //gp
- vmx_vcpu_set_gr(vcpu,inst.M1.r1,data,0);
- }else{
- panic("Don't support ldfd now !");
-/* switch(inst.M6.f1){
-
- case 6:
- regs->f6=(struct ia64_fpreg)data;
- case 7:
- regs->f7=(struct ia64_fpreg)data;
- case 8:
- regs->f8=(struct ia64_fpreg)data;
- case 9:
- regs->f9=(struct ia64_fpreg)data;
- case 10:
- regs->f10=(struct ia64_fpreg)data;
- case 11:
- regs->f11=(struct ia64_fpreg)data;
- default :
- ia64_ldfs(inst.M6.f1,&data);
- }
-*/
- }
- }
- vmx_vcpu_increment_iip(vcpu);
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/pal_emul.c
--- a/xen/arch/ia64/pal_emul.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,280 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@xxxxxxxxx>
- * Copyright (c) 2005 Yu Ke <ke.yu@xxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <asm/vmx_vcpu.h>
-
-static void
-get_pal_parameters (VCPU *vcpu, UINT64 *gr29,
- UINT64 *gr30, UINT64 *gr31) {
-
- vmx_vcpu_get_gr(vcpu,29,gr29);
- vmx_vcpu_get_gr(vcpu,30,gr30);
- vmx_vcpu_get_gr(vcpu,31,gr31);
-}
-
-static void
-set_pal_result (VCPU *vcpu,struct ia64_pal_retval result) {
-
- vmx_vcpu_set_gr(vcpu,8, result.status,0);
- vmx_vcpu_set_gr(vcpu,9, result.v0,0);
- vmx_vcpu_set_gr(vcpu,10, result.v1,0);
- vmx_vcpu_set_gr(vcpu,11, result.v2,0);
-}
-
-
-static struct ia64_pal_retval
-pal_cache_flush (VCPU *vcpu) {
- UINT64 gr28,gr29, gr30, gr31;
- struct ia64_pal_retval result;
-
- get_pal_parameters (vcpu, &gr29, &gr30, &gr31);
- vmx_vcpu_get_gr(vcpu,28,&gr28);
-
- /* Always call Host Pal in int=1 */
- gr30 = gr30 &(~(0x2UL));
-
- /* call Host PAL cache flush */
- result=ia64_pal_call_static(gr28 ,gr29, gr30,gr31,1); // Clear psr.ic
when call PAL_CACHE_FLUSH
-
- /* If host PAL call is interrupted, then loop to complete it */
-// while (result.status == 1) {
-// ia64_pal_call_static(gr28 ,gr29, gr30,
-// result.v1,1LL);
-// }
- while (result.status != 0) {
- panic("PAL_CACHE_FLUSH ERROR, status %d", result.status);
- }
-
- return result;
-}
-
-static struct ia64_pal_retval
-pal_vm_tr_read (VCPU *vcpu ) {
-#warning pal_vm_tr_read: to be implemented
- struct ia64_pal_retval result;
-
- result.status= -1; //unimplemented
-
- return result;
-}
-
-
-static struct ia64_pal_retval
-pal_prefetch_visibility (VCPU *vcpu) {
- /* Due to current MM virtualization algorithm,
- * We do not allow guest to change mapping attribute.
- * Thus we will not support PAL_PREFETCH_VISIBILITY
- */
- struct ia64_pal_retval result;
-
- result.status= -1; //unimplemented
-
- return result;
-}
-
-static struct ia64_pal_retval
-pal_platform_addr(VCPU *vcpu) {
- struct ia64_pal_retval result;
-
- result.status= 0; //success
-
- return result;
-}
-
-static struct ia64_pal_retval
-pal_halt (VCPU *vcpu) {
-#warning pal_halt: to be implemented
- //bugbug: to be implement.
- struct ia64_pal_retval result;
-
- result.status= -1; //unimplemented
-
- return result;
-}
-
-
-static struct ia64_pal_retval
-pal_halt_light (VCPU *vcpu) {
- struct ia64_pal_retval result;
-
- result.status= -1; //unimplemented
-
- return result;
-}
-
-static struct ia64_pal_retval
-pal_cache_read (VCPU *vcpu) {
- struct ia64_pal_retval result;
-
- result.status= -1; //unimplemented
-
- return result;
-}
-
-static struct ia64_pal_retval
-pal_cache_write (VCPU *vcpu) {
- struct ia64_pal_retval result;
-
- result.status= -1; //unimplemented
-
- return result;
-}
-
-static struct ia64_pal_retval
-pal_bus_get_features(VCPU *vcpu){
-
-}
-
-static struct ia64_pal_retval
-pal_cache_summary(VCPU *vcpu){
-
-}
-
-static struct ia64_pal_retval
-pal_cache_init(VCPU *vcpu){
- struct ia64_pal_retval result;
- result.status=0;
- return result;
-}
-
-static struct ia64_pal_retval
-pal_cache_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_cache_prot_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_cache_shared_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_mem_attrib(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_debug_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_fixed_addr(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_freq_base(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_freq_ratios(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_halt_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_logical_to_physica(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_perf_mon_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_proc_get_features(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_ptce_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_register_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_rse_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_test_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_vm_summary(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_vm_info(VCPU *vcpu){
-}
-
-static struct ia64_pal_retval
-pal_vm_page_size(VCPU *vcpu){
-}
-
-void
-pal_emul( VCPU *vcpu) {
- UINT64 gr28;
- struct ia64_pal_retval result;
-
-
- vmx_vcpu_get_gr(vcpu,28,&gr28); //bank1
-
- switch (gr28) {
- case PAL_CACHE_FLUSH:
- result = pal_cache_flush (vcpu);
- break;
-
- case PAL_PREFETCH_VISIBILITY:
- result = pal_prefetch_visibility (vcpu);
- break;
-
- case PAL_VM_TR_READ:
- result = pal_vm_tr_read (vcpu);
- break;
-
- case PAL_HALT:
- result = pal_halt (vcpu);
- break;
-
- case PAL_HALT_LIGHT:
- result = pal_halt_light (vcpu);
- break;
-
- case PAL_CACHE_READ:
- result = pal_cache_read (vcpu);
- break;
-
- case PAL_CACHE_WRITE:
- result = pal_cache_write (vcpu);
- break;
-
- case PAL_PLATFORM_ADDR:
- result = pal_platform_addr (vcpu);
- break;
-
- default:
- panic("pal_emul(): guest call unsupported pal" );
- }
- set_pal_result (vcpu, result);
-}
-
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/pcdp.c
--- a/xen/arch/ia64/pcdp.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,120 +0,0 @@
-/*
- * Parse the EFI PCDP table to locate the console device.
- *
- * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P.
- * Khalid Aziz <khalid.aziz@xxxxxx>
- * Alex Williamson <alex.williamson@xxxxxx>
- * Bjorn Helgaas <bjorn.helgaas@xxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/acpi.h>
-#include <linux/console.h>
-#include <linux/efi.h>
-#include <linux/serial.h>
-#ifdef XEN
-#include <linux/errno.h>
-#endif
-#include "pcdp.h"
-
-static int __init
-setup_serial_console(struct pcdp_uart *uart)
-{
-#ifdef XEN
- extern struct ns16550_defaults ns16550_com1;
- ns16550_com1.baud = uart->baud;
- ns16550_com1.io_base = uart->addr.address;
- if (uart->bits)
- ns16550_com1.data_bits = uart->bits;
- return 0;
-#else
-#ifdef CONFIG_SERIAL_8250_CONSOLE
- int mmio;
- static char options[64];
-
- mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY);
- snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d",
- mmio ? "mmio" : "io", uart->addr.address, uart->baud,
- uart->bits ? uart->bits : 8);
-
- return early_serial_console_init(options);
-#else
- return -ENODEV;
-#endif
-#endif
-}
-
-#ifndef XEN
-static int __init
-setup_vga_console(struct pcdp_vga *vga)
-{
-#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
- if (efi_mem_type(0xA0000) == EFI_CONVENTIONAL_MEMORY) {
- printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not
MMIO!\n");
- return -ENODEV;
- }
-
- conswitchp = &vga_con;
- printk(KERN_INFO "PCDP: VGA console\n");
- return 0;
-#else
- return -ENODEV;
-#endif
-}
-#endif
-
-int __init
-efi_setup_pcdp_console(char *cmdline)
-{
- struct pcdp *pcdp;
- struct pcdp_uart *uart;
- struct pcdp_device *dev, *end;
- int i, serial = 0;
-
- pcdp = efi.hcdp;
- if (!pcdp)
- return -ENODEV;
-
-#ifndef XEN
- printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, __pa(pcdp));
-#endif
-
- if (strstr(cmdline, "console=hcdp")) {
- if (pcdp->rev < 3)
- serial = 1;
- } else if (strstr(cmdline, "console=")) {
-#ifndef XEN
- printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n");
-#endif
- return -ENODEV;
- }
-
- if (pcdp->rev < 3 && efi_uart_console_only())
- serial = 1;
-
- for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) {
- if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) {
- if (uart->type == PCDP_CONSOLE_UART) {
- return setup_serial_console(uart);
- }
- }
- }
-
-#ifndef XEN
- end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length);
- for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts);
- dev < end;
- dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) {
- if (dev->flags & PCDP_PRIMARY_CONSOLE) {
- if (dev->type == PCDP_CONSOLE_VGA) {
- return setup_vga_console((struct pcdp_vga *)
dev);
- }
- }
- }
-#endif
-
- return -ENODEV;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/privop.c
--- a/xen/arch/ia64/privop.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1130 +0,0 @@
-/*
- * Privileged operation "API" handling functions.
- *
- * Copyright (C) 2004 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <asm/privop.h>
-#include <asm/vcpu.h>
-#include <asm/processor.h>
-#include <asm/delay.h> // Debug only
-//#include <debug.h>
-
-long priv_verbose=0;
-
-/**************************************************************************
-Hypercall bundle creation
-**************************************************************************/
-
-
-void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64
ret)
-{
- INST64_A5 slot0;
- INST64_I19 slot1;
- INST64_B4 slot2;
- IA64_BUNDLE bundle;
-
- // slot1: mov r2 = hypnum (low 20 bits)
- slot0.inst = 0;
- slot0.qp = 0; slot0.r1 = 2; slot0.r3 = 0; slot0.major = 0x9;
- slot0.imm7b = hypnum; slot0.imm9d = hypnum >> 7;
- slot0.imm5c = hypnum >> 16; slot0.s = 0;
- // slot1: break brkimm
- slot1.inst = 0;
- slot1.qp = 0; slot1.x6 = 0; slot1.x3 = 0; slot1.major = 0x0;
- slot1.imm20 = brkimm; slot1.i = brkimm >> 20;
- // if ret slot2: br.ret.sptk.many rp
- // else slot2: br.cond.sptk.many rp
- slot2.inst = 0; slot2.qp = 0; slot2.p = 1; slot2.b2 = 0;
- slot2.wh = 0; slot2.d = 0; slot2.major = 0x0;
- if (ret) {
- slot2.btype = 4; slot2.x6 = 0x21;
- }
- else {
- slot2.btype = 0; slot2.x6 = 0x20;
- }
-
- bundle.i64[0] = 0; bundle.i64[1] = 0;
- bundle.template = 0x11;
- bundle.slot0 = slot0.inst; bundle.slot2 = slot2.inst;
- bundle.slot1a = slot1.inst; bundle.slot1b = slot1.inst >> 18;
-
- *imva++ = bundle.i64[0]; *imva = bundle.i64[1];
-}
-
-/**************************************************************************
-Privileged operation emulation routines
-**************************************************************************/
-
-IA64FAULT priv_rfi(VCPU *vcpu, INST64 inst)
-{
- return vcpu_rfi(vcpu);
-}
-
-IA64FAULT priv_bsw0(VCPU *vcpu, INST64 inst)
-{
- return vcpu_bsw0(vcpu);
-}
-
-IA64FAULT priv_bsw1(VCPU *vcpu, INST64 inst)
-{
- return vcpu_bsw1(vcpu);
-}
-
-IA64FAULT priv_cover(VCPU *vcpu, INST64 inst)
-{
- return vcpu_cover(vcpu);
-}
-
-IA64FAULT priv_ptc_l(VCPU *vcpu, INST64 inst)
-{
- UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
- UINT64 addr_range;
-
- addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
- return vcpu_ptc_l(vcpu,vadr,addr_range);
-}
-
-IA64FAULT priv_ptc_e(VCPU *vcpu, INST64 inst)
-{
- UINT src = inst.M28.r3;
-
- // NOTE: ptc_e with source gr > 63 is emulated as a fc r(y-64)
- if (src > 63) return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64)));
- return vcpu_ptc_e(vcpu,vcpu_get_gr(vcpu,src));
-}
-
-IA64FAULT priv_ptc_g(VCPU *vcpu, INST64 inst)
-{
- UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
- UINT64 addr_range;
-
- addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
- return vcpu_ptc_g(vcpu,vadr,addr_range);
-}
-
-IA64FAULT priv_ptc_ga(VCPU *vcpu, INST64 inst)
-{
- UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
- UINT64 addr_range;
-
- addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
- return vcpu_ptc_ga(vcpu,vadr,addr_range);
-}
-
-IA64FAULT priv_ptr_d(VCPU *vcpu, INST64 inst)
-{
- UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
- UINT64 addr_range;
-
- addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
- return vcpu_ptr_d(vcpu,vadr,addr_range);
-}
-
-IA64FAULT priv_ptr_i(VCPU *vcpu, INST64 inst)
-{
- UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3);
- UINT64 addr_range;
-
- addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2);
- return vcpu_ptr_i(vcpu,vadr,addr_range);
-}
-
-IA64FAULT priv_tpa(VCPU *vcpu, INST64 inst)
-{
- UINT64 padr;
- UINT fault;
- UINT src = inst.M46.r3;
-
- // NOTE: tpa with source gr > 63 is emulated as a ttag rx=r(y-64)
- if (src > 63)
- fault = vcpu_ttag(vcpu,vcpu_get_gr(vcpu,src-64),&padr);
- else fault = vcpu_tpa(vcpu,vcpu_get_gr(vcpu,src),&padr);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M46.r1, padr);
- else return fault;
-}
-
-IA64FAULT priv_tak(VCPU *vcpu, INST64 inst)
-{
- UINT64 key;
- UINT fault;
- UINT src = inst.M46.r3;
-
- // NOTE: tak with source gr > 63 is emulated as a thash rx=r(y-64)
- if (src > 63)
- fault = vcpu_thash(vcpu,vcpu_get_gr(vcpu,src-64),&key);
- else fault = vcpu_tak(vcpu,vcpu_get_gr(vcpu,src),&key);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M46.r1, key);
- else return fault;
-}
-
-/************************************
- * Insert translation register/cache
-************************************/
-
-IA64FAULT priv_itr_d(VCPU *vcpu, INST64 inst)
-{
- UINT64 fault, itir, ifa, pte, slot;
-
- //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- pte = vcpu_get_gr(vcpu,inst.M42.r2);
- slot = vcpu_get_gr(vcpu,inst.M42.r3);
-
- return (vcpu_itr_d(vcpu,slot,pte,itir,ifa));
-}
-
-IA64FAULT priv_itr_i(VCPU *vcpu, INST64 inst)
-{
- UINT64 fault, itir, ifa, pte, slot;
-
- //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- pte = vcpu_get_gr(vcpu,inst.M42.r2);
- slot = vcpu_get_gr(vcpu,inst.M42.r3);
-
- return (vcpu_itr_i(vcpu,slot,pte,itir,ifa));
-}
-
-IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst)
-{
- UINT64 fault, itir, ifa, pte;
-
- //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- pte = vcpu_get_gr(vcpu,inst.M41.r2);
-
- return (vcpu_itc_d(vcpu,pte,itir,ifa));
-}
-
-IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst)
-{
- UINT64 fault, itir, ifa, pte;
-
- //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT)
- return(IA64_ILLOP_FAULT);
- pte = vcpu_get_gr(vcpu,inst.M41.r2);
-
- return (vcpu_itc_i(vcpu,pte,itir,ifa));
-}
-
-/*************************************
- * Moves to semi-privileged registers
-*************************************/
-
-IA64FAULT priv_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
-{
- // I27 and M30 are identical for these fields
- UINT64 ar3 = inst.M30.ar3;
- UINT64 imm = vcpu_get_gr(vcpu,inst.M30.imm);
- return (vcpu_set_ar(vcpu,ar3,imm));
-}
-
-IA64FAULT priv_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
-{
- // I26 and M29 are identical for these fields
- UINT64 ar3 = inst.M29.ar3;
-
- if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) { // privified mov from kr
- UINT64 val;
- if (vcpu_get_ar(vcpu,ar3,&val) != IA64_ILLOP_FAULT)
- return vcpu_set_gr(vcpu, inst.M29.r2-64, val);
- else return IA64_ILLOP_FAULT;
- }
- else {
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M29.r2);
- return (vcpu_set_ar(vcpu,ar3,r2));
- }
-}
-
-/********************************
- * Moves to privileged registers
-********************************/
-
-IA64FAULT priv_mov_to_pkr(VCPU *vcpu, INST64 inst)
-{
- UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
- return (vcpu_set_pkr(vcpu,r3,r2));
-}
-
-IA64FAULT priv_mov_to_rr(VCPU *vcpu, INST64 inst)
-{
- UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
- return (vcpu_set_rr(vcpu,r3,r2));
-}
-
-IA64FAULT priv_mov_to_dbr(VCPU *vcpu, INST64 inst)
-{
- UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
- return (vcpu_set_dbr(vcpu,r3,r2));
-}
-
-IA64FAULT priv_mov_to_ibr(VCPU *vcpu, INST64 inst)
-{
- UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
- return (vcpu_set_ibr(vcpu,r3,r2));
-}
-
-IA64FAULT priv_mov_to_pmc(VCPU *vcpu, INST64 inst)
-{
- UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
- return (vcpu_set_pmc(vcpu,r3,r2));
-}
-
-IA64FAULT priv_mov_to_pmd(VCPU *vcpu, INST64 inst)
-{
- UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3);
- UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2);
- return (vcpu_set_pmd(vcpu,r3,r2));
-}
-
-unsigned long to_cr_cnt[128] = { 0 };
-
-IA64FAULT priv_mov_to_cr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val = vcpu_get_gr(vcpu, inst.M32.r2);
- to_cr_cnt[inst.M32.cr3]++;
- switch (inst.M32.cr3) {
- case 0: return vcpu_set_dcr(vcpu,val);
- case 1: return vcpu_set_itm(vcpu,val);
- case 2: return vcpu_set_iva(vcpu,val);
- case 8: return vcpu_set_pta(vcpu,val);
- case 16:return vcpu_set_ipsr(vcpu,val);
- case 17:return vcpu_set_isr(vcpu,val);
- case 19:return vcpu_set_iip(vcpu,val);
- case 20:return vcpu_set_ifa(vcpu,val);
- case 21:return vcpu_set_itir(vcpu,val);
- case 22:return vcpu_set_iipa(vcpu,val);
- case 23:return vcpu_set_ifs(vcpu,val);
- case 24:return vcpu_set_iim(vcpu,val);
- case 25:return vcpu_set_iha(vcpu,val);
- case 64:return vcpu_set_lid(vcpu,val);
- case 65:return IA64_ILLOP_FAULT;
- case 66:return vcpu_set_tpr(vcpu,val);
- case 67:return vcpu_set_eoi(vcpu,val);
- case 68:return IA64_ILLOP_FAULT;
- case 69:return IA64_ILLOP_FAULT;
- case 70:return IA64_ILLOP_FAULT;
- case 71:return IA64_ILLOP_FAULT;
- case 72:return vcpu_set_itv(vcpu,val);
- case 73:return vcpu_set_pmv(vcpu,val);
- case 74:return vcpu_set_cmcv(vcpu,val);
- case 80:return vcpu_set_lrr0(vcpu,val);
- case 81:return vcpu_set_lrr1(vcpu,val);
- default: return IA64_ILLOP_FAULT;
- }
-}
-
-IA64FAULT priv_rsm(VCPU *vcpu, INST64 inst)
-{
- UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
- return vcpu_reset_psr_sm(vcpu,imm24);
-}
-
-IA64FAULT priv_ssm(VCPU *vcpu, INST64 inst)
-{
- UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
- return vcpu_set_psr_sm(vcpu,imm24);
-}
-
-/**
- * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
- */
-IA64FAULT priv_mov_to_psr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val = vcpu_get_gr(vcpu, inst.M35.r2);
- return vcpu_set_psr_l(vcpu,val);
-}
-
-/**********************************
- * Moves from privileged registers
- **********************************/
-
-IA64FAULT priv_mov_from_rr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val;
- IA64FAULT fault;
-
- if (inst.M43.r1 > 63) { // privified mov from cpuid
- fault = vcpu_get_cpuid(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
- }
- else {
- fault = vcpu_get_rr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1, val);
- }
- return fault;
-}
-
-IA64FAULT priv_mov_from_pkr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val;
- IA64FAULT fault;
-
- fault = vcpu_get_pkr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1, val);
- else return fault;
-}
-
-IA64FAULT priv_mov_from_dbr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val;
- IA64FAULT fault;
-
- fault = vcpu_get_dbr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1, val);
- else return fault;
-}
-
-IA64FAULT priv_mov_from_ibr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val;
- IA64FAULT fault;
-
- fault = vcpu_get_ibr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1, val);
- else return fault;
-}
-
-IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst)
-{
- UINT64 val;
- IA64FAULT fault;
-
- if (inst.M43.r1 > 63) { // privified mov from pmd
- fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1-64, val);
- }
- else {
- fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val);
- if (fault == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, inst.M43.r1, val);
- }
- return fault;
-}
-
-unsigned long from_cr_cnt[128] = { 0 };
-
-#define cr_get(cr) \
- ((fault = vcpu_get_##cr(vcpu,&val)) == IA64_NO_FAULT) ? \
- vcpu_set_gr(vcpu, tgt, val) : fault;
-
-IA64FAULT priv_mov_from_cr(VCPU *vcpu, INST64 inst)
-{
- UINT64 tgt = inst.M33.r1;
- UINT64 val;
- IA64FAULT fault;
-
- from_cr_cnt[inst.M33.cr3]++;
- switch (inst.M33.cr3) {
- case 0: return cr_get(dcr);
- case 1: return cr_get(itm);
- case 2: return cr_get(iva);
- case 8: return cr_get(pta);
- case 16:return cr_get(ipsr);
- case 17:return cr_get(isr);
- case 19:return cr_get(iip);
- case 20:return cr_get(ifa);
- case 21:return cr_get(itir);
- case 22:return cr_get(iipa);
- case 23:return cr_get(ifs);
- case 24:return cr_get(iim);
- case 25:return cr_get(iha);
- case 64:return cr_get(lid);
- case 65:return cr_get(ivr);
- case 66:return cr_get(tpr);
- case 67:return vcpu_set_gr(vcpu,tgt,0L);
- case 68:return cr_get(irr0);
- case 69:return cr_get(irr1);
- case 70:return cr_get(irr2);
- case 71:return cr_get(irr3);
- case 72:return cr_get(itv);
- case 73:return cr_get(pmv);
- case 74:return cr_get(cmcv);
- case 80:return cr_get(lrr0);
- case 81:return cr_get(lrr1);
- default: return IA64_ILLOP_FAULT;
- }
- return IA64_ILLOP_FAULT;
-}
-
-IA64FAULT priv_mov_from_psr(VCPU *vcpu, INST64 inst)
-{
- UINT64 tgt = inst.M33.r1;
- UINT64 val;
- IA64FAULT fault;
-
- if ((fault = vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
- return vcpu_set_gr(vcpu, tgt, val);
- else return fault;
-}
-
-/**************************************************************************
-Privileged operation decode and dispatch routines
-**************************************************************************/
-
-IA64_SLOT_TYPE slot_types[0x20][3] = {
- {M, I, I}, {M, I, I}, {M, I, I}, {M, I, I},
- {M, I, ILLEGAL}, {M, I, ILLEGAL},
- {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
- {M, M, I}, {M, M, I}, {M, M, I}, {M, M, I},
- {M, F, I}, {M, F, I},
- {M, M, F}, {M, M, F},
- {M, I, B}, {M, I, B},
- {M, B, B}, {M, B, B},
- {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
- {B, B, B}, {B, B, B},
- {M, M, B}, {M, M, B},
- {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL},
- {M, F, B}, {M, F, B},
- {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}
-};
-
-// pointer to privileged emulation function
-typedef IA64FAULT (*PPEFCN)(VCPU *vcpu, INST64 inst);
-
-PPEFCN Mpriv_funcs[64] = {
- priv_mov_to_rr, priv_mov_to_dbr, priv_mov_to_ibr, priv_mov_to_pkr,
- priv_mov_to_pmc, priv_mov_to_pmd, 0, 0,
- 0, priv_ptc_l, priv_ptc_g, priv_ptc_ga,
- priv_ptr_d, priv_ptr_i, priv_itr_d, priv_itr_i,
- priv_mov_from_rr, priv_mov_from_dbr, priv_mov_from_ibr, priv_mov_from_pkr,
- priv_mov_from_pmc, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, priv_tpa, priv_tak,
- 0, 0, 0, 0,
- priv_mov_from_cr, priv_mov_from_psr, 0, 0,
- 0, 0, 0, 0,
- priv_mov_to_cr, priv_mov_to_psr, priv_itc_d, priv_itc_i,
- 0, 0, 0, 0,
- priv_ptc_e, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-struct {
- unsigned long mov_to_ar_imm;
- unsigned long mov_to_ar_reg;
- unsigned long mov_from_ar;
- unsigned long ssm;
- unsigned long rsm;
- unsigned long rfi;
- unsigned long bsw0;
- unsigned long bsw1;
- unsigned long cover;
- unsigned long fc;
- unsigned long cpuid;
- unsigned long Mpriv_cnt[64];
-} privcnt = { 0 };
-
-unsigned long privop_trace = 0;
-
-IA64FAULT
-priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl)
-{
- IA64_BUNDLE bundle;
- IA64_BUNDLE __get_domain_bundle(UINT64);
- int slot;
- IA64_SLOT_TYPE slot_type;
- INST64 inst;
- PPEFCN pfunc;
- unsigned long ipsr = regs->cr_ipsr;
- UINT64 iip = regs->cr_iip;
- int x6;
-
- // make a local copy of the bundle containing the privop
-#if 1
- bundle = __get_domain_bundle(iip);
- if (!bundle.i64[0] && !bundle.i64[1])
-#else
- if (__copy_from_user(&bundle,iip,sizeof(bundle)))
-#endif
- {
-//printf("*** priv_handle_op: privop bundle @%p not mapped, retrying\n",iip);
- return vcpu_force_data_miss(vcpu,regs->cr_iip);
- }
-#if 0
- if (iip==0xa000000100001820) {
- static int firstpagefault = 1;
- if (firstpagefault) {
- printf("*** First time to domain page fault!\n");
firstpagefault=0;
- }
- }
-#endif
- if (privop_trace) {
- static long i = 400;
- //if (i > 0) printf("privop @%p\n",iip);
- if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
- iip,ia64_get_itc(),ia64_get_itm());
- i--;
- }
- slot = ((struct ia64_psr *)&ipsr)->ri;
- if (!slot) inst.inst = (bundle.i64[0]>>5) & MASK_41;
- else if (slot == 1)
- inst.inst = ((bundle.i64[0]>>46) | bundle.i64[1]<<18) & MASK_41;
- else if (slot == 2) inst.inst = (bundle.i64[1]>>23) & MASK_41;
- else printf("priv_handle_op: illegal slot: %d\n", slot);
-
- slot_type = slot_types[bundle.template][slot];
- if (priv_verbose) {
- printf("priv_handle_op: checking bundle at 0x%lx (op=0x%016lx)
slot %d (type=%d)\n",
- iip, (UINT64)inst.inst, slot, slot_type);
- }
- if (slot_type == B && inst.generic.major == 0 && inst.B8.x6 == 0x0) {
- // break instr for privified cover
- }
- else if (privlvl != 2) return (IA64_ILLOP_FAULT);
- switch (slot_type) {
- case M:
- if (inst.generic.major == 0) {
-#if 0
- if (inst.M29.x6 == 0 && inst.M29.x3 == 0) {
- privcnt.cover++;
- return priv_cover(vcpu,inst);
- }
-#endif
- if (inst.M29.x3 != 0) break;
- if (inst.M30.x4 == 8 && inst.M30.x2 == 2) {
- privcnt.mov_to_ar_imm++;
- return priv_mov_to_ar_imm(vcpu,inst);
- }
- if (inst.M44.x4 == 6) {
- privcnt.ssm++;
- return priv_ssm(vcpu,inst);
- }
- if (inst.M44.x4 == 7) {
- privcnt.rsm++;
- return priv_rsm(vcpu,inst);
- }
- break;
- }
- else if (inst.generic.major != 1) break;
- x6 = inst.M29.x6;
- if (x6 == 0x2a) {
- if (inst.M29.r2 > 63 && inst.M29.ar3 < 8)
- privcnt.mov_from_ar++; // privified mov from kr
- else privcnt.mov_to_ar_reg++;
- return priv_mov_to_ar_reg(vcpu,inst);
- }
- if (inst.M29.x3 != 0) break;
- if (!(pfunc = Mpriv_funcs[x6])) break;
- if (x6 == 0x1e || x6 == 0x1f) { // tpa or tak are "special"
- if (inst.M46.r3 > 63) {
- if (x6 == 0x1e) x6 = 0x1b;
- else x6 = 0x1a;
- }
- }
- if (x6 == 52 && inst.M28.r3 > 63)
- privcnt.fc++;
- else if (x6 == 16 && inst.M43.r3 > 63)
- privcnt.cpuid++;
- else privcnt.Mpriv_cnt[x6]++;
- return (*pfunc)(vcpu,inst);
- break;
- case B:
- if (inst.generic.major != 0) break;
- if (inst.B8.x6 == 0x08) {
- IA64FAULT fault;
- privcnt.rfi++;
- fault = priv_rfi(vcpu,inst);
- if (fault == IA64_NO_FAULT) fault =
IA64_RFI_IN_PROGRESS;
- return fault;
- }
- if (inst.B8.x6 == 0x0c) {
- privcnt.bsw0++;
- return priv_bsw0(vcpu,inst);
- }
- if (inst.B8.x6 == 0x0d) {
- privcnt.bsw1++;
- return priv_bsw1(vcpu,inst);
- }
- if (inst.B8.x6 == 0x0) { // break instr for privified cover
- privcnt.cover++;
- return priv_cover(vcpu,inst);
- }
- break;
- case I:
- if (inst.generic.major != 0) break;
-#if 0
- if (inst.I26.x6 == 0 && inst.I26.x3 == 0) {
- privcnt.cover++;
- return priv_cover(vcpu,inst);
- }
-#endif
- if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3
- if (inst.I26.x6 == 0x2a) {
- if (inst.I26.r2 > 63 && inst.I26.ar3 < 8)
- privcnt.mov_from_ar++; // privified mov from kr
- else privcnt.mov_to_ar_reg++;
- return priv_mov_to_ar_reg(vcpu,inst);
- }
- if (inst.I27.x6 == 0x0a) {
- privcnt.mov_to_ar_imm++;
- return priv_mov_to_ar_imm(vcpu,inst);
- }
- break;
- default:
- break;
- }
- //printf("We who are about do die salute you\n");
- printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d
(type=%d), ipsr=%p\n",
- iip, (UINT64)inst.inst, slot, slot_type, ipsr);
- //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip));
- //thread_mozambique("privop fault\n");
- return (IA64_ILLOP_FAULT);
-}
-
-/** Emulate a privileged operation.
- *
- * This should probably return 0 on success and the "trap number"
- * (e.g. illegal operation for bad register, priv op for an
- * instruction that isn't allowed, etc.) on "failure"
- *
- * @param vcpu virtual cpu
- * @param isrcode interrupt service routine code
- * @return fault
- */
-IA64FAULT
-priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr)
-{
- IA64FAULT fault;
- UINT64 ipsr = regs->cr_ipsr;
- UINT64 isrcode = (isr >> 4) & 0xf;
- int privlvl;
-
- // handle privops masked as illops? and breaks (6)
- if (isrcode != 1 && isrcode != 2 && isrcode != 0 && isrcode != 6) {
- printf("priv_emulate: isrcode != 0 or 1 or 2\n");
- printf("priv_emulate: returning ILLOP, not implemented!\n");
- while (1);
- return IA64_ILLOP_FAULT;
- }
- //if (isrcode != 1 && isrcode != 2) return 0;
- vcpu_set_regs(vcpu,regs);
- privlvl = (ipsr & IA64_PSR_CPL) >> IA64_PSR_CPL0_BIT;
- // its OK for a privified-cover to be executed in user-land
- fault = priv_handle_op(vcpu,regs,privlvl);
- if ((fault == IA64_NO_FAULT) || (fault == IA64_EXTINT_VECTOR)) { //
success!!
- // update iip/ipsr to point to the next instruction
- (void)vcpu_increment_iip(vcpu);
- }
- if (fault == IA64_ILLOP_FAULT)
- printf("priv_emulate: priv_handle_op fails, isr=%p\n",isr);
- return fault;
-}
-
-
-// FIXME: Move these to include/public/arch-ia64?
-#define HYPERPRIVOP_RFI 0x1
-#define HYPERPRIVOP_RSM_DT 0x2
-#define HYPERPRIVOP_SSM_DT 0x3
-#define HYPERPRIVOP_COVER 0x4
-#define HYPERPRIVOP_ITC_D 0x5
-#define HYPERPRIVOP_ITC_I 0x6
-#define HYPERPRIVOP_SSM_I 0x7
-#define HYPERPRIVOP_GET_IVR 0x8
-#define HYPERPRIVOP_GET_TPR 0x9
-#define HYPERPRIVOP_SET_TPR 0xa
-#define HYPERPRIVOP_EOI 0xb
-#define HYPERPRIVOP_SET_ITM 0xc
-#define HYPERPRIVOP_THASH 0xd
-#define HYPERPRIVOP_PTC_GA 0xe
-#define HYPERPRIVOP_ITR_D 0xf
-#define HYPERPRIVOP_GET_RR 0x10
-#define HYPERPRIVOP_SET_RR 0x11
-#define HYPERPRIVOP_MAX 0x11
-
-char *hyperpriv_str[HYPERPRIVOP_MAX+1] = {
- 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
- "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d",
- "=rr", "rr=",
- 0
-};
-
-unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
-unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 };
-
-/* hyperprivops are generally executed in assembly (with physical psr.ic off)
- * so this code is primarily used for debugging them */
-int
-ia64_hyperprivop(unsigned long iim, REGS *regs)
-{
- struct vcpu *v = (struct domain *) current;
- INST64 inst;
- UINT64 val;
- UINT64 itir, ifa;
-
-// FIXME: Handle faults appropriately for these
- if (!iim || iim > HYPERPRIVOP_MAX) {
- printf("bad hyperprivop; ignored\n");
- printf("iim=%d, iip=%p\n",iim,regs->cr_iip);
- return 1;
- }
- slow_hyperpriv_cnt[iim]++;
- switch(iim) {
- case HYPERPRIVOP_RFI:
- (void)vcpu_rfi(v);
- return 0; // don't update iip
- case HYPERPRIVOP_RSM_DT:
- (void)vcpu_reset_psr_dt(v);
- return 1;
- case HYPERPRIVOP_SSM_DT:
- (void)vcpu_set_psr_dt(v);
- return 1;
- case HYPERPRIVOP_COVER:
- (void)vcpu_cover(v);
- return 1;
- case HYPERPRIVOP_ITC_D:
- (void)vcpu_get_itir(v,&itir);
- (void)vcpu_get_ifa(v,&ifa);
- (void)vcpu_itc_d(v,regs->r8,itir,ifa);
- return 1;
- case HYPERPRIVOP_ITC_I:
- (void)vcpu_get_itir(v,&itir);
- (void)vcpu_get_ifa(v,&ifa);
- (void)vcpu_itc_i(v,regs->r8,itir,ifa);
- return 1;
- case HYPERPRIVOP_SSM_I:
- (void)vcpu_set_psr_i(v);
- return 1;
- case HYPERPRIVOP_GET_IVR:
- (void)vcpu_get_ivr(v,&val);
- regs->r8 = val;
- return 1;
- case HYPERPRIVOP_GET_TPR:
- (void)vcpu_get_tpr(v,&val);
- regs->r8 = val;
- return 1;
- case HYPERPRIVOP_SET_TPR:
- (void)vcpu_set_tpr(v,regs->r8);
- return 1;
- case HYPERPRIVOP_EOI:
- (void)vcpu_set_eoi(v,0L);
- return 1;
- case HYPERPRIVOP_SET_ITM:
- (void)vcpu_set_itm(v,regs->r8);
- return 1;
- case HYPERPRIVOP_THASH:
- (void)vcpu_thash(v,regs->r8,&val);
- regs->r8 = val;
- return 1;
- case HYPERPRIVOP_PTC_GA:
- (void)vcpu_ptc_ga(v,regs->r8,(1L << ((regs->r9 & 0xfc) >> 2)));
- return 1;
- case HYPERPRIVOP_ITR_D:
- (void)vcpu_get_itir(v,&itir);
- (void)vcpu_get_ifa(v,&ifa);
- (void)vcpu_itr_d(v,regs->r8,regs->r9,itir,ifa);
- return 1;
- case HYPERPRIVOP_GET_RR:
- (void)vcpu_get_rr(v,regs->r8,&val);
- regs->r8 = val;
- return 1;
- case HYPERPRIVOP_SET_RR:
- (void)vcpu_set_rr(v,regs->r8,regs->r9);
- return 1;
- }
- return 0;
-}
-
-
-/**************************************************************************
-Privileged operation instrumentation routines
-**************************************************************************/
-
-char *Mpriv_str[64] = {
- "mov_to_rr", "mov_to_dbr", "mov_to_ibr", "mov_to_pkr",
- "mov_to_pmc", "mov_to_pmd", "<0x06>", "<0x07>",
- "<0x08>", "ptc_l", "ptc_g", "ptc_ga",
- "ptr_d", "ptr_i", "itr_d", "itr_i",
- "mov_from_rr", "mov_from_dbr", "mov_from_ibr", "mov_from_pkr",
- "mov_from_pmc", "<0x15>", "<0x16>", "<0x17>",
- "<0x18>", "<0x19>", "privified-thash", "privified-ttag",
- "<0x1c>", "<0x1d>", "tpa", "tak",
- "<0x20>", "<0x21>", "<0x22>", "<0x23>",
- "mov_from_cr", "mov_from_psr", "<0x26>", "<0x27>",
- "<0x28>", "<0x29>", "<0x2a>", "<0x2b>",
- "mov_to_cr", "mov_to_psr", "itc_d", "itc_i",
- "<0x30>", "<0x31>", "<0x32>", "<0x33>",
- "ptc_e", "<0x35>", "<0x36>", "<0x37>",
- "<0x38>", "<0x39>", "<0x3a>", "<0x3b>",
- "<0x3c>", "<0x3d>", "<0x3e>", "<0x3f>"
-};
-
-#define RS "Rsvd"
-char *cr_str[128] = {
- "dcr","itm","iva",RS,RS,RS,RS,RS,
- "pta",RS,RS,RS,RS,RS,RS,RS,
- "ipsr","isr",RS,"iip","ifa","itir","iipa","ifs",
- "iim","iha",RS,RS,RS,RS,RS,RS,
- RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
- RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
- "lid","ivr","tpr","eoi","irr0","irr1","irr2","irr3",
- "itv","pmv","cmcv",RS,RS,RS,RS,RS,
- "lrr0","lrr1",RS,RS,RS,RS,RS,RS,
- RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
- RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS,
- RS,RS,RS,RS,RS,RS,RS,RS
-};
-
-// FIXME: should use snprintf to ensure no buffer overflow
-int dump_privop_counts(char *buf)
-{
- int i, j;
- UINT64 sum = 0;
- char *s = buf;
-
- // this is ugly and should probably produce sorted output
- // but it will have to do for now
- sum += privcnt.mov_to_ar_imm; sum += privcnt.mov_to_ar_reg;
- sum += privcnt.ssm; sum += privcnt.rsm;
- sum += privcnt.rfi; sum += privcnt.bsw0;
- sum += privcnt.bsw1; sum += privcnt.cover;
- for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i];
- s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum);
- if (privcnt.mov_to_ar_imm)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_imm,
- "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum);
- if (privcnt.mov_to_ar_reg)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_reg,
- "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum);
- if (privcnt.mov_from_ar)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_from_ar,
- "privified-mov_from_ar",
(privcnt.mov_from_ar*100L)/sum);
- if (privcnt.ssm)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.ssm,
- "ssm", (privcnt.ssm*100L)/sum);
- if (privcnt.rsm)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rsm,
- "rsm", (privcnt.rsm*100L)/sum);
- if (privcnt.rfi)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rfi,
- "rfi", (privcnt.rfi*100L)/sum);
- if (privcnt.bsw0)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw0,
- "bsw0", (privcnt.bsw0*100L)/sum);
- if (privcnt.bsw1)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw1,
- "bsw1", (privcnt.bsw1*100L)/sum);
- if (privcnt.cover)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cover,
- "cover", (privcnt.cover*100L)/sum);
- if (privcnt.fc)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.fc,
- "privified-fc", (privcnt.fc*100L)/sum);
- if (privcnt.cpuid)
- s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cpuid,
- "privified-getcpuid", (privcnt.cpuid*100L)/sum);
- for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) {
- if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n");
- else s += sprintf(s,"%10d %s [%d%%]\n", privcnt.Mpriv_cnt[i],
- Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum);
- if (i == 0x24) { // mov from CR
- s += sprintf(s," [");
- for (j=0; j < 128; j++) if (from_cr_cnt[j]) {
- if (!cr_str[j])
- s += sprintf(s,"PRIVSTRING NULL!!\n");
- s +=
sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]);
- }
- s += sprintf(s,"]\n");
- }
- else if (i == 0x2c) { // mov to CR
- s += sprintf(s," [");
- for (j=0; j < 128; j++) if (to_cr_cnt[j]) {
- if (!cr_str[j])
- s += sprintf(s,"PRIVSTRING NULL!!\n");
- s +=
sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]);
- }
- s += sprintf(s,"]\n");
- }
- }
- return s - buf;
-}
-
-int zero_privop_counts(char *buf)
-{
- int i, j;
- char *s = buf;
-
- // this is ugly and should probably produce sorted output
- // but it will have to do for now
- privcnt.mov_to_ar_imm = 0; privcnt.mov_to_ar_reg = 0;
- privcnt.mov_from_ar = 0;
- privcnt.ssm = 0; privcnt.rsm = 0;
- privcnt.rfi = 0; privcnt.bsw0 = 0;
- privcnt.bsw1 = 0; privcnt.cover = 0;
- privcnt.fc = 0; privcnt.cpuid = 0;
- for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0;
- for (j=0; j < 128; j++) from_cr_cnt[j] = 0;
- for (j=0; j < 128; j++) to_cr_cnt[j] = 0;
- s += sprintf(s,"All privop statistics zeroed\n");
- return s - buf;
-}
-
-#ifdef PRIVOP_ADDR_COUNT
-
-extern struct privop_addr_count privop_addr_counter[];
-
-void privop_count_addr(unsigned long iip, int inst)
-{
- struct privop_addr_count *v = &privop_addr_counter[inst];
- int i;
-
- for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) {
- if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; }
- else if (v->addr[i] == iip) { v->count[i]++; return; }
- }
- v->overflow++;;
-}
-
-int dump_privop_addrs(char *buf)
-{
- int i,j;
- char *s = buf;
- s += sprintf(s,"Privop addresses:\n");
- for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
- struct privop_addr_count *v = &privop_addr_counter[i];
- s += sprintf(s,"%s:\n",v->instname);
- for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) {
- if (!v->addr[j]) break;
- s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]);
- }
- if (v->overflow)
- s += sprintf(s," other #%ld\n",v->overflow);
- }
- return s - buf;
-}
-
-void zero_privop_addrs(void)
-{
- int i,j;
- for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
- struct privop_addr_count *v = &privop_addr_counter[i];
- for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
- v->addr[j] = v->count[j] = 0;
- v->overflow = 0;
- }
-}
-#endif
-
-extern unsigned long dtlb_translate_count;
-extern unsigned long tr_translate_count;
-extern unsigned long phys_translate_count;
-extern unsigned long vhpt_translate_count;
-extern unsigned long lazy_cover_count;
-extern unsigned long idle_when_pending;
-extern unsigned long pal_halt_light_count;
-extern unsigned long context_switch_count;
-
-int dump_misc_stats(char *buf)
-{
- char *s = buf;
- s += sprintf(s,"Virtual TR translations: %d\n",tr_translate_count);
- s += sprintf(s,"Virtual VHPT translations: %d\n",vhpt_translate_count);
- s += sprintf(s,"Virtual DTLB translations: %d\n",dtlb_translate_count);
- s += sprintf(s,"Physical translations: %d\n",phys_translate_count);
- s += sprintf(s,"Idle when pending: %d\n",idle_when_pending);
- s += sprintf(s,"PAL_HALT_LIGHT (no pending):
%d\n",pal_halt_light_count);
- s += sprintf(s,"context switches: %d\n",context_switch_count);
- s += sprintf(s,"Lazy covers: %d\n",lazy_cover_count);
- return s - buf;
-}
-
-void zero_misc_stats(void)
-{
- dtlb_translate_count = 0;
- tr_translate_count = 0;
- phys_translate_count = 0;
- vhpt_translate_count = 0;
- lazy_cover_count = 0;
- pal_halt_light_count = 0;
- idle_when_pending = 0;
- context_switch_count = 0;
-}
-
-int dump_hyperprivop_counts(char *buf)
-{
- int i;
- char *s = buf;
- unsigned long total = 0;
- for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += slow_hyperpriv_cnt[i];
- s += sprintf(s,"Slow hyperprivops (total %d):\n",total);
- for (i = 1; i <= HYPERPRIVOP_MAX; i++)
- if (slow_hyperpriv_cnt[i])
- s += sprintf(s,"%10d %s\n",
- slow_hyperpriv_cnt[i], hyperpriv_str[i]);
- total = 0;
- for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += fast_hyperpriv_cnt[i];
- s += sprintf(s,"Fast hyperprivops (total %d):\n",total);
- for (i = 1; i <= HYPERPRIVOP_MAX; i++)
- if (fast_hyperpriv_cnt[i])
- s += sprintf(s,"%10d %s\n",
- fast_hyperpriv_cnt[i], hyperpriv_str[i]);
- return s - buf;
-}
-
-void zero_hyperprivop_counts(void)
-{
- int i;
- for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0;
- for (i = 0; i <= HYPERPRIVOP_MAX; i++) fast_hyperpriv_cnt[i] = 0;
-}
-
-#define TMPBUFLEN 8*1024
-int dump_privop_counts_to_user(char __user *ubuf, int len)
-{
- char buf[TMPBUFLEN];
- int n = dump_privop_counts(buf);
-
- n += dump_hyperprivop_counts(buf + n);
- n += dump_reflect_counts(buf + n);
-#ifdef PRIVOP_ADDR_COUNT
- n += dump_privop_addrs(buf + n);
-#endif
- n += dump_misc_stats(buf + n);
- if (len < TMPBUFLEN) return -1;
- if (__copy_to_user(ubuf,buf,n)) return -1;
- return n;
-}
-
-int zero_privop_counts_to_user(char __user *ubuf, int len)
-{
- char buf[TMPBUFLEN];
- int n = zero_privop_counts(buf);
-
- zero_hyperprivop_counts();
-#ifdef PRIVOP_ADDR_COUNT
- zero_privop_addrs();
-#endif
- zero_misc_stats();
- zero_reflect_counts();
- if (len < TMPBUFLEN) return -1;
- if (__copy_to_user(ubuf,buf,n)) return -1;
- return n;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/process.c
--- a/xen/arch/ia64/process.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,749 +0,0 @@
-/*
- * Miscellaneous process/domain related routines
- *
- * Copyright (C) 2004 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <asm/ptrace.h>
-#include <xen/delay.h>
-
-#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
-#include <asm/sal.h> /* FOR struct ia64_sal_retval */
-
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-//#include <asm/ldt.h>
-#include <xen/irq.h>
-#include <xen/event.h>
-#include <asm/regionreg.h>
-#include <asm/privop.h>
-#include <asm/vcpu.h>
-#include <asm/ia64_int.h>
-#include <asm/dom_fw.h>
-#include "hpsim_ssc.h"
-
-extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64);
-extern struct ia64_sal_retval pal_emulator_static(UINT64);
-extern struct ia64_sal_retval
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
-
-extern unsigned long dom0_start, dom0_size;
-
-#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT)
-// note IA64_PSR_PK removed from following, why is this necessary?
-#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \
- IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \
- IA64_PSR_IT | IA64_PSR_BN)
-
-#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \
- IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \
- IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \
- IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \
- IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \
- IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA)
-
-#define PSCB(x,y) VCPU(x,y)
-#define PSCBX(x,y) x->arch.y
-
-extern unsigned long vcpu_verbose;
-
-long do_iopl(domid_t domain, unsigned int new_io_pl)
-{
- dummy();
- return 0;
-}
-
-void schedule_tail(struct vcpu *next)
-{
- unsigned long rr7;
- //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info);
- //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info);
-#ifdef CONFIG_VTI
- /* rr7 will be postponed to last point when resuming back to guest */
- vmx_load_all_rr(current);
-#else // CONFIG_VTI
- if (rr7 = load_region_regs(current)) {
- printk("schedule_tail: change to rr7 not yet implemented\n");
- }
-#endif // CONFIG_VTI
-}
-
-void tdpfoo(void) { }
-
-// given a domain virtual address, pte and pagesize, extract the metaphysical
-// address, convert the pte for a physical address for (possibly different)
-// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use
-// PAGE_SIZE!)
-unsigned long translate_domain_pte(unsigned long pteval,
- unsigned long address, unsigned long itir)
-{
- struct domain *d = current->domain;
- unsigned long mask, pteval2, mpaddr;
- unsigned long lookup_domain_mpa(struct domain *,unsigned long);
- extern struct domain *dom0;
- extern unsigned long dom0_start, dom0_size;
-
- // FIXME address had better be pre-validated on insert
- mask = (1L << ((itir >> 2) & 0x3f)) - 1;
- mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
- if (d == dom0) {
- if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
- //printk("translate_domain_pte: out-of-bounds dom0
mpaddr %p! itc=%lx...\n",mpaddr,ia64_get_itc());
- tdpfoo();
- }
- }
- else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) {
- printf("translate_domain_pte: bad mpa=%p (>
%p),vadr=%p,pteval=%p,itir=%p\n",
- mpaddr,d->max_pages<<PAGE_SHIFT,address,pteval,itir);
- tdpfoo();
- }
- pteval2 = lookup_domain_mpa(d,mpaddr);
- pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
- pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
- pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
- return pteval2;
-}
-
-// given a current domain metaphysical address, return the physical address
-unsigned long translate_domain_mpaddr(unsigned long mpaddr)
-{
- extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
- unsigned long pteval;
-
- if (current->domain == dom0) {
- if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
- printk("translate_domain_mpaddr: out-of-bounds dom0
mpaddr %p! continuing...\n",mpaddr);
- tdpfoo();
- }
- }
- pteval = lookup_domain_mpa(current->domain,mpaddr);
- return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
-}
-
-unsigned long slow_reflect_count[0x80] = { 0 };
-unsigned long fast_reflect_count[0x80] = { 0 };
-
-#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++;
-
-void zero_reflect_counts(void)
-{
- int i;
- for (i=0; i<0x80; i++) slow_reflect_count[i] = 0;
- for (i=0; i<0x80; i++) fast_reflect_count[i] = 0;
-}
-
-int dump_reflect_counts(char *buf)
-{
- int i,j,cnt;
- char *s = buf;
-
- s += sprintf(s,"Slow reflections by vector:\n");
- for (i = 0, j = 0; i < 0x80; i++) {
- if (cnt = slow_reflect_count[i]) {
- s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
- if ((j++ & 3) == 3) s += sprintf(s,"\n");
- }
- }
- if (j & 3) s += sprintf(s,"\n");
- s += sprintf(s,"Fast reflections by vector:\n");
- for (i = 0, j = 0; i < 0x80; i++) {
- if (cnt = fast_reflect_count[i]) {
- s += sprintf(s,"0x%02x00:%10d, ",i,cnt);
- if ((j++ & 3) == 3) s += sprintf(s,"\n");
- }
- }
- if (j & 3) s += sprintf(s,"\n");
- return s - buf;
-}
-
-void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long
itiriim, struct pt_regs *regs, unsigned long vector)
-{
- unsigned long vcpu_get_ipsr_int_state(struct vcpu *,unsigned long);
- unsigned long vcpu_get_rr_ve(struct vcpu *,unsigned long);
- struct domain *d = current->domain;
- struct vcpu *v = current;
-
- if (vector == IA64_EXTINT_VECTOR) {
-
- extern unsigned long vcpu_verbose, privop_trace;
- static first_extint = 1;
- if (first_extint) {
- printf("Delivering first extint to domain: ifa=%p,
isr=%p, itir=%p, iip=%p\n",ifa,isr,itiriim,regs->cr_iip);
- //privop_trace = 1; vcpu_verbose = 1;
- first_extint = 0;
- }
- }
- if (!PSCB(v,interrupt_collection_enabled)) {
- if (!(PSCB(v,ipsr) & IA64_PSR_DT)) {
- panic_domain(regs,"psr.dt off, trying to deliver nested
dtlb!\n");
- }
- vector &= ~0xf;
- if (vector != IA64_DATA_TLB_VECTOR &&
- vector != IA64_ALT_DATA_TLB_VECTOR &&
- vector != IA64_VHPT_TRANS_VECTOR) {
-panic_domain(regs,"psr.ic off, delivering
fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n",
- vector,regs->cr_ipsr,regs->cr_iip,ifa,isr,PSCB(v,iip));
-
- }
-//printf("Delivering NESTED DATA TLB fault\n");
- vector = IA64_DATA_NESTED_TLB_VECTOR;
- regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) &
~0xffUL;
- regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) |
DELIVER_PSR_SET;
-// NOTE: nested trap must NOT pass PSCB address
- //regs->r31 = (unsigned long) &PSCB(v);
- inc_slow_reflect_count(vector);
- return;
-
- }
- if ((vector & 0xf) == IA64_FORCED_IFA)
- ifa = PSCB(v,tmp[0]);
- vector &= ~0xf;
- PSCB(v,ifa) = ifa;
- if (vector < IA64_DATA_NESTED_TLB_VECTOR) /* VHPT miss, TLB miss, Alt
TLB miss */
- vcpu_thash(v,ifa,&PSCB(current,iha));
- PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed?
- PSCB(v,precover_ifs) = regs->cr_ifs;
- vcpu_bsw0(v);
- PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr);
- if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
- PSCB(v,iim) = itiriim;
- else PSCB(v,itir) = vcpu_get_itir_on_fault(v,ifa);
- PSCB(v,isr) = isr; // this is unnecessary except for interrupts!
- PSCB(v,iip) = regs->cr_iip;
- PSCB(v,ifs) = 0;
- PSCB(v,incomplete_regframe) = 0;
-
- regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL;
- regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
-#ifdef CONFIG_SMP
-#warning "SMP FIXME: sharedinfo doesn't handle smp yet, need page per vcpu"
-#endif
- regs->r31 = &(((mapped_regs_t *)SHARED_ARCHINFO_ADDR)->ipsr);
-
- PSCB(v,interrupt_delivery_enabled) = 0;
- PSCB(v,interrupt_collection_enabled) = 0;
-
- inc_slow_reflect_count(vector);
-}
-
-void foodpi(void) {}
-
-unsigned long pending_false_positive = 0;
-
-// ONLY gets called from ia64_leave_kernel
-// ONLY call with interrupts disabled?? (else might miss one?)
-// NEVER successful if already reflecting a trap/fault because psr.i==0
-void deliver_pending_interrupt(struct pt_regs *regs)
-{
- struct domain *d = current->domain;
- struct vcpu *v = current;
- // FIXME: Will this work properly if doing an RFI???
- if (!is_idle_task(d) && user_mode(regs)) {
- //vcpu_poke_timer(v);
- if (vcpu_deliverable_interrupts(v)) {
- unsigned long isr = regs->cr_ipsr & IA64_PSR_RI;
- if (vcpu_timer_pending_early(v))
-printf("*#*#*#* about to deliver early timer to domain
%d!!!\n",v->domain->domain_id);
- reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR);
- }
- else if (PSCB(v,pending_interruption))
- ++pending_false_positive;
- }
-}
-unsigned long lazy_cover_count = 0;
-
-int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs)
-{
- if (!PSCB(v,interrupt_collection_enabled)) {
- PSCB(v,ifs) = regs->cr_ifs;
- PSCB(v,incomplete_regframe) = 1;
- regs->cr_ifs = 0;
- lazy_cover_count++;
- return(1); // retry same instruction with cr.ifs off
- }
- return(0);
-}
-
-void ia64_do_page_fault (unsigned long address, unsigned long isr, struct
pt_regs *regs, unsigned long itir)
-{
- unsigned long iip = regs->cr_iip;
- // FIXME should validate address here
- unsigned long pteval;
- unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
- IA64FAULT fault;
-
- if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs))
return;
- if ((isr & IA64_ISR_SP)
- || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) ==
IA64_ISR_CODE_LFETCH))
- {
- /*
- * This fault was due to a speculative load or lfetch.fault,
set the "ed"
- * bit in the psr to ensure forward progress. (Target register
will get a
- * NaT for ld.s, lfetch will be canceled.)
- */
- ia64_psr(regs)->ed = 1;
- return;
- }
-
- fault = vcpu_translate(current,address,is_data,&pteval,&itir);
- if (fault == IA64_NO_FAULT)
- {
- pteval = translate_domain_pte(pteval,address,itir);
-
vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f);
- return;
- }
- else if (IS_VMM_ADDRESS(iip))
- {
- if (!ia64_done_with_exception(regs)) {
- // should never happen. If it does, region 0 addr may
- // indicate a bad xen pointer
- printk("*** xen_handle_domain_access: exception table"
- " lookup failed, iip=%p, addr=%p, spinning...\n",
- iip,address);
- panic_domain(regs,"*** xen_handle_domain_access:
exception table"
- " lookup failed, iip=%p, addr=%p, spinning...\n",
- iip,address);
- }
- return;
- }
-
- reflect_interruption(address, isr, 0, regs, fault);
-}
-
-void
-ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
- unsigned long iim, unsigned long itir, unsigned long arg5,
- unsigned long arg6, unsigned long arg7, unsigned long stack)
-{
- struct pt_regs *regs = (struct pt_regs *) &stack;
- unsigned long code, error = isr;
- char buf[128];
- int result, sig;
- static const char *reason[] = {
- "IA-64 Illegal Operation fault",
- "IA-64 Privileged Operation fault",
- "IA-64 Privileged Register fault",
- "IA-64 Reserved Register/Field fault",
- "Disabled Instruction Set Transition fault",
- "Unknown fault 5", "Unknown fault 6", "Unknown fault 7",
"Illegal Hazard fault",
- "Unknown fault 9", "Unknown fault 10", "Unknown fault 11",
"Unknown fault 12",
- "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
- };
-#if 0
-printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n",
- vector, ifa, regs->cr_iip, regs->cr_ipsr, isr);
-#endif
-
- if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) ==
IA64_ISR_CODE_LFETCH)) {
- /*
- * This fault was due to lfetch.fault, set "ed" bit in the psr
to cancel
- * the lfetch.
- */
- ia64_psr(regs)->ed = 1;
- printf("ia64_fault: handled lfetch.fault\n");
- return;
- }
-
- switch (vector) {
- case 24: /* General Exception */
- code = (isr >> 4) & 0xf;
- sprintf(buf, "General Exception: %s%s", reason[code],
- (code == 3) ? ((isr & (1UL << 37))
- ? " (RSE access)" : " (data access)") :
"");
- if (code == 8) {
-# ifdef CONFIG_IA64_PRINT_HAZARDS
- printk("%s[%d]: possible hazard @ ip=%016lx (pr =
%016lx)\n",
- current->comm, current->pid, regs->cr_iip +
ia64_psr(regs)->ri,
- regs->pr);
-# endif
- printf("ia64_fault: returning on hazard\n");
- return;
- }
- break;
-
- case 25: /* Disabled FP-Register */
- if (isr & 2) {
- //disabled_fph_fault(regs);
- //return;
- }
- sprintf(buf, "Disabled FPL fault---not supposed to happen!");
- break;
-
- case 26: /* NaT Consumption */
- if (user_mode(regs)) {
- void *addr;
-
- if (((isr >> 4) & 0xf) == 2) {
- /* NaT page consumption */
- //sig = SIGSEGV;
- //code = SEGV_ACCERR;
- addr = (void *) ifa;
- } else {
- /* register NaT consumption */
- //sig = SIGILL;
- //code = ILL_ILLOPN;
- addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- }
- //siginfo.si_signo = sig;
- //siginfo.si_code = code;
- //siginfo.si_errno = 0;
- //siginfo.si_addr = addr;
- //siginfo.si_imm = vector;
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //force_sig_info(sig, &siginfo, current);
- //return;
- } //else if (ia64_done_with_exception(regs))
- //return;
- sprintf(buf, "NaT consumption");
- break;
-
- case 31: /* Unsupported Data Reference */
- if (user_mode(regs)) {
- //siginfo.si_signo = SIGILL;
- //siginfo.si_code = ILL_ILLOPN;
- //siginfo.si_errno = 0;
- //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- //siginfo.si_imm = vector;
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //force_sig_info(SIGILL, &siginfo, current);
- //return;
- }
- sprintf(buf, "Unsupported data reference");
- break;
-
- case 29: /* Debug */
- case 35: /* Taken Branch Trap */
- case 36: /* Single Step Trap */
- //if (fsys_mode(current, regs)) {}
- switch (vector) {
- case 29:
- //siginfo.si_code = TRAP_HWBKPT;
-#ifdef CONFIG_ITANIUM
- /*
- * Erratum 10 (IFA may contain incorrect address) now
has
- * "NoFix" status. There are no plans for fixing this.
- */
- if (ia64_psr(regs)->is == 0)
- ifa = regs->cr_iip;
-#endif
- break;
- case 35: ifa = 0; break;
- case 36: ifa = 0; break;
- //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
- //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
- }
- //siginfo.si_signo = SIGTRAP;
- //siginfo.si_errno = 0;
- //siginfo.si_addr = (void *) ifa;
- //siginfo.si_imm = 0;
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //force_sig_info(SIGTRAP, &siginfo, current);
- //return;
-
- case 32: /* fp fault */
- case 33: /* fp trap */
- //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
- //if ((result < 0) || (current->thread.flags &
IA64_THREAD_FPEMU_SIGFPE)) {
- //siginfo.si_signo = SIGFPE;
- //siginfo.si_errno = 0;
- //siginfo.si_code = FPE_FLTINV;
- //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- //siginfo.si_flags = __ISR_VALID;
- //siginfo.si_isr = isr;
- //siginfo.si_imm = 0;
- //force_sig_info(SIGFPE, &siginfo, current);
- //}
- //return;
- sprintf(buf, "FP fault/trap");
- break;
-
- case 34:
- if (isr & 0x2) {
- /* Lower-Privilege Transfer Trap */
- /*
- * Just clear PSR.lp and then return immediately: all
the
- * interesting work (e.g., signal delivery is done in
the kernel
- * exit path).
- */
- //ia64_psr(regs)->lp = 0;
- //return;
- sprintf(buf, "Lower-Privilege Transfer trap");
- } else {
- /* Unimplemented Instr. Address Trap */
- if (user_mode(regs)) {
- //siginfo.si_signo = SIGILL;
- //siginfo.si_code = ILL_BADIADDR;
- //siginfo.si_errno = 0;
- //siginfo.si_flags = 0;
- //siginfo.si_isr = 0;
- //siginfo.si_imm = 0;
- //siginfo.si_addr = (void *) (regs->cr_iip +
ia64_psr(regs)->ri);
- //force_sig_info(SIGILL, &siginfo, current);
- //return;
- }
- sprintf(buf, "Unimplemented Instruction Address fault");
- }
- break;
-
- case 45:
- printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
- printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
- regs->cr_iip, ifa, isr);
- //force_sig(SIGSEGV, current);
- break;
-
- case 46:
- printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
- printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim -
0x%lx\n",
- regs->cr_iip, ifa, isr, iim);
- //force_sig(SIGSEGV, current);
- return;
-
- case 47:
- sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
- break;
-
- default:
- sprintf(buf, "Fault %lu", vector);
- break;
- }
- //die_if_kernel(buf, regs, error);
-printk("ia64_fault: %s: reflecting\n",buf);
-reflect_interruption(ifa,isr,iim,regs,IA64_GENEX_VECTOR);
-//while(1);
- //force_sig(SIGILL, current);
-}
-
-unsigned long running_on_sim = 0;
-
-void
-do_ssc(unsigned long ssc, struct pt_regs *regs)
-{
- extern unsigned long lookup_domain_mpa(struct domain *,unsigned long);
- unsigned long arg0, arg1, arg2, arg3, retval;
- char buf[2];
-/**/ static int last_fd, last_count; // FIXME FIXME FIXME
-/**/ // BROKEN FOR MULTIPLE DOMAINS & SMP
-/**/ struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat;
- extern unsigned long vcpu_verbose, privop_trace;
-
- arg0 = vcpu_get_gr(current,32);
- switch(ssc) {
- case SSC_PUTCHAR:
- buf[0] = arg0;
- buf[1] = '\0';
- printf(buf);
- break;
- case SSC_GETCHAR:
- retval = ia64_ssc(0,0,0,0,ssc);
- vcpu_set_gr(current,8,retval);
- break;
- case SSC_WAIT_COMPLETION:
- if (arg0) { // metaphysical address
-
- arg0 = translate_domain_mpaddr(arg0);
-/**/ stat = (struct ssc_disk_stat *)__va(arg0);
-///**/ if (stat->fd == last_fd) stat->count = last_count;
-/**/ stat->count = last_count;
-//if (last_count >= PAGE_SIZE) printf("ssc_wait:
stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count);
-///**/ retval = ia64_ssc(arg0,0,0,0,ssc);
-/**/ retval = 0;
- }
- else retval = -1L;
- vcpu_set_gr(current,8,retval);
- break;
- case SSC_OPEN:
- arg1 = vcpu_get_gr(current,33); // access rights
-if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware.
(ignoring...)\n"); arg0 = 0; }
- if (arg0) { // metaphysical address
- arg0 = translate_domain_mpaddr(arg0);
- retval = ia64_ssc(arg0,arg1,0,0,ssc);
- }
- else retval = -1L;
- vcpu_set_gr(current,8,retval);
- break;
- case SSC_WRITE:
- case SSC_READ:
-//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n");
- arg1 = vcpu_get_gr(current,33);
- arg2 = vcpu_get_gr(current,34);
- arg3 = vcpu_get_gr(current,35);
- if (arg2) { // metaphysical address of descriptor
- struct ssc_disk_req *req;
- unsigned long mpaddr, paddr;
- long len;
-
- arg2 = translate_domain_mpaddr(arg2);
- req = (struct disk_req *)__va(arg2);
- req->len &= 0xffffffffL; // avoid strange bug
- len = req->len;
-/**/ last_fd = arg1;
-/**/ last_count = len;
- mpaddr = req->addr;
-//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx
",last_fd,mpaddr,len);
- retval = 0;
- if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) &
PAGE_MASK)) {
- // do partial page first
- req->addr = translate_domain_mpaddr(mpaddr);
- req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK);
- len -= req->len; mpaddr += req->len;
- retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc);
- arg3 += req->len; // file offset
-/**/ last_stat.fd = last_fd;
-/**/
(void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION);
-//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x
",req->addr,req->len,retval);
- }
- if (retval >= 0) while (len > 0) {
- req->addr = translate_domain_mpaddr(mpaddr);
- req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len;
- len -= PAGE_SIZE; mpaddr += PAGE_SIZE;
- retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc);
- arg3 += req->len; // file offset
-// TEMP REMOVED AGAIN arg3 += req->len; // file offset
-/**/ last_stat.fd = last_fd;
-/**/
(void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION);
-//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x
",req->addr,req->len,retval);
- }
- // set it back to the original value
- req->len = last_count;
- }
- else retval = -1L;
- vcpu_set_gr(current,8,retval);
-//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval);
- break;
- case SSC_CONNECT_INTERRUPT:
- arg1 = vcpu_get_gr(current,33);
- arg2 = vcpu_get_gr(current,34);
- arg3 = vcpu_get_gr(current,35);
- if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not
implemented on hardware. (ignoring...)\n"); break; }
- (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc);
- break;
- case SSC_NETDEV_PROBE:
- vcpu_set_gr(current,8,-1L);
- break;
- default:
- printf("ia64_handle_break: bad ssc code %lx, iip=%p, b0=%p...
spinning\n",ssc,regs->cr_iip,regs->b0);
- while(1);
- break;
- }
- vcpu_increment_iip(current);
-}
-
-int first_break = 1;
-
-void
-ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr,
unsigned long iim)
-{
- struct domain *d = (struct domain *) current->domain;
- struct vcpu *v = (struct domain *) current;
- extern unsigned long running_on_sim;
-
- if (first_break) {
- if (platform_is_hp_ski()) running_on_sim = 1;
- else running_on_sim = 0;
- first_break = 0;
- }
- if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
- if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
- else do_ssc(vcpu_get_gr(current,36), regs);
- }
- else if (iim == d->arch.breakimm) {
- if (ia64_hypercall(regs))
- vcpu_increment_iip(current);
- }
- else if (!PSCB(v,interrupt_collection_enabled)) {
- if (ia64_hyperprivop(iim,regs))
- vcpu_increment_iip(current);
- }
- else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR);
-}
-
-void
-ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long
isr, unsigned long itir)
-{
- IA64FAULT vector;
- struct domain *d = current->domain;
- struct vcpu *v = current;
- // FIXME: no need to pass itir in to this routine as we need to
- // compute the virtual itir anyway (based on domain's RR.ps)
- // AND ACTUALLY reflect_interruption doesn't use it anyway!
- itir = vcpu_get_itir_on_fault(v,ifa);
- vector = priv_emulate(current,regs,isr);
- if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) {
- reflect_interruption(ifa,isr,itir,regs,vector);
- }
-}
-
-#define INTR_TYPE_MAX 10
-UINT64 int_counts[INTR_TYPE_MAX];
-
-void
-ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long
isr, unsigned long iim, unsigned long vector)
-{
- struct domain *d = (struct domain *) current->domain;
- struct vcpu *v = (struct domain *) current;
- unsigned long check_lazy_cover = 0;
- unsigned long psr = regs->cr_ipsr;
- unsigned long itir = vcpu_get_itir_on_fault(v,ifa);
-
- if (!(psr & IA64_PSR_CPL)) {
- printk("ia64_handle_reflection: reflecting with priv=0!!\n");
- }
- // FIXME: no need to pass itir in to this routine as we need to
- // compute the virtual itir anyway (based on domain's RR.ps)
- // AND ACTUALLY reflect_interruption doesn't use it anyway!
- itir = vcpu_get_itir_on_fault(v,ifa);
- switch(vector) {
- case 8:
- vector = IA64_DIRTY_BIT_VECTOR; break;
- case 9:
- vector = IA64_INST_ACCESS_BIT_VECTOR; break;
- case 10:
- check_lazy_cover = 1;
- vector = IA64_DATA_ACCESS_BIT_VECTOR; break;
- case 20:
- check_lazy_cover = 1;
- vector = IA64_PAGE_NOT_PRESENT_VECTOR; break;
- case 22:
- vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break;
- case 23:
- check_lazy_cover = 1;
- vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break;
- case 25:
- vector = IA64_DISABLED_FPREG_VECTOR;
- break;
- case 26:
-printf("*** NaT fault... attempting to handle as privop\n");
-printf("isr=%p, ifa=%p,iip=%p,ipsr=%p\n",isr,ifa,regs->cr_iip,psr);
- vector = priv_emulate(v,regs,isr);
- if (vector == IA64_NO_FAULT) {
-printf("*** Handled privop masquerading as NaT fault\n");
- return;
- }
- vector = IA64_NAT_CONSUMPTION_VECTOR; break;
- case 27:
-//printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc());
- itir = iim;
- vector = IA64_SPECULATION_VECTOR; break;
- case 30:
- // FIXME: Should we handle unaligned refs in Xen??
- vector = IA64_UNALIGNED_REF_VECTOR; break;
- default:
- printf("ia64_handle_reflection: unhandled
vector=0x%lx\n",vector);
- while(vector);
- return;
- }
- if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v,
isr, regs)) return;
- reflect_interruption(ifa,isr,itir,regs,vector);
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/regionreg.c
--- a/xen/arch/ia64/regionreg.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,376 +0,0 @@
-/*
- * Region register and region id management
- *
- * Copyright (C) 2001-2004 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx
- * Bret Mckee (bret.mckee@xxxxxx)
- *
- */
-
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <asm/page.h>
-#include <asm/regionreg.h>
-#include <asm/vhpt.h>
-#include <asm/vcpu.h>
-extern void ia64_new_rr7(unsigned long rid,void *shared_info, void
*shared_arch_info);
-
-
-#define IA64_MIN_IMPL_RID_BITS (IA64_MIN_IMPL_RID_MSB+1)
-#define IA64_MAX_IMPL_RID_BITS 24
-
-#define MIN_RIDS (1 << IA64_MIN_IMPL_RID_BITS)
-#define MIN_RID_MAX (MIN_RIDS - 1)
-#define MIN_RID_MASK (MIN_RIDS - 1)
-#define MAX_RIDS (1 << (IA64_MAX_IMPL_RID_BITS))
-#define MAX_RID (MAX_RIDS - 1)
-#define MAX_RID_BLOCKS (1 <<
(IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS))
-#define RIDS_PER_RIDBLOCK MIN_RIDS
-
-#if 0
-// following already defined in include/asm-ia64/gcc_intrin.h
-// it should probably be ifdef'd out from there to ensure all region
-// register usage is encapsulated in this file
-static inline unsigned long
-ia64_get_rr (unsigned long rr)
-{
- unsigned long r;
- __asm__ __volatile__ (";;mov %0=rr[%1];;":"=r"(r):"r"(rr):"memory");
- return r;
-}
-
-static inline void
-ia64_set_rr (unsigned long rr, unsigned long rrv)
-{
- __asm__ __volatile__ (";;mov
rr[%0]=%1;;"::"r"(rr),"r"(rrv):"memory");
-}
-#endif
-
-// use this to allocate a rid out of the "Xen reserved rid block"
-unsigned long allocate_reserved_rid(void)
-{
- static unsigned long currentrid = XEN_DEFAULT_RID;
- unsigned long t = currentrid;
-
- unsigned long max = RIDS_PER_RIDBLOCK;
-
- if (++currentrid >= max) return(-1UL);
- return t;
-}
-
-
-// returns -1 if none available
-unsigned long allocate_metaphysical_rr(void)
-{
- ia64_rr rrv;
-
- rrv.rid = allocate_reserved_rid();
- rrv.ps = PAGE_SHIFT;
- rrv.ve = 0;
- return rrv.rrval;
-}
-
-int deallocate_metaphysical_rid(unsigned long rid)
-{
- // fix this when the increment allocation mechanism is fixed.
- return 1;
-}
-
-/*************************************
- Region Block setup/management
-*************************************/
-
-static int implemented_rid_bits = 0;
-static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 };
-
-void get_impl_rid_bits(void)
-{
- // FIXME (call PAL)
-//#ifdef CONFIG_MCKINLEY
- implemented_rid_bits = IA64_MAX_IMPL_RID_BITS;
-//#else
-//#error "rid ranges won't work on Merced"
-//#endif
- if (implemented_rid_bits <= IA64_MIN_IMPL_RID_BITS ||
- implemented_rid_bits > IA64_MAX_IMPL_RID_BITS)
- BUG();
-}
-
-
-/*
- * Allocate a power-of-two-sized chunk of region id space -- one or more
- * "rid blocks"
- */
-int allocate_rid_range(struct domain *d, unsigned long ridbits)
-{
- int i, j, n_rid_blocks;
-
- if (implemented_rid_bits == 0) get_impl_rid_bits();
-
- if (ridbits >= IA64_MAX_IMPL_RID_BITS)
- ridbits = IA64_MAX_IMPL_RID_BITS - 1;
-
- if (ridbits < IA64_MIN_IMPL_RID_BITS)
- ridbits = IA64_MIN_IMPL_RID_BITS;
-
- // convert to rid_blocks and find one
- n_rid_blocks = ridbits - IA64_MIN_IMPL_RID_BITS + 1;
-
- // skip over block 0, reserved for "meta-physical mappings (and Xen)"
- for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) {
- if (ridblock_owner[i] == NULL) {
- for (j = i; j < i + n_rid_blocks; ++j) {
- if (ridblock_owner[j]) break;
- }
- if (ridblock_owner[j] == NULL) break;
- }
- }
-
- if (i >= MAX_RID_BLOCKS) return 0;
-
- // found an unused block:
- // (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits)
- // mark this block as owned
- for (j = i; j < i + n_rid_blocks; ++j) ridblock_owner[j] = d;
-
- // setup domain struct
- d->arch.rid_bits = ridbits;
- d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS; d->arch.ending_rid
= (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS;
-printf("###allocating rid_range, domain %p: starting_rid=%lx,
ending_rid=%lx\n",
-d,d->arch.starting_rid, d->arch.ending_rid);
-
- return 1;
-}
-
-
-int deallocate_rid_range(struct domain *d)
-{
- int i;
- int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS;
- int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS;
-
- return 1; // KLUDGE ALERT
- //
- // not all domains will have allocated RIDs (physical mode loaders for
instance)
- //
- if (d->arch.rid_bits == 0) return 1;
-
-#ifdef DEBUG
- for (i = rid_block_start; i < rid_block_end; ++i) {
- ASSERT(ridblock_owner[i] == d);
- }
-#endif
-
- for (i = rid_block_start; i < rid_block_end; ++i)
- ridblock_owner[i] = NULL;
-
- d->arch.rid_bits = 0;
- d->arch.starting_rid = 0;
- d->arch.ending_rid = 0;
- return 1;
-}
-
-
-static inline void
-set_rr_no_srlz(unsigned long rr, unsigned long rrval)
-{
- ia64_set_rr(rr, vmMangleRID(rrval));
-}
-
-void
-set_rr(unsigned long rr, unsigned long rrval)
-{
- ia64_set_rr(rr, vmMangleRID(rrval));
- ia64_srlz_d();
-}
-
-unsigned long
-get_rr(unsigned long rr)
-{
- return vmUnmangleRID(ia64_get_rr(rr));
-}
-
-static inline int validate_page_size(unsigned long ps)
-{
- switch(ps) {
- case 12: case 13: case 14: case 16: case 18:
- case 20: case 22: case 24: case 26: case 28:
- return 1;
- default:
- return 0;
- }
-}
-
-// validates and changes a single region register
-// in the currently executing domain
-// Passing a value of -1 is a (successful) no-op
-// NOTE: DOES NOT SET VCPU's rrs[x] value!!
-int set_one_rr(unsigned long rr, unsigned long val)
-{
- struct vcpu *v = current;
- unsigned long rreg = REGION_NUMBER(rr);
- ia64_rr rrv, newrrv, memrrv;
- unsigned long newrid;
-
- if (val == -1) return 1;
-
- rrv.rrval = val;
- newrrv.rrval = 0;
- newrid = v->arch.starting_rid + rrv.rid;
-
- if (newrid > v->arch.ending_rid) {
- printk("can't set rr%d to %lx, starting_rid=%lx,"
- "ending_rid=%lx, val=%lx\n", rreg, newrid,
- v->arch.starting_rid,v->arch.ending_rid,val);
- return 0;
- }
-
-#ifdef CONFIG_VTI
- memrrv.rrval = rrv.rrval;
- if (rreg == 7) {
- newrrv.rid = newrid;
- newrrv.ve = VHPT_ENABLED_REGION_7;
- newrrv.ps = IA64_GRANULE_SHIFT;
- ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
- v->vcpu_info->arch.privregs);
- }
- else {
- newrrv.rid = newrid;
- // FIXME? region 6 needs to be uncached for EFI to work
- if (rreg == 6) newrrv.ve = VHPT_ENABLED_REGION_7;
- else newrrv.ve = VHPT_ENABLED_REGION_0_TO_6;
- newrrv.ps = PAGE_SHIFT;
- if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
- set_rr(rr,newrrv.rrval);
- }
-#else
- memrrv.rrval = rrv.rrval;
- newrrv.rid = newrid;
- newrrv.ve = 1; // VHPT now enabled for region 7!!
- newrrv.ps = PAGE_SHIFT;
- if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval;
- if (rreg == 7) ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info,
- v->vcpu_info->arch.privregs);
- else set_rr(rr,newrrv.rrval);
-#endif
- return 1;
-}
-
-// set rr0 to the passed rid (for metaphysical mode so don't use domain offset
-int set_metaphysical_rr0(void)
-{
- struct vcpu *v = current;
- ia64_rr rrv;
-
-// rrv.ve = 1; FIXME: TURN ME BACK ON WHEN VHPT IS WORKING
- set_rr(0,v->arch.metaphysical_rr0);
-}
-
-// validates/changes region registers 0-6 in the currently executing domain
-// Note that this is the one and only SP API (other than executing a privop)
-// for a domain to use to change region registers
-int set_all_rr( u64 rr0, u64 rr1, u64 rr2, u64 rr3,
- u64 rr4, u64 rr5, u64 rr6, u64 rr7)
-{
- if (!set_one_rr(0x0000000000000000L, rr0)) return 0;
- if (!set_one_rr(0x2000000000000000L, rr1)) return 0;
- if (!set_one_rr(0x4000000000000000L, rr2)) return 0;
- if (!set_one_rr(0x6000000000000000L, rr3)) return 0;
- if (!set_one_rr(0x8000000000000000L, rr4)) return 0;
- if (!set_one_rr(0xa000000000000000L, rr5)) return 0;
- if (!set_one_rr(0xc000000000000000L, rr6)) return 0;
- if (!set_one_rr(0xe000000000000000L, rr7)) return 0;
- return 1;
-}
-
-void init_all_rr(struct vcpu *v)
-{
- ia64_rr rrv;
-
- rrv.rrval = 0;
- rrv.rrval = v->domain->arch.metaphysical_rr0;
- rrv.ps = PAGE_SHIFT;
- rrv.ve = 1;
-if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); }
- VCPU(v,rrs[0]) = -1;
- VCPU(v,rrs[1]) = rrv.rrval;
- VCPU(v,rrs[2]) = rrv.rrval;
- VCPU(v,rrs[3]) = rrv.rrval;
- VCPU(v,rrs[4]) = rrv.rrval;
- VCPU(v,rrs[5]) = rrv.rrval;
- rrv.ve = 0;
- VCPU(v,rrs[6]) = rrv.rrval;
-// v->shared_info->arch.rrs[7] = rrv.rrval;
-}
-
-
-/* XEN/ia64 INTERNAL ROUTINES */
-
-unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval)
-{
- ia64_rr rrv;
-
- rrv.rrval = rrval;
- rrv.rid += v->arch.starting_rid;
- return rrv.rrval;
-}
-
-unsigned long
-virtualize_rid(struct vcpu *v, unsigned long rrval)
-{
- ia64_rr rrv;
-
- rrv.rrval = rrval;
- rrv.rid -= v->arch.starting_rid;
- return rrv.rrval;
-}
-
-// loads a thread's region register (0-6) state into
-// the real physical region registers. Returns the
-// (possibly mangled) bits to store into rr7
-// iff it is different than what is currently in physical
-// rr7 (because we have to to assembly and physical mode
-// to change rr7). If no change to rr7 is required, returns 0.
-//
-unsigned long load_region_regs(struct vcpu *v)
-{
- unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6, rr7;
- // TODO: These probably should be validated
- unsigned long bad = 0;
-
- if (VCPU(v,metaphysical_mode)) {
- ia64_rr rrv;
-
- rrv.rrval = 0;
- rrv.rid = v->domain->arch.metaphysical_rr0;
- rrv.ps = PAGE_SHIFT;
- rrv.ve = 1;
- rr0 = rrv.rrval;
- set_rr_no_srlz(0x0000000000000000L, rr0);
- ia64_srlz_d();
- }
- else {
- rr0 = VCPU(v,rrs[0]);
- if (!set_one_rr(0x0000000000000000L, rr0)) bad |= 1;
- }
- rr1 = VCPU(v,rrs[1]);
- rr2 = VCPU(v,rrs[2]);
- rr3 = VCPU(v,rrs[3]);
- rr4 = VCPU(v,rrs[4]);
- rr5 = VCPU(v,rrs[5]);
- rr6 = VCPU(v,rrs[6]);
- rr7 = VCPU(v,rrs[7]);
- if (!set_one_rr(0x2000000000000000L, rr1)) bad |= 2;
- if (!set_one_rr(0x4000000000000000L, rr2)) bad |= 4;
- if (!set_one_rr(0x6000000000000000L, rr3)) bad |= 8;
- if (!set_one_rr(0x8000000000000000L, rr4)) bad |= 0x10;
- if (!set_one_rr(0xa000000000000000L, rr5)) bad |= 0x20;
- if (!set_one_rr(0xc000000000000000L, rr6)) bad |= 0x40;
- if (!set_one_rr(0xe000000000000000L, rr7)) bad |= 0x80;
- if (bad) {
- panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad);
- }
- return 0;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/sn_console.c
--- a/xen/arch/ia64/sn_console.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,84 +0,0 @@
-/*
- * C-Brick Serial Port (and console) driver for SGI Altix machines.
- *
- * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
- */
-
-#include <asm/acpi.h>
-#include <asm/sn/sn_sal.h>
-#include <xen/serial.h>
-
-void sn_putc(struct serial_port *, char);
-
-static struct uart_driver sn_sal_console = {
- .putc = sn_putc,
-};
-
-/**
- * early_sn_setup - early setup routine for SN platforms
- *
- * pulled from arch/ia64/sn/kernel/setup.c
- */
-static void __init early_sn_setup(void)
-{
- efi_system_table_t *efi_systab;
- efi_config_table_t *config_tables;
- struct ia64_sal_systab *sal_systab;
- struct ia64_sal_desc_entry_point *ep;
- char *p;
- int i, j;
-
- /*
- * Parse enough of the SAL tables to locate the SAL entry point. Since,
console
- * IO on SN2 is done via SAL calls, early_printk won't work without
this.
- *
- * This code duplicates some of the ACPI table parsing that is in efi.c
& sal.c.
- * Any changes to those file may have to be made hereas well.
- */
- efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
- config_tables = __va(efi_systab->tables);
- for (i = 0; i < efi_systab->nr_tables; i++) {
- if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
- 0) {
- sal_systab = __va(config_tables[i].table);
- p = (char *)(sal_systab + 1);
- for (j = 0; j < sal_systab->entry_count; j++) {
- if (*p == SAL_DESC_ENTRY_POINT) {
- ep = (struct ia64_sal_desc_entry_point
- *)p;
- ia64_sal_handler_init(__va
- (ep->sal_proc),
- __va(ep->gp));
- return;
- }
- p += SAL_DESC_SIZE(*p);
- }
- }
- }
- /* Uh-oh, SAL not available?? */
- printk(KERN_ERR "failed to find SAL entry point\n");
-}
-
-/**
- * sn_serial_console_early_setup - Sets up early console output support
- *
- * pulled from drivers/serial/sn_console.c
- */
-int __init sn_serial_console_early_setup(void)
-{
- if (strcmp("sn2",acpi_get_sysname()))
- return -1;
-
- early_sn_setup(); /* Find SAL entry points */
- serial_register_uart(0, &sn_sal_console, NULL);
-
- return 0;
-}
-
-/*
- * sn_putc - Send a character to the console, polled or interrupt mode
- */
-void sn_putc(struct serial_port *port, char c)
-{
- return ia64_sn_console_putc(c);
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vcpu.c
--- a/xen/arch/ia64/vcpu.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1843 +0,0 @@
-/*
- * Virtualized CPU functions
- *
- * Copyright (C) 2004 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <linux/sched.h>
-#include <public/arch-ia64.h>
-#include <asm/ia64_int.h>
-#include <asm/vcpu.h>
-#include <asm/regionreg.h>
-#include <asm/tlb.h>
-#include <asm/processor.h>
-#include <asm/delay.h>
-#include <asm/vmx_vcpu.h>
-
-typedef union {
- struct ia64_psr ia64_psr;
- unsigned long i64;
-} PSR;
-
-//typedef struct pt_regs REGS;
-//typedef struct domain VCPU;
-
-// this def for vcpu_regs won't work if kernel stack is present
-#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs)
-#define PSCB(x,y) VCPU(x,y)
-#define PSCBX(x,y) x->arch.y
-
-#define TRUE 1
-#define FALSE 0
-#define IA64_PTA_SZ_BIT 2
-#define IA64_PTA_VF_BIT 8
-#define IA64_PTA_BASE_BIT 15
-#define IA64_PTA_LFMT (1UL << IA64_PTA_VF_BIT)
-#define IA64_PTA_SZ(x) (x##UL << IA64_PTA_SZ_BIT)
-
-#define STATIC
-
-#ifdef PRIVOP_ADDR_COUNT
-struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = {
- { "=ifa", { 0 }, { 0 }, 0 },
- { "thash", { 0 }, { 0 }, 0 },
- 0
-};
-extern void privop_count_addr(unsigned long addr, int inst);
-#define PRIVOP_COUNT_ADDR(regs,inst)
privop_count_addr(regs->cr_iip,inst)
-#else
-#define PRIVOP_COUNT_ADDR(x,y) do {} while (0)
-#endif
-
-unsigned long dtlb_translate_count = 0;
-unsigned long tr_translate_count = 0;
-unsigned long phys_translate_count = 0;
-
-unsigned long vcpu_verbose = 0;
-#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0)
-
-extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa);
-extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa);
-
-/**************************************************************************
- VCPU general register access routines
-**************************************************************************/
-
-UINT64
-vcpu_get_gr(VCPU *vcpu, unsigned reg)
-{
- REGS *regs = vcpu_regs(vcpu);
- UINT64 val;
-
- if (!reg) return 0;
- getreg(reg,&val,0,regs); // FIXME: handle NATs later
- return val;
-}
-
-// returns:
-// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
-// IA64_NO_FAULT otherwise
-IA64FAULT
-vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value)
-{
- REGS *regs = vcpu_regs(vcpu);
- long sof = (regs->cr_ifs) & 0x7f;
-
- if (!reg) return IA64_ILLOP_FAULT;
- if (reg >= sof + 32) return IA64_ILLOP_FAULT;
- setreg(reg,value,0,regs); // FIXME: handle NATs later
- return IA64_NO_FAULT;
-}
-
-/**************************************************************************
- VCPU privileged application register access routines
-**************************************************************************/
-
-IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- if (reg == 44) return (vcpu_set_itc(vcpu,val));
- else if (reg == 27) return (IA64_ILLOP_FAULT);
- else if (reg == 24)
- printf("warning: setting ar.eflg is a no-op; no IA-32 support\n");
- else if (reg > 7) return (IA64_ILLOP_FAULT);
- else PSCB(vcpu,krs[reg]) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val)
-{
- if (reg == 24)
- printf("warning: getting ar.eflg is a no-op; no IA-32 support\n");
- else if (reg > 7) return (IA64_ILLOP_FAULT);
- else *val = PSCB(vcpu,krs[reg]);
- return IA64_NO_FAULT;
-}
-
-/**************************************************************************
- VCPU processor status register access routines
-**************************************************************************/
-
-void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode)
-{
- /* only do something if mode changes */
- if (!!newmode ^ !!PSCB(vcpu,metaphysical_mode)) {
- if (newmode) set_metaphysical_rr0();
- else if (PSCB(vcpu,rrs[0]) != -1)
- set_one_rr(0, PSCB(vcpu,rrs[0]));
- PSCB(vcpu,metaphysical_mode) = newmode;
- }
-}
-
-IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu)
-{
- vcpu_set_metaphysical_mode(vcpu,TRUE);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
-{
- struct ia64_psr psr, imm, *ipsr;
- REGS *regs = vcpu_regs(vcpu);
-
- //PRIVOP_COUNT_ADDR(regs,_RSM);
- // TODO: All of these bits need to be virtualized
- // TODO: Only allowed for current vcpu
- __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
- ipsr = (struct ia64_psr *)®s->cr_ipsr;
- imm = *(struct ia64_psr *)&imm24;
- // interrupt flag
- if (imm.i) PSCB(vcpu,interrupt_delivery_enabled) = 0;
- if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 0;
- // interrupt collection flag
- //if (imm.ic) PSCB(vcpu,interrupt_delivery_enabled) = 0;
- // just handle psr.up and psr.pp for now
- if (imm24 & ~(IA64_PSR_BE | IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP
- | IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT
- | IA64_PSR_DFL | IA64_PSR_DFH))
- return (IA64_ILLOP_FAULT);
- if (imm.dfh) ipsr->dfh = 0;
- if (imm.dfl) ipsr->dfl = 0;
- if (imm.pp) { ipsr->pp = 0; psr.pp = 0; }
- if (imm.up) { ipsr->up = 0; psr.up = 0; }
- if (imm.sp) { ipsr->sp = 0; psr.sp = 0; }
- if (imm.be) ipsr->be = 0;
- if (imm.dt) vcpu_set_metaphysical_mode(vcpu,TRUE);
- __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
- return IA64_NO_FAULT;
-}
-
-extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu);
-#define SPURIOUS_VECTOR 0xf
-
-IA64FAULT vcpu_set_psr_dt(VCPU *vcpu)
-{
- vcpu_set_metaphysical_mode(vcpu,FALSE);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_psr_i(VCPU *vcpu)
-{
- PSCB(vcpu,interrupt_delivery_enabled) = 1;
- PSCB(vcpu,interrupt_collection_enabled) = 1;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
-{
- struct ia64_psr psr, imm, *ipsr;
- REGS *regs = vcpu_regs(vcpu);
- UINT64 mask, enabling_interrupts = 0;
-
- //PRIVOP_COUNT_ADDR(regs,_SSM);
- // TODO: All of these bits need to be virtualized
- __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
- imm = *(struct ia64_psr *)&imm24;
- ipsr = (struct ia64_psr *)®s->cr_ipsr;
- // just handle psr.sp,pp and psr.i,ic (and user mask) for now
- mask = IA64_PSR_PP|IA64_PSR_SP|IA64_PSR_I|IA64_PSR_IC|IA64_PSR_UM |
- IA64_PSR_DT|IA64_PSR_DFL|IA64_PSR_DFH;
- if (imm24 & ~mask) return (IA64_ILLOP_FAULT);
- if (imm.dfh) ipsr->dfh = 1;
- if (imm.dfl) ipsr->dfl = 1;
- if (imm.pp) { ipsr->pp = 1; psr.pp = 1; }
- if (imm.sp) { ipsr->sp = 1; psr.sp = 1; }
- if (imm.i) {
- if (!PSCB(vcpu,interrupt_delivery_enabled)) {
-//printf("vcpu_set_psr_sm: psr.ic 0->1 ");
- enabling_interrupts = 1;
- }
- PSCB(vcpu,interrupt_delivery_enabled) = 1;
- }
- if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
- // TODO: do this faster
- if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
- if (imm.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
- if (imm.ac) { ipsr->ac = 1; psr.ac = 1; }
- if (imm.up) { ipsr->up = 1; psr.up = 1; }
- if (imm.be) {
- printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
- return (IA64_ILLOP_FAULT);
- }
- if (imm.dt) vcpu_set_metaphysical_mode(vcpu,FALSE);
- __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
-#if 0 // now done with deliver_pending_interrupts
- if (enabling_interrupts) {
- if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) {
-//printf("with interrupts pending\n");
- return IA64_EXTINT_VECTOR;
- }
-//else printf("but nothing pending\n");
- }
-#endif
- if (enabling_interrupts &&
- vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
-{
- struct ia64_psr psr, newpsr, *ipsr;
- REGS *regs = vcpu_regs(vcpu);
- UINT64 enabling_interrupts = 0;
-
- // TODO: All of these bits need to be virtualized
- __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
- newpsr = *(struct ia64_psr *)&val;
- ipsr = (struct ia64_psr *)®s->cr_ipsr;
- // just handle psr.up and psr.pp for now
- //if (val & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP)) return
(IA64_ILLOP_FAULT);
- // however trying to set other bits can't be an error as it is in ssm
- if (newpsr.dfh) ipsr->dfh = 1;
- if (newpsr.dfl) ipsr->dfl = 1;
- if (newpsr.pp) { ipsr->pp = 1; psr.pp = 1; }
- if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
- if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; }
- if (newpsr.i) {
- if (!PSCB(vcpu,interrupt_delivery_enabled))
- enabling_interrupts = 1;
- PSCB(vcpu,interrupt_delivery_enabled) = 1;
- }
- if (newpsr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
- if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; }
- if (newpsr.mfh) { ipsr->mfh = 1; psr.mfh = 1; }
- if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; }
- if (newpsr.up) { ipsr->up = 1; psr.up = 1; }
- if (newpsr.dt && newpsr.rt) vcpu_set_metaphysical_mode(vcpu,FALSE);
- else vcpu_set_metaphysical_mode(vcpu,TRUE);
- if (newpsr.be) {
- printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
- return (IA64_ILLOP_FAULT);
- }
- //__asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory");
-#if 0 // now done with deliver_pending_interrupts
- if (enabling_interrupts) {
- if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- return IA64_EXTINT_VECTOR;
- }
-#endif
- if (enabling_interrupts &&
- vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval)
-{
- UINT64 psr;
- struct ia64_psr newpsr;
-
- // TODO: This needs to return a "filtered" view of
- // the psr, not the actual psr. Probably the psr needs
- // to be a field in regs (in addition to ipsr).
- __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory");
- newpsr = *(struct ia64_psr *)&psr;
- if (newpsr.cpl == 2) newpsr.cpl = 0;
- if (PSCB(vcpu,interrupt_delivery_enabled)) newpsr.i = 1;
- else newpsr.i = 0;
- if (PSCB(vcpu,interrupt_collection_enabled)) newpsr.ic = 1;
- else newpsr.ic = 0;
- *pval = *(unsigned long *)&newpsr;
- return IA64_NO_FAULT;
-}
-
-BOOLEAN vcpu_get_psr_ic(VCPU *vcpu)
-{
- return !!PSCB(vcpu,interrupt_collection_enabled);
-}
-
-BOOLEAN vcpu_get_psr_i(VCPU *vcpu)
-{
- return !!PSCB(vcpu,interrupt_delivery_enabled);
-}
-
-UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr)
-{
- UINT64 dcr = PSCBX(vcpu,dcr);
- PSR psr = {0};
-
- //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr);
- psr.i64 = prevpsr;
- psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1;
- psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1;
- psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled);
- psr.ia64_psr.i = PSCB(vcpu,interrupt_delivery_enabled);
- psr.ia64_psr.bn = PSCB(vcpu,banknum);
- psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1;
- if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain
- // psr.pk = 1;
- //printf("returns 0x%016lx...",psr.i64);
- return psr.i64;
-}
-
-/**************************************************************************
- VCPU control register access routines
-**************************************************************************/
-
-IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval)
-{
-extern unsigned long privop_trace;
-//privop_trace=0;
-//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip));
- // Reads of cr.dcr on Xen always have the sign bit set, so
- // a domain can differentiate whether it is running on SP or not
- *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCBX(vcpu,iva) & ~0x7fffL;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCB(vcpu,pta);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval)
-{
- //REGS *regs = vcpu_regs(vcpu);
- //*pval = regs->cr_ipsr;
- *pval = PSCB(vcpu,ipsr);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCB(vcpu,isr);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval)
-{
- //REGS *regs = vcpu_regs(vcpu);
- //*pval = regs->cr_iip;
- *pval = PSCB(vcpu,iip);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval)
-{
- UINT64 val = PSCB(vcpu,ifa);
- REGS *regs = vcpu_regs(vcpu);
- PRIVOP_COUNT_ADDR(regs,_GET_IFA);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr)
-{
- ia64_rr rr;
-
- rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
- return(rr.ps);
-}
-
-unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr)
-{
- ia64_rr rr;
-
- rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
- return(rr.rid);
-}
-
-unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa)
-{
- ia64_rr rr;
-
- rr.rrval = 0;
- rr.ps = vcpu_get_rr_ps(vcpu,ifa);
- rr.rid = vcpu_get_rr_rid(vcpu,ifa);
- return (rr.rrval);
-}
-
-
-IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval)
-{
- UINT64 val = PSCB(vcpu,itir);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval)
-{
- UINT64 val = PSCB(vcpu,iipa);
- // SP entry code does not save iipa yet nor does it get
- // properly delivered in the pscb
- printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n");
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval)
-{
- //PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs;
- //*pval = PSCB(vcpu,regs).cr_ifs;
- *pval = PSCB(vcpu,ifs);
- PSCB(vcpu,incomplete_regframe) = 0;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval)
-{
- UINT64 val = PSCB(vcpu,iim);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval)
-{
- //return vcpu_thash(vcpu,PSCB(vcpu,ifa),pval);
- UINT64 val = PSCB(vcpu,iha);
- REGS *regs = vcpu_regs(vcpu);
- PRIVOP_COUNT_ADDR(regs,_THASH);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val)
-{
-extern unsigned long privop_trace;
-//privop_trace=1;
- // Reads of cr.dcr on SP always have the sign bit set, so
- // a domain can differentiate whether it is running on SP or not
- // Thus, writes of DCR should ignore the sign bit
-//verbose("vcpu_set_dcr: called\n");
- PSCBX(vcpu,dcr) = val & ~0x8000000000000000L;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val)
-{
- PSCBX(vcpu,iva) = val & ~0x7fffL;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val)
-{
- if (val & IA64_PTA_LFMT) {
- printf("*** No support for VHPT long format yet!!\n");
- return (IA64_ILLOP_FAULT);
- }
- if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT;
- if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu,pta) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,ipsr) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,isr) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,iip) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_increment_iip(VCPU *vcpu)
-{
- REGS *regs = vcpu_regs(vcpu);
- struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr;
- if (ipsr->ri == 2) { ipsr->ri=0; regs->cr_iip += 16; }
- else ipsr->ri++;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,ifa) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,itir) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val)
-{
- // SP entry code does not save iipa yet nor does it get
- // properly delivered in the pscb
- printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n");
- PSCB(vcpu,iipa) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val)
-{
- //REGS *regs = vcpu_regs(vcpu);
- PSCB(vcpu,ifs) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,iim) = val;
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val)
-{
- PSCB(vcpu,iha) = val;
- return IA64_NO_FAULT;
-}
-
-/**************************************************************************
- VCPU interrupt control register access routines
-**************************************************************************/
-
-void vcpu_pend_unspecified_interrupt(VCPU *vcpu)
-{
- PSCB(vcpu,pending_interruption) = 1;
-}
-
-void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
-{
- if (vector & ~0xff) {
- printf("vcpu_pend_interrupt: bad vector\n");
- return;
- }
-#ifdef CONFIG_VTI
- if ( VMX_DOMAIN(vcpu) ) {
- set_bit(vector,VPD_CR(vcpu,irr));
- } else
-#endif // CONFIG_VTI
- {
- /* if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; */
- if (test_bit(vector,PSCBX(vcpu,irr))) {
-//printf("vcpu_pend_interrupt: overrun\n");
- }
- set_bit(vector,PSCBX(vcpu,irr));
- PSCB(vcpu,pending_interruption) = 1;
- }
-
-#if 0
- /* Keir: I think you should unblock when an interrupt is pending. */
- {
- int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags);
- vcpu_unblock(vcpu);
- if ( running )
- smp_send_event_check_cpu(vcpu->processor);
- }
-#endif
-}
-
-void early_tick(VCPU *vcpu)
-{
- UINT64 *p = &PSCBX(vcpu,irr[3]);
- printf("vcpu_check_pending: about to deliver early tick\n");
- printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p);
-}
-
-#define IA64_TPR_MMI 0x10000
-#define IA64_TPR_MIC 0x000f0
-
-/* checks to see if a VCPU has any unmasked pending interrupts
- * if so, returns the highest, else returns SPURIOUS_VECTOR */
-/* NOTE: Since this gets called from vcpu_get_ivr() and the
- * semantics of "mov rx=cr.ivr" ignore the setting of the psr.i bit,
- * this routine also ignores pscb.interrupt_delivery_enabled
- * and this must be checked independently; see vcpu_deliverable interrupts() */
-UINT64 vcpu_check_pending_interrupts(VCPU *vcpu)
-{
- UINT64 *p, *q, *r, bits, bitnum, mask, i, vector;
-
- p = &PSCBX(vcpu,irr[3]);
- /* q = &PSCB(vcpu,delivery_mask[3]); */
- r = &PSCBX(vcpu,insvc[3]);
- for (i = 3; ; p--, q--, r--, i--) {
- bits = *p /* & *q */;
- if (bits) break; // got a potential interrupt
- if (*r) {
- // nothing in this word which is pending+inservice
- // but there is one inservice which masks lower
- return SPURIOUS_VECTOR;
- }
- if (i == 0) {
- // checked all bits... nothing pending+inservice
- return SPURIOUS_VECTOR;
- }
- }
- // have a pending,deliverable interrupt... see if it is masked
- bitnum = ia64_fls(bits);
-//printf("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...",bitnum);
- vector = bitnum+(i*64);
- mask = 1L << bitnum;
-//printf("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...",vector);
- if (*r >= mask) {
- // masked by equal inservice
-//printf("but masked by equal inservice\n");
- return SPURIOUS_VECTOR;
- }
- if (PSCB(vcpu,tpr) & IA64_TPR_MMI) {
- // tpr.mmi is set
-//printf("but masked by tpr.mmi\n");
- return SPURIOUS_VECTOR;
- }
- if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) {
- //tpr.mic masks class
-//printf("but masked by tpr.mic\n");
- return SPURIOUS_VECTOR;
- }
-
-//printf("returned to caller\n");
-#if 0
-if (vector == (PSCB(vcpu,itv) & 0xff)) {
- UINT64 now = ia64_get_itc();
- UINT64 itm = PSCBX(vcpu,domain_itm);
- if (now < itm) early_tick(vcpu);
-
-}
-#endif
- return vector;
-}
-
-UINT64 vcpu_deliverable_interrupts(VCPU *vcpu)
-{
- return (vcpu_get_psr_i(vcpu) &&
- vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR);
-}
-
-UINT64 vcpu_deliverable_timer(VCPU *vcpu)
-{
- return (vcpu_get_psr_i(vcpu) &&
- vcpu_check_pending_interrupts(vcpu) == PSCB(vcpu,itv));
-}
-
-IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval)
-{
-extern unsigned long privop_trace;
-//privop_trace=1;
- //TODO: Implement this
- printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n");
- //*pval = 0;
- *pval = ia64_getreg(_IA64_REG_CR_LID);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval)
-{
- int i;
- UINT64 vector, mask;
-
-#define HEARTBEAT_FREQ 16 // period in seconds
-#ifdef HEARTBEAT_FREQ
-#define N_DOMS 16 // period in seconds
- static long count[N_DOMS] = { 0 };
- static long nonclockcount[N_DOMS] = { 0 };
- REGS *regs = vcpu_regs(vcpu);
- unsigned domid = vcpu->domain->domain_id;
-#endif
-#ifdef IRQ_DEBUG
- static char firstivr = 1;
- static char firsttime[256];
- if (firstivr) {
- int i;
- for (i=0;i<256;i++) firsttime[i]=1;
- firstivr=0;
- }
-#endif
-
- vector = vcpu_check_pending_interrupts(vcpu);
- if (vector == SPURIOUS_VECTOR) {
- PSCB(vcpu,pending_interruption) = 0;
- *pval = vector;
- return IA64_NO_FAULT;
- }
-#ifdef HEARTBEAT_FREQ
- if (domid >= N_DOMS) domid = N_DOMS-1;
- if (vector == (PSCB(vcpu,itv) & 0xff)) {
- if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) {
- printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n",
- domid, count[domid], nonclockcount[domid]);
- //count[domid] = 0;
- //dump_runq();
- }
- }
- else nonclockcount[domid]++;
-#endif
- // now have an unmasked, pending, deliverable vector!
- // getting ivr has "side effects"
-#ifdef IRQ_DEBUG
- if (firsttime[vector]) {
- printf("*** First get_ivr on vector=%d,itc=%lx\n",
- vector,ia64_get_itc());
- firsttime[vector]=0;
- }
-#endif
- i = vector >> 6;
- mask = 1L << (vector & 0x3f);
-//printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector);
- PSCBX(vcpu,insvc[i]) |= mask;
- PSCBX(vcpu,irr[i]) &= ~mask;
- //PSCB(vcpu,pending_interruption)--;
- *pval = vector;
- // if delivering a timer interrupt, remember domain_itm
- if (vector == (PSCB(vcpu,itv) & 0xff)) {
- PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm);
- }
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCB(vcpu,tpr);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_eoi(VCPU *vcpu, UINT64 *pval)
-{
- *pval = 0L; // reads of eoi always return 0
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval)
-{
-#ifndef IRR_USE_FIXED
- printk("vcpu_get_irr: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-#else
- *pval = vcpu->irr[0];
- return (IA64_NO_FAULT);
-#endif
-}
-
-IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval)
-{
-#ifndef IRR_USE_FIXED
- printk("vcpu_get_irr: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-#else
- *pval = vcpu->irr[1];
- return (IA64_NO_FAULT);
-#endif
-}
-
-IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval)
-{
-#ifndef IRR_USE_FIXED
- printk("vcpu_get_irr: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-#else
- *pval = vcpu->irr[2];
- return (IA64_NO_FAULT);
-#endif
-}
-
-IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval)
-{
-#ifndef IRR_USE_FIXED
- printk("vcpu_get_irr: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-#else
- *pval = vcpu->irr[3];
- return (IA64_NO_FAULT);
-#endif
-}
-
-IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCB(vcpu,itv);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCB(vcpu,pmv);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval)
-{
- *pval = PSCB(vcpu,cmcv);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval)
-{
- // fix this when setting values other than m-bit is supported
- printf("vcpu_get_lrr0: Unmasked interrupts unsupported\n");
- *pval = (1L << 16);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval)
-{
- // fix this when setting values other than m-bit is supported
- printf("vcpu_get_lrr1: Unmasked interrupts unsupported\n");
- *pval = (1L << 16);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val)
-{
- printf("vcpu_set_lid: Setting cr.lid is unsupported\n");
- return (IA64_ILLOP_FAULT);
-}
-
-IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val)
-{
- if (val & 0xff00) return IA64_RSVDREG_FAULT;
- PSCB(vcpu,tpr) = val;
- if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val)
-{
- UINT64 *p, bits, vec, bitnum;
- int i;
-
- p = &PSCBX(vcpu,insvc[3]);
- for (i = 3; (i >= 0) && !(bits = *p); i--, p--);
- if (i < 0) {
- printf("Trying to EOI interrupt when none are in-service.\r\n");
- return;
- }
- bitnum = ia64_fls(bits);
- vec = bitnum + (i*64);
- /* clear the correct bit */
- bits &= ~(1L << bitnum);
- *p = bits;
- /* clearing an eoi bit may unmask another pending interrupt... */
- if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled...
- // worry about this later... Linux only calls eoi
- // with interrupts disabled
- printf("Trying to EOI interrupt with interrupts enabled\r\n");
- }
- if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR)
- PSCB(vcpu,pending_interruption) = 1;
-//printf("YYYYY vcpu_set_eoi: Successful\n");
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val)
-{
- if (!(val & (1L << 16))) {
- printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
- return (IA64_ILLOP_FAULT);
- }
- // no place to save this state but nothing to do anyway
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val)
-{
- if (!(val & (1L << 16))) {
- printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n");
- return (IA64_ILLOP_FAULT);
- }
- // no place to save this state but nothing to do anyway
- return (IA64_NO_FAULT);
-}
-
-// parameter is a time interval specified in cycles
-void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles)
-{
- PSCBX(vcpu,xen_timer_interval) = cycles;
- vcpu_set_next_timer(vcpu);
- printf("vcpu_enable_timer(%d): interval set to %d cycles\n",
- PSCBX(vcpu,xen_timer_interval));
- __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask));
-}
-
-IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val)
-{
-extern unsigned long privop_trace;
-//privop_trace=1;
- if (val & 0xef00) return (IA64_ILLOP_FAULT);
- PSCB(vcpu,itv) = val;
- if (val & 0x10000) {
-printf("**** vcpu_set_itv(%d): vitm=%lx, setting to
0\n",val,PSCBX(vcpu,domain_itm));
- PSCBX(vcpu,domain_itm) = 0;
- }
- else vcpu_enable_timer(vcpu,1000000L);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val)
-{
- if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu,pmv) = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val)
-{
- if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT;
- PSCB(vcpu,cmcv) = val;
- return (IA64_NO_FAULT);
-}
-
-/**************************************************************************
- VCPU temporary register access routines
-**************************************************************************/
-UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index)
-{
- if (index > 7) return 0;
- return PSCB(vcpu,tmp[index]);
-}
-
-void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val)
-{
- if (index <= 7) PSCB(vcpu,tmp[index]) = val;
-}
-
-/**************************************************************************
-Interval timer routines
-**************************************************************************/
-
-BOOLEAN vcpu_timer_disabled(VCPU *vcpu)
-{
- UINT64 itv = PSCB(vcpu,itv);
- return(!itv || !!(itv & 0x10000));
-}
-
-BOOLEAN vcpu_timer_inservice(VCPU *vcpu)
-{
- UINT64 itv = PSCB(vcpu,itv);
- return (test_bit(itv, PSCBX(vcpu,insvc)));
-}
-
-BOOLEAN vcpu_timer_expired(VCPU *vcpu)
-{
- unsigned long domain_itm = PSCBX(vcpu,domain_itm);
- unsigned long now = ia64_get_itc();
-
- if (!domain_itm) return FALSE;
- if (now < domain_itm) return FALSE;
- if (vcpu_timer_disabled(vcpu)) return FALSE;
- return TRUE;
-}
-
-void vcpu_safe_set_itm(unsigned long val)
-{
- unsigned long epsilon = 100;
- UINT64 now = ia64_get_itc();
-
- local_irq_disable();
- while (1) {
-//printf("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now);
- ia64_set_itm(val);
- if (val > (now = ia64_get_itc())) break;
- val = now + epsilon;
- epsilon <<= 1;
- }
- local_irq_enable();
-}
-
-void vcpu_set_next_timer(VCPU *vcpu)
-{
- UINT64 d = PSCBX(vcpu,domain_itm);
- //UINT64 s = PSCBX(vcpu,xen_itm);
- UINT64 s = local_cpu_data->itm_next;
- UINT64 now = ia64_get_itc();
- //UINT64 interval = PSCBX(vcpu,xen_timer_interval);
-
- /* gloss over the wraparound problem for now... we know it exists
- * but it doesn't matter right now */
-
-#if 0
- /* ensure at least next SP tick is in the future */
- if (!interval) PSCBX(vcpu,xen_itm) = now +
-#if 0
- (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE :
- DEFAULT_CLOCK_RATE);
-#else
- 3000000;
-//printf("vcpu_set_next_timer: HACK!\n");
-#endif
-#if 0
- if (PSCBX(vcpu,xen_itm) < now)
- while (PSCBX(vcpu,xen_itm) < now + (interval>>1))
- PSCBX(vcpu,xen_itm) += interval;
-#endif
-#endif
-
- if (is_idle_task(vcpu->domain)) {
- printf("****** vcpu_set_next_timer called during idle!!\n");
- }
- //s = PSCBX(vcpu,xen_itm);
- if (d && (d > now) && (d < s)) {
- vcpu_safe_set_itm(d);
- //using_domain_as_itm++;
- }
- else {
- vcpu_safe_set_itm(s);
- //using_xen_as_itm++;
- }
-}
-
-IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val)
-{
- UINT now = ia64_get_itc();
-
- //if (val < now) val = now + 1000;
-//printf("*** vcpu_set_itm: called with %lx\n",val);
- PSCBX(vcpu,domain_itm) = val;
- vcpu_set_next_timer(vcpu);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val)
-{
-
- UINT64 oldnow = ia64_get_itc();
- UINT64 olditm = PSCBX(vcpu,domain_itm);
- unsigned long d = olditm - oldnow;
- unsigned long x = local_cpu_data->itm_next - oldnow;
-
- UINT64 newnow = val, min_delta;
-
-#define DISALLOW_SETTING_ITC_FOR_NOW
-#ifdef DISALLOW_SETTING_ITC_FOR_NOW
-printf("vcpu_set_itc: Setting ar.itc is currently disabled\n");
-#else
- local_irq_disable();
- if (olditm) {
-printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d);
- PSCBX(vcpu,domain_itm) = newnow + d;
- }
- local_cpu_data->itm_next = newnow + x;
- d = PSCBX(vcpu,domain_itm);
- x = local_cpu_data->itm_next;
-
- ia64_set_itc(newnow);
- if (d && (d > newnow) && (d < x)) {
- vcpu_safe_set_itm(d);
- //using_domain_as_itm++;
- }
- else {
- vcpu_safe_set_itm(x);
- //using_xen_as_itm++;
- }
- local_irq_enable();
-#endif
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval)
-{
- //FIXME: Implement this
- printf("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n");
- return (IA64_NO_FAULT);
- //return (IA64_ILLOP_FAULT);
-}
-
-IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval)
-{
- //TODO: Implement this
- printf("vcpu_get_itc: Getting ar.itc is unsupported\n");
- return (IA64_ILLOP_FAULT);
-}
-
-void vcpu_pend_timer(VCPU *vcpu)
-{
- UINT64 itv = PSCB(vcpu,itv) & 0xff;
-
- if (vcpu_timer_disabled(vcpu)) return;
- //if (vcpu_timer_inservice(vcpu)) return;
- if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) {
- // already delivered an interrupt for this so
- // don't deliver another
- return;
- }
-#if 0
- // attempt to flag "timer tick before its due" source
- {
- UINT64 itm = PSCBX(vcpu,domain_itm);
- UINT64 now = ia64_get_itc();
- if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n");
- }
-#endif
- vcpu_pend_interrupt(vcpu, itv);
-}
-
-// returns true if ready to deliver a timer interrupt too early
-UINT64 vcpu_timer_pending_early(VCPU *vcpu)
-{
- UINT64 now = ia64_get_itc();
- UINT64 itm = PSCBX(vcpu,domain_itm);
-
- if (vcpu_timer_disabled(vcpu)) return 0;
- if (!itm) return 0;
- return (vcpu_deliverable_timer(vcpu) && (now < itm));
-}
-
-//FIXME: This is a hack because everything dies if a timer tick is lost
-void vcpu_poke_timer(VCPU *vcpu)
-{
- UINT64 itv = PSCB(vcpu,itv) & 0xff;
- UINT64 now = ia64_get_itc();
- UINT64 itm = PSCBX(vcpu,domain_itm);
- UINT64 irr;
-
- if (vcpu_timer_disabled(vcpu)) return;
- if (!itm) return;
- if (itv != 0xefL) {
- printf("vcpu_poke_timer: unimplemented itv=%lx!\n",itv);
- while(1);
- }
- // using 0xef instead of itv so can get real irr
- if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) {
- if (!test_bit(0xefL,PSCBX(vcpu,irr))) {
- irr = ia64_getreg(_IA64_REG_CR_IRR3);
- if (irr & (1L<<(0xef-0xc0))) return;
-if (now-itm>0x800000)
-printf("*** poking timer:
now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm());
- vcpu_pend_timer(vcpu);
- }
- }
-}
-
-
-/**************************************************************************
-Privileged operation emulation routines
-**************************************************************************/
-
-IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa)
-{
- PSCB(vcpu,tmp[0]) = ifa; // save ifa in vcpu structure, then
specify IA64_FORCED_IFA
- return (vcpu_get_rr_ve(vcpu,ifa) ? IA64_DATA_TLB_VECTOR :
IA64_ALT_DATA_TLB_VECTOR) | IA64_FORCED_IFA;
-}
-
-
-IA64FAULT vcpu_rfi(VCPU *vcpu)
-{
- // TODO: Only allowed for current vcpu
- PSR psr;
- UINT64 int_enable, regspsr = 0;
- UINT64 ifs;
- REGS *regs = vcpu_regs(vcpu);
- extern void dorfirfi(void);
-
- psr.i64 = PSCB(vcpu,ipsr);
- if (psr.ia64_psr.cpl < 3) psr.ia64_psr.cpl = 2;
- if (psr.ia64_psr.i) PSCB(vcpu,interrupt_delivery_enabled) = 1;
- int_enable = psr.ia64_psr.i;
- if (psr.ia64_psr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1;
- if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it)
vcpu_set_metaphysical_mode(vcpu,FALSE);
- else vcpu_set_metaphysical_mode(vcpu,TRUE);
- psr.ia64_psr.ic = 1; psr.ia64_psr.i = 1;
- psr.ia64_psr.dt = 1; psr.ia64_psr.rt = 1; psr.ia64_psr.it = 1;
- psr.ia64_psr.bn = 1;
- //psr.pk = 1; // checking pkeys shouldn't be a problem but seems broken
- if (psr.ia64_psr.be) {
- printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n");
- return (IA64_ILLOP_FAULT);
- }
- PSCB(vcpu,incomplete_regframe) = 0; // is this necessary?
- ifs = PSCB(vcpu,ifs);
- //if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs !=
regs->cr_ifs) {
- //if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) {
- if (ifs & regs->cr_ifs & 0x8000000000000000L) {
- // TODO: validate PSCB(vcpu,iip)
- // TODO: PSCB(vcpu,ipsr) = psr;
- PSCB(vcpu,ipsr) = psr.i64;
- // now set up the trampoline
- regs->cr_iip = *(unsigned long *)dorfirfi; // function pointer!!
- __asm__ __volatile ("mov %0=psr;;":"=r"(regspsr)::"memory");
- regs->cr_ipsr = regspsr & ~(IA64_PSR_I | IA64_PSR_IC |
IA64_PSR_BN);
- }
- else {
- regs->cr_ipsr = psr.i64;
- regs->cr_iip = PSCB(vcpu,iip);
- }
- PSCB(vcpu,interrupt_collection_enabled) = 1;
- vcpu_bsw1(vcpu);
- PSCB(vcpu,interrupt_delivery_enabled) = int_enable;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_cover(VCPU *vcpu)
-{
- // TODO: Only allowed for current vcpu
- REGS *regs = vcpu_regs(vcpu);
-
- if (!PSCB(vcpu,interrupt_collection_enabled)) {
- if (!PSCB(vcpu,incomplete_regframe))
- PSCB(vcpu,ifs) = regs->cr_ifs;
- else PSCB(vcpu,incomplete_regframe) = 0;
- }
- regs->cr_ifs = 0;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
-{
- UINT64 pta = PSCB(vcpu,pta);
- UINT64 pta_sz = (pta & IA64_PTA_SZ(0x3f)) >> IA64_PTA_SZ_BIT;
- UINT64 pta_base = pta & ~((1UL << IA64_PTA_BASE_BIT)-1);
- UINT64 Mask = (1L << pta_sz) - 1;
- UINT64 Mask_60_15 = (Mask >> 15) & 0x3fffffffffff;
- UINT64 compMask_60_15 = ~Mask_60_15;
- //UINT64 rr_ps = RR_TO_PS(get_rr(vadr));
- UINT64 rr_ps = vcpu_get_rr_ps(vcpu,vadr);
- UINT64 VHPT_offset = (vadr >> rr_ps) << 3;
- UINT64 VHPT_addr1 = vadr & 0xe000000000000000L;
- UINT64 VHPT_addr2a =
- ((pta_base >> 15) & 0x3fffffffffff) & compMask_60_15;
- UINT64 VHPT_addr2b =
- ((VHPT_offset >> 15) & 0x3fffffffffff) & Mask_60_15;;
- UINT64 VHPT_addr3 = VHPT_offset & 0x7fff;
- UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) |
- VHPT_addr3;
-
-#if 0
- if (VHPT_addr1 == 0xe000000000000000L) {
- printf("vcpu_thash: thash unsupported with rr7 @%lx\n",
- PSCB(vcpu,iip));
- return (IA64_ILLOP_FAULT);
- }
-#endif
-//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr);
- *pval = VHPT_addr;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
-{
- printf("vcpu_ttag: ttag instruction unsupported\n");
- return (IA64_ILLOP_FAULT);
-}
-
-#define itir_ps(itir) ((itir >> 2) & 0x3f)
-#define itir_mask(itir) (~((1UL << itir_ps(itir)) - 1))
-
-unsigned long vhpt_translate_count = 0;
-
-IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64
*pteval, UINT64 *itir)
-{
- unsigned long pta, pta_mask, iha, pte, ps;
- TR_ENTRY *trp;
- ia64_rr rr;
-
- if (!(address >> 61)) {
- if (!PSCB(vcpu,metaphysical_mode)) {
- REGS *regs = vcpu_regs(vcpu);
- unsigned long viip = PSCB(vcpu,iip);
- unsigned long vipsr = PSCB(vcpu,ipsr);
- unsigned long iip = regs->cr_iip;
- unsigned long ipsr = regs->cr_ipsr;
- printk("vcpu_translate: bad address %p, viip=%p,
vipsr=%p, iip=%p, ipsr=%p continuing\n", address, viip, vipsr, iip, ipsr);
- }
-
- *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS |
_PAGE_PL_2 | _PAGE_AR_RWX;
- *itir = PAGE_SHIFT << 2;
- phys_translate_count++;
- return IA64_NO_FAULT;
- }
-
- /* check translation registers */
- if ((trp = match_tr(vcpu,address))) {
- tr_translate_count++;
- *pteval = trp->page_flags;
- *itir = trp->itir;
- return IA64_NO_FAULT;
- }
-
- /* check 1-entry TLB */
- if ((trp = match_dtlb(vcpu,address))) {
- dtlb_translate_count++;
- *pteval = trp->page_flags;
- *itir = trp->itir;
- return IA64_NO_FAULT;
- }
-
- /* check guest VHPT */
- pta = PSCB(vcpu,pta);
- rr.rrval = PSCB(vcpu,rrs)[address>>61];
- if (rr.ve && (pta & IA64_PTA_VE))
- {
- if (pta & IA64_PTA_VF)
- {
- /* long format VHPT - not implemented */
- return (is_data ? IA64_DATA_TLB_VECTOR :
IA64_INST_TLB_VECTOR);
- }
- else
- {
- /* short format VHPT */
-
- /* avoid recursively walking VHPT */
- pta_mask = (itir_mask(pta) << 3) >> 3;
- if (((address ^ pta) & pta_mask) == 0)
- return (is_data ? IA64_DATA_TLB_VECTOR :
IA64_INST_TLB_VECTOR);
-
- vcpu_thash(vcpu, address, &iha);
- if (__copy_from_user(&pte, (void *)iha, sizeof(pte)) !=
0)
- return IA64_VHPT_TRANS_VECTOR;
-
- /*
- * Optimisation: this VHPT walker aborts on not-present
pages
- * instead of inserting a not-present translation, this
allows
- * vectoring directly to the miss handler.
- \ */
- if (pte & _PAGE_P)
- {
- *pteval = pte;
- *itir = vcpu_get_itir_on_fault(vcpu,address);
- vhpt_translate_count++;
- return IA64_NO_FAULT;
- }
- return (is_data ? IA64_DATA_TLB_VECTOR :
IA64_INST_TLB_VECTOR);
- }
- }
- return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR);
-}
-
-IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
-{
- UINT64 pteval, itir, mask;
- IA64FAULT fault;
-
- fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir);
- if (fault == IA64_NO_FAULT)
- {
- mask = itir_mask(itir);
- *padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask);
- return (IA64_NO_FAULT);
- }
- else
- {
- PSCB(vcpu,tmp[0]) = vadr; // save ifa in vcpu structure,
then specify IA64_FORCED_IFA
- return (fault | IA64_FORCED_IFA);
- }
-}
-
-IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
-{
- printf("vcpu_tak: tak instruction unsupported\n");
- return (IA64_ILLOP_FAULT);
- // HACK ALERT: tak does a thash for now
- //return vcpu_thash(vcpu,vadr,key);
-}
-
-/**************************************************************************
- VCPU debug breakpoint register access routines
-**************************************************************************/
-
-IA64FAULT vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- // TODO: unimplemented DBRs return a reserved register fault
- // TODO: Should set Logical CPU state, not just physical
- ia64_set_dbr(reg,val);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- // TODO: unimplemented IBRs return a reserved register fault
- // TODO: Should set Logical CPU state, not just physical
- ia64_set_ibr(reg,val);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- // TODO: unimplemented DBRs return a reserved register fault
- UINT64 val = ia64_get_dbr(reg);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- // TODO: unimplemented IBRs return a reserved register fault
- UINT64 val = ia64_get_ibr(reg);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-/**************************************************************************
- VCPU performance monitor register access routines
-**************************************************************************/
-
-IA64FAULT vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- // TODO: Should set Logical CPU state, not just physical
- // NOTE: Writes to unimplemented PMC registers are discarded
- ia64_set_pmc(reg,val);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- // TODO: Should set Logical CPU state, not just physical
- // NOTE: Writes to unimplemented PMD registers are discarded
- ia64_set_pmd(reg,val);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- // NOTE: Reads from unimplemented PMC registers return zero
- UINT64 val = (UINT64)ia64_get_pmc(reg);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- // NOTE: Reads from unimplemented PMD registers return zero
- UINT64 val = (UINT64)ia64_get_pmd(reg);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-/**************************************************************************
- VCPU banked general register access routines
-**************************************************************************/
-
-IA64FAULT vcpu_bsw0(VCPU *vcpu)
-{
- // TODO: Only allowed for current vcpu
- REGS *regs = vcpu_regs(vcpu);
- unsigned long *r = ®s->r16;
- unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
- unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
- int i;
-
- if (PSCB(vcpu,banknum)) {
- for (i = 0; i < 16; i++) { *b1++ = *r; *r++ = *b0++; }
- PSCB(vcpu,banknum) = 0;
- }
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_bsw1(VCPU *vcpu)
-{
- // TODO: Only allowed for current vcpu
- REGS *regs = vcpu_regs(vcpu);
- unsigned long *r = ®s->r16;
- unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]);
- unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]);
- int i;
-
- if (!PSCB(vcpu,banknum)) {
- for (i = 0; i < 16; i++) { *b0++ = *r; *r++ = *b1++; }
- PSCB(vcpu,banknum) = 1;
- }
- return (IA64_NO_FAULT);
-}
-
-/**************************************************************************
- VCPU cpuid access routines
-**************************************************************************/
-
-
-IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- // FIXME: This could get called as a result of a rsvd-reg fault
- // if reg > 3
- switch(reg) {
- case 0:
- memcpy(pval,"Xen/ia64",8);
- break;
- case 1:
- *pval = 0;
- break;
- case 2:
- *pval = 0;
- break;
- case 3:
- *pval = ia64_get_cpuid(3);
- break;
- case 4:
- *pval = ia64_get_cpuid(4);
- break;
- default:
- if (reg > (ia64_get_cpuid(3) & 0xff))
- return IA64_RSVDREG_FAULT;
- *pval = ia64_get_cpuid(reg);
- break;
- }
- return (IA64_NO_FAULT);
-}
-
-/**************************************************************************
- VCPU region register access routines
-**************************************************************************/
-
-unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr)
-{
- ia64_rr rr;
-
- rr.rrval = PSCB(vcpu,rrs)[vadr>>61];
- return(rr.ve);
-}
-
-IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- PSCB(vcpu,rrs)[reg>>61] = val;
- // warning: set_one_rr() does it "live"
- set_one_rr(reg,val);
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- UINT val = PSCB(vcpu,rrs)[reg>>61];
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-/**************************************************************************
- VCPU protection key register access routines
-**************************************************************************/
-
-IA64FAULT vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
-#ifndef PKR_USE_FIXED
- printk("vcpu_get_pkr: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-#else
- UINT64 val = (UINT64)ia64_get_pkr(reg);
- *pval = val;
- return (IA64_NO_FAULT);
-#endif
-}
-
-IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
-#ifndef PKR_USE_FIXED
- printk("vcpu_set_pkr: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-#else
-// if (reg >= NPKRS) return (IA64_ILLOP_FAULT);
- vcpu->pkrs[reg] = val;
- ia64_set_pkr(reg,val);
- return (IA64_NO_FAULT);
-#endif
-}
-
-/**************************************************************************
- VCPU translation register access routines
-**************************************************************************/
-
-static void vcpu_purge_tr_entry(TR_ENTRY *trp)
-{
- trp->p = 0;
-}
-
-static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64
ifa)
-{
- UINT64 ps;
-
- trp->itir = itir;
- trp->rid = virtualize_rid(current, get_rr(ifa) & RR_RID_MASK);
- trp->p = 1;
- ps = trp->ps;
- trp->page_flags = pte;
- if (trp->pl < 2) trp->pl = 2;
- trp->vadr = ifa & ~0xfff;
- if (ps > 12) { // "ignore" relevant low-order bits
- trp->ppn &= ~((1UL<<(ps-12))-1);
- trp->vadr &= ~((1UL<<ps)-1);
- }
-}
-
-TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count)
-{
- unsigned long rid = (get_rr(ifa) & RR_RID_MASK);
- int i;
-
- for (i = 0; i < count; i++, trp++) {
- if (!trp->p) continue;
- if (physicalize_rid(vcpu,trp->rid) != rid) continue;
- if (ifa < trp->vadr) continue;
- if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue;
- //if (trp->key && !match_pkr(vcpu,trp->key)) continue;
- return trp;
- }
- return 0;
-}
-
-TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa)
-{
- TR_ENTRY *trp;
-
- trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS);
- if (trp) return trp;
- trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS);
- if (trp) return trp;
- return 0;
-}
-
-IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte,
- UINT64 itir, UINT64 ifa)
-{
- TR_ENTRY *trp;
-
- if (slot >= NDTRS) return IA64_RSVDREG_FAULT;
- trp = &PSCBX(vcpu,dtrs[slot]);
-//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa);
- vcpu_set_tr_entry(trp,pte,itir,ifa);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte,
- UINT64 itir, UINT64 ifa)
-{
- TR_ENTRY *trp;
-
- if (slot >= NITRS) return IA64_RSVDREG_FAULT;
- trp = &PSCBX(vcpu,itrs[slot]);
-//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa);
- vcpu_set_tr_entry(trp,pte,itir,ifa);
- return IA64_NO_FAULT;
-}
-
-/**************************************************************************
- VCPU translation cache access routines
-**************************************************************************/
-
-void foobar(void) { /*vcpu_verbose = 1;*/ }
-
-extern struct domain *dom0;
-
-void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte,
UINT64 mp_pte, UINT64 logps)
-{
- unsigned long psr;
- unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
-
- // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
- // FIXME, must be inlined or potential for nested fault here!
- if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) {
- printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n");
- //FIXME: kill domain here
- while(1);
- }
- psr = ia64_clear_ic();
- ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
- ia64_set_psr(psr);
- // ia64_srlz_i(); // no srls req'd, will rfi later
-#ifdef VHPT_GLOBAL
- if (vcpu->domain==dom0 && ((vaddr >> 61) == 7)) {
- // FIXME: this is dangerous... vhpt_flush_address ensures these
- // addresses never get flushed. More work needed if this
- // ever happens.
-//printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps);
- if (logps > PAGE_SHIFT) vhpt_multiple_insert(vaddr,pte,logps);
- else vhpt_insert(vaddr,pte,logps<<2);
- }
- // even if domain pagesize is larger than PAGE_SIZE, just put
- // PAGE_SIZE mapping in the vhpt for now, else purging is complicated
- else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
-#endif
- if (IorD & 0x4) return; // don't place in 1-entry TLB
- if (IorD & 0x1) {
- vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr);
- PSCBX(vcpu,itlb_pte) = mp_pte;
- }
- if (IorD & 0x2) {
- vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr);
- PSCBX(vcpu,dtlb_pte) = mp_pte;
- }
-}
-
-// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check
-// the physical address contained for correctness
-TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa)
-{
- TR_ENTRY *trp;
-
- if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1))
- return (&vcpu->arch.dtlb);
- return 0UL;
-}
-
-IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
-{
- unsigned long pteval, logps = (itir >> 2) & 0x3f;
- unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
-
- if (logps < PAGE_SHIFT) {
- printf("vcpu_itc_d: domain trying to use smaller page size!\n");
- //FIXME: kill domain here
- while(1);
- }
- //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
- pteval = translate_domain_pte(pte,ifa,itir);
- if (!pteval) return IA64_ILLOP_FAULT;
- vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
-{
- unsigned long pteval, logps = (itir >> 2) & 0x3f;
- unsigned long translate_domain_pte(UINT64,UINT64,UINT64);
-
- // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK!
- if (logps < PAGE_SHIFT) {
- printf("vcpu_itc_i: domain trying to use smaller page size!\n");
- //FIXME: kill domain here
- while(1);
- }
- //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize
- pteval = translate_domain_pte(pte,ifa,itir);
- // FIXME: what to do if bad physical address? (machine check?)
- if (!pteval) return IA64_ILLOP_FAULT;
- vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
-{
- printk("vcpu_ptc_l: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-}
-
-// At privlvl=0, fc performs no access rights or protection key checks, while
-// at privlvl!=0, fc performs access rights checks as if it were a 1-byte
-// read but no protection key check. Thus in order to avoid an unexpected
-// access rights fault, we have to translate the virtual address to a
-// physical address (possibly via a metaphysical address) and do the fc
-// on the physical address, which is guaranteed to flush the same cache line
-IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr)
-{
- // TODO: Only allowed for current vcpu
- UINT64 mpaddr, paddr;
- IA64FAULT fault;
- unsigned long translate_domain_mpaddr(unsigned long);
- IA64FAULT vcpu_tpa(VCPU *, UINT64, UINT64 *);
-
- fault = vcpu_tpa(vcpu, vadr, &mpaddr);
- if (fault == IA64_NO_FAULT) {
- paddr = translate_domain_mpaddr(mpaddr);
- ia64_fc(__va(paddr));
- }
- return fault;
-}
-
-int ptce_count = 0;
-IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
-{
- // Note that this only needs to be called once, i.e. the
- // architected loop to purge the entire TLB, should use
- // base = stride1 = stride2 = 0, count0 = count 1 = 1
-
-#ifdef VHPT_GLOBAL
- vhpt_flush(); // FIXME: This is overdoing it
-#endif
- local_flush_tlb_all();
- // just invalidate the "whole" tlb
- vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
- vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 addr_range)
-{
- printk("vcpu_ptc_g: called, not implemented yet\n");
- return IA64_ILLOP_FAULT;
-}
-
-IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
-{
- extern ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits);
- // FIXME: validate not flushing Xen addresses
- // if (Xen address) return(IA64_ILLOP_FAULT);
- // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE
-//printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range);
-#ifdef VHPT_GLOBAL
- vhpt_flush_address(vadr,addr_range);
-#endif
- ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
- vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb));
- vcpu_purge_tr_entry(&PSCBX(vcpu,itlb));
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
-{
- printf("vcpu_ptr_d: Purging TLB is unsupported\n");
- return (IA64_ILLOP_FAULT);
-}
-
-IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range)
-{
- printf("vcpu_ptr_i: Purging TLB is unsupported\n");
- return (IA64_ILLOP_FAULT);
-}
-
-void vcpu_set_regs(VCPU *vcpu, REGS *regs)
-{
- vcpu->arch.regs = regs;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vhpt.c
--- a/xen/arch/ia64/vhpt.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,151 +0,0 @@
-/*
- * Initialize VHPT support.
- *
- * Copyright (C) 2004 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- */
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/pgalloc.h>
-#include <asm/page.h>
-#include <asm/dma.h>
-#include <asm/vhpt.h>
-
-unsigned long vhpt_paddr, vhpt_pend, vhpt_pte;
-
-void vhpt_flush(void)
-{
- struct vhpt_lf_entry *v = (void *)VHPT_ADDR;
- int i, cnt = 0;
-#if 0
-static int firsttime = 2;
-
-if (firsttime) firsttime--;
-else {
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-printf("vhpt_flush: *********************************************\n");
-}
-#endif
- for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
- v->itir = 0;
- v->CChain = 0;
- v->page_flags = 0;
- v->ti_tag = INVALID_TI_TAG;
- }
- // initialize cache too???
-}
-
-#ifdef VHPT_GLOBAL
-void vhpt_flush_address(unsigned long vadr, unsigned long addr_range)
-{
- unsigned long ps;
- struct vhpt_lf_entry *vlfe;
-
- if ((vadr >> 61) == 7) {
- // no vhpt for region 7 yet, see vcpu_itc_no_srlz
- printf("vhpt_flush_address: region 7, spinning...\n");
- while(1);
- }
-#if 0
- // this only seems to occur at shutdown, but it does occur
- if ((!addr_range) || addr_range & (addr_range - 1)) {
- printf("vhpt_flush_address: weird range, spinning...\n");
- while(1);
- }
-//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range);
-#endif
- while ((long)addr_range > 0) {
- vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr);
- // FIXME: for now, just blow it away even if it belongs to
- // another domain. Later, use ttag to check for match
-//if (!(vlfe->ti_tag & INVALID_TI_TAG)) {
-//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr);
-//}
- vlfe->ti_tag |= INVALID_TI_TAG;
- addr_range -= PAGE_SIZE;
- vadr += PAGE_SIZE;
- }
-}
-#endif
-
-void vhpt_map(void)
-{
- unsigned long psr;
-
- psr = ia64_clear_ic();
- ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, vhpt_pte, VHPT_SIZE_LOG2);
- ia64_set_psr(psr);
- ia64_srlz_i();
-}
-
-void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned
long logps)
-{
- unsigned long mask = (1L << logps) - 1;
- extern long running_on_sim;
- int i;
-
- if (logps-PAGE_SHIFT > 10 && !running_on_sim) {
- // if this happens, we may want to revisit this algorithm
- printf("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n");
- while(1);
- }
- if (logps-PAGE_SHIFT > 2) {
- // FIXME: Should add counter here to see how often this
- // happens (e.g. for 16MB pages!) and determine if it
- // is a performance problem. On a quick look, it takes
- // about 39000 instrs for a 16MB page and it seems to occur
- // only a few times/second, so OK for now.
- // An alternate solution would be to just insert the one
- // 16KB in the vhpt (but with the full mapping)?
- //printf("vhpt_multiple_insert: logps-PAGE_SHIFT==%d,"
- //"va=%p, pa=%p, pa-masked=%p\n",
- //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK,
- //(pte&_PFN_MASK)&~mask);
- }
- vaddr &= ~mask;
- pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK);
- for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) {
- vhpt_insert(vaddr,pte,logps<<2);
- vaddr += PAGE_SIZE;
- }
-}
-
-void vhpt_init(void)
-{
- unsigned long vhpt_total_size, vhpt_alignment, vhpt_imva;
-#if !VHPT_ENABLED
- return;
-#endif
- // allocate a huge chunk of physical memory.... how???
- vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
- vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB
- printf("vhpt_init: vhpt size=%p,
align=%p\n",vhpt_total_size,vhpt_alignment);
- /* This allocation only holds true if vhpt table is unique for
- * all domains. Or else later new vhpt table should be allocated
- * from domain heap when each domain is created. Assume xen buddy
- * allocator can provide natural aligned page by order?
- */
- vhpt_imva = alloc_xenheap_pages(VHPT_SIZE_LOG2 - PAGE_SHIFT);
- if (!vhpt_imva) {
- printf("vhpt_init: can't allocate VHPT!\n");
- while(1);
- }
- vhpt_paddr = __pa(vhpt_imva);
- vhpt_pend = vhpt_paddr + vhpt_total_size - 1;
- printf("vhpt_init: vhpt paddr=%p, end=%p\n",vhpt_paddr,vhpt_pend);
- vhpt_pte = pte_val(pfn_pte(vhpt_paddr >> PAGE_SHIFT, PAGE_KERNEL));
- vhpt_map();
- ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
- VHPT_ENABLED);
- vhpt_flush();
-}
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vlsapic.c
--- a/xen/arch/ia64/vlsapic.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,620 +0,0 @@
-
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vlsapic.c: virtual lsapic model including ITC timer.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
- */
-
-#include <linux/sched.h>
-#include <public/arch-ia64.h>
-#include <asm/ia64_int.h>
-#include <asm/vcpu.h>
-#include <asm/regionreg.h>
-#include <asm/tlb.h>
-#include <asm/processor.h>
-#include <asm/delay.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/vmx.h>
-#include <asm/hw_irq.h>
-#include <asm/vmx_pal_vsa.h>
-#include <asm/kregs.h>
-
-#define SHARED_VLAPIC_INF
-#ifdef V_IOSAPIC_READY
-static inline vl_apic_info* get_psapic(VCPU *vcpu)
-{
- shared_iopage_t *sp = get_sp(vcpu->domain);
- return &(sp->vcpu_iodata[vcpu->vcpu_id].apic_intr);
-}
-#endif
-//u64 fire_itc;
-//u64 fire_itc2;
-//u64 fire_itm;
-//u64 fire_itm2;
-/*
- * Update the checked last_itc.
- */
-static void update_last_itc(vtime_t *vtm, uint64_t cur_itc)
-{
- vtm->last_itc = cur_itc;
-}
-
-/*
- * ITC value saw in guest (host+offset+drift).
- */
-static uint64_t now_itc(vtime_t *vtm)
-{
- uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc();
-
- if ( vtm->vtm_local_drift ) {
-// guest_itc -= vtm->vtm_local_drift;
- }
- if ( (long)(guest_itc - vtm->last_itc) > 0 ) {
- return guest_itc;
-
- }
- else {
- /* guest ITC backwarded due after LP switch */
- return vtm->last_itc;
- }
-}
-
-/*
- * Interval time components reset.
- */
-static void vtm_reset(VCPU *vcpu)
-{
- uint64_t cur_itc;
- vtime_t *vtm;
-
- vtm=&(vcpu->arch.arch_vmx.vtm);
- vtm->vtm_offset = 0;
- vtm->vtm_local_drift = 0;
- VPD_CR(vcpu, itm) = 0;
- VPD_CR(vcpu, itv) = 0x10000;
- cur_itc = ia64_get_itc();
- vtm->last_itc = vtm->vtm_offset + cur_itc;
-}
-
-/* callback function when vtm_timer expires */
-static void vtm_timer_fn(void *data)
-{
- vtime_t *vtm;
- VCPU *vcpu = data;
- u64 cur_itc,vitm;
-
- UINT64 vec;
-
- vec = VPD_CR(vcpu, itv) & 0xff;
- vmx_vcpu_pend_interrupt(vcpu, vec);
-
- vtm=&(vcpu->arch.arch_vmx.vtm);
- cur_itc = now_itc(vtm);
- vitm =VPD_CR(vcpu, itm);
- //fire_itc2 = cur_itc;
- //fire_itm2 = vitm;
- update_last_itc(vtm,cur_itc); // pseudo read to update vITC
-}
-
-void vtm_init(VCPU *vcpu)
-{
- vtime_t *vtm;
- uint64_t itc_freq;
-
- vtm=&(vcpu->arch.arch_vmx.vtm);
-
- itc_freq = local_cpu_data->itc_freq;
- vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000;
- vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000;
- init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0);
- vtm_reset(vcpu);
-}
-
-/*
- * Action when guest read ITC.
- */
-uint64_t vtm_get_itc(VCPU *vcpu)
-{
- uint64_t guest_itc, spsr;
- vtime_t *vtm;
-
- vtm=&(vcpu->arch.arch_vmx.vtm);
- // FIXME: should use local_irq_disable & local_irq_enable ??
- local_irq_save(spsr);
- guest_itc = now_itc(vtm);
-// update_last_itc(vtm, guest_itc);
-
- local_irq_restore(spsr);
- return guest_itc;
-}
-
-void vtm_set_itc(VCPU *vcpu, uint64_t new_itc)
-{
- uint64_t spsr;
- vtime_t *vtm;
-
- vtm=&(vcpu->arch.arch_vmx.vtm);
- local_irq_save(spsr);
- vtm->vtm_offset = new_itc - ia64_get_itc();
- vtm->last_itc = new_itc;
- vtm_interruption_update(vcpu, vtm);
- local_irq_restore(spsr);
-}
-
-void vtm_set_itv(VCPU *vcpu)
-{
- uint64_t spsr,itv;
- vtime_t *vtm;
-
- vtm=&(vcpu->arch.arch_vmx.vtm);
- local_irq_save(spsr);
- itv = VPD_CR(vcpu, itv);
- if ( ITV_IRQ_MASK(itv) )
- rem_ac_timer(&vtm->vtm_timer);
- vtm_interruption_update(vcpu, vtm);
- local_irq_restore(spsr);
-}
-
-
-/*
- * Update interrupt or hook the vtm ac_timer for fire
- * At this point vtm_timer should be removed if itv is masked.
- */
-/* Interrupt must be disabled at this point */
-
-extern u64 tick_to_ns(u64 tick);
-#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */
-void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm)
-{
- uint64_t cur_itc,vitm,vitv;
- uint64_t expires;
- long diff_now, diff_last;
- uint64_t spsr;
-
- vitv = VPD_CR(vcpu, itv);
- if ( ITV_IRQ_MASK(vitv) ) {
- return;
- }
-
- vitm =VPD_CR(vcpu, itm);
- local_irq_save(spsr);
- cur_itc =now_itc(vtm);
- diff_last = vtm->last_itc - vitm;
- diff_now = cur_itc - vitm;
- update_last_itc (vtm,cur_itc);
-
- if ( diff_last >= 0 ) {
- // interrupt already fired.
- rem_ac_timer(&vtm->vtm_timer);
- }
- else if ( diff_now >= 0 ) {
- // ITV is fired.
- vmx_vcpu_pend_interrupt(vcpu, vitv&0xff);
- }
- /* Both last_itc & cur_itc < itm, wait for fire condition */
- else {
- expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP;
- set_ac_timer(&vtm->vtm_timer, expires);
- }
- local_irq_restore(spsr);
-}
-
-/*
- * Action for vtm when the domain is scheduled out.
- * Remove the ac_timer for vtm.
- */
-void vtm_domain_out(VCPU *vcpu)
-{
- if(!is_idle_task(vcpu->domain))
- rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer);
-}
-
-/*
- * Action for vtm when the domain is scheduled in.
- * Fire vtm IRQ or add the ac_timer for vtm.
- */
-void vtm_domain_in(VCPU *vcpu)
-{
- vtime_t *vtm;
-
- if(!is_idle_task(vcpu->domain)) {
- vtm=&(vcpu->arch.arch_vmx.vtm);
- vtm_interruption_update(vcpu, vtm);
- }
-}
-
-/*
- * Next for vLSapic
- */
-
-#define NMI_VECTOR 2
-#define ExtINT_VECTOR 0
-#define NULL_VECTOR -1
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i])
-static void update_vhpi(VCPU *vcpu, int vec)
-{
- u64 vhpi;
- if ( vec == NULL_VECTOR ) {
- vhpi = 0;
- }
- else if ( vec == NMI_VECTOR ) { // NMI
- vhpi = 32;
- } else if (vec == ExtINT_VECTOR) { //ExtINT
- vhpi = 16;
- }
- else {
- vhpi = vec / 16;
- }
-
- VMX_VPD(vcpu,vhpi) = vhpi;
- // TODO: Add support for XENO
- if ( VMX_VPD(vcpu,vac).a_int ) {
- ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT,
- (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0);
- }
-}
-
-#ifdef V_IOSAPIC_READY
-void vlapic_update_shared_info(VCPU *vcpu)
-{
- //int i;
-
- vl_apic_info *ps;
-
- if (vcpu->domain == dom0)
- return;
-
- ps = get_psapic(vcpu);
- ps->vl_lapic_id = ((VPD_CR(vcpu, lid) >> 16) & 0xffff) << 16;
- printf("vl_lapic_id = %x\n", ps->vl_lapic_id);
- ps->vl_apr = 0;
- // skip ps->vl_logical_dest && ps->vl_dest_format
- // IPF support physical destination mode only
- ps->vl_arb_id = 0;
- /*
- for ( i=0; i<4; i++ ) {
- ps->tmr[i] = 0; // edge trigger
- }
- */
-}
-
-void vlapic_update_ext_irq(VCPU *vcpu)
-{
- int vec;
-
- vl_apic_info *ps = get_psapic(vcpu);
- while ( (vec = highest_bits(ps->irr)) != NULL_VECTOR ) {
- clear_bit (vec, ps->irr);
- vmx_vcpu_pend_interrupt(vcpu, vec);
- }
-}
-#endif
-
-void vlsapic_reset(VCPU *vcpu)
-{
- int i;
-#ifdef V_IOSAPIC_READY
- vl_apic_info *psapic; // shared lapic inf.
-#endif
-
- VPD_CR(vcpu, lid) = ia64_getreg(_IA64_REG_CR_LID);
- VPD_CR(vcpu, ivr) = 0;
- VPD_CR(vcpu,tpr) = 0x10000;
- VPD_CR(vcpu, eoi) = 0;
- VPD_CR(vcpu, irr[0]) = 0;
- VPD_CR(vcpu, irr[1]) = 0;
- VPD_CR(vcpu, irr[2]) = 0;
- VPD_CR(vcpu, irr[3]) = 0;
- VPD_CR(vcpu, pmv) = 0x10000;
- VPD_CR(vcpu, cmcv) = 0x10000;
- VPD_CR(vcpu, lrr0) = 0x10000; // default reset value?
- VPD_CR(vcpu, lrr1) = 0x10000; // default reset value?
- update_vhpi(vcpu, NULL_VECTOR);
- for ( i=0; i<4; i++) {
- VLSAPIC_INSVC(vcpu,i) = 0;
- }
-#ifdef V_IOSAPIC_READY
- vlapic_update_shared_info(vcpu);
- //vlapic_update_shared_irr(vcpu);
-#endif
- DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) );
-}
-
-/*
- * Find highest signaled bits in 4 words (long).
- *
- * return 0-255: highest bits.
- * -1 : Not found.
- */
-static __inline__ int highest_bits(uint64_t *dat)
-{
- uint64_t bits, bitnum;
- int i;
-
- /* loop for all 256 bits */
- for ( i=3; i >= 0 ; i -- ) {
- bits = dat[i];
- if ( bits ) {
- bitnum = ia64_fls(bits);
- return i*64+bitnum;
- }
- }
- return NULL_VECTOR;
-}
-
-/*
- * Return 0-255 for pending irq.
- * NULL_VECTOR: when no pending.
- */
-static int highest_pending_irq(VCPU *vcpu)
-{
- if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
- if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
- return highest_bits(&VPD_CR(vcpu, irr[0]));
-}
-
-static int highest_inservice_irq(VCPU *vcpu)
-{
- if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR;
- if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR;
- return highest_bits(&(VLSAPIC_INSVC(vcpu, 0)));
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static int is_higher_irq(int pending, int inservice)
-{
- return ( (pending >> 4) > (inservice>>4) ||
- ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) );
-}
-
-static int is_higher_class(int pending, int mic)
-{
- return ( (pending >> 4) > mic );
-}
-
-static int is_invalid_irq(int vec)
-{
- return (vec == 1 || ((vec <= 14 && vec >= 3)));
-}
-
-#define IRQ_NO_MASKED 0
-#define IRQ_MASKED_BY_VTPR 1
-#define IRQ_MASKED_BY_INSVC 2 // masked by inservice IRQ
-
-/* See Table 5-8 in SDM vol2 for the definition */
-static int
-_xirq_masked(VCPU *vcpu, int h_pending, int h_inservice)
-{
- tpr_t vtpr;
- uint64_t mmi;
-
- vtpr.val = VPD_CR(vcpu, tpr);
-
- if ( h_inservice == NMI_VECTOR ) {
- return IRQ_MASKED_BY_INSVC;
- }
- if ( h_pending == NMI_VECTOR ) {
- // Non Maskable Interrupt
- return IRQ_NO_MASKED;
- }
- if ( h_inservice == ExtINT_VECTOR ) {
- return IRQ_MASKED_BY_INSVC;
- }
- mmi = vtpr.mmi;
- if ( h_pending == ExtINT_VECTOR ) {
- if ( mmi ) {
- // mask all external IRQ
- return IRQ_MASKED_BY_VTPR;
- }
- else {
- return IRQ_NO_MASKED;
- }
- }
-
- if ( is_higher_irq(h_pending, h_inservice) ) {
- if ( !mmi && is_higher_class(h_pending, vtpr.mic) ) {
- return IRQ_NO_MASKED;
- }
- else {
- return IRQ_MASKED_BY_VTPR;
- }
- }
- else {
- return IRQ_MASKED_BY_INSVC;
- }
-}
-
-static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice)
-{
- int mask;
-
- mask = _xirq_masked(vcpu, h_pending, h_inservice);
- return mask;
-}
-
-
-/*
- * May come from virtualization fault or
- * nested host interrupt.
- */
-void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector)
-{
- uint64_t spsr;
-
- if (vector & ~0xff) {
- DPRINTK("vmx_vcpu_pend_interrupt: bad vector\n");
- return;
- }
- local_irq_save(spsr);
- VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63);
- //vlapic_update_shared_irr(vcpu);
- local_irq_restore(spsr);
- vcpu->arch.irq_new_pending = 1;
-}
-
-/*
- * Add batch of pending interrupt.
- * The interrupt source is contained in pend_irr[0-3] with
- * each bits stand for one interrupt.
- */
-void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, UINT64 *pend_irr)
-{
- uint64_t spsr;
- int i;
-
- local_irq_save(spsr);
- for (i=0 ; i<4; i++ ) {
- VPD_CR(vcpu,irr[i]) |= pend_irr[i];
- }
- //vlapic_update_shared_irr(vcpu);
- local_irq_restore(spsr);
- vcpu->arch.irq_new_pending = 1;
-}
-
-/*
- * If the new pending interrupt is enabled and not masked, we directly inject
- * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when
- * the interrupt becomes unmasked, it gets injected.
- * RETURN:
- * TRUE: Interrupt is injected.
- * FALSE: Not injected but may be in VHPI when vac.a_int=1
- *
- * Optimization: We defer setting the VHPI until the EOI time, if a higher
- * priority interrupt is in-service. The idea is to reduce the
- * number of unnecessary calls to inject_vhpi.
- */
-int vmx_check_pending_irq(VCPU *vcpu)
-{
- uint64_t spsr, mask;
- int h_pending, h_inservice;
- int injected=0;
- uint64_t isr;
- IA64_PSR vpsr;
-
- local_irq_save(spsr);
- h_pending = highest_pending_irq(vcpu);
- if ( h_pending == NULL_VECTOR ) goto chk_irq_exit;
- h_inservice = highest_inservice_irq(vcpu);
-
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- mask = irq_masked(vcpu, h_pending, h_inservice);
- if ( vpsr.i && IRQ_NO_MASKED == mask ) {
- isr = vpsr.val & IA64_PSR_RI;
- if ( !vpsr.ic )
- panic("Interrupt when IC=0\n");
- vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
- injected = 1;
- }
- else if ( mask == IRQ_MASKED_BY_INSVC ) {
- // cann't inject VHPI
-// DPRINTK("IRQ masked by higher inservice\n");
- }
- else {
- // masked by vpsr.i or vtpr.
- update_vhpi(vcpu,h_pending);
- }
-
-chk_irq_exit:
- local_irq_restore(spsr);
- return injected;
-}
-
-/*
- * Only coming from virtualization fault.
- */
-void guest_write_eoi(VCPU *vcpu)
-{
- int vec;
- uint64_t spsr;
-
- vec = highest_inservice_irq(vcpu);
- if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n");
- local_irq_save(spsr);
- VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63));
- local_irq_restore(spsr);
- VPD_CR(vcpu, eoi)=0; // overwrite the data
- vmx_check_pending_irq(vcpu);
-}
-
-uint64_t guest_read_vivr(VCPU *vcpu)
-{
- int vec, next, h_inservice;
- uint64_t spsr;
-
- local_irq_save(spsr);
- vec = highest_pending_irq(vcpu);
- h_inservice = highest_inservice_irq(vcpu);
- if ( vec == NULL_VECTOR ||
- irq_masked(vcpu, vec, h_inservice) != IRQ_NO_MASKED ) {
- local_irq_restore(spsr);
- return IA64_SPURIOUS_INT_VECTOR;
- }
-
- VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63));
- VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63));
- update_vhpi(vcpu, NULL_VECTOR); // clear VHPI till EOI or IRR write
- //vlapic_update_shared_irr(vcpu);
- local_irq_restore(spsr);
- return (uint64_t)vec;
-}
-
-static void generate_exirq(VCPU *vcpu)
-{
- IA64_PSR vpsr;
- uint64_t isr;
-
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- update_vhpi(vcpu, NULL_VECTOR);
- isr = vpsr.val & IA64_PSR_RI;
- if ( !vpsr.ic )
- panic("Interrupt when IC=0\n");
- vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ
-}
-
-vhpi_detection(VCPU *vcpu)
-{
- uint64_t threshold,vhpi;
- tpr_t vtpr;
- IA64_PSR vpsr;
-
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- vtpr.val = VPD_CR(vcpu, tpr);
-
- threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
- vhpi = VMX_VPD(vcpu,vhpi);
- if ( vhpi > threshold ) {
- // interrupt actived
- generate_exirq (vcpu);
- }
-}
-
-vmx_vexirq(VCPU *vcpu)
-{
- static uint64_t vexirq_count=0;
-
- vexirq_count ++;
- printk("Virtual ex-irq %ld\n", vexirq_count);
- generate_exirq (vcpu);
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmmu.c
--- a/xen/arch/ia64/vmmu.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,846 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmmu.c: virtual memory management unit components.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/tlb.h>
-#include <asm/gcc_intrin.h>
-#include <asm/vcpu.h>
-#include <linux/interrupt.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/vmx.h>
-#include <asm/hw_irq.h>
-#include <asm/vmx_pal_vsa.h>
-#include <asm/kregs.h>
-
-/*
- * Architecture ppn is in 4KB unit while XEN
- * page may be different(1<<PAGE_SHIFT).
- */
-static inline u64 arch_ppn_to_xen_ppn(u64 appn)
-{
- return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT;
-}
-
-static inline u64 xen_ppn_to_arch_ppn(u64 xppn)
-{
- return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT;
-}
-
-
-/*
- * Get the machine page frame number in 16KB unit
- * Input:
- * d:
- */
-u64 get_mfn(domid_t domid, u64 gpfn, u64 pages)
-{
- struct domain *d;
- u64 i, xen_gppn, xen_mppn, mpfn;
-
- if ( domid == DOMID_SELF ) {
- d = current->domain;
- }
- else {
- d = find_domain_by_id(domid);
- }
- xen_gppn = arch_ppn_to_xen_ppn(gpfn);
- xen_mppn = __gpfn_to_mfn(d, xen_gppn);
-/*
- for (i=0; i<pages; i++) {
- if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) {
- return INVALID_MFN;
- }
- }
-*/
- mpfn= xen_ppn_to_arch_ppn(xen_mppn);
- mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn);
- return mpfn;
-
-}
-
-/*
- * The VRN bits of va stand for which rr to get.
- */
-ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va)
-{
- ia64_rr vrr;
- vmx_vcpu_get_rr(vcpu, va, &vrr.rrval);
- return vrr;
-}
-
-
-void recycle_message(thash_cb_t *hcb, u64 para)
-{
- printk("hcb=%p recycled with %lx\n",hcb,para);
-}
-
-
-/*
- * Purge all guest TCs in logical processor.
- * Instead of purging all LP TCs, we should only purge
- * TCs that belong to this guest.
- */
-void
-purge_machine_tc_by_domid(domid_t domid)
-{
-#ifndef PURGE_GUEST_TC_ONLY
- // purge all TCs
- struct ia64_pal_retval result;
- u64 addr;
- u32 count1,count2;
- u32 stride1,stride2;
- u32 i,j;
- u64 psr;
-
-
- result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0);
- if ( result.status != 0 ) {
- panic ("PAL_PTCE_INFO failed\n");
- }
- addr = result.v0;
- count1 = HIGH_32BITS(result.v1);
- count2 = LOW_32BITS (result.v1);
- stride1 = HIGH_32BITS(result.v2);
- stride2 = LOW_32BITS (result.v2);
-
- local_irq_save(psr);
- for (i=0; i<count1; i++) {
- for (j=0; j<count2; j++) {
- ia64_ptce(addr);
- addr += stride2;
- }
- addr += stride1;
- }
- local_irq_restore(psr);
-#else
- // purge all TCs belong to this guest.
-#endif
-}
-
-static thash_cb_t *init_domain_vhpt(struct vcpu *d)
-{
- struct pfn_info *page;
- void *vbase,*vcur;
- vhpt_special *vs;
- thash_cb_t *vhpt;
- PTA pta_value;
-
- page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
- if ( page == NULL ) {
- panic("No enough contiguous memory for init_domain_mm\n");
- }
- vbase = page_to_virt(page);
- printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase);
- memset(vbase, 0, VCPU_TLB_SIZE);
- vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
- vhpt = --((thash_cb_t*)vcur);
- vhpt->ht = THASH_VHPT;
- vhpt->vcpu = d;
- vhpt->hash_func = machine_thash;
- vs = --((vhpt_special *)vcur);
-
- /* Setup guest pta */
- pta_value.val = 0;
- pta_value.ve = 1;
- pta_value.vf = 1;
- pta_value.size = VCPU_TLB_SHIFT - 1; /* 2M */
- pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT;
- d->arch.arch_vmx.mpta = pta_value.val;
-
- vhpt->vs = vs;
- vhpt->vs->get_mfn = get_mfn;
- vhpt->vs->tag_func = machine_ttag;
- vhpt->hash = vbase;
- vhpt->hash_sz = VCPU_TLB_SIZE/2;
- vhpt->cch_buf = (u64)vbase + vhpt->hash_sz;
- vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf;
- vhpt->recycle_notifier = recycle_message;
- thash_init(vhpt,VCPU_TLB_SHIFT-1);
- return vhpt;
-}
-
-
-thash_cb_t *init_domain_tlb(struct vcpu *d)
-{
- struct pfn_info *page;
- void *vbase,*vcur;
- tlb_special_t *ts;
- thash_cb_t *tlb;
-
- page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
- if ( page == NULL ) {
- panic("No enough contiguous memory for init_domain_mm\n");
- }
- vbase = page_to_virt(page);
- printk("Allocate domain tlb at 0x%lx\n", (u64)vbase);
- memset(vbase, 0, VCPU_TLB_SIZE);
- vcur = (void*)((u64)vbase + VCPU_TLB_SIZE);
- tlb = --((thash_cb_t*)vcur);
- tlb->ht = THASH_TLB;
- tlb->vcpu = d;
- ts = --((tlb_special_t *)vcur);
- tlb->ts = ts;
- tlb->ts->vhpt = init_domain_vhpt(d);
- tlb->hash_func = machine_thash;
- tlb->hash = vbase;
- tlb->hash_sz = VCPU_TLB_SIZE/2;
- tlb->cch_buf = (u64)vbase + tlb->hash_sz;
- tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf;
- tlb->recycle_notifier = recycle_message;
- thash_init(tlb,VCPU_TLB_SHIFT-1);
- return tlb;
-}
-
-/* Allocate physical to machine mapping table for domN
- * FIXME: Later this interface may be removed, if that table is provided
- * by control panel. Dom0 has gpfn identical to mfn, which doesn't need
- * this interface at all.
- */
-void
-alloc_pmt(struct domain *d)
-{
- struct pfn_info *page;
-
- /* Only called once */
- ASSERT(d->arch.pmt);
-
- page = alloc_domheap_pages(NULL, get_order(d->max_pages), 0);
- ASSERT(page);
-
- d->arch.pmt = page_to_virt(page);
- memset(d->arch.pmt, 0x55, d->max_pages * 8);
-}
-
-/*
- * Insert guest TLB to machine TLB.
- * data: In TLB format
- */
-void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb)
-{
- u64 saved_itir, saved_ifa, saved_rr;
- u64 pages;
- thash_data_t mtlb;
- ia64_rr vrr;
- unsigned int cl = tlb->cl;
-
- mtlb.ifa = tlb->vadr;
- mtlb.itir = tlb->itir & ~ITIR_RV_MASK;
- vrr = vmmu_get_rr(d,mtlb.ifa);
- //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value);
- pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
- mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
- mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages);
- if (mtlb.ppn == INVALID_MFN)
- panic("Machine tlb insert with invalid mfn number.\n");
-
- __asm __volatile("rsm psr.ic|psr.i;; srlz.i" );
-
- saved_itir = ia64_getreg(_IA64_REG_CR_ITIR);
- saved_ifa = ia64_getreg(_IA64_REG_CR_IFA);
- saved_rr = ia64_get_rr(mtlb.ifa);
-
- ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir);
- ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa);
- /* Only access memory stack which is mapped by TR,
- * after rr is switched.
- */
- ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.rrval));
- ia64_srlz_d();
- if ( cl == ISIDE_TLB ) {
- ia64_itci(mtlb.page_flags);
- ia64_srlz_i();
- }
- else {
- ia64_itcd(mtlb.page_flags);
- ia64_srlz_d();
- }
- ia64_set_rr(mtlb.ifa,saved_rr);
- ia64_srlz_d();
- ia64_setreg(_IA64_REG_CR_IFA, saved_ifa);
- ia64_setreg(_IA64_REG_CR_ITIR, saved_itir);
- __asm __volatile("ssm psr.ic|psr.i;; srlz.i" );
-}
-
-u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps)
-{
- u64 saved_pta, saved_rr0;
- u64 hash_addr, tag;
- unsigned long psr;
- struct vcpu *v = current;
- ia64_rr vrr;
-
-
- saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
- saved_rr0 = ia64_get_rr(0);
- vrr.rrval = saved_rr0;
- vrr.rid = rid;
- vrr.ps = ps;
-
- va = (va << 3) >> 3; // set VRN to 0.
- // TODO: Set to enforce lazy mode
- local_irq_save(psr);
- ia64_setreg(_IA64_REG_CR_PTA, pta.val);
- ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
- ia64_srlz_d();
-
- hash_addr = ia64_thash(va);
- ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
-
- ia64_set_rr(0, saved_rr0);
- ia64_srlz_d();
- local_irq_restore(psr);
- return hash_addr;
-}
-
-u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps)
-{
- u64 saved_pta, saved_rr0;
- u64 hash_addr, tag;
- u64 psr;
- struct vcpu *v = current;
- ia64_rr vrr;
-
- // TODO: Set to enforce lazy mode
- saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
- saved_rr0 = ia64_get_rr(0);
- vrr.rrval = saved_rr0;
- vrr.rid = rid;
- vrr.ps = ps;
-
- va = (va << 3) >> 3; // set VRN to 0.
- local_irq_save(psr);
- ia64_setreg(_IA64_REG_CR_PTA, pta.val);
- ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
- ia64_srlz_d();
-
- tag = ia64_ttag(va);
- ia64_setreg(_IA64_REG_CR_PTA, saved_pta);
-
- ia64_set_rr(0, saved_rr0);
- ia64_srlz_d();
- local_irq_restore(psr);
- return tag;
-}
-
-/*
- * Purge machine tlb.
- * INPUT
- * rr: guest rr.
- * va: only bits 0:60 is valid
- * size: bits format (1<<size) for the address range to purge.
- *
- */
-void machine_tlb_purge(u64 rid, u64 va, u64 ps)
-{
- u64 saved_rr0;
- u64 psr;
- ia64_rr vrr;
-
- va = (va << 3) >> 3; // set VRN to 0.
- saved_rr0 = ia64_get_rr(0);
- vrr.rrval = saved_rr0;
- vrr.rid = rid;
- vrr.ps = ps;
- local_irq_save(psr);
- ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.rrval) );
- ia64_srlz_d();
- ia64_ptcl(va, ps << 2);
- ia64_set_rr( 0, saved_rr0 );
- ia64_srlz_d();
- local_irq_restore(psr);
-}
-
-
-int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref)
-{
- ia64_rr vrr;
- PTA vpta;
- IA64_PSR vpsr;
-
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- vrr = vmx_vcpu_rr(vcpu, vadr);
- vmx_vcpu_get_pta(vcpu,&vpta.val);
-
- if ( vrr.ve & vpta.ve ) {
- switch ( ref ) {
- case DATA_REF:
- case NA_REF:
- return vpsr.dt;
- case INST_REF:
- return vpsr.dt && vpsr.it && vpsr.ic;
- case RSE_REF:
- return vpsr.dt && vpsr.rt;
-
- }
- }
- return 0;
-}
-
-
-int unimplemented_gva(VCPU *vcpu,u64 vadr)
-{
- int bit=vcpu->domain->arch.imp_va_msb;
- u64 ladr =(vadr<<3)>>(3+bit);
- if(!ladr||ladr==(1U<<(61-bit))-1){
- return 0;
- }else{
- return 1;
- }
-}
-
-
-/*
- * Prefetch guest bundle code.
- * INPUT:
- * code: buffer pointer to hold the read data.
- * num: number of dword (8byts) to read.
- */
-int
-fetch_code(VCPU *vcpu, u64 gip, u64 *code)
-{
- u64 gpip; // guest physical IP
- u64 mpa;
- thash_data_t *tlb;
- ia64_rr vrr;
- u64 mfn;
-
- if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode
- gpip = gip;
- }
- else {
- vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval);
- tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu),
- vrr.rid, gip, ISIDE_TLB );
- if ( tlb == NULL ) panic("No entry found in ITLB\n");
- gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) );
- }
- mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT);
- if ( mfn == INVALID_MFN ) return 0;
-
- mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT);
- *code = *(u64*)__va(mpa);
- return 1;
-}
-
-IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
-{
-
- thash_data_t data, *ovl;
- thash_cb_t *hcb;
- search_section_t sections;
- ia64_rr vrr;
-
- hcb = vmx_vcpu_get_vtlb(vcpu);
- data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
- data.itir=itir;
- data.vadr=PAGEALIGN(ifa,data.ps);
- data.tc = 1;
- data.cl=ISIDE_TLB;
- vmx_vcpu_get_rr(vcpu, ifa, &vrr);
- data.rid = vrr.rid;
-
- sections.tr = 1;
- sections.tc = 0;
-
- ovl = thash_find_overlap(hcb, &data, sections);
- while (ovl) {
- // generate MCA.
- panic("Tlb conflict!!");
- return;
- }
- thash_purge_and_insert(hcb, &data);
- return IA64_NO_FAULT;
-}
-
-
-
-
-IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa)
-{
-
- thash_data_t data, *ovl;
- thash_cb_t *hcb;
- search_section_t sections;
- ia64_rr vrr;
-
- hcb = vmx_vcpu_get_vtlb(vcpu);
- data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
- data.itir=itir;
- data.vadr=PAGEALIGN(ifa,data.ps);
- data.tc = 1;
- data.cl=DSIDE_TLB;
- vmx_vcpu_get_rr(vcpu, ifa, &vrr);
- data.rid = vrr.rid;
- sections.tr = 1;
- sections.tc = 0;
-
- ovl = thash_find_overlap(hcb, &data, sections);
- if (ovl) {
- // generate MCA.
- panic("Tlb conflict!!");
- return;
- }
- thash_purge_and_insert(hcb, &data);
- return IA64_NO_FAULT;
-}
-
-/*
- * Return TRUE/FALSE for success of lock operation
- */
-int vmx_lock_guest_dtc (VCPU *vcpu, UINT64 va, int lock)
-{
-
- thash_cb_t *hcb;
- ia64_rr vrr;
- u64 preferred_size;
-
- vmx_vcpu_get_rr(vcpu, va, &vrr);
- hcb = vmx_vcpu_get_vtlb(vcpu);
- va = PAGEALIGN(va,vrr.ps);
- preferred_size = PSIZE(vrr.ps);
- return thash_lock_tc(hcb, va, preferred_size, vrr.rid, DSIDE_TLB, lock);
-}
-
-IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa,
UINT64 idx)
-{
-
- thash_data_t data, *ovl;
- thash_cb_t *hcb;
- search_section_t sections;
- ia64_rr vrr;
-
- hcb = vmx_vcpu_get_vtlb(vcpu);
- data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
- data.itir=itir;
- data.vadr=PAGEALIGN(ifa,data.ps);
- data.tc = 0;
- data.cl=ISIDE_TLB;
- vmx_vcpu_get_rr(vcpu, ifa, &vrr);
- data.rid = vrr.rid;
- sections.tr = 1;
- sections.tc = 0;
-
- ovl = thash_find_overlap(hcb, &data, sections);
- if (ovl) {
- // generate MCA.
- panic("Tlb conflict!!");
- return;
- }
- sections.tr = 0;
- sections.tc = 1;
- thash_purge_entries(hcb, &data, sections);
- thash_tr_insert(hcb, &data, ifa, idx);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa,
UINT64 idx)
-{
-
- thash_data_t data, *ovl;
- thash_cb_t *hcb;
- search_section_t sections;
- ia64_rr vrr;
-
-
- hcb = vmx_vcpu_get_vtlb(vcpu);
- data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
- data.itir=itir;
- data.vadr=PAGEALIGN(ifa,data.ps);
- data.tc = 0;
- data.cl=DSIDE_TLB;
- vmx_vcpu_get_rr(vcpu, ifa, &vrr);
- data.rid = vrr.rid;
- sections.tr = 1;
- sections.tc = 0;
-
- ovl = thash_find_overlap(hcb, &data, sections);
- while (ovl) {
- // generate MCA.
- panic("Tlb conflict!!");
- return;
- }
- sections.tr = 0;
- sections.tc = 1;
- thash_purge_entries(hcb, &data, sections);
- thash_tr_insert(hcb, &data, ifa, idx);
- return IA64_NO_FAULT;
-}
-
-
-
-IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps)
-{
- thash_cb_t *hcb;
- ia64_rr rr;
- search_section_t sections;
-
- hcb = vmx_vcpu_get_vtlb(vcpu);
- rr=vmx_vcpu_rr(vcpu,vadr);
- sections.tr = 1;
- sections.tc = 1;
- thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps)
-{
- thash_cb_t *hcb;
- ia64_rr rr;
- search_section_t sections;
- hcb = vmx_vcpu_get_vtlb(vcpu);
- rr=vmx_vcpu_rr(vcpu,vadr);
- sections.tr = 1;
- sections.tc = 1;
- thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps)
-{
- thash_cb_t *hcb;
- ia64_rr vrr;
- search_section_t sections;
- thash_data_t data, *ovl;
- hcb = vmx_vcpu_get_vtlb(vcpu);
- vrr=vmx_vcpu_rr(vcpu,vadr);
- sections.tr = 0;
- sections.tc = 1;
- vadr = PAGEALIGN(vadr, ps);
-
- thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB);
- thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB);
- return IA64_NO_FAULT;
-}
-
-
-IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr)
-{
- thash_cb_t *hcb;
- hcb = vmx_vcpu_get_vtlb(vcpu);
- thash_purge_all(hcb);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps)
-{
- vmx_vcpu_ptc_l(vcpu, vadr, ps);
- return IA64_ILLOP_FAULT;
-}
-
-IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps)
-{
- vmx_vcpu_ptc_l(vcpu, vadr, ps);
- return IA64_NO_FAULT;
-}
-
-
-IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
-{
- PTA vpta;
- ia64_rr vrr;
- u64 vhpt_offset,tmp;
- vmx_vcpu_get_pta(vcpu, &vpta.val);
- vrr=vmx_vcpu_rr(vcpu, vadr);
- if(vpta.vf){
- panic("THASH,Don't support long format VHPT");
- *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0);
- }else{
- vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1);
- *pval = (vadr&VRN_MASK)|
- (vpta.val<<3>>(vpta.size+3)<<(vpta.size))|
- vhpt_offset;
- }
- return IA64_NO_FAULT;
-}
-
-
-IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval)
-{
- ia64_rr vrr;
- PTA vpta;
- vmx_vcpu_get_pta(vcpu, &vpta.val);
- vrr=vmx_vcpu_rr(vcpu, vadr);
- if(vpta.vf){
- panic("THASH,Don't support long format VHPT");
- *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0);
- }else{
- *pval = 1;
- }
- return IA64_NO_FAULT;
-}
-
-
-
-IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr)
-{
- thash_data_t *data;
- thash_cb_t *hcb;
- ia64_rr vrr;
- ISR visr,pt_isr;
- REGS *regs;
- u64 vhpt_adr;
- IA64_PSR vpsr;
- hcb = vmx_vcpu_get_vtlb(vcpu);
- vrr=vmx_vcpu_rr(vcpu,vadr);
- regs=vcpu_regs(vcpu);
- pt_isr.val=regs->cr_isr;
- visr.val=0;
- visr.ei=pt_isr.ei;
- visr.ir=pt_isr.ir;
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- if(vpsr.ic==0){
- visr.ni=1;
- }
- visr.na=1;
- data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB);
- if(data){
- if(data->p==0){
- visr.na=1;
- vmx_vcpu_set_isr(vcpu,visr.val);
- page_not_present(vcpu, vadr);
- return IA64_FAULT;
- }else if(data->ma == VA_MATTR_NATPAGE){
- visr.na = 1;
- vmx_vcpu_set_isr(vcpu, visr.val);
- dnat_page_consumption(vcpu, vadr);
- return IA64_FAULT;
- }else{
- *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1));
- return IA64_NO_FAULT;
- }
- }else{
- if(!vhpt_enabled(vcpu, vadr, NA_REF)){
- if(vpsr.ic){
- vmx_vcpu_set_isr(vcpu, visr.val);
- alt_dtlb(vcpu, vadr);
- return IA64_FAULT;
- }
- else{
- nested_dtlb(vcpu);
- return IA64_FAULT;
- }
- }
- else{
- vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
- vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
- data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB);
- if(data){
- if(vpsr.ic){
- vmx_vcpu_set_isr(vcpu, visr.val);
- dtlb_fault(vcpu, vadr);
- return IA64_FAULT;
- }
- else{
- nested_dtlb(vcpu);
- return IA64_FAULT;
- }
- }
- else{
- if(vpsr.ic){
- vmx_vcpu_set_isr(vcpu, visr.val);
- dvhpt_fault(vcpu, vadr);
- return IA64_FAULT;
- }
- else{
- nested_dtlb(vcpu);
- return IA64_FAULT;
- }
- }
- }
- }
-}
-
-IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key)
-{
- thash_data_t *data;
- thash_cb_t *hcb;
- ia64_rr rr;
- PTA vpta;
- vmx_vcpu_get_pta(vcpu, &vpta.val);
- if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){
- *key=1;
- return IA64_NO_FAULT;
- }
- hcb = vmx_vcpu_get_vtlb(vcpu);
- rr=vmx_vcpu_rr(vcpu,vadr);
- data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB);
- if(!data||!data->p){
- *key=1;
- }else{
- *key=data->key;
- }
- return IA64_NO_FAULT;
-}
-
-/*
- * [FIXME] Is there any effective way to move this routine
- * into vmx_uaccess.h? struct exec_domain is incomplete type
- * in that way...
- *
- * This is the interface to lookup virtual TLB, and then
- * return corresponding machine address in 2nd parameter.
- * The 3rd parameter contains how many bytes mapped by
- * matched vTLB entry, thus to allow caller copy more once.
- *
- * If failed to lookup, -EFAULT is returned. Or else reutrn
- * 0. All upper domain access utilities rely on this routine
- * to determine the real machine address.
- *
- * Yes, put_user and get_user seems to somhow slow upon it.
- * However it's the necessary steps for any vmx domain virtual
- * address, since that's difference address space as HV's one.
- * Later some short-circuit may be created for special case
- */
-long
-__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len)
-{
- unsigned long mpfn, gpfn, m, n = *len;
- thash_cb_t *vtlb;
- unsigned long end; /* end of the area mapped by current entry */
- thash_data_t *entry;
- struct vcpu *v = current;
- ia64_rr vrr;
-
- vtlb = vmx_vcpu_get_vtlb(v);
- vrr = vmx_vcpu_rr(v, va);
- entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB);
- if (entry == NULL)
- return -EFAULT;
-
- gpfn =(entry->ppn>>(PAGE_SHIFT-12));
- gpfn =PAGEALIGN(gpfn,(entry->ps-PAGE_SHIFT));
- gpfn = gpfn | POFFSET(va>>PAGE_SHIFT,(entry->ps-PAGE_SHIFT));
-
- mpfn = __gpfn_to_mfn(v->domain, gpfn);
- m = (mpfn<<PAGE_SHIFT) | (va & (PAGE_SIZE - 1));
- /* machine address may be not continuous */
- end = PAGEALIGN(m, PAGE_SHIFT) + PAGE_SIZE;
- /*end = PAGEALIGN(m, entry->ps) + PSIZE(entry->ps);*/
- /* Current entry can't map all requested area */
- if ((m + n) > end)
- n = end - m;
-
- *ma = m;
- *len = n;
- return 0;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_entry.S
--- a/xen/arch/ia64/vmx_entry.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,611 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_entry.S:
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
- * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
- */
-
-#ifndef VCPU_TLB_SHIFT
-#define VCPU_TLB_SHIFT 22
-#endif
-#include <linux/config.h>
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/kregs.h>
-#include <asm/offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-
-#include "vmx_minstate.h"
-
-/*
- * prev_task <- vmx_ia64_switch_to(struct task_struct *next)
- * With Ingo's new scheduler, interrupts are disabled when this routine
gets
- * called. The code starting at .map relies on this. The rest of the code
- * doesn't care about the interrupt masking status.
- *
- * Since we allocate domain stack in xenheap, there's no need to map new
- * domain's stack since all xenheap is mapped by TR. Another different task
- * for vmx_ia64_switch_to is to switch to bank0 and change current pointer.
- */
-GLOBAL_ENTRY(vmx_ia64_switch_to)
- .prologue
- alloc r16=ar.pfs,1,0,0,0
- DO_SAVE_SWITCH_STACK
- .body
-
- bsw.0 // Switch to bank0, because bank0 r21 is current pointer
- ;;
- adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
- movl r25=init_task
- adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0
- ;;
- st8 [r22]=sp // save kernel stack pointer of old task
- ;;
- /*
- * TR always mapped this task's page, we can skip doing it again.
- */
- ld8 sp=[r26] // load kernel stack pointer of new task
- mov r21=in0 // update "current" application register
- mov r8=r13 // return pointer to previously running
task
- mov r13=in0 // set "current" pointer
- ;;
- bsw.1
- ;;
- DO_LOAD_SWITCH_STACK
-
-#ifdef CONFIG_SMP
- sync.i // ensure "fc"s done by this CPU are
visible on other CPUs
-#endif
- br.ret.sptk.many rp // boogie on out in new context
-END(vmx_ia64_switch_to)
-
-GLOBAL_ENTRY(ia64_leave_nested)
- rsm psr.i
- ;;
- adds r21=PT(PR)+16,r12
- ;;
-
- lfetch [r21],PT(CR_IPSR)-PT(PR)
- adds r2=PT(B6)+16,r12
- adds r3=PT(R16)+16,r12
- ;;
- lfetch [r21]
- ld8 r28=[r2],8 // load b6
- adds r29=PT(R24)+16,r12
-
- ld8.fill r16=[r3]
- adds r3=PT(AR_CSD)-PT(R16),r3
- adds r30=PT(AR_CCV)+16,r12
- ;;
- ld8.fill r24=[r29]
- ld8 r15=[r30] // load ar.ccv
- ;;
- ld8 r29=[r2],16 // load b7
- ld8 r30=[r3],16 // load ar.csd
- ;;
- ld8 r31=[r2],16 // load ar.ssd
- ld8.fill r8=[r3],16
- ;;
- ld8.fill r9=[r2],16
- ld8.fill r10=[r3],PT(R17)-PT(R10)
- ;;
- ld8.fill r11=[r2],PT(R18)-PT(R11)
- ld8.fill r17=[r3],16
- ;;
- ld8.fill r18=[r2],16
- ld8.fill r19=[r3],16
- ;;
- ld8.fill r20=[r2],16
- ld8.fill r21=[r3],16
- mov ar.csd=r30
- mov ar.ssd=r31
- ;;
- rsm psr.i | psr.ic // initiate turning off of interrupt and
interruption collection
- invala // invalidate ALAT
- ;;
- ld8.fill r22=[r2],24
- ld8.fill r23=[r3],24
- mov b6=r28
- ;;
- ld8.fill r25=[r2],16
- ld8.fill r26=[r3],16
- mov b7=r29
- ;;
- ld8.fill r27=[r2],16
- ld8.fill r28=[r3],16
- ;;
- ld8.fill r29=[r2],16
- ld8.fill r30=[r3],24
- ;;
- ld8.fill r31=[r2],PT(F9)-PT(R31)
- adds r3=PT(F10)-PT(F6),r3
- ;;
- ldf.fill f9=[r2],PT(F6)-PT(F9)
- ldf.fill f10=[r3],PT(F8)-PT(F10)
- ;;
- ldf.fill f6=[r2],PT(F7)-PT(F6)
- ;;
- ldf.fill f7=[r2],PT(F11)-PT(F7)
- ldf.fill f8=[r3],32
- ;;
- srlz.i // ensure interruption collection is off
- mov ar.ccv=r15
- ;;
- bsw.0 // switch back to bank 0 (no stop bit required
beforehand...)
- ;;
- ldf.fill f11=[r2]
-// mov r18=r13
-// mov r21=r13
- adds r16=PT(CR_IPSR)+16,r12
- adds r17=PT(CR_IIP)+16,r12
- ;;
- ld8 r29=[r16],16 // load cr.ipsr
- ld8 r28=[r17],16 // load cr.iip
- ;;
- ld8 r30=[r16],16 // load cr.ifs
- ld8 r25=[r17],16 // load ar.unat
- ;;
- ld8 r26=[r16],16 // load ar.pfs
- ld8 r27=[r17],16 // load ar.rsc
- cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore
cr.ifs
- ;;
- ld8 r24=[r16],16 // load ar.rnat (may be garbage)
- ld8 r23=[r17],16// load ar.bspstore (may be garbage)
- ;;
- ld8 r31=[r16],16 // load predicates
- ld8 r22=[r17],16 // load b0
- ;;
- ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
- ld8.fill r1=[r17],16 // load r1
- ;;
- ld8.fill r12=[r16],16
- ld8.fill r13=[r17],16
- ;;
- ld8 r20=[r16],16 // ar.fpsr
- ld8.fill r15=[r17],16
- ;;
- ld8.fill r14=[r16],16
- ld8.fill r2=[r17]
- ;;
- ld8.fill r3=[r16]
- ;;
- mov r16=ar.bsp // get existing backing store pointer
- ;;
- mov b0=r22
- mov ar.pfs=r26
- mov cr.ifs=r30
- mov cr.ipsr=r29
- mov ar.fpsr=r20
- mov cr.iip=r28
- ;;
- mov ar.rsc=r27
- mov ar.unat=r25
- mov pr=r31,-1
- rfi
-END(ia64_leave_nested)
-
-
-
-GLOBAL_ENTRY(ia64_leave_hypervisor)
- PT_REGS_UNWIND_INFO(0)
- /*
- * work.need_resched etc. mustn't get changed by this CPU before it
returns to
- ;;
- * user- or fsys-mode, hence we disable interrupts early on:
- */
- rsm psr.i
- ;;
- alloc loc0=ar.pfs,0,1,1,0
- adds out0=16,r12
- ;;
- br.call.sptk.many b0=leave_hypervisor_tail
- mov ar.pfs=loc0
- adds r8=IA64_VPD_BASE_OFFSET,r13
- ;;
- ld8 r8=[r8]
- ;;
- adds r9=VPD(VPSR),r8
- ;;
- ld8 r9=[r9]
- ;;
- tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT
- ;;
-(pBN0) add r7=VPD(VBNAT),r8;
-(pBN1) add r7=VPD(VNAT),r8;
- ;;
- ld8 r7=[r7]
- ;;
- mov ar.unat=r7
-(pBN0) add r4=VPD(VBGR),r8;
-(pBN1) add r4=VPD(VGR),r8;
-(pBN0) add r5=VPD(VBGR)+0x8,r8;
-(pBN1) add r5=VPD(VGR)+0x8,r8;
- ;;
- ld8.fill r16=[r4],16
- ld8.fill r17=[r5],16
- ;;
- ld8.fill r18=[r4],16
- ld8.fill r19=[r5],16
- ;;
- ld8.fill r20=[r4],16
- ld8.fill r21=[r5],16
- ;;
- ld8.fill r22=[r4],16
- ld8.fill r23=[r5],16
- ;;
- ld8.fill r24=[r4],16
- ld8.fill r25=[r5],16
- ;;
- ld8.fill r26=[r4],16
- ld8.fill r27=[r5],16
- ;;
- ld8.fill r28=[r4],16
- ld8.fill r29=[r5],16
- ;;
- ld8.fill r30=[r4],16
- ld8.fill r31=[r5],16
- ;;
- bsw.0
- ;;
- mov r18=r8 //vpd
- mov r19=r9 //vpsr
- adds r20=PT(PR)+16,r12
- ;;
- lfetch [r20],PT(CR_IPSR)-PT(PR)
- adds r16=PT(B6)+16,r12
- adds r17=PT(B7)+16,r12
- ;;
- lfetch [r20]
- mov r21=r13 // get current
- ;;
- ld8 r30=[r16],16 // load b6
- ld8 r31=[r17],16 // load b7
- add r20=PT(EML_UNAT)+16,r12
- ;;
- ld8 r29=[r20] //load ar_unat
- mov b6=r30
- mov b7=r31
- ld8 r30=[r16],16 //load ar_csd
- ld8 r31=[r17],16 //load ar_ssd
- ;;
- mov ar.unat=r29
- mov ar.csd=r30
- mov ar.ssd=r31
- ;;
- ld8.fill r8=[r16],16 //load r8
- ld8.fill r9=[r17],16 //load r9
- ;;
- ld8.fill r10=[r16],PT(R1)-PT(R10) //load r10
- ld8.fill r11=[r17],PT(R12)-PT(R11) //load r11
- ;;
- ld8.fill r1=[r16],16 //load r1
- ld8.fill r12=[r17],16 //load r12
- ;;
- ld8.fill r13=[r16],16 //load r13
- ld8 r30=[r17],16 //load ar_fpsr
- ;;
- ld8.fill r15=[r16],16 //load r15
- ld8.fill r14=[r17],16 //load r14
- mov ar.fpsr=r30
- ;;
- ld8.fill r2=[r16],16 //load r2
- ld8.fill r3=[r17],16 //load r3
- ;;
-/*
-(pEml) ld8.fill r4=[r16],16 //load r4
-(pEml) ld8.fill r5=[r17],16 //load r5
- ;;
-(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6
-(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7
- ;;
-(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16
-(pNonEml) adds r17=PT(F7)-PT(R5),r17
- ;;
-*/
- ld8.fill r4=[r16],16 //load r4
- ld8.fill r5=[r17],16 //load r5
- ;;
- ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6
- ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7
- ;;
-
- ld8 r30=[r16],PT(F6)-PT(AR_CCV)
- rsm psr.i | psr.ic // initiate turning off of interrupt and interruption
collection
- ;;
- srlz.i // ensure interruption collection is off
- ;;
- invala // invalidate ALAT
- ;;
- ldf.fill f6=[r16],32
- ldf.fill f7=[r17],32
- ;;
- ldf.fill f8=[r16],32
- ldf.fill f9=[r17],32
- ;;
- ldf.fill f10=[r16]
- ldf.fill f11=[r17]
- ;;
- mov ar.ccv=r30
- adds r16=PT(CR_IPSR)-PT(F10),r16
- adds r17=PT(CR_IIP)-PT(F11),r17
- ;;
- ld8 r31=[r16],16 // load cr.ipsr
- ld8 r30=[r17],16 // load cr.iip
- ;;
- ld8 r29=[r16],16 // load cr.ifs
- ld8 r28=[r17],16 // load ar.unat
- ;;
- ld8 r27=[r16],16 // load ar.pfs
- ld8 r26=[r17],16 // load ar.rsc
- ;;
- ld8 r25=[r16],16 // load ar.rnat (may be garbage)
- ld8 r24=[r17],16// load ar.bspstore (may be garbage)
- ;;
- ld8 r23=[r16],16 // load predicates
- ld8 r22=[r17],PT(RFI_PFS)-PT(B0) // load b0
- ;;
- ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
- ;;
-//rbs_switch
- // loadrs has already been shifted
- alloc r16=ar.pfs,0,0,0,0 // drop current register frame
- ;;
- mov ar.rsc=r20
- ;;
- loadrs
- ;;
- mov ar.bspstore=r24
- ;;
- ld8 r24=[r17] //load rfi_pfs
- mov ar.unat=r28
- mov ar.rnat=r25
- mov ar.rsc=r26
- ;;
- mov cr.ipsr=r31
- mov cr.iip=r30
- mov cr.ifs=r29
- cmp.ne p6,p0=r24,r0
-(p6)br.sptk vmx_dorfirfi
- ;;
-vmx_dorfirfi_back:
- mov ar.pfs=r27
-
-//vsa_sync_write_start
- movl r20=__vsa_base
- ;;
- ld8 r20=[r20] // read entry point
- mov r25=r18
- ;;
- add r16=PAL_VPS_SYNC_WRITE,r20
- movl r24=switch_rr7 // calculate return address
- ;;
- mov b0=r16
- br.cond.sptk b0 // call the service
- ;;
-// switch rr7 and rr5
-switch_rr7:
- adds r24=SWITCH_MRR5_OFFSET, r21
- adds r26=SWITCH_MRR6_OFFSET, r21
- adds r16=SWITCH_MRR7_OFFSET ,r21
- movl r25=(5<<61)
- movl r27=(6<<61)
- movl r17=(7<<61)
- ;;
- ld8 r24=[r24]
- ld8 r26=[r26]
- ld8 r16=[r16]
- ;;
- mov rr[r25]=r24
- mov rr[r27]=r26
- mov rr[r17]=r16
- ;;
- srlz.i
- ;;
- add r24=SWITCH_MPTA_OFFSET, r21
- ;;
- ld8 r24=[r24]
- ;;
- mov cr.pta=r24
- ;;
- srlz.i
- ;;
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- * must be at bank 0
- * parameter:
- * r18:vpd
- * r19:vpsr
- * r20:__vsa_base
- * r22:b0
- * r23:predicate
- */
- mov r24=r22
- mov r25=r18
- tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
- ;;
- (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
- (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
- ;;
- mov pr=r23,-2
- mov b0=r29
- ;;
- br.cond.sptk b0 // call pal service
-END(ia64_leave_hypervisor)
-
-//r24 rfi_pfs
-//r17 address of rfi_pfs
-GLOBAL_ENTRY(vmx_dorfirfi)
- mov r16=ar.ec
- movl r20 = vmx_dorfirfi_back
- ;;
-// clean rfi_pfs
- st8 [r17]=r0
- mov b0=r20
-// pfs.pec=ar.ec
- dep r24 = r16, r24, 52, 6
- ;;
- mov ar.pfs=r24
- ;;
- br.ret.sptk b0
- ;;
-END(vmx_dorfirfi)
-
-
-#define VMX_PURGE_RR7 0
-#define VMX_INSERT_RR7 1
-/*
- * in0: old rr7
- * in1: virtual address of xen image
- * in2: virtual address of vhpt table
- */
-GLOBAL_ENTRY(vmx_purge_double_mapping)
- alloc loc1 = ar.pfs,5,9,0,0
- mov loc0 = rp
- movl r8 = 1f
- ;;
- movl loc4 = KERNEL_TR_PAGE_SHIFT
- movl loc5 = VCPU_TLB_SHIFT
- mov loc6 = psr
- movl loc7 = XEN_RR7_SWITCH_STUB
- mov loc8 = (1<<VMX_PURGE_RR7)
- ;;
- srlz.i
- ;;
- rsm psr.i | psr.ic
- ;;
- srlz.i
- ;;
- mov ar.rsc = 0
- mov b6 = loc7
- mov rp = r8
- ;;
- br.sptk b6
-1:
- mov ar.rsc = 3
- mov rp = loc0
- ;;
- mov psr.l = loc6
- ;;
- srlz.i
- ;;
- br.ret.sptk rp
-END(vmx_purge_double_mapping)
-
-/*
- * in0: new rr7
- * in1: virtual address of xen image
- * in2: virtual address of vhpt table
- * in3: pte entry of xen image
- * in4: pte entry of vhpt table
- */
-GLOBAL_ENTRY(vmx_insert_double_mapping)
- alloc loc1 = ar.pfs,5,9,0,0
- mov loc0 = rp
- movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image
- ;;
- movl loc3 = IA64_TR_VHPT_IN_DOM // TR number for vhpt table
- movl r8 = 1f
- movl loc4 = KERNEL_TR_PAGE_SHIFT
- ;;
- movl loc5 = VCPU_TLB_SHIFT
- mov loc6 = psr
- movl loc7 = XEN_RR7_SWITCH_STUB
- ;;
- srlz.i
- ;;
- rsm psr.i | psr.ic
- mov loc8 = (1<<VMX_INSERT_RR7)
- ;;
- srlz.i
- ;;
- mov ar.rsc = 0
- mov b6 = loc7
- mov rp = r8
- ;;
- br.sptk b6
-1:
- mov ar.rsc = 3
- mov rp = loc0
- ;;
- mov psr.l = loc6
- ;;
- srlz.i
- ;;
- br.ret.sptk rp
-END(vmx_insert_double_mapping)
-
- .align PAGE_SIZE
-/*
- * Stub to add double mapping for new domain, which shouldn't
- * access any memory when active. Before reaching this point,
- * both psr.i/ic is cleared and rse is set in lazy mode.
- *
- * in0: new rr7
- * in1: virtual address of xen image
- * in2: virtual address of vhpt table
- * in3: pte entry of xen image
- * in4: pte entry of vhpt table
- * loc2: TR number for xen image
- * loc3: TR number for vhpt table
- * loc4: page size for xen image
- * loc5: page size of vhpt table
- * loc7: free to use
- * loc8: purge or insert
- * r8: will contain old rid value
- */
-GLOBAL_ENTRY(vmx_switch_rr7)
- movl loc7 = (7<<61)
- dep.z loc4 = loc4, 2, 6
- dep.z loc5 = loc5, 2, 6
- ;;
- tbit.nz p6,p7=loc8, VMX_INSERT_RR7
- mov r8 = rr[loc7]
- ;;
- mov rr[loc7] = in0
-(p6)mov cr.ifa = in1
-(p6)mov cr.itir = loc4
- ;;
- srlz.i
- ;;
-(p6)itr.i itr[loc2] = in3
-(p7)ptr.i in1, loc4
- ;;
-(p6)itr.d dtr[loc2] = in3
-(p7)ptr.d in1, loc4
- ;;
- srlz.i
- ;;
-(p6)mov cr.ifa = in2
-(p6)mov cr.itir = loc5
- ;;
-(p6)itr.d dtr[loc3] = in4
-(p7)ptr.d in2, loc5
- ;;
- srlz.i
- ;;
- mov rr[loc7] = r8
- ;;
- srlz.i
- br.sptk rp
-END(vmx_switch_rr7)
- .align PAGE_SIZE
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_hypercall.c
--- a/xen/arch/ia64/vmx_hypercall.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,235 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_hyparcall.c: handling hypercall from domain
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <asm/vmx_vcpu.h>
-#include <public/xen.h>
-#include <public/event_channel.h>
-#include <asm/vmmu.h>
-#include <asm/tlb.h>
-#include <asm/regionreg.h>
-#include <asm/page.h>
-#include <xen/mm.h>
-#include <xen/multicall.h>
-
-
-void hyper_not_support(void)
-{
- VCPU *vcpu=current;
- vmx_vcpu_set_gr(vcpu, 8, -1, 0);
- vmx_vcpu_increment_iip(vcpu);
-}
-
-void hyper_mmu_update(void)
-{
- VCPU *vcpu=current;
- u64 r32,r33,r34,r35,ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- vmx_vcpu_get_gr(vcpu,17,&r33);
- vmx_vcpu_get_gr(vcpu,18,&r34);
- vmx_vcpu_get_gr(vcpu,19,&r35);
- ret=do_mmu_update((mmu_update_t*)r32,r33,r34,r35);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
- vmx_vcpu_increment_iip(vcpu);
-}
-
-unsigned long __hypercall_create_continuation(
- unsigned int op, unsigned int nr_args, ...)
-{
- struct mc_state *mcs = &mc_state[smp_processor_id()];
- VCPU *vcpu = current;
- struct cpu_user_regs *regs = vcpu_regs(vcpu);
- unsigned int i;
- va_list args;
-
- va_start(args, nr_args);
- if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) {
- panic("PREEMPT happen in multicall\n"); // Not support yet
- } else {
- vmx_vcpu_set_gr(vcpu, 15, op, 0);
- for ( i = 0; i < nr_args; i++) {
- switch (i) {
- case 0: vmx_vcpu_set_gr(vcpu, 16, va_arg(args, unsigned long), 0);
- break;
- case 1: vmx_vcpu_set_gr(vcpu, 17, va_arg(args, unsigned long), 0);
- break;
- case 2: vmx_vcpu_set_gr(vcpu, 18, va_arg(args, unsigned long), 0);
- break;
- case 3: vmx_vcpu_set_gr(vcpu, 19, va_arg(args, unsigned long), 0);
- break;
- case 4: vmx_vcpu_set_gr(vcpu, 20, va_arg(args, unsigned long), 0);
- break;
- default: panic("Too many args for hypercall continuation\n");
- break;
- }
- }
- }
- vcpu->arch.hypercall_continuation = 1;
- va_end(args);
- return op;
-}
-
-void hyper_dom_mem_op(void)
-{
- VCPU *vcpu=current;
- u64 r32,r33,r34,r35,r36;
- u64 ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- vmx_vcpu_get_gr(vcpu,17,&r33);
- vmx_vcpu_get_gr(vcpu,18,&r34);
- vmx_vcpu_get_gr(vcpu,19,&r35);
- vmx_vcpu_get_gr(vcpu,20,&r36);
- ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36);
- printf("do_dom_mem return value: %lx\n", ret);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
-
- /* Hard to define a special return value to indicate hypercall restart.
- * So just add a new mark, which is SMP safe
- */
- if (vcpu->arch.hypercall_continuation == 1)
- vcpu->arch.hypercall_continuation = 0;
- else
- vmx_vcpu_increment_iip(vcpu);
-}
-
-
-void hyper_sched_op(void)
-{
- VCPU *vcpu=current;
- u64 r32,ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- ret=do_sched_op(r32);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
-
- vmx_vcpu_increment_iip(vcpu);
-}
-
-void hyper_dom0_op(void)
-{
- VCPU *vcpu=current;
- u64 r32,ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- ret=do_dom0_op((dom0_op_t *)r32);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
-
- vmx_vcpu_increment_iip(vcpu);
-}
-
-void hyper_event_channel_op(void)
-{
- VCPU *vcpu=current;
- u64 r32,ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- ret=do_event_channel_op((evtchn_op_t *)r32);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
- vmx_vcpu_increment_iip(vcpu);
-}
-
-void hyper_xen_version(void)
-{
- VCPU *vcpu=current;
- u64 r32,ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- ret=do_xen_version((int )r32);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
- vmx_vcpu_increment_iip(vcpu);
-}
-
-static int do_lock_page(VCPU *vcpu, u64 va, u64 lock)
-{
- int i;
- ia64_rr rr;
- thash_cb_t *hcb;
- hcb = vmx_vcpu_get_vtlb(vcpu);
- rr = vmx_vcpu_rr(vcpu, va);
- return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock);
-}
-
-/*
- * Lock guest page in vTLB, so that it's not relinquished by recycle
- * session when HV is servicing that hypercall.
- */
-void hyper_lock_page(void)
-{
-//TODO:
- VCPU *vcpu=current;
- u64 va,lock, ret;
- vmx_vcpu_get_gr(vcpu,16,&va);
- vmx_vcpu_get_gr(vcpu,17,&lock);
- ret=do_lock_page(vcpu, va, lock);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
-
- vmx_vcpu_increment_iip(vcpu);
-}
-
-static int do_set_shared_page(VCPU *vcpu, u64 gpa)
-{
- u64 shared_info, o_info;
- struct domain *d = vcpu->domain;
- struct vcpu *v;
- if(vcpu->domain!=dom0)
- return -EPERM;
- shared_info = __gpa_to_mpa(vcpu->domain, gpa);
- o_info = (u64)vcpu->domain->shared_info;
- d->shared_info= (shared_info_t *)__va(shared_info);
-
- /* Copy existing shared info into new page */
- if (o_info) {
- memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE);
- for_each_vcpu(d, v) {
- v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
- }
- /* If original page belongs to xen heap, then relinguish back
- * to xen heap. Or else, leave to domain itself to decide.
- */
- if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info))))
- free_xenheap_page(o_info);
- } else
- memset(d->shared_info, 0, PAGE_SIZE);
- return 0;
-}
-
-void hyper_set_shared_page(void)
-{
- VCPU *vcpu=current;
- u64 gpa,ret;
- vmx_vcpu_get_gr(vcpu,16,&gpa);
-
- ret=do_set_shared_page(vcpu, gpa);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
-
- vmx_vcpu_increment_iip(vcpu);
-}
-
-/*
-void hyper_grant_table_op(void)
-{
- VCPU *vcpu=current;
- u64 r32,r33,r34,ret;
- vmx_vcpu_get_gr(vcpu,16,&r32);
- vmx_vcpu_get_gr(vcpu,17,&r33);
- vmx_vcpu_get_gr(vcpu,18,&r34);
-
- ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34);
- vmx_vcpu_set_gr(vcpu, 8, ret, 0);
-}
-*/
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_init.c
--- a/xen/arch/ia64/vmx_init.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,375 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_init.c: initialization work for vt specific domain
- * Copyright (c) 2005, Intel Corporation.
- * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>
- * Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx>
- * Fred Yang <fred.yang@xxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
- * Disable doubling mapping
- *
- * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
- * Simplied design in first step:
- * - One virtual environment
- * - Domain is bound to one LP
- * Later to support guest SMP:
- * - Need interface to handle VP scheduled to different LP
- */
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <asm/pal.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/vmx_vcpu.h>
-#include <xen/lib.h>
-#include <asm/vmmu.h>
-#include <public/arch-ia64.h>
-#include <public/io/ioreq.h>
-#include <asm/vmx_phy_mode.h>
-#include <asm/processor.h>
-#include <asm/vmx.h>
-#include <xen/mm.h>
-
-/* Global flag to identify whether Intel vmx feature is on */
-u32 vmx_enabled = 0;
-static u32 vm_order;
-static u64 buffer_size;
-static u64 vp_env_info;
-static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */
-u64 __vsa_base = 0; /* Run-time service base of VMX */
-
-/* Check whether vt feature is enabled or not. */
-void
-identify_vmx_feature(void)
-{
- pal_status_t ret;
- u64 avail = 1, status = 1, control = 1;
-
- vmx_enabled = 0;
- /* Check VT-i feature */
- ret = ia64_pal_proc_get_features(&avail, &status, &control);
- if (ret != PAL_STATUS_SUCCESS) {
- printk("Get proc features failed.\n");
- goto no_vti;
- }
-
- /* FIXME: do we need to check status field, to see whether
- * PSR.vm is actually enabled? If yes, aonther call to
- * ia64_pal_proc_set_features may be reuqired then.
- */
- printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
- avail, status, control, avail & PAL_PROC_VM_BIT);
- if (!(avail & PAL_PROC_VM_BIT)) {
- printk("No VT feature supported.\n");
- goto no_vti;
- }
-
- ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
- if (ret != PAL_STATUS_SUCCESS) {
- printk("Get vp environment info failed.\n");
- goto no_vti;
- }
-
- /* Does xen has ability to decode itself? */
- if (!(vp_env_info & VP_OPCODE))
- printk("WARNING: no opcode provided from hardware(%lx)!!!\n",
vp_env_info);
- vm_order = get_order(buffer_size);
- printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order);
-
- vmx_enabled = 1;
-no_vti:
- return;
-}
-
-/*
- * Init virtual environment on current LP
- * vsa_base is the indicator whether it's first LP to be initialized
- * for current domain.
- */
-void
-vmx_init_env(void)
-{
- u64 status, tmp_base;
-
- if (!vm_buffer) {
- vm_buffer = alloc_xenheap_pages(vm_order);
- ASSERT(vm_buffer);
- printk("vm_buffer: 0x%lx\n", vm_buffer);
- }
-
- status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV :
VP_INIT_ENV_INITALIZE,
- __pa(vm_buffer),
- vm_buffer,
- &tmp_base);
-
- if (status != PAL_STATUS_SUCCESS) {
- printk("ia64_pal_vp_init_env failed.\n");
- return -1;
- }
-
- if (!__vsa_base)
- __vsa_base = tmp_base;
- else
- ASSERT(tmp_base != __vsa_base);
-
-#ifdef XEN_DBL_MAPPING
- /* Init stub for rr7 switch */
- vmx_init_double_mapping_stub();
-#endif
-}
-
-void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c)
-{
- struct domain *d = v->domain;
- shared_iopage_t *sp;
-
- ASSERT(d != dom0); /* only for non-privileged vti domain */
- d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg);
- sp = get_sp(d);
- memset((char *)sp,0,PAGE_SIZE);
- /* FIXME: temp due to old CP */
- sp->sp_global.eport = 2;
-#ifdef V_IOSAPIC_READY
- sp->vcpu_number = 1;
-#endif
- /* TEMP */
- d->arch.vmx_platform.pib_base = 0xfee00000UL;
-
- /* One more step to enable interrupt assist */
- set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
- /* Only open one port for I/O and interrupt emulation */
- if (v == d->vcpu[0]) {
- memset(&d->shared_info->evtchn_mask[0], 0xff,
- sizeof(d->shared_info->evtchn_mask));
- clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
- }
-
- /* FIXME: only support PMT table continuously by far */
- d->arch.pmt = __va(c->pt_base);
- d->arch.max_pfn = c->pt_max_pfn;
-
- vmx_final_setup_domain(d);
-}
-
-typedef union {
- u64 value;
- struct {
- u64 number : 8;
- u64 revision : 8;
- u64 model : 8;
- u64 family : 8;
- u64 archrev : 8;
- u64 rv : 24;
- };
-} cpuid3_t;
-
-/* Allocate vpd from xenheap */
-static vpd_t *alloc_vpd(void)
-{
- int i;
- cpuid3_t cpuid3;
- vpd_t *vpd;
-
- vpd = alloc_xenheap_pages(get_order(VPD_SIZE));
- if (!vpd) {
- printk("VPD allocation failed.\n");
- return NULL;
- }
-
- printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t));
- memset(vpd, 0, VPD_SIZE);
- /* CPUID init */
- for (i = 0; i < 5; i++)
- vpd->vcpuid[i] = ia64_get_cpuid(i);
-
- /* Limit the CPUID number to 5 */
- cpuid3.value = vpd->vcpuid[3];
- cpuid3.number = 4; /* 5 - 1 */
- vpd->vcpuid[3] = cpuid3.value;
-
- vpd->vdc.d_vmsw = 1;
- return vpd;
-}
-
-
-#ifdef CONFIG_VTI
-/*
- * Create a VP on intialized VMX environment.
- */
-static void
-vmx_create_vp(struct vcpu *v)
-{
- u64 ret;
- vpd_t *vpd = v->arch.arch_vmx.vpd;
- u64 ivt_base;
- extern char vmx_ia64_ivt;
- /* ia64_ivt is function pointer, so need this tranlation */
- ivt_base = (u64) &vmx_ia64_ivt;
- printk("ivt_base: 0x%lx\n", ivt_base);
- ret = ia64_pal_vp_create(vpd, ivt_base, 0);
- if (ret != PAL_STATUS_SUCCESS)
- panic("ia64_pal_vp_create failed. \n");
-}
-
-#ifdef XEN_DBL_MAPPING
-void vmx_init_double_mapping_stub(void)
-{
- u64 base, psr;
- extern void vmx_switch_rr7(void);
-
- base = (u64) &vmx_switch_rr7;
- base = *((u64*)base);
-
- psr = ia64_clear_ic();
- ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB,
- pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)),
- RR7_SWITCH_SHIFT);
- ia64_set_psr(psr);
- ia64_srlz_i();
- printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n",
(u64)(__pa(base)));
-}
-#endif
-
-/* Other non-context related tasks can be done in context switch */
-void
-vmx_save_state(struct vcpu *v)
-{
- u64 status, psr;
- u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
-
- /* FIXME: about setting of pal_proc_vector... time consuming */
- status = ia64_pal_vp_save(v->arch.arch_vmx.vpd, 0);
- if (status != PAL_STATUS_SUCCESS)
- panic("Save vp status failed\n");
-
-#ifdef XEN_DBL_MAPPING
- /* FIXME: Do we really need purge double mapping for old vcpu?
- * Since rid is completely different between prev and next,
- * it's not overlap and thus no MCA possible... */
- dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
- vmx_purge_double_mapping(dom_rr7, KERNEL_START,
- (u64)v->arch.vtlb->ts->vhpt->hash);
-#endif
-
- /* Need to save KR when domain switch, though HV itself doesn;t
- * use them.
- */
- v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
- v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
- v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
- v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
- v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
- v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
- v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
- v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
-}
-
-/* Even guest is in physical mode, we still need such double mapping */
-void
-vmx_load_state(struct vcpu *v)
-{
- u64 status, psr;
- u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt;
- u64 pte_xen, pte_vhpt;
- int i;
-
- status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0);
- if (status != PAL_STATUS_SUCCESS)
- panic("Restore vp status failed\n");
-
-#ifdef XEN_DBL_MAPPING
- dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
- pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
- pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >>
PAGE_SHIFT), PAGE_KERNEL));
- vmx_insert_double_mapping(dom_rr7, KERNEL_START,
- (u64)v->arch.vtlb->ts->vhpt->hash,
- pte_xen, pte_vhpt);
-#endif
-
- ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
- ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
- ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
- ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
- ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
- ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
- ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
- ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
- /* Guest vTLB is not required to be switched explicitly, since
- * anchored in vcpu */
-}
-
-#ifdef XEN_DBL_MAPPING
-/* Purge old double mapping and insert new one, due to rr7 change */
-void
-vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7)
-{
- u64 pte_xen, pte_vhpt, vhpt_base;
-
- vhpt_base = (u64)v->arch.vtlb->ts->vhpt->hash;
- vmx_purge_double_mapping(oldrr7, KERNEL_START,
- vhpt_base);
-
- pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
- pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT),
PAGE_KERNEL));
- vmx_insert_double_mapping(newrr7, KERNEL_START,
- vhpt_base,
- pte_xen, pte_vhpt);
-}
-#endif // XEN_DBL_MAPPING
-#endif // CONFIG_VTI
-
-/*
- * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
- * is registered here.
- */
-void
-vmx_final_setup_domain(struct domain *d)
-{
- struct vcpu *v = d->vcpu[0];
- vpd_t *vpd;
-
- /* Allocate resources for vcpu 0 */
- //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct));
-
- vpd = alloc_vpd();
- ASSERT(vpd);
-
- v->arch.arch_vmx.vpd = vpd;
- vpd->virt_env_vaddr = vm_buffer;
-
-#ifdef CONFIG_VTI
- /* v->arch.schedule_tail = arch_vmx_do_launch; */
- vmx_create_vp(v);
-
- /* Set this ed to be vmx */
- set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags);
-
- /* Physical mode emulation initialization, including
- * emulation ID allcation and related memory request
- */
- physical_mode_init(v);
-
- vlsapic_reset(v);
- vtm_init(v);
-#endif
-
- /* Other vmx specific initialization work */
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_interrupt.c
--- a/xen/arch/ia64/vmx_interrupt.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,388 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_interrupt.c: handle inject interruption.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
- * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx>
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-
-#include <xen/types.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/vmx_pal_vsa.h>
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
-void
-collect_interruption(VCPU *vcpu)
-{
- u64 ipsr;
- u64 vdcr;
- u64 vifs;
- IA64_PSR vpsr;
- REGS * regs = vcpu_regs(vcpu);
- vpsr.val = vmx_vcpu_get_psr(vcpu);
-
- if(vpsr.ic){
- extern void vmx_dorfirfi(void);
- if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi)
- panic("COLLECT interruption for vmx_dorfirfi\n");
-
- /* Sync mpsr id/da/dd/ss/ed bits to vipsr
- * since after guest do rfi, we still want these bits on in
- * mpsr
- */
-
- ipsr = regs->cr_ipsr;
- vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
- | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED));
- vmx_vcpu_set_ipsr(vcpu, vpsr.val);
-
- /* Currently, for trap, we do not advance IIP to next
- * instruction. That's because we assume caller already
- * set up IIP correctly
- */
-
- vmx_vcpu_set_iip(vcpu , regs->cr_iip);
-
- /* set vifs.v to zero */
- vifs = VPD_CR(vcpu,ifs);
- vifs &= ~IA64_IFS_V;
- vmx_vcpu_set_ifs(vcpu, vifs);
-
- vmx_vcpu_set_iipa(vcpu, regs->cr_iipa);
- }
-
- vdcr = VPD_CR(vcpu,dcr);
-
- /* Set guest psr
- * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
- * be: set to the value of dcr.be
- * pp: set to the value of dcr.pp
- */
- vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
- vpsr.val |= ( vdcr & IA64_DCR_BE);
-
- /* VDCR pp bit position is different from VPSR pp bit */
- if ( vdcr & IA64_DCR_PP ) {
- vpsr.val |= IA64_PSR_PP;
- } else {
- vpsr.val &= ~IA64_PSR_PP;;
- }
-
- vmx_vcpu_set_psr(vcpu, vpsr.val);
-
-}
-int
-inject_guest_interruption(VCPU *vcpu, u64 vec)
-{
- u64 viva;
- REGS *regs;
- regs=vcpu_regs(vcpu);
-
- collect_interruption(vcpu);
-
- vmx_vcpu_get_iva(vcpu,&viva);
- regs->cr_iip = viva + vec;
-}
-
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- * set_ifa: if true, set vIFA
- * set_itir: if true, set vITIR
- * set_iha: if true, set vIHA
- */
-void
-set_ifa_itir_iha (VCPU *vcpu, u64 vadr,
- int set_ifa, int set_itir, int set_iha)
-{
- IA64_PSR vpsr;
- u64 value;
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- /* Vol2, Table 8-1 */
- if ( vpsr.ic ) {
- if ( set_ifa){
- vmx_vcpu_set_ifa(vcpu, vadr);
- }
- if ( set_itir) {
- value = vmx_vcpu_get_itir_on_fault(vcpu, vadr);
- vmx_vcpu_set_itir(vcpu, value);
- }
-
- if ( set_iha) {
- vmx_vcpu_thash(vcpu, vadr, &value);
- vmx_vcpu_set_iha(vcpu, value);
- }
- }
-
-
-}
-
-/*
- * Data TLB Fault
- * @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-dtlb_fault (VCPU *vcpu, u64 vadr)
-{
- /* If vPSR.ic, IFA, ITIR, IHA */
- set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
- inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- * @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-itlb_fault (VCPU *vcpu, u64 vadr)
-{
- /* If vPSR.ic, IFA, ITIR, IHA */
- set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
- inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR);
-}
-
-
-
-/*
- * Data Nested TLB Fault
- * @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-nested_dtlb (VCPU *vcpu)
-{
- inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- * @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-alt_dtlb (VCPU *vcpu, u64 vadr)
-{
- set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
- inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR);
-}
-
-
-/*
- * Data TLB Fault
- * @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-alt_itlb (VCPU *vcpu, u64 vadr)
-{
- set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
- inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- * VHPT Translation Vector
- */
-static void
-_vhpt_fault(VCPU *vcpu, u64 vadr)
-{
- /* If vPSR.ic, IFA, ITIR, IHA*/
- set_ifa_itir_iha (vcpu, vadr, 1, 1, 1);
- inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR);
-
-
-}
-
-/*
- * VHPT Instruction Fault
- * @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-ivhpt_fault (VCPU *vcpu, u64 vadr)
-{
- _vhpt_fault(vcpu, vadr);
-}
-
-
-/*
- * VHPT Data Fault
- * @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-dvhpt_fault (VCPU *vcpu, u64 vadr)
-{
- _vhpt_fault(vcpu, vadr);
-}
-
-
-
-/*
- * Deal with:
- * General Exception vector
- */
-void
-_general_exception (VCPU *vcpu)
-{
- inject_guest_interruption(vcpu,IA64_GENEX_VECTOR);
-}
-
-
-/*
- * Illegal Operation Fault
- * @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-illegal_op (VCPU *vcpu)
-{
- _general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- * @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-illegal_dep (VCPU *vcpu)
-{
- _general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- * @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-rsv_reg_field (VCPU *vcpu)
-{
- _general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- * @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void
-privilege_op (VCPU *vcpu)
-{
- _general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- * @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-unimpl_daddr (VCPU *vcpu)
-{
- _general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- * @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-privilege_reg (VCPU *vcpu)
-{
- _general_exception(vcpu);
-}
-
-/* Deal with
- * Nat consumption vector
- * Parameter:
- * vaddr: Optional, if t == REGISTER
- */
-static void
-_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t)
-{
- /* If vPSR.ic && t == DATA/INST, IFA */
- if ( t == DATA || t == INSTRUCTION ) {
- /* IFA */
- set_ifa_itir_iha (vcpu, vadr, 1, 0, 0);
- }
-
- inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * IR Data Nat Page Consumption Fault
- * @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-static void
-ir_nat_page_consumption (VCPU *vcpu, u64 vadr)
-{
- _nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- * @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-inat_page_consumption (VCPU *vcpu, u64 vadr)
-{
- _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- * @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-rnat_consumption (VCPU *vcpu)
-{
- _nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- * @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void
-dnat_page_consumption (VCPU *vcpu, uint64_t vadr)
-{
- _nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- * Page not present vector
- */
-void
-page_not_present(VCPU *vcpu, u64 vadr)
-{
- /* If vPSR.ic, IFA, ITIR */
- set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
- inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_irq_ia64.c
--- a/xen/arch/ia64/vmx_irq_ia64.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,127 +0,0 @@
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/jiffies.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/kernel_stat.h>
-#include <linux/slab.h>
-#include <linux/ptrace.h>
-#include <linux/random.h> /* for rand_initialize_irq() */
-#include <linux/signal.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/threads.h>
-#include <linux/bitops.h>
-
-#include <asm/delay.h>
-#include <asm/intrinsics.h>
-#include <asm/io.h>
-#include <asm/hw_irq.h>
-#include <asm/machvec.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_PERFMON
-# include <asm/perfmon.h>
-#endif
-
-#define IRQ_DEBUG 0
-
-#ifdef CONFIG_VTI
-#define vmx_irq_enter() \
- add_preempt_count(HARDIRQ_OFFSET);
-
-/* Now softirq will be checked when leaving hypervisor, or else
- * scheduler irq will be executed too early.
- */
-#define vmx_irq_exit(void) \
- sub_preempt_count(HARDIRQ_OFFSET);
-/*
- * That's where the IVT branches when we get an external
- * interrupt. This branches to the correct hardware IRQ handler via
- * function ptr.
- */
-void
-vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
-{
- unsigned long saved_tpr;
- int wake_dom0 = 0;
-
-
-#if IRQ_DEBUG
- {
- unsigned long bsp, sp;
-
- /*
- * Note: if the interrupt happened while executing in
- * the context switch routine (ia64_switch_to), we may
- * get a spurious stack overflow here. This is
- * because the register and the memory stack are not
- * switched atomically.
- */
- bsp = ia64_getreg(_IA64_REG_AR_BSP);
- sp = ia64_getreg(_IA64_REG_AR_SP);
-
- if ((sp - bsp) < 1024) {
- static unsigned char count;
- static long last_time;
-
- if (jiffies - last_time > 5*HZ)
- count = 0;
- if (++count < 5) {
- last_time = jiffies;
- printk("ia64_handle_irq: DANGER: less than "
- "1KB of free stack space!!\n"
- "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
- }
- }
- }
-#endif /* IRQ_DEBUG */
-
- /*
- * Always set TPR to limit maximum interrupt nesting depth to
- * 16 (without this, it would be ~240, which could easily lead
- * to kernel stack overflows).
- */
- vmx_irq_enter();
- saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
- ia64_srlz_d();
- while (vector != IA64_SPURIOUS_INT_VECTOR) {
- if (!IS_RESCHEDULE(vector)) {
- ia64_setreg(_IA64_REG_CR_TPR, vector);
- ia64_srlz_d();
-
- if (vector != IA64_TIMER_VECTOR) {
- /* FIXME: Leave IRQ re-route later */
- vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
- wake_dom0 = 1;
- }
- else { // FIXME: Handle Timer only now
- __do_IRQ(local_vector_to_irq(vector), regs);
- }
-
- /*
- * Disable interrupts and send EOI:
- */
- local_irq_disable();
- ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
- }
- else {
- printf("Oops: RESCHEDULE IPI absorbed by HV\n");
- }
- ia64_eoi();
- vector = ia64_get_ivr();
- }
- /*
- * This must be done *after* the ia64_eoi(). For example, the keyboard
softirq
- * handler needs to be able to wait for further keyboard interrupts,
which can't
- * come through until ia64_eoi() has been done.
- */
- vmx_irq_exit();
- if ( wake_dom0 && current != dom0 )
- vcpu_wake(dom0->vcpu[0]);
-}
-#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_ivt.S
--- a/xen/arch/ia64/vmx_ivt.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1085 +0,0 @@
-/*
- * arch/ia64/kernel/vmx_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- * Stephane Eranian <eranian@xxxxxxxxxx>
- * David Mosberger <davidm@xxxxxxxxxx>
- * Copyright (C) 2000, 2002-2003 Intel Co
- * Asit Mallick <asit.k.mallick@xxxxxxxxx>
- * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
- * Kenneth Chen <kenneth.w.chen@xxxxxxxxx>
- * Fenghua Yu <fenghua.yu@xxxxxxxxx>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP
- * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now
uses virtual PT.
- *
- * 05/3/20 Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
- * Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for critical
- * interruptions like TLB misses.
- *
- * For each entry, the comment is as follows:
- *
- * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
- * entry offset ----/ / / / /
- * entry number ---------/ / / /
- * size of the entry -------------/ / /
- * vector name -------------------------------------/ /
- * interruptions triggering this vector ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-#include <linux/config.h>
-
-#include <asm/asmmacro.h>
-#include <asm/break.h>
-#include <asm/ia32.h>
-#include <asm/kregs.h>
-#include <asm/offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/vhpt.h>
-
-
-#if 0
- /*
- * This lets you track the last eight faults that occurred on the CPU. Make
sure ar.k2 isn't
- * needed for something else before enabling this...
- */
-# define VMX_DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add
r16=(i),r16;;mov ar.k2=r16
-#else
-# define VMX_DBG_FAULT(i)
-#endif
-
-#include "vmx_minstate.h"
-
-
-
-#define VMX_FAULT(n) \
-vmx_fault_##n:; \
- br.sptk vmx_fault_##n; \
- ;; \
-
-
-#define VMX_REFLECT(n) \
- mov r31=pr;
\
- mov r19=n; /* prepare to save predicates */
\
- mov r29=cr.ipsr; \
- ;; \
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
-(p7) br.sptk.many vmx_dispatch_reflection; \
- VMX_FAULT(n); \
-
-
-GLOBAL_ENTRY(vmx_panic)
- br.sptk.many vmx_panic
- ;;
-END(vmx_panic)
-
-
-
-
-
- .section .text.ivt,"ax"
-
- .align 32768 // align on 32KB boundary
- .global vmx_ia64_ivt
-vmx_ia64_ivt:
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(vmx_vhpt_miss)
- VMX_FAULT(0)
-END(vmx_vhpt_miss)
-
- .org vmx_ia64_ivt+0x400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(vmx_itlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6) br.sptk vmx_fault_1
- mov r16 = cr.ifa
- ;;
- thash r17 = r16
- ttag r20 = r16
- ;;
-vmx_itlb_loop:
- cmp.eq p6,p0 = r0, r17
-(p6) br vmx_itlb_out
- ;;
- adds r22 = VLE_TITAG_OFFSET, r17
- adds r23 = VLE_CCHAIN_OFFSET, r17
- ;;
- ld8 r24 = [r22]
- ld8 r25 = [r23]
- ;;
- lfetch [r25]
- cmp.eq p6,p7 = r20, r24
- ;;
-(p7) mov r17 = r25;
-(p7) br.sptk vmx_itlb_loop
- ;;
- adds r23 = VLE_PGFLAGS_OFFSET, r17
- adds r24 = VLE_ITIR_OFFSET, r17
- ;;
- ld8 r26 = [r23]
- ld8 r25 = [r24]
- ;;
- mov cr.itir = r25
- ;;
- itc.i r26
- ;;
- srlz.i
- ;;
- mov r23=r31
- mov r22=b0
- adds r16=IA64_VPD_BASE_OFFSET,r21
- ;;
- ld8 r18=[r16]
- ;;
- adds r19=VPD(VPSR),r18
- movl r20=__vsa_base
- ;;
- ld8 r19=[r19]
- ld8 r20=[r20]
- ;;
- br.sptk ia64_vmm_entry
- ;;
-vmx_itlb_out:
- mov r19 = 1
- br.sptk vmx_dispatch_tlb_miss
- VMX_FAULT(1);
-END(vmx_itlb_miss)
-
- .org vmx_ia64_ivt+0x0800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(vmx_dtlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)br.sptk vmx_fault_2
- mov r16 = cr.ifa
- ;;
- thash r17 = r16
- ttag r20 = r16
- ;;
-vmx_dtlb_loop:
- cmp.eq p6,p0 = r0, r17
-(p6)br vmx_dtlb_out
- ;;
- adds r22 = VLE_TITAG_OFFSET, r17
- adds r23 = VLE_CCHAIN_OFFSET, r17
- ;;
- ld8 r24 = [r22]
- ld8 r25 = [r23]
- ;;
- lfetch [r25]
- cmp.eq p6,p7 = r20, r24
- ;;
-(p7)mov r17 = r25;
-(p7)br.sptk vmx_dtlb_loop
- ;;
- adds r23 = VLE_PGFLAGS_OFFSET, r17
- adds r24 = VLE_ITIR_OFFSET, r17
- ;;
- ld8 r26 = [r23]
- ld8 r25 = [r24]
- ;;
- mov cr.itir = r25
- ;;
- itc.d r26
- ;;
- srlz.d;
- ;;
- mov r23=r31
- mov r22=b0
- adds r16=IA64_VPD_BASE_OFFSET,r21
- ;;
- ld8 r18=[r16]
- ;;
- adds r19=VPD(VPSR),r18
- movl r20=__vsa_base
- ;;
- ld8 r19=[r19]
- ld8 r20=[r20]
- ;;
- br.sptk ia64_vmm_entry
- ;;
-vmx_dtlb_out:
- mov r19 = 2
- br.sptk vmx_dispatch_tlb_miss
- VMX_FAULT(2);
-END(vmx_dtlb_miss)
-
- .org vmx_ia64_ivt+0x0c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(vmx_alt_itlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p7)br.sptk vmx_fault_3
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r24=cr.ipsr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- ;;
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- shr.u r18=r16,55 // move address bit 59 to bit 4
- ;;
- and r18=0x10,r18 // bit 4=address-bit(61)
- or r19=r17,r19 // insert PTE control bits into r19
- ;;
- or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
- ;;
- itc.i r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
- VMX_FAULT(3);
-END(vmx_alt_itlb_miss)
-
-
- .org vmx_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(vmx_alt_dtlb_miss)
- mov r31=pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p7)br.sptk vmx_fault_4
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r20=cr.isr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r24=cr.ipsr
- ;;
- and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
- tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
- shr.u r18=r16,55 // move address bit 59 to bit 4
- and r19=r19,r16 // clear ed, reserved bits, and
PTE control bits
- tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
- ;;
- and r18=0x10,r18 // bit 4=address-bit(61)
-(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
- dep r24=-1,r24,IA64_PSR_ED_BIT,1
- or r19=r19,r17 // insert PTE control bits into r19
- ;;
- or r19=r19,r18 // set bit 4 (uncached) if the access was to
region 6
-(p6) mov cr.ipsr=r24
- ;;
-(p7) itc.d r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
- VMX_FAULT(4);
-END(vmx_alt_dtlb_miss)
-
- .org vmx_ia64_ivt+0x1400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(vmx_nested_dtlb_miss)
- VMX_FAULT(5)
-END(vmx_nested_dtlb_miss)
-
- .org vmx_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(vmx_ikey_miss)
- VMX_REFLECT(6)
-END(vmx_ikey_miss)
-
- .org vmx_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(vmx_dkey_miss)
- VMX_REFLECT(7)
-END(vmx_dkey_miss)
-
- .org vmx_ia64_ivt+0x2000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(vmx_dirty_bit)
- VMX_REFLECT(8)
-END(vmx_idirty_bit)
-
- .org vmx_ia64_ivt+0x2400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(vmx_iaccess_bit)
- VMX_REFLECT(9)
-END(vmx_iaccess_bit)
-
- .org vmx_ia64_ivt+0x2800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(vmx_daccess_bit)
- VMX_REFLECT(10)
-END(vmx_daccess_bit)
-
- .org vmx_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(vmx_break_fault)
- mov r31=pr
- mov r19=11
- mov r30=cr.iim
- movl r29=0x1100
- ;;
- cmp.eq p6,p7=r30,r0
- (p6) br.sptk vmx_fault_11
- ;;
- cmp.eq p6,p7=r29,r30
- (p6) br.dptk.few vmx_hypercall_dispatch
- (p7) br.sptk.many vmx_dispatch_break_fault
- ;;
- VMX_FAULT(11);
-END(vmx_break_fault)
-
- .org vmx_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(vmx_interrupt)
- mov r31=pr // prepare to save predicates
- mov r19=12
- mov r29=cr.ipsr
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT
- tbit.z p0,p15=r29,IA64_PSR_I_BIT
- ;;
-(p7) br.sptk vmx_dispatch_interrupt
- ;;
- mov r27=ar.rsc /* M */
- mov r20=r1 /* A */
- mov r25=ar.unat /* M */
- mov r26=ar.pfs /* I */
- mov r28=cr.iip /* M */
- cover /* B (or nothing) */
- ;;
- mov r1=sp
- ;;
- invala /* M */
- mov r30=cr.ifs
- ;;
- addl r1=-IA64_PT_REGS_SIZE,r1
- ;;
- adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line
size */
- adds r16=PT(CR_IPSR),r1
- ;;
- lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
- st8 [r16]=r29 /* save cr.ipsr */
- ;;
- lfetch.fault.excl.nt1 [r17]
- mov r29=b0
- ;;
- adds r16=PT(R8),r1 /* initialize first base pointer */
- adds r17=PT(R9),r1 /* initialize second base pointer */
- mov r18=r0 /* make sure r18 isn't NaT */
- ;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
- ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
- ;;
- st8 [r16]=r28,16 /* save cr.iip */
- st8 [r17]=r30,16 /* save cr.ifs */
- mov r8=ar.fpsr /* M */
- mov r9=ar.csd
- mov r10=ar.ssd
- movl r11=FPSR_DEFAULT /* L-unit */
- ;;
- st8 [r16]=r25,16 /* save ar.unat */
- st8 [r17]=r26,16 /* save ar.pfs */
- shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
- ;;
- st8 [r16]=r27,16 /* save ar.rsc */
- adds r17=16,r17 /* skip over ar_rnat field */
- ;; /* avoid RAW on r16 & r17 */
- st8 [r17]=r31,16 /* save predicates */
- adds r16=16,r16 /* skip over ar_bspstore field */
- ;;
- st8 [r16]=r29,16 /* save b0 */
- st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
- ;;
-.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
- adds r12=-16,r1 /* switch to kernel memory stack (with 16 bytes of
scratch) */
- ;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
- mov r13=r21 /* establish `current' */
- ;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
- dep r14=-1,r0,60,4
- ;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
- adds r2=IA64_PT_REGS_R16_OFFSET,r1
- ;;
- mov r8=ar.ccv
- movl r1=__gp /* establish kernel global pointer */
- ;; \
- bsw.1
- ;;
- alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
- mov out0=cr.ivr // pass cr.ivr as first arg
- add out1=16,sp // pass pointer to pt_regs as second arg
-
- ssm psr.ic
- ;;
- srlz.i
- ;;
- (p15) ssm psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
- mov r18=b6
- ;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
- mov r19=b7
- ;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
- ;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
- ;;
- mov ar.fpsr=r11 /* M-unit */
- st8 [r2]=r8,8 /* ar.ccv */
- adds r24=PT(B6)-PT(F7),r3
- ;;
- stf.spill [r2]=f6,32
- stf.spill [r3]=f7,32
- ;;
- stf.spill [r2]=f8,32
- stf.spill [r3]=f9,32
- ;;
- stf.spill [r2]=f10
- stf.spill [r3]=f11
- adds r25=PT(B7)-PT(F11),r3
- ;;
- st8 [r24]=r18,16 /* b6 */
- st8 [r25]=r19,16 /* b7 */
- ;;
- st8 [r24]=r9 /* ar.csd */
- st8 [r25]=r10 /* ar.ssd */
- ;;
- srlz.d // make sure we see the effect of cr.ivr
- movl r14=ia64_leave_nested
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_ia64_handle_irq
- ;;
-END(vmx_interrupt)
-
- .org vmx_ia64_ivt+0x3400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(vmx_virtual_exirq)
- VMX_DBG_FAULT(13)
- mov r31=pr
- mov r19=13
- br.sptk vmx_dispatch_vexirq
-END(vmx_virtual_exirq)
-
- .org vmx_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
- VMX_DBG_FAULT(14)
- VMX_FAULT(14)
-
-
- .org vmx_ia64_ivt+0x3c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
- VMX_DBG_FAULT(15)
- VMX_FAULT(15)
-
-
- .org vmx_ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
- VMX_DBG_FAULT(16)
- VMX_FAULT(16)
-
- .org vmx_ia64_ivt+0x4400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
- VMX_DBG_FAULT(17)
- VMX_FAULT(17)
-
- .org vmx_ia64_ivt+0x4800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
- VMX_DBG_FAULT(18)
- VMX_FAULT(18)
-
- .org vmx_ia64_ivt+0x4c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
- VMX_DBG_FAULT(19)
- VMX_FAULT(19)
-
- .org vmx_ia64_ivt+0x5000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(vmx_page_not_present)
- VMX_REFLECT(20)
-END(vmx_page_not_present)
-
- .org vmx_ia64_ivt+0x5100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(vmx_key_permission)
- VMX_REFLECT(21)
-END(vmx_key_permission)
-
- .org vmx_ia64_ivt+0x5200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(vmx_iaccess_rights)
- VMX_REFLECT(22)
-END(vmx_iaccess_rights)
-
- .org vmx_ia64_ivt+0x5300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(vmx_daccess_rights)
- VMX_REFLECT(23)
-END(vmx_daccess_rights)
-
- .org vmx_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(vmx_general_exception)
- VMX_FAULT(24)
-// VMX_REFLECT(24)
-END(vmx_general_exception)
-
- .org vmx_ia64_ivt+0x5500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(vmx_disabled_fp_reg)
- VMX_REFLECT(25)
-END(vmx_disabled_fp_reg)
-
- .org vmx_ia64_ivt+0x5600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(vmx_nat_consumption)
- VMX_REFLECT(26)
-END(vmx_nat_consumption)
-
- .org vmx_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(vmx_speculation_vector)
- VMX_REFLECT(27)
-END(vmx_speculation_vector)
-
- .org vmx_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
- VMX_DBG_FAULT(28)
- VMX_FAULT(28)
-
- .org vmx_ia64_ivt+0x5900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(vmx_debug_vector)
- VMX_DBG_FAULT(29)
- VMX_FAULT(29)
-END(vmx_debug_vector)
-
- .org vmx_ia64_ivt+0x5a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(vmx_unaligned_access)
- VMX_REFLECT(30)
-END(vmx_unaligned_access)
-
- .org vmx_ia64_ivt+0x5b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(vmx_unsupported_data_reference)
- VMX_REFLECT(31)
-END(vmx_unsupported_data_reference)
-
- .org vmx_ia64_ivt+0x5c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
-ENTRY(vmx_floating_point_fault)
- VMX_REFLECT(32)
-END(vmx_floating_point_fault)
-
- .org vmx_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(vmx_floating_point_trap)
- VMX_REFLECT(33)
-END(vmx_floating_point_trap)
-
- .org vmx_ia64_ivt+0x5e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(vmx_lower_privilege_trap)
- VMX_REFLECT(34)
-END(vmx_lower_privilege_trap)
-
- .org vmx_ia64_ivt+0x5f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(vmx_taken_branch_trap)
- VMX_REFLECT(35)
-END(vmx_taken_branch_trap)
-
- .org vmx_ia64_ivt+0x6000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(vmx_single_step_trap)
- VMX_REFLECT(36)
-END(vmx_single_step_trap)
-
- .org vmx_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(vmx_virtualization_fault)
- VMX_DBG_FAULT(37)
- mov r31=pr
- mov r19=37
- br.sptk vmx_dispatch_virtualization_fault
-END(vmx_virtualization_fault)
-
- .org vmx_ia64_ivt+0x6200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
- VMX_DBG_FAULT(38)
- VMX_FAULT(38)
-
- .org vmx_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
- VMX_DBG_FAULT(39)
- VMX_FAULT(39)
-
- .org vmx_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
- VMX_DBG_FAULT(40)
- VMX_FAULT(40)
-
- .org vmx_ia64_ivt+0x6500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
- VMX_DBG_FAULT(41)
- VMX_FAULT(41)
-
- .org vmx_ia64_ivt+0x6600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
- VMX_DBG_FAULT(42)
- VMX_FAULT(42)
-
- .org vmx_ia64_ivt+0x6700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
- VMX_DBG_FAULT(43)
- VMX_FAULT(43)
-
- .org vmx_ia64_ivt+0x6800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
- VMX_DBG_FAULT(44)
- VMX_FAULT(44)
-
- .org vmx_ia64_ivt+0x6900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(vmx_ia32_exception)
- VMX_DBG_FAULT(45)
- VMX_FAULT(45)
-END(vmx_ia32_exception)
-
- .org vmx_ia64_ivt+0x6a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
-ENTRY(vmx_ia32_intercept)
- VMX_DBG_FAULT(46)
- VMX_FAULT(46)
-END(vmx_ia32_intercept)
-
- .org vmx_ia64_ivt+0x6b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
-ENTRY(vmx_ia32_interrupt)
- VMX_DBG_FAULT(47)
- VMX_FAULT(47)
-END(vmx_ia32_interrupt)
-
- .org vmx_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
- VMX_DBG_FAULT(48)
- VMX_FAULT(48)
-
- .org vmx_ia64_ivt+0x6d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
- VMX_DBG_FAULT(49)
- VMX_FAULT(49)
-
- .org vmx_ia64_ivt+0x6e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
- VMX_DBG_FAULT(50)
- VMX_FAULT(50)
-
- .org vmx_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
- VMX_DBG_FAULT(51)
- VMX_FAULT(51)
-
- .org vmx_ia64_ivt+0x7000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7000 Entry 52 (size 16 bundles) Reserved
- VMX_DBG_FAULT(52)
- VMX_FAULT(52)
-
- .org vmx_ia64_ivt+0x7100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
- VMX_DBG_FAULT(53)
- VMX_FAULT(53)
-
- .org vmx_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
- VMX_DBG_FAULT(54)
- VMX_FAULT(54)
-
- .org vmx_ia64_ivt+0x7300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
- VMX_DBG_FAULT(55)
- VMX_FAULT(55)
-
- .org vmx_ia64_ivt+0x7400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
- VMX_DBG_FAULT(56)
- VMX_FAULT(56)
-
- .org vmx_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
- VMX_DBG_FAULT(57)
- VMX_FAULT(57)
-
- .org vmx_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
- VMX_DBG_FAULT(58)
- VMX_FAULT(58)
-
- .org vmx_ia64_ivt+0x7700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
- VMX_DBG_FAULT(59)
- VMX_FAULT(59)
-
- .org vmx_ia64_ivt+0x7800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
- VMX_DBG_FAULT(60)
- VMX_FAULT(60)
-
- .org vmx_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
- VMX_DBG_FAULT(61)
- VMX_FAULT(61)
-
- .org vmx_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
- VMX_DBG_FAULT(62)
- VMX_FAULT(62)
-
- .org vmx_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
- VMX_DBG_FAULT(63)
- VMX_FAULT(63)
-
- .org vmx_ia64_ivt+0x7c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
- VMX_DBG_FAULT(64)
- VMX_FAULT(64)
-
- .org vmx_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
- VMX_DBG_FAULT(65)
- VMX_FAULT(65)
-
- .org vmx_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
- VMX_DBG_FAULT(66)
- VMX_FAULT(66)
-
- .org vmx_ia64_ivt+0x7f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
- VMX_DBG_FAULT(67)
- VMX_FAULT(67)
-
- .org vmx_ia64_ivt+0x8000
- // There is no particular reason for this code to be here, other than that
- // there happens to be space here that would go unused otherwise. If this
- // fault ever gets "unreserved", simply moved the following code to a more
- // suitable spot...
-
-
-ENTRY(vmx_dispatch_reflection)
- /*
- * Input:
- * psr.ic: off
- * r19: intr type (offset into ivt, see ia64_int.h)
- * r31: contains saved predicates (pr)
- */
- VMX_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,4,0
- mov out0=cr.ifa
- mov out1=cr.isr
- mov out2=cr.iim
- mov out3=r15
-
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- (p15) ssm psr.i // restore psr.i
- adds r3=16,r2 // set up second base pointer
- ;;
- VMX_SAVE_REST
- movl r14=ia64_leave_hypervisor
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_reflect_interruption
-END(vmx_dispatch_reflection)
-
-ENTRY(vmx_dispatch_virtualization_fault)
- VMX_SAVE_MIN_WITH_COVER_R19
- ;;
- alloc r14=ar.pfs,0,0,3,0 // now it's safe (must be first in insn
group!)
- mov out0=r13 //vcpu
- mov out1=r4 //cause
- mov out2=r5 //opcode
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- (p15) ssm psr.i // restore psr.i
- adds r3=16,r2 // set up second base pointer
- ;;
- VMX_SAVE_REST
- movl r14=ia64_leave_hypervisor
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_emulate
-END(vmx_dispatch_virtualization_fault)
-
-
-ENTRY(vmx_dispatch_vexirq)
- VMX_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,1,0
- mov out0=r13
-
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- (p15) ssm psr.i // restore psr.i
- adds r3=16,r2 // set up second base pointer
- ;;
- VMX_SAVE_REST
- movl r14=ia64_leave_hypervisor
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_vexirq
-END(vmx_dispatch_vexirq)
-
-ENTRY(vmx_dispatch_tlb_miss)
- VMX_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,3,0
- mov out0=r13
- mov out1=r15
- mov out2=cr.ifa
-
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- (p15) ssm psr.i // restore psr.i
- adds r3=16,r2 // set up second base pointer
- ;;
- VMX_SAVE_REST
- movl r14=ia64_leave_hypervisor
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_hpw_miss
-END(vmx_dispatch_tlb_miss)
-
-
-ENTRY(vmx_dispatch_break_fault)
- VMX_SAVE_MIN_WITH_COVER_R19
- ;;
- ;;
- alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
- mov out0=cr.ifa
- adds out1=16,sp
- mov out2=cr.isr // FIXME: pity to make this slow access twice
- mov out3=cr.iim // FIXME: pity to make this slow access twice
-
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- (p15)ssm psr.i // restore psr.i
- adds r3=16,r2 // set up second base pointer
- ;;
- VMX_SAVE_REST
- movl r14=ia64_leave_hypervisor
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_ia64_handle_break
- ;;
-END(vmx_dispatch_break_fault)
-
-
-ENTRY(vmx_hypercall_dispatch)
- VMX_SAVE_MIN_WITH_COVER
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- (p15) ssm psr.i // restore psr.i
- adds r3=16,r2 // set up second base pointer
- ;;
- VMX_SAVE_REST
- ;;
- movl r14=ia64_leave_hypervisor
- movl r2=hyper_call_table
- ;;
- mov rp=r14
- shladd r2=r15,3,r2
- ;;
- ld8 r2=[r2]
- ;;
- mov b6=r2
- ;;
- br.call.sptk.many b6=b6
- ;;
-END(vmx_hypercall_dispatch)
-
-
-
-ENTRY(vmx_dispatch_interrupt)
- VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
- ;;
- alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
- mov out0=cr.ivr // pass cr.ivr as first arg
- add out1=16,sp // pass pointer to pt_regs as second arg
-
- ssm psr.ic
- ;;
- srlz.i
- ;;
- (p15) ssm psr.i
- adds r3=16,r2 // set up second base pointer for SAVE_REST
- ;;
- VMX_SAVE_REST
- movl r14=ia64_leave_hypervisor
- ;;
- mov rp=r14
- br.call.sptk.many b6=vmx_ia64_handle_irq
-END(vmx_dispatch_interrupt)
-
-
-
- .rodata
- .align 8
- .globl hyper_call_table
-hyper_call_table:
- data8 hyper_not_support //hyper_set_trap_table /* 0 */
- data8 hyper_mmu_update
- data8 hyper_not_support //hyper_set_gdt
- data8 hyper_not_support //hyper_stack_switch
- data8 hyper_not_support //hyper_set_callbacks
- data8 hyper_not_support //hyper_fpu_taskswitch /* 5 */
- data8 hyper_sched_op
- data8 hyper_dom0_op
- data8 hyper_not_support //hyper_set_debugreg
- data8 hyper_not_support //hyper_get_debugreg
- data8 hyper_not_support //hyper_update_descriptor /* 10 */
- data8 hyper_not_support //hyper_set_fast_trap
- data8 hyper_dom_mem_op
- data8 hyper_not_support //hyper_multicall
- data8 hyper_not_support //hyper_update_va_mapping
- data8 hyper_not_support //hyper_set_timer_op /* 15 */
- data8 hyper_event_channel_op
- data8 hyper_xen_version
- data8 hyper_not_support //hyper_console_io
- data8 hyper_not_support //hyper_physdev_op
- data8 hyper_not_support //hyper_grant_table_op /* 20 */
- data8 hyper_not_support //hyper_vm_assist
- data8 hyper_not_support //hyper_update_va_mapping_otherdomain
- data8 hyper_not_support //hyper_switch_vm86
- data8 hyper_not_support //hyper_boot_vcpu
- data8 hyper_not_support //hyper_ni_hypercall /* 25 */
- data8 hyper_not_support //hyper_mmuext_op
- data8 hyper_lock_page
- data8 hyper_set_shared_page
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_minstate.h
--- a/xen/arch/ia64/vmx_minstate.h Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,333 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_minstate.h:
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-#include <linux/config.h>
-
-#include <asm/asmmacro.h>
-#include <asm/fpu.h>
-#include <asm/mmu_context.h>
-#include <asm/offsets.h>
-#include <asm/pal.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/vmx_pal_vsa.h>
-#include <asm/vmx_vpd.h>
-#include <asm/cache.h>
-#include "entry.h"
-
-#define VMX_MINSTATE_START_SAVE_MIN \
- mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian,
loadrs=0 */ \
- ;; \
- mov.m r28=ar.rnat; \
- addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
- ;; \
- lfetch.fault.excl.nt1 [r22]; \
- addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory
stack */ \
- mov r23=ar.bspstore; /* save ar.bspstore */ \
- ;; \
- mov ar.bspstore=r22; /* switch to kernel RBS */ \
- ;; \
- mov r18=ar.bsp; \
- mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
\
-
-
-
-#define VMX_MINSTATE_END_SAVE_MIN \
- bsw.1; /* switch back to bank 1 (must be last in insn group) */
\
- ;;
-
-
-#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \
- /* begin to call pal vps sync_read and cleanup psr.pl */ \
- add r25=IA64_VPD_BASE_OFFSET, r21; \
- movl r20=__vsa_base; \
- ;; \
- ld8 r25=[r25]; /* read vpd base */ \
- ld8 r20=[r20]; /* read entry point */ \
- ;; \
- mov r6=r25; \
- add r20=PAL_VPS_SYNC_READ,r20; \
- ;; \
-{ .mii; \
- add r22=VPD(VPSR),r25; \
- mov r24=ip; \
- mov b0=r20; \
- ;; \
-}; \
-{ .mmb; \
- add r24 = 0x20, r24; \
- mov r16 = cr.ipsr; /* Temp workaround since psr.ic is off */ \
- br.cond.sptk b0; /* call the service */ \
- ;; \
-}; \
- ld8 r7=[r22]; \
- /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */ \
- extr.u r30=r16, IA64_PSR_CPL0_BIT, 2; \
- ;; \
- dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2; \
- ;; \
- extr.u r30=r16, IA64_PSR_BE_BIT, 5; \
- ;; \
- dep r7=r30, r7, IA64_PSR_BE_BIT, 5; \
- ;; \
- extr.u r30=r16, IA64_PSR_RI_BIT, 2; \
- ;; \
- dep r7=r30, r7, IA64_PSR_RI_BIT, 2; \
- ;; \
- st8 [r22]=r7; \
- ;;
-
-
-
-#define IA64_CURRENT_REG IA64_KR(CURRENT) /* r21 is reserved for current
pointer */
-//#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=IA64_CURRENT_REG
-#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=r21
-
-/*
- * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- * psr.ic: off
- * r31: contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- * psr.ic: off
- * r2 = points to &pt_regs.r16
- * r8 = contents of ar.ccv
- * r9 = contents of ar.csd
- * r10 = contents of ar.ssd
- * r11 = FPSR_DEFAULT
- * r12 = kernel sp (kernel virtual address)
- * r13 = points to current task_struct (kernel virtual address)
- * p15 = TRUE if psr.i is set in cr.ipsr
- * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- * preserved
- *
- * Note that psr.ic is NOT turned on by this macro. This is so that
- * we can pass interruption state as arguments to a handler.
- */
-#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
-/* switch rr7 */ \
- movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2)); \
- movl r17=(7<<61); \
- movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2)); \
- movl r22=(6<<61); \
- movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT <<
2) | 1); \
- movl r23=(5<<61); \
- ;; \
- mov rr[r17]=r16; \
- mov rr[r22]=r20; \
- mov rr[r23]=r18; \
- ;; \
- srlz.i; \
- ;; \
- VMX_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
- mov r27=ar.rsc; /* M */ \
- mov r20=r1; /* A */ \
- mov r26=ar.unat; /* M */ \
- mov r29=cr.ipsr; /* M */ \
- mov r18=cr.isr; \
- COVER; /* B;; (or nothing) */ \
- ;; \
- tbit.z p6,p0=r29,IA64_PSR_VM_BIT; \
- tbit.nz.or p6,p0 = r18,39; \
- ;; \
-(p6) br.sptk.few vmx_panic; \
- tbit.z p0,p15=r29,IA64_PSR_I_BIT; \
- mov r1=r16; \
-/* mov r21=r16; */ \
- /* switch from user to kernel RBS: */ \
- ;; \
- invala; /* M */ \
- SAVE_IFS; \
- ;; \
- VMX_MINSTATE_START_SAVE_MIN \
- adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */
\
- adds r16=PT(CR_IPSR),r1; \
- ;; \
- lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
- st8 [r16]=r29; /* save cr.ipsr */ \
- ;; \
- lfetch.fault.excl.nt1 [r17]; \
- tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
- mov r29=b0 \
- ;; \
- adds r16=PT(R8),r1; /* initialize first base pointer */ \
- adds r17=PT(R9),r1; /* initialize second base pointer */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r8,16; \
-.mem.offset 8,0; st8.spill [r17]=r9,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r10,24; \
-.mem.offset 8,0; st8.spill [r17]=r11,24; \
- ;; \
- mov r8=ar.pfs; /* I */ \
- mov r9=cr.iip; /* M */ \
- mov r10=ar.fpsr; /* M */ \
- ;; \
- st8 [r16]=r9,16; /* save cr.iip */ \
- st8 [r17]=r30,16; /* save cr.ifs */ \
- sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
- ;; \
- st8 [r16]=r26,16; /* save ar.unat */ \
- st8 [r17]=r8,16; /* save ar.pfs */ \
- shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */
\
- ;; \
- st8 [r16]=r27,16; /* save ar.rsc */ \
- st8 [r17]=r28,16; /* save ar.rnat */ \
- ;; /* avoid RAW on r16 & r17 */ \
- st8 [r16]=r23,16; /* save ar.bspstore */ \
- st8 [r17]=r31,16; /* save predicates */ \
- ;; \
- st8 [r16]=r29,16; /* save b0 */ \
- st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */
\
-.mem.offset 8,0; st8.spill [r17]=r12,16; \
- adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of
scratch) */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r13,16; \
-.mem.offset 8,0; st8.spill [r17]=r10,16; /* save ar.fpsr */ \
- mov r13=r21; /* establish `current' */ \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r15,16; \
-.mem.offset 8,0; st8.spill [r17]=r14,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r16]=r2,16; \
-.mem.offset 8,0; st8.spill [r17]=r3,16; \
- adds r2=PT(F6),r1; \
- ;; \
- .mem.offset 0,0; st8.spill [r16]=r4,16; \
- .mem.offset 8,0; st8.spill [r17]=r5,16; \
- ;; \
- .mem.offset 0,0; st8.spill [r16]=r6,16; \
- .mem.offset 8,0; st8.spill [r17]=r7,16; \
- mov r20=ar.ccv; \
- ;; \
- mov r18=cr.iipa; \
- mov r4=cr.isr; \
- mov r22=ar.unat; \
- ;; \
- st8 [r16]=r18,16; \
- st8 [r17]=r4; \
- ;; \
- adds r16=PT(EML_UNAT),r1; \
- adds r17=PT(AR_CCV),r1; \
- ;; \
- st8 [r16]=r22,8; \
- st8 [r17]=r20; \
- mov r4=r24; \
- mov r5=r25; \
- ;; \
- st8 [r16]=r0; \
- EXTRA; \
- mov r9=ar.csd; \
- mov r10=ar.ssd; \
- movl r11=FPSR_DEFAULT; /* L-unit */ \
- movl r1=__gp; /* establish kernel global pointer */ \
- ;; \
- PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \
- VMX_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- * psr.ic: on
- * r2: points to &pt_regs.f6
- * r3: points to &pt_regs.f7
- * r4,r5,scrach
- * r6: points to vpd
- * r7: vpsr
- * r9: contents of ar.csd
- * r10: contents of ar.ssd
- * r11: FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define VMX_SAVE_REST \
- tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT; /* guest bank0 or bank1 ? */ \
- ;; \
-(pBN0) add r4=VPD(VBGR),r6; \
-(pBN0) add r5=VPD(VBGR)+0x8,r6; \
-(pBN0) add r7=VPD(VBNAT),r6; \
- ;; \
-(pBN1) add r5=VPD(VGR)+0x8,r6; \
-(pBN1) add r4=VPD(VGR),r6; \
-(pBN1) add r7=VPD(VNAT),r6; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r16,16; \
-.mem.offset 8,0; st8.spill [r5]=r17,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r18,16; \
-.mem.offset 8,0; st8.spill [r5]=r19,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r20,16; \
-.mem.offset 8,0; st8.spill [r5]=r21,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r22,16; \
-.mem.offset 8,0; st8.spill [r5]=r23,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r24,16; \
-.mem.offset 8,0; st8.spill [r5]=r25,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r26,16; \
-.mem.offset 8,0; st8.spill [r5]=r27,16; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r28,16; \
-.mem.offset 8,0; st8.spill [r5]=r29,16; \
- mov r26=b6; \
- ;; \
-.mem.offset 0,0; st8.spill [r4]=r30,16; \
-.mem.offset 8,0; st8.spill [r5]=r31,16; \
- mov r27=b7; \
- ;; \
- mov r30=ar.unat; \
- ;; \
- st8 [r7]=r30; \
- mov ar.fpsr=r11; /* M-unit */ \
- ;; \
- stf.spill [r2]=f6,32; \
- stf.spill [r3]=f7,32; \
- ;; \
- stf.spill [r2]=f8,32; \
- stf.spill [r3]=f9,32; \
- ;; \
- stf.spill [r2]=f10; \
- stf.spill [r3]=f11; \
- ;; \
- adds r2=PT(B6)-PT(F10),r2; \
- adds r3=PT(B7)-PT(F11),r3; \
- ;; \
- st8 [r2]=r26,16; /* b6 */ \
- st8 [r3]=r27,16; /* b7 */ \
- ;; \
- st8 [r2]=r9; /* ar.csd */ \
- st8 [r3]=r10; /* ar.ssd */ \
- ;;
-
-#define VMX_SAVE_MIN_WITH_COVER VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov
r15=r19)
-#define VMX_SAVE_MIN VMX_DO_SAVE_MIN( , mov r30=r0, )
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx_phy_mode.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,433 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_phy_mode.c: emulating domain physical mode.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Arun Sharma (arun.sharma@xxxxxxxxx)
- * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx)
- * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx)
- */
-
-
-#include <asm/processor.h>
-#include <asm/gcc_intrin.h>
-#include <asm/vmx_phy_mode.h>
-#include <xen/sched.h>
-#include <asm/pgtable.h>
-
-
-int valid_mm_mode[8] = {
- GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */
- INV_MODE,
- INV_MODE,
- GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */
- INV_MODE,
- GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */
- INV_MODE,
- GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/
-};
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- * mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
- /* 2004/09/12(Kevin): Allow switch to self */
- /*
- * (it,dt,rt): (0,0,0) -> (1,1,1)
- * This kind of transition usually occurs in the very early
- * stage of Linux boot up procedure. Another case is in efi
- * and pal calls. (see "arch/ia64/kernel/head.S")
- *
- * (it,dt,rt): (0,0,0) -> (0,1,1)
- * This kind of transition is found when OSYa exits efi boot
- * service. Due to gva = gpa in this case (Same region),
- * data access can be satisfied though itlb entry for physical
- * emulation is hit.
- */
- SW_SELF,0, 0, SW_NOP, 0, 0, 0, SW_P2V,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- /*
- * (it,dt,rt): (0,1,1) -> (1,1,1)
- * This kind of transition is found in OSYa.
- *
- * (it,dt,rt): (0,1,1) -> (0,0,0)
- * This kind of transition is found in OSYa
- */
- SW_NOP, 0, 0, SW_SELF,0, 0, 0, SW_P2V,
- /* (1,0,0)->(1,1,1) */
- 0, 0, 0, 0, 0, 0, 0, SW_P2V,
- /*
- * (it,dt,rt): (1,0,1) -> (1,1,1)
- * This kind of transition usually occurs when Linux returns
- * from the low level TLB miss handlers.
- * (see "arch/ia64/kernel/ivt.S")
- */
- 0, 0, 0, 0, 0, SW_SELF,0, SW_P2V,
- 0, 0, 0, 0, 0, 0, 0, 0,
- /*
- * (it,dt,rt): (1,1,1) -> (1,0,1)
- * This kind of transition usually occurs in Linux low level
- * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
- *
- * (it,dt,rt): (1,1,1) -> (0,0,0)
- * This kind of transition usually occurs in pal and efi calls,
- * which requires running in physical mode.
- * (see "arch/ia64/kernel/head.S")
- * (1,1,1)->(1,0,0)
- */
-
- SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF,
-};
-
-void
-physical_mode_init(VCPU *vcpu)
-{
- UINT64 psr;
- struct domain * d = vcpu->domain;
-
- vcpu->arch.old_rsc = 0;
- vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
-#if 0
-void
-physical_itlb_miss_domn(VCPU *vcpu, u64 vadr)
-{
- u64 psr;
- IA64_PSR vpsr;
- u64 mppn,gppn,mpp1,gpp1;
- struct domain *d;
- static u64 test=0;
- d=vcpu->domain;
- if(test)
- panic("domn physical itlb miss happen\n");
- else
- test=1;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- gppn=(vadr<<1)>>13;
- mppn = get_mfn(DOMID_SELF,gppn,1);
- mppn=(mppn<<12)|(vpsr.cpl<<7);
- gpp1=0;
- mpp1 = get_mfn(DOMID_SELF,gpp1,1);
- mpp1=(mpp1<<12)|(vpsr.cpl<<7);
-// if(vadr>>63)
-// mppn |= PHY_PAGE_UC;
-// else
-// mppn |= PHY_PAGE_WB;
- mpp1 |= PHY_PAGE_WB;
- psr=ia64_clear_ic();
- ia64_itr(0x1, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
- ia64_srlz_i();
- ia64_itr(0x2, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24);
- ia64_stop();
- ia64_srlz_i();
- ia64_itr(0x1, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL),
(mppn|PHY_PAGE_WB), 24);
- ia64_srlz_i();
- ia64_itr(0x2, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL),
(mppn|PHY_PAGE_WB), 24);
- ia64_stop();
- ia64_srlz_i();
- ia64_itr(0x1, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
- ia64_srlz_i();
- ia64_itr(0x2, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28);
- ia64_stop();
- ia64_srlz_i();
- ia64_set_psr(psr);
- ia64_srlz_i();
- return;
-}
-#endif
-
-void
-physical_itlb_miss(VCPU *vcpu, u64 vadr)
-{
- physical_itlb_miss_dom0(vcpu, vadr);
-}
-
-
-void
-physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr)
-{
- u64 psr;
- IA64_PSR vpsr;
- u64 mppn,gppn;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- gppn=(vadr<<1)>>13;
- mppn = get_mfn(DOMID_SELF,gppn,1);
- mppn=(mppn<<12)|(vpsr.cpl<<7);
-// if(vadr>>63)
-// mppn |= PHY_PAGE_UC;
-// else
- mppn |= PHY_PAGE_WB;
-
- psr=ia64_clear_ic();
- ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
- ia64_set_psr(psr);
- ia64_srlz_i();
- return;
-}
-
-
-void
-physical_dtlb_miss(VCPU *vcpu, u64 vadr)
-{
- u64 psr;
- IA64_PSR vpsr;
- u64 mppn,gppn;
-// if(vcpu->domain!=dom0)
-// panic("dom n physical dtlb miss happen\n");
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- gppn=(vadr<<1)>>13;
- mppn = get_mfn(DOMID_SELF,gppn,1);
- mppn=(mppn<<12)|(vpsr.cpl<<7);
- if(vadr>>63)
- mppn |= PHY_PAGE_UC;
- else
- mppn |= PHY_PAGE_WB;
-
- psr=ia64_clear_ic();
- ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT);
- ia64_set_psr(psr);
- ia64_srlz_i();
- return;
-}
-
-void
-vmx_init_all_rr(VCPU *vcpu)
-{
- VMX(vcpu,vrr[VRN0]) = 0x38;
- VMX(vcpu,vrr[VRN1]) = 0x38;
- VMX(vcpu,vrr[VRN2]) = 0x38;
- VMX(vcpu,vrr[VRN3]) = 0x38;
- VMX(vcpu,vrr[VRN4]) = 0x38;
- VMX(vcpu,vrr[VRN5]) = 0x38;
- VMX(vcpu,vrr[VRN6]) = 0x60;
- VMX(vcpu,vrr[VRN7]) = 0x60;
-
- VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38);
- VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60);
- VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60);
-}
-
-void
-vmx_load_all_rr(VCPU *vcpu)
-{
- unsigned long psr;
- ia64_rr phy_rr;
-
- psr = ia64_clear_ic();
-
- phy_rr.ps = EMUL_PHY_PAGE_SHIFT;
- phy_rr.ve = 1;
-
- /* WARNING: not allow co-exist of both virtual mode and physical
- * mode in same region
- */
- if (is_physical_mode(vcpu)) {
- if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
- panic("Unexpected domain switch in phy emul\n");
- phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
- ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval);
- phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
- ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval);
- } else {
- ia64_set_rr((VRN0 << VRN_SHIFT),
- vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0])));
- ia64_set_rr((VRN4 << VRN_SHIFT),
- vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4])));
- }
-
-#if 1
- /* rr567 will be postponed to last point when resuming back to guest */
- ia64_set_rr((VRN1 << VRN_SHIFT),
- vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1])));
- ia64_set_rr((VRN2 << VRN_SHIFT),
- vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2])));
- ia64_set_rr((VRN3 << VRN_SHIFT),
- vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3])));
-#endif
- ia64_srlz_d();
- ia64_set_psr(psr);
- ia64_srlz_i();
-}
-
-void
-switch_to_physical_rid(VCPU *vcpu)
-{
- UINT64 psr;
- ia64_rr phy_rr;
-
- phy_rr.ps = EMUL_PHY_PAGE_SHIFT;
- phy_rr.ve = 1;
-
- /* Save original virtual mode rr[0] and rr[4] */
- psr=ia64_clear_ic();
- phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
- ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval);
- ia64_srlz_d();
- phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
- ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval);
- ia64_srlz_d();
-
- ia64_set_psr(psr);
- ia64_srlz_i();
- return;
-}
-
-
-void
-switch_to_virtual_rid(VCPU *vcpu)
-{
- UINT64 psr;
- ia64_rr mrr;
-
- psr=ia64_clear_ic();
-
- mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT);
- ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
- ia64_srlz_d();
- mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT);
- ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval));
- ia64_srlz_d();
- ia64_set_psr(psr);
- ia64_srlz_i();
- return;
-}
-
-static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr)
-{
- return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void
-switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
-{
- int act;
- REGS * regs=vcpu_regs(vcpu);
- act = mm_switch_action(old_psr, new_psr);
- switch (act) {
- case SW_V2P:
- vcpu->arch.old_rsc = regs->ar_rsc;
- switch_to_physical_rid(vcpu);
- /*
- * Set rse to enforced lazy, to prevent active rse save/restor when
- * guest physical mode.
- */
- regs->ar_rsc &= ~(IA64_RSC_MODE);
- vcpu->arch.mode_flags |= GUEST_IN_PHY;
- break;
- case SW_P2V:
- switch_to_virtual_rid(vcpu);
- /*
- * recover old mode which is saved when entering
- * guest physical mode
- */
- regs->ar_rsc = vcpu->arch.old_rsc;
- vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
- break;
- case SW_SELF:
- printf("Switch to self-0x%lx!!! MM mode doesn't change...\n",
- old_psr.val);
- break;
- case SW_NOP:
- printf("No action required for mode transition: (0x%lx -> 0x%lx)\n",
- old_psr.val, new_psr.val);
- break;
- default:
- /* Sanity check */
- printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val);
- panic("Unexpected virtual <--> physical mode transition");
- break;
- }
- return;
-}
-
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- * - insertions (itc.*, itr.*)
- * - purges (ptc.* and ptr.*)
- * - tpa
- * - tak
- * - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void
-check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
-{
-
- if ( (old_psr.dt != new_psr.dt ) ||
- (old_psr.it != new_psr.it ) ||
- (old_psr.rt != new_psr.rt )
- ) {
- switch_mm_mode (vcpu, old_psr, new_psr);
- }
-
- return 0;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- * - insertions (itc.*, itr.*)
- * - purges (ptc.* and ptr.*)
- * - tpa
- * - tak
- * - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void
-prepare_if_physical_mode(VCPU *vcpu)
-{
- if (is_physical_mode(vcpu)) {
- vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
- switch_to_virtual_rid(vcpu);
- }
- return;
-}
-
-/* Recover always follows prepare */
-void
-recover_if_physical_mode(VCPU *vcpu)
-{
- if (is_physical_mode(vcpu)) {
- vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
- switch_to_physical_rid(vcpu);
- }
- return;
-}
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_process.c
--- a/xen/arch/ia64/vmx_process.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,375 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_process.c: handling VMX architecture-related VM exits
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx>
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <asm/ptrace.h>
-#include <xen/delay.h>
-
-#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */
-#include <asm/sal.h> /* FOR struct ia64_sal_retval */
-
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-//#include <asm/ldt.h>
-#include <xen/irq.h>
-#include <xen/event.h>
-#include <asm/regionreg.h>
-#include <asm/privop.h>
-#include <asm/ia64_int.h>
-#include <asm/hpsim_ssc.h>
-#include <asm/dom_fw.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/kregs.h>
-#include <asm/vmx.h>
-#include <asm/vmx_mm_def.h>
-#include <xen/mm.h>
-/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
-
-
-extern struct ia64_sal_retval pal_emulator_static(UINT64);
-extern struct ia64_sal_retval
sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64);
-extern void rnat_consumption (VCPU *vcpu);
-#define DOMN_PAL_REQUEST 0x110000
-IA64FAULT
-vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long
isr, unsigned long iim)
-{
- static int first_time = 1;
- struct domain *d = (struct domain *) current->domain;
- struct vcpu *v = (struct domain *) current;
- extern unsigned long running_on_sim;
- unsigned long i, sal_param[8];
-
-#if 0
- if (first_time) {
- if (platform_is_hp_ski()) running_on_sim = 1;
- else running_on_sim = 0;
- first_time = 0;
- }
- if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant
- if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs);
- else do_ssc(vcpu_get_gr(current,36), regs);
- }
-#endif
- if (iim == d->arch.breakimm) {
- struct ia64_sal_retval x;
- switch (regs->r2) {
- case FW_HYPERCALL_PAL_CALL:
- //printf("*** PAL hypercall: index=%d\n",regs->r28);
- //FIXME: This should call a C routine
- x = pal_emulator_static(VMX_VPD(v, vgr[12]));
- regs->r8 = x.status; regs->r9 = x.v0;
- regs->r10 = x.v1; regs->r11 = x.v2;
-#if 0
- if (regs->r8)
- printk("Failed vpal emulation, with
index:0x%lx\n",
- VMX_VPD(v, vgr[12]));
-#endif
- break;
- case FW_HYPERCALL_SAL_CALL:
- for (i = 0; i < 8; i++)
- vmx_vcpu_get_gr(v, 32+i, &sal_param[i]);
- x = sal_emulator(sal_param[0], sal_param[1],
- sal_param[2], sal_param[3],
- sal_param[4], sal_param[5],
- sal_param[6], sal_param[7]);
- regs->r8 = x.status; regs->r9 = x.v0;
- regs->r10 = x.v1; regs->r11 = x.v2;
-#if 0
- if (regs->r8)
- printk("Failed vsal emulation, with
index:0x%lx\n",
- sal_param[0]);
-#endif
- break;
- case FW_HYPERCALL_EFI_RESET_SYSTEM:
- printf("efi.reset_system called ");
- if (current->domain == dom0) {
- printf("(by dom0)\n ");
- (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL);
- }
- printf("(not supported for non-0 domain)\n");
- regs->r8 = EFI_UNSUPPORTED;
- break;
- case FW_HYPERCALL_EFI_GET_TIME:
- {
- unsigned long *tv, *tc;
- vmx_vcpu_get_gr(v, 32, &tv);
- vmx_vcpu_get_gr(v, 33, &tc);
- printf("efi_get_time(%p,%p) called...",tv,tc);
- tv = __va(translate_domain_mpaddr(tv));
- if (tc) tc = __va(translate_domain_mpaddr(tc));
- regs->r8 = (*efi.get_time)(tv,tc);
- printf("and returns %lx\n",regs->r8);
- }
- break;
- case FW_HYPERCALL_EFI_SET_TIME:
- case FW_HYPERCALL_EFI_GET_WAKEUP_TIME:
- case FW_HYPERCALL_EFI_SET_WAKEUP_TIME:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP:
- // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED
- // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS
- // POINTER ARGUMENTS WILL BE VIRTUAL!!
- case FW_HYPERCALL_EFI_GET_VARIABLE:
- // FIXME: need fixes in efi.h from 2.6.9
- case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE:
- case FW_HYPERCALL_EFI_SET_VARIABLE:
- case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT:
- // FIXME: need fixes in efi.h from 2.6.9
- regs->r8 = EFI_UNSUPPORTED;
- break;
- }
-#if 0
- if (regs->r8)
- printk("Failed vgfw emulation, with index:0x%lx\n",
- regs->r2);
-#endif
- vmx_vcpu_increment_iip(current);
- }else if(iim == DOMN_PAL_REQUEST){
- pal_emul(current);
- vmx_vcpu_increment_iip(current);
- } else
- vmx_reflect_interruption(ifa,isr,iim,11);
-}
-
-static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800,
- 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000,
- 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600,
- 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000,
- 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00,
- 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400,
- 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00,
- 0x7f00,
-};
-
-
-
-void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim,
- UINT64 vector)
-{
- VCPU *vcpu = current;
- REGS *regs=vcpu_regs(vcpu);
- UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu);
- if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){
- panic("Guest nested fault!");
- }
- VPD_CR(vcpu,isr)=isr;
- VPD_CR(vcpu,iipa) = regs->cr_iip;
- vector=vec2off[vector];
- if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
- VPD_CR(vcpu,iim) = iim;
- else {
- set_ifa_itir_iha(vcpu,ifa,1,1,1);
- }
- inject_guest_interruption(vcpu, vector);
-}
-
-// ONLY gets called from ia64_leave_kernel
-// ONLY call with interrupts disabled?? (else might miss one?)
-// NEVER successful if already reflecting a trap/fault because psr.i==0
-void leave_hypervisor_tail(struct pt_regs *regs)
-{
- struct domain *d = current->domain;
- struct vcpu *v = current;
- // FIXME: Will this work properly if doing an RFI???
- if (!is_idle_task(d) ) { // always comes from guest
- extern void vmx_dorfirfi(void);
- struct pt_regs *user_regs = vcpu_regs(current);
-
- if (local_softirq_pending())
- do_softirq();
- local_irq_disable();
-
- if (user_regs != regs)
- printk("WARNING: checking pending interrupt in nested
interrupt!!!\n");
-
- /* VMX Domain N has other interrupt source, saying DM */
- if (test_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags))
- vmx_intr_assist(v);
-
- /* FIXME: Check event pending indicator, and set
- * pending bit if necessary to inject back to guest.
- * Should be careful about window between this check
- * and above assist, since IOPACKET_PORT shouldn't be
- * injected into vmx domain.
- *
- * Now hardcode the vector as 0x10 temporarily
- */
- if
(event_pending(v)&&(!((v->arch.arch_vmx.in_service[0])&(1UL<<0x10)))) {
- VPD_CR(v, irr[0]) |= 1UL << 0x10;
- v->arch.irq_new_pending = 1;
- }
-
- if ( v->arch.irq_new_pending ) {
- v->arch.irq_new_pending = 0;
- vmx_check_pending_irq(v);
- }
- }
-}
-
-extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr);
-
-/* We came here because the H/W VHPT walker failed to find an entry */
-void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr)
-{
- IA64_PSR vpsr;
- CACHE_LINE_TYPE type;
- u64 vhpt_adr;
- ISR misr;
- ia64_rr vrr;
- REGS *regs;
- thash_cb_t *vtlb, *vhpt;
- thash_data_t *data, me;
- vtlb=vmx_vcpu_get_vtlb(vcpu);
-#ifdef VTLB_DEBUG
- check_vtlb_sanity(vtlb);
- dump_vtlb(vtlb);
-#endif
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- regs = vcpu_regs(vcpu);
- misr.val=regs->cr_isr;
-/* TODO
- if(vcpu->domain->id && vec == 2 &&
- vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){
- emulate_ins(&v);
- return;
- }
-*/
-
- if((vec==1)&&(!vpsr.it)){
- physical_itlb_miss(vcpu, vadr);
- return;
- }
- if((vec==2)&&(!vpsr.dt)){
-
if(vcpu->domain!=dom0&&__gpfn_is_io(vcpu->domain,(vadr<<1)>>(PAGE_SHIFT+1))){
- emulate_io_inst(vcpu,((vadr<<1)>>1),4); // UC
- }else{
- physical_dtlb_miss(vcpu, vadr);
- }
- return;
- }
- vrr = vmx_vcpu_rr(vcpu,vadr);
- if(vec == 1) type = ISIDE_TLB;
- else if(vec == 2) type = DSIDE_TLB;
- else panic("wrong vec\n");
-
-// prepare_if_physical_mode(vcpu);
-
- if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){
- if(vcpu->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(vcpu->domain,
data->ppn>>(PAGE_SHIFT-12))){
-
vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps);
- emulate_io_inst(vcpu, vadr, data->ma);
- return IA64_FAULT;
- }
- if ( data->ps != vrr.ps ) {
- machine_tlb_insert(vcpu, data);
- }
- else {
- thash_insert(vtlb->ts->vhpt,data,vadr);
- }
- }else if(type == DSIDE_TLB){
- if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
- if(vpsr.ic){
- vmx_vcpu_set_isr(vcpu, misr.val);
- alt_dtlb(vcpu, vadr);
- return IA64_FAULT;
- } else{
- if(misr.sp){
- //TODO lds emulation
- panic("Don't support speculation load");
- }else{
- nested_dtlb(vcpu);
- return IA64_FAULT;
- }
- }
- } else{
- vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
- vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
- data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
- if(data){
- if(vpsr.ic){
- vmx_vcpu_set_isr(vcpu, misr.val);
- dtlb_fault(vcpu, vadr);
- return IA64_FAULT;
- }else{
- if(misr.sp){
- //TODO lds emulation
- panic("Don't support speculation load");
- }else{
- nested_dtlb(vcpu);
- return IA64_FAULT;
- }
- }
- }else{
- if(vpsr.ic){
- vmx_vcpu_set_isr(vcpu, misr.val);
- dvhpt_fault(vcpu, vadr);
- return IA64_FAULT;
- }else{
- if(misr.sp){
- //TODO lds emulation
- panic("Don't support speculation load");
- }else{
- nested_dtlb(vcpu);
- return IA64_FAULT;
- }
- }
- }
- }
- }else if(type == ISIDE_TLB){
- if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){
- if(!vpsr.ic){
- misr.ni=1;
- }
- vmx_vcpu_set_isr(vcpu, misr.val);
- alt_itlb(vcpu, vadr);
- return IA64_FAULT;
- } else{
- vmx_vcpu_thash(vcpu, vadr, &vhpt_adr);
- vrr=vmx_vcpu_rr(vcpu,vhpt_adr);
- data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB);
- if(data){
- if(!vpsr.ic){
- misr.ni=1;
- }
- vmx_vcpu_set_isr(vcpu, misr.val);
- itlb_fault(vcpu, vadr);
- return IA64_FAULT;
- }else{
- if(!vpsr.ic){
- misr.ni=1;
- }
- vmx_vcpu_set_isr(vcpu, misr.val);
- ivhpt_fault(vcpu, vadr);
- return IA64_FAULT;
- }
- }
- }
-}
-
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_support.c
--- a/xen/arch/ia64/vmx_support.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,164 +0,0 @@
-
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_support.c: vmx specific support interface.
- * Copyright (c) 2005, Intel Corporation.
- * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <public/io/ioreq.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vcpu.h>
-
-/*
- * I/O emulation should be atomic from domain point of view. However,
- * when emulation code is waiting for I/O completion by do_block,
- * other events like DM interrupt, VBD, etc. may come and unblock
- * current exection flow. So we have to prepare for re-block if unblocked
- * by non I/O completion event.
- */
-void vmx_wait_io(void)
-{
- struct vcpu *v = current;
- struct domain *d = v->domain;
- extern void do_block();
- int port = iopacket_port(d);
-
- do {
- if (!test_bit(port,
- &d->shared_info->evtchn_pending[0]))
- do_block();
-
- /* Unblocked when some event is coming. Clear pending indication
- * immediately if deciding to go for io assist
- */
- if (test_and_clear_bit(port,
- &d->shared_info->evtchn_pending[0])) {
- clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
- clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
- vmx_io_assist(v);
- }
-
-
- if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
- /*
- * Latest event is not I/O completion, so clear corresponding
- * selector and pending indication, to allow real event coming
- */
- clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
-
- /* Here atually one window is leaved before selector is cleared.
- * However this window only delay the indication to coming event,
- * nothing losed. Next loop will check I/O channel to fix this
- * window.
- */
- clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
- }
- else
- break;
- } while (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
-}
-
-/*
- * Only place to call vmx_io_assist is mmio/legacy_io emulation.
- * Since I/O emulation is synchronous, it shouldn't be called in
- * other places. This is not like x86, since IA-64 implements a
- * per-vp stack without continuation.
- */
-void vmx_io_assist(struct vcpu *v)
-{
- vcpu_iodata_t *vio;
- ioreq_t *p;
-
- /*
- * This shared page contains I/O request between emulation code
- * and device model.
- */
- vio = get_vio(v->domain, v->vcpu_id);
- if (!vio)
- panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
-
- p = &vio->vp_ioreq;
-
- if (p->state == STATE_IORESP_HOOK)
- panic("Not supported: No hook available for DM request\n");
-
- if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
- if (p->state != STATE_IORESP_READY) {
- /* Can't do_block here, for the same reason as other places to
- * use vmx_wait_io. Simple return is safe since vmx_wait_io will
- * try to block again
- */
- return;
- } else
- p->state = STATE_INVALID;
-
- clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
- } else
- return; /* Spurous event? */
-}
-
-/*
- * VMX domainN has two types of interrupt source: lsapic model within
- * HV, and device model within domain 0 (service OS). There're another
- * pending array in share page, manipulated by device model directly.
- * To conform to VT-i spec, we have to sync pending bits in shared page
- * into VPD. This has to be done before checking pending interrupt at
- * resume to guest. For domain 0, all the interrupt sources come from
- * HV, which then doesn't require this assist.
- */
-void vmx_intr_assist(struct vcpu *v)
-{
- vcpu_iodata_t *vio;
- struct domain *d = v->domain;
- extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu,
- unsigned long *pend_irr);
- int port = iopacket_port(d);
-
- /* I/O emulation is atomic, so it's impossible to see execution flow
- * out of vmx_wait_io, when guest is still waiting for response.
- */
- if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
- panic("!!!Bad resume to guest before I/O emulation is done.\n");
-
- /* Clear indicator specific to interrupt delivered from DM */
- if (test_and_clear_bit(port,
- &d->shared_info->evtchn_pending[0])) {
- if (!d->shared_info->evtchn_pending[port >> 5])
- clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel);
-
- if (!v->vcpu_info->evtchn_pending_sel)
- clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
- }
-
- /* Even without event pending, we still need to sync pending bits
- * between DM and vlsapic. The reason is that interrupt delivery
- * shares same event channel as I/O emulation, with corresponding
- * indicator possibly cleared when vmx_wait_io().
- */
- vio = get_vio(v->domain, v->vcpu_id);
- if (!vio)
- panic("Corruption: bad shared page: %lx\n", (unsigned long)vio);
-
-#ifdef V_IOSAPIC_READY
- vlapic_update_ext_irq(v);
-#else
- panic("IOSAPIC model is missed in qemu\n");
-#endif
- return;
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_utility.c
--- a/xen/arch/ia64/vmx_utility.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,659 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_utility.c:
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
- * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx>
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-#include <xen/types.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/processor.h>
-#include <asm/vmx_mm_def.h>
-
-
-/*
- * Return:
- * 0: Not reserved indirect registers
- * 1: Is reserved indirect registers
- */
-int
-is_reserved_indirect_register (
- int type,
- int index )
-{
- switch (type) {
- case IA64_CPUID:
- if ( index >= 5 ) {
- return 1;
- }
-
- case IA64_DBR:
- case IA64_IBR:
- //bugbugbug:check with pal about the max ibr/dbr!!!!
- break;
-
- case IA64_PMC:
- //bugbugbug:check with pal about the max ibr/dbr!!!!
- break;
-
- case IA64_PMD:
- //bugbugbug:check with pal about the max ibr/dbr!!!!
- break;
-
- case IA64_PKR:
- //bugbugbug:check with pal about the max pkr!!!!
- break;
-
- case IA64_RR:
- //bugbugbug:check with pal about the max rr!!!!
- break;
-
- default:
- panic ("Unsupported instruction!");
- }
-
- return 0;
-
-}
-
-/*
- * Return:
- * Set all ignored fields in value to 0 and return
- */
-u64
-indirect_reg_igfld_MASK (
- int type,
- int index,
- u64 value
- )
-{
- u64 nvalue;
-
- nvalue = value;
- switch ( type ) {
- case IA64_CPUID:
- if ( index == 2 ) {
- nvalue = 0;
- }
- break;
-
- case IA64_DBR:
- case IA64_IBR:
- /* Refer to SDM Vol2 Table 7-1,7-2 */
- if ( index % 2 != 0) {
- /* Ignore field: {61:60} */
- nvalue = value & (~MASK (60, 2));
- }
- break;
- case IA64_PMC:
- if ( index == 0 ) {
- /* Ignore field: 3:1 */
- nvalue = value & (~MASK (1, 3));
- }
- break;
- case IA64_PMD:
- if ( index >= 4 ) {
- /* Ignore field: 7:7 */
- /* bugbug: this code is correct for generic
- * PMD. However, for implementation specific
- * PMD, it's WRONG. need more info to judge
- * what's implementation specific PMD.
- */
- nvalue = value & (~MASK (7, 1));
- }
- break;
- case IA64_PKR:
- case IA64_RR:
- break;
- default:
- panic ("Unsupported instruction!");
- }
-
- return nvalue;
-}
-
-/*
- * Return:
- * Set all ignored fields in value to 0 and return
- */
-u64
-cr_igfld_mask (int index, u64 value)
-{
- u64 nvalue;
-
- nvalue = value;
-
- switch ( index ) {
- case IA64_REG_CR_IVA:
- /* Ignore filed: 14:0 */
- nvalue = value & (~MASK (0, 15));
- break;
-
- case IA64_REG_CR_IHA:
- /* Ignore filed: 1:0 */
- nvalue = value & (~MASK (0, 2));
- break;
-
- case IA64_REG_CR_LID:
- /* Ignore filed: 63:32 */
- nvalue = value & (~MASK (32, 32));
- break;
-
- case IA64_REG_CR_TPR:
- /* Ignore filed: 63:17,3:0 */
- nvalue = value & (~MASK (17, 47));
- nvalue = nvalue & (~MASK (0, 4));
- break;
-
- case IA64_REG_CR_EOI:
- /* Ignore filed: 63:0 */
- nvalue = 0;
- break;
-
- case IA64_REG_CR_ITV:
- case IA64_REG_CR_PMV:
- case IA64_REG_CR_CMCV:
- case IA64_REG_CR_LRR0:
- case IA64_REG_CR_LRR1:
- /* Ignore filed: 63:17,12:12 */
- nvalue = value & (~MASK (17, 47));
- nvalue = nvalue & (~MASK (12, 1));
- break;
- }
-
- return nvalue;
-}
-
-
-/*
- * Return:
- * 1: PSR reserved fields are not zero
- * 0: PSR reserved fields are all zero
- */
-int
-check_psr_rsv_fields (u64 value)
-{
- /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46
- * These reserved fields shall all be zero
- * Otherwise we will panic
- */
-
- if ( value & MASK (0, 1) ||
- value & MASK (6, 7) ||
- value & MASK (16, 1) ||
- value & MASK (28, 4) ||
- value & MASK (46, 18)
- ) {
- return 1;
- }
-
- return 0;
-}
-
-
-
-/*
- * Return:
- * 1: CR reserved fields are not zero
- * 0: CR reserved fields are all zero
- */
-int
-check_cr_rsv_fields (int index, u64 value)
-{
- switch (index) {
- case IA64_REG_CR_DCR:
- if ( (value & MASK ( 3, 5 )) ||
- (value & MASK (15, 49))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_ITM:
- case IA64_REG_CR_IVA:
- case IA64_REG_CR_IIP:
- case IA64_REG_CR_IFA:
- case IA64_REG_CR_IIPA:
- case IA64_REG_CR_IIM:
- case IA64_REG_CR_IHA:
- case IA64_REG_CR_EOI:
- return 0;
-
- case IA64_REG_CR_PTA:
- if ( (value & MASK ( 1, 1 )) ||
- (value & MASK (9, 6))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_IPSR:
- return check_psr_rsv_fields (value);
-
-
- case IA64_REG_CR_ISR:
- if ( (value & MASK ( 24, 8 )) ||
- (value & MASK (44, 20))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_ITIR:
- if ( (value & MASK ( 0, 2 )) ||
- (value & MASK (32, 32))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_IFS:
- if ( (value & MASK ( 38, 25 ))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_LID:
- if ( (value & MASK ( 0, 16 ))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_IVR:
- if ( (value & MASK ( 8, 56 ))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_TPR:
- if ( (value & MASK ( 8, 8 ))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_IRR0:
- if ( (value & MASK ( 1, 1 )) ||
- (value & MASK (3, 13))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_ITV:
- case IA64_REG_CR_PMV:
- case IA64_REG_CR_CMCV:
- if ( (value & MASK ( 8, 4 )) ||
- (value & MASK (13, 3))) {
- return 1;
- }
- return 0;
-
- case IA64_REG_CR_LRR0:
- case IA64_REG_CR_LRR1:
- if ( (value & MASK ( 11, 1 )) ||
- (value & MASK (14, 1))) {
- return 1;
- }
- return 0;
- }
-
-
- panic ("Unsupported CR");
-}
-
-
-
-/*
- * Return:
- * 0: Indirect Reg reserved fields are not zero
- * 1: Indirect Reg reserved fields are all zero
- */
-int
-check_indirect_reg_rsv_fields ( int type, int index, u64 value )
-{
-
- switch ( type ) {
- case IA64_CPUID:
- if ( index == 3 ) {
- if ( value & MASK (40, 24 )) {
- return 0;
- }
- } else if ( index == 4 ) {
- if ( value & MASK (2, 62 )) {
- return 0;
- }
- }
- break;
-
- case IA64_DBR:
- case IA64_IBR:
- case IA64_PMC:
- case IA64_PMD:
- break;
-
- case IA64_PKR:
- if ( value & MASK (4, 4) ||
- value & MASK (32, 32 )) {
- return 0;
- }
- break;
-
- case IA64_RR:
- if ( value & MASK (1, 1) ||
- value & MASK (32, 32 )) {
- return 0;
- }
- break;
-
- default:
- panic ("Unsupported instruction!");
- }
-
- return 1;
-}
-
-
-
-
-/* Return
- * Same format as isr_t
- * Only ei/ni bits are valid, all other bits are zero
- */
-u64
-set_isr_ei_ni (VCPU *vcpu)
-{
-
- IA64_PSR vpsr,ipsr;
- ISR visr;
- REGS *regs;
-
- regs=vcpu_regs(vcpu);
-
- visr.val = 0;
-
- vpsr.val = vmx_vcpu_get_psr (vcpu);
-
- if (!vpsr.ic == 1 ) {
- /* Set ISR.ni */
- visr.ni = 1;
- }
- ipsr.val = regs->cr_ipsr;
-
- visr.ei = ipsr.ri;
- return visr.val;
-}
-
-
-/* Set up ISR.na/code{3:0}/r/w for no-access instructions
- * Refer to SDM Vol Table 5-1
- * Parameter:
- * setr: if 1, indicates this function will set up ISR.r
- * setw: if 1, indicates this function will set up ISR.w
- * Return:
- * Same format as ISR. All fields are zero, except na/code{3:0}/r/w
- */
-u64
-set_isr_for_na_inst(VCPU *vcpu, int op)
-{
- ISR visr;
- visr.val = 0;
- switch (op) {
- case IA64_INST_TPA:
- visr.na = 1;
- visr.code = 0;
- break;
- case IA64_INST_TAK:
- visr.na = 1;
- visr.code = 3;
- break;
- }
- return visr.val;
-}
-
-
-
-/*
- * Set up ISR for registe Nat consumption fault
- * Parameters:
- * read: if 1, indicates this is a read access;
- * write: if 1, indicates this is a write access;
- */
-void
-set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write)
-{
- ISR visr;
- u64 value;
- /* Need set up ISR: code, ei, ni, na, r/w */
- visr.val = 0;
-
- /* ISR.code{7:4} =1,
- * Set up ISR.code{3:0}, ISR.na
- */
- visr.code = (1 << 4);
- if (inst) {
-
- value = set_isr_for_na_inst (vcpu,inst);
- visr.val = visr.val | value;
- }
-
- /* Set up ISR.r/w */
- visr.r = read;
- visr.w = write;
-
- /* Set up ei/ni */
- value = set_isr_ei_ni (vcpu);
- visr.val = visr.val | value;
-
- vmx_vcpu_set_isr (vcpu,visr.val);
-}
-
-
-
-/*
- * Set up ISR for break fault
- */
-void set_break_isr (VCPU *vcpu)
-{
- ISR visr;
- u64 value;
-
- /* Need set up ISR: ei, ni */
-
- visr.val = 0;
-
- /* Set up ei/ni */
- value = set_isr_ei_ni (vcpu);
- visr.val = visr.val | value;
-
- vmx_vcpu_set_isr(vcpu, visr.val);
-}
-
-
-
-
-
-
-/*
- * Set up ISR for Priviledged Operation fault
- */
-void set_privileged_operation_isr (VCPU *vcpu,int inst)
-{
- ISR visr;
- u64 value;
-
- /* Need set up ISR: code, ei, ni, na */
-
- visr.val = 0;
-
- /* Set up na, code{3:0} for no-access instruction */
- value = set_isr_for_na_inst (vcpu, inst);
- visr.val = visr.val | value;
-
-
- /* ISR.code{7:4} =1 */
- visr.code = (1 << 4) | visr.code;
-
- /* Set up ei/ni */
- value = set_isr_ei_ni (vcpu);
- visr.val = visr.val | value;
-
- vmx_vcpu_set_isr (vcpu, visr.val);
-}
-
-
-
-
-/*
- * Set up ISR for Priviledged Register fault
- */
-void set_privileged_reg_isr (VCPU *vcpu, int inst)
-{
- ISR visr;
- u64 value;
-
- /* Need set up ISR: code, ei, ni */
-
- visr.val = 0;
-
- /* ISR.code{7:4} =2 */
- visr.code = 2 << 4;
-
- /* Set up ei/ni */
- value = set_isr_ei_ni (vcpu);
- visr.val = visr.val | value;
-
- vmx_vcpu_set_isr (vcpu, visr.val);
-}
-
-
-
-
-
-/*
- * Set up ISR for Reserved Register/Field fault
- */
-void set_rsv_reg_field_isr (VCPU *vcpu)
-{
- ISR visr;
- u64 value;
-
- /* Need set up ISR: code, ei, ni */
-
- visr.val = 0;
-
- /* ISR.code{7:4} =4 */
- visr.code = (3 << 4) | visr.code;
-
- /* Set up ei/ni */
- value = set_isr_ei_ni (vcpu);
- visr.val = visr.val | value;
-
- vmx_vcpu_set_isr (vcpu, visr.val);
-}
-
-
-
-/*
- * Set up ISR for Illegal Operation fault
- */
-void set_illegal_op_isr (VCPU *vcpu)
-{
- ISR visr;
- u64 value;
-
- /* Need set up ISR: ei, ni */
-
- visr.val = 0;
-
- /* Set up ei/ni */
- value = set_isr_ei_ni (vcpu);
- visr.val = visr.val | value;
-
- vmx_vcpu_set_isr (vcpu, visr.val);
-}
-
-
-void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access)
-{
- ISR isr;
-
- isr.val = 0;
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag;
- isr.na = non_access;
- isr.r = 1;
- isr.w = 0;
- vmx_vcpu_set_isr(vcpu, isr.val);
- return;
-}
-
-void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access)
-{
- u64 value;
- ISR isr;
-
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_PRIV_OP_FAULT;
- isr.na = non_access;
- vmx_vcpu_set_isr(vcpu, isr.val);
-
- return;
-}
-
-
-IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index)
-{
- u64 sof;
- REGS *regs;
- regs=vcpu_regs(vcpu);
- sof = regs->cr_ifs & 0x7f;
- if(reg_index >= sof + 32)
- return IA64_FAULT;
- return IA64_NO_FAULT;;
-}
-
-
-int is_reserved_rr_register(VCPU* vcpu, int reg_index)
-{
- return (reg_index >= 8);
-}
-
-#define ITIR_RSV_MASK (0x3UL | (((1UL<<32)-1) << 32))
-int is_reserved_itir_field(VCPU* vcpu, u64 itir)
-{
- if ( itir & ITIR_RSV_MASK ) {
- return 1;
- }
- return 0;
-}
-
-int is_reserved_rr_field(VCPU* vcpu, u64 reg_value)
-{
- ia64_rr rr;
- rr.rrval = reg_value;
-
- if(rr.reserved0 != 0 || rr.reserved1 != 0){
- return 1;
- }
- if(rr.ps < 12 || rr.ps > 28){
- // page too big or small.
- return 1;
- }
- if(rr.ps > 15 && rr.ps % 2 != 0){
- // unsupported page size.
- return 1;
- }
- return 0;
-}
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_vcpu.c
--- a/xen/arch/ia64/vmx_vcpu.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,446 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Fred yang (fred.yang@xxxxxxxxx)
- * Arun Sharma (arun.sharma@xxxxxxxxx)
- * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
- * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-#include <xen/sched.h>
-#include <public/arch-ia64.h>
-#include <asm/ia64_int.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/regionreg.h>
-#include <asm/tlb.h>
-#include <asm/processor.h>
-#include <asm/delay.h>
-#include <asm/regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/vmx.h>
-
-//u64 fire_itc;
-//u64 fire_itc2;
-//u64 fire_itm;
-//u64 fire_itm2;
-/*
- * Copyright (c) 2005 Intel Corporation.
- * Anthony Xu (anthony.xu@xxxxxxxxx)
- * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/**************************************************************************
- VCPU general register access routines
-**************************************************************************/
-#include <asm/hw_irq.h>
-#include <asm/vmx_pal_vsa.h>
-#include <asm/kregs.h>
-
-//unsigned long last_guest_rsm = 0x0;
-struct guest_psr_bundle{
- unsigned long ip;
- unsigned long psr;
-};
-
-struct guest_psr_bundle guest_psr_buf[100];
-unsigned long guest_psr_index = 0;
-
-void
-vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value)
-{
-
- UINT64 mask;
- REGS *regs;
- IA64_PSR old_psr, new_psr;
- old_psr.val=vmx_vcpu_get_psr(vcpu);
-
- regs=vcpu_regs(vcpu);
- /* We only support guest as:
- * vpsr.pk = 0
- * vpsr.is = 0
- * Otherwise panic
- */
- if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) {
- panic ("Setting unsupport guest psr!");
- }
-
- /*
- * For those IA64_PSR bits: id/da/dd/ss/ed/ia
- * Since these bits will become 0, after success execution of each
- * instruction, we will change set them to mIA64_PSR
- */
- VMX_VPD(vcpu,vpsr) = value &
- (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
- IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
- ));
-
- if ( !old_psr.i && (value & IA64_PSR_I) ) {
- // vpsr.i 0->1
- vcpu->arch.irq_new_condition = 1;
- }
- new_psr.val=vmx_vcpu_get_psr(vcpu);
- {
- struct pt_regs *regs = vcpu_regs(vcpu);
- guest_psr_buf[guest_psr_index].ip = regs->cr_iip;
- guest_psr_buf[guest_psr_index].psr = new_psr.val;
- if (++guest_psr_index >= 100)
- guest_psr_index = 0;
- }
-#if 0
- if (old_psr.i != new_psr.i) {
- if (old_psr.i)
- last_guest_rsm = vcpu_regs(vcpu)->cr_iip;
- else
- last_guest_rsm = 0;
- }
-#endif
-
- /*
- * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
- * , except for the following bits:
- * ic/i/dt/si/rt/mc/it/bn/vm
- */
- mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
- IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
- IA64_PSR_VM;
-
- regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) );
-
- check_mm_mode_switch(vcpu, old_psr, new_psr);
- return IA64_NO_FAULT;
-}
-
-/* Adjust slot both in pt_regs and vpd, upon vpsr.ri which
- * should have sync with ipsr in entry.
- *
- * Clear some bits due to successfully emulation.
- */
-IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu)
-{
- // TODO: trap_bounce?? Eddie
- REGS *regs = vcpu_regs(vcpu);
- IA64_PSR vpsr;
- IA64_PSR *ipsr = (IA64_PSR *)®s->cr_ipsr;
-
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- if (vpsr.ri == 2) {
- vpsr.ri = 0;
- regs->cr_iip += 16;
- } else {
- vpsr.ri++;
- }
-
- ipsr->ri = vpsr.ri;
- vpsr.val &=
- (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
- IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
- ));
-
- VMX_VPD(vcpu, vpsr) = vpsr.val;
-
- ipsr->val &=
- (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD |
- IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA
- ));
-
- return (IA64_NO_FAULT);
-}
-
-
-IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
-{
- REGS *regs = vcpu_regs(vcpu);
- IA64_PSR vpsr;
- vpsr.val = vmx_vcpu_get_psr(vcpu);
-
- if(!vpsr.ic)
- VPD_CR(vcpu,ifs) = regs->cr_ifs;
- regs->cr_ifs = IA64_IFS_V;
- return (IA64_NO_FAULT);
-}
-
-
-thash_cb_t *
-vmx_vcpu_get_vtlb(VCPU *vcpu)
-{
- return vcpu->arch.vtlb;
-}
-
-
-struct virutal_platform_def *
-vmx_vcpu_get_plat(VCPU *vcpu)
-{
- return &(vcpu->domain->arch.vmx_platform);
-}
-
-
-ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr)
-{
- return (ia64_rr)VMX(vcpu,vrr[vadr>>61]);
-}
-
-
-IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- ia64_rr oldrr,newrr;
- thash_cb_t *hcb;
- oldrr=vmx_vcpu_rr(vcpu,reg);
- newrr.rrval=val;
-#if 1
- if(oldrr.ps!=newrr.ps){
- hcb = vmx_vcpu_get_vtlb(vcpu);
- thash_purge_all(hcb);
- }
-#endif
- VMX(vcpu,vrr[reg>>61]) = val;
- switch((u64)(reg>>61)) {
- case VRN5:
- VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val);
- break;
- case VRN6:
- VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val);
- break;
- case VRN7:
- VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val);
- /* Change double mapping for this domain */
-#ifdef XEN_DBL_MAPPING
- vmx_change_double_mapping(vcpu,
- vmx_vrrtomrr(vcpu,oldrr.rrval),
- vmx_vrrtomrr(vcpu,newrr.rrval));
-#endif
- break;
- default:
- ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val));
- break;
- }
-
- return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
- VCPU protection key register access routines
-**************************************************************************/
-
-IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval)
-{
- UINT64 val = (UINT64)ia64_get_pkr(reg);
- *pval = val;
- return (IA64_NO_FAULT);
-}
-
-IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val)
-{
- ia64_set_pkr(reg,val);
- return (IA64_NO_FAULT);
-}
-
-#if 0
-int tlb_debug=0;
-check_entry(u64 va, u64 ps, char *str)
-{
- va &= ~ (PSIZE(ps)-1);
- if ( va == 0x2000000002908000UL ||
- va == 0x600000000000C000UL ) {
- stop();
- }
- if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps);
-}
-#endif
-
-
-u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa)
-{
- ia64_rr rr,rr1;
- rr=vmx_vcpu_rr(vcpu,ifa);
- rr1.rrval=0;
- rr1.ps=rr.ps;
- rr1.rid=rr.rid;
- return (rr1.rrval);
-}
-
-
-
-
-IA64FAULT vmx_vcpu_rfi(VCPU *vcpu)
-{
- // TODO: Only allowed for current vcpu
- UINT64 ifs, psr;
- REGS *regs = vcpu_regs(vcpu);
- psr = VPD_CR(vcpu,ipsr);
- vmx_vcpu_set_psr(vcpu,psr);
- ifs=VPD_CR(vcpu,ifs);
- if((ifs>>63)&&(ifs<<1)){
- ifs=(regs->cr_ifs)&0x7f;
- regs->rfi_pfs = (ifs<<7)|ifs;
- regs->cr_ifs = VPD_CR(vcpu,ifs);
- }
- regs->cr_iip = VPD_CR(vcpu,iip);
- return (IA64_NO_FAULT);
-}
-
-
-UINT64
-vmx_vcpu_get_psr(VCPU *vcpu)
-{
- return VMX_VPD(vcpu,vpsr);
-}
-
-
-IA64FAULT
-vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val)
-{
- IA64_PSR vpsr;
-
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- if ( vpsr.bn ) {
- *val=VMX_VPD(vcpu,vgr[reg-16]);
- // Check NAT bit
- if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) {
- // TODO
- //panic ("NAT consumption fault\n");
- return IA64_FAULT;
- }
-
- }
- else {
- *val=VMX_VPD(vcpu,vbgr[reg-16]);
- if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) {
- //panic ("NAT consumption fault\n");
- return IA64_FAULT;
- }
-
- }
- return IA64_NO_FAULT;
-}
-
-IA64FAULT
-vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat)
-{
- IA64_PSR vpsr;
- vpsr.val = vmx_vcpu_get_psr(vcpu);
- if ( vpsr.bn ) {
- VMX_VPD(vcpu,vgr[reg-16]) = val;
- if(nat){
- VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) );
- }else{
- VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) );
- }
- }
- else {
- VMX_VPD(vcpu,vbgr[reg-16]) = val;
- if(nat){
- VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) );
- }else{
- VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) );
- }
- }
- return IA64_NO_FAULT;
-}
-
-
-
-IA64FAULT
-vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val)
-{
- REGS *regs=vcpu_regs(vcpu);
- int nat;
- //TODO, Eddie
- if (!regs) return 0;
- if (reg >= 16 && reg < 32) {
- return vmx_vcpu_get_bgr(vcpu,reg,val);
- }
- getreg(reg,val,&nat,regs); // FIXME: handle NATs later
- if(nat){
- return IA64_FAULT;
- }
- return IA64_NO_FAULT;
-}
-
-// returns:
-// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault
-// IA64_NO_FAULT otherwise
-
-IA64FAULT
-vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat)
-{
- REGS *regs = vcpu_regs(vcpu);
- long sof = (regs->cr_ifs) & 0x7f;
- //TODO Eddie
-
- if (!regs) return IA64_ILLOP_FAULT;
- if (reg >= sof + 32) return IA64_ILLOP_FAULT;
- if ( reg >= 16 && reg < 32 ) {
- return vmx_vcpu_set_bgr(vcpu,reg, value, nat);
- }
- setreg(reg,value,nat,regs);
- return IA64_NO_FAULT;
-}
-
-
-IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24)
-{
- UINT64 vpsr;
- vpsr = vmx_vcpu_get_psr(vcpu);
- vpsr &= (~imm24);
- vmx_vcpu_set_psr(vcpu, vpsr);
- return IA64_NO_FAULT;
-}
-
-
-IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24)
-{
- UINT64 vpsr;
- vpsr = vmx_vcpu_get_psr(vcpu);
- vpsr |= imm24;
- vmx_vcpu_set_psr(vcpu, vpsr);
- return IA64_NO_FAULT;
-}
-
-
-IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val)
-{
- vmx_vcpu_set_psr(vcpu, val);
- return IA64_NO_FAULT;
-}
-
-IA64FAULT
-vmx_vcpu_set_tpr(VCPU *vcpu, u64 val)
-{
- VPD_CR(vcpu,tpr)=val;
- vcpu->arch.irq_new_condition = 1;
- return IA64_NO_FAULT;
-}
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_virt.c
--- a/xen/arch/ia64/vmx_virt.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1511 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_virt.c:
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Fred yang (fred.yang@xxxxxxxxx)
- * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx>
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-
-
-#include <asm/privop.h>
-#include <asm/vmx_vcpu.h>
-#include <asm/processor.h>
-#include <asm/delay.h> // Debug only
-#include <asm/vmmu.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/smp.h>
-
-#include <asm/virt_event.h>
-extern UINT64 privop_trace;
-
-void
-ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause)
-{
- *cause=0;
- switch (slot_type) {
- case M:
- if (inst.generic.major==0){
- if(inst.M28.x3==0){
- if(inst.M44.x4==6){
- *cause=EVENT_SSM;
- }else if(inst.M44.x4==7){
- *cause=EVENT_RSM;
- }else if(inst.M30.x4==8&&inst.M30.x2==2){
- *cause=EVENT_MOV_TO_AR_IMM;
- }
- }
- }
- else if(inst.generic.major==1){
- if(inst.M28.x3==0){
- if(inst.M32.x6==0x2c){
- *cause=EVENT_MOV_TO_CR;
- }else if(inst.M33.x6==0x24){
- *cause=EVENT_MOV_FROM_CR;
- }else if(inst.M35.x6==0x2d){
- *cause=EVENT_MOV_TO_PSR;
- }else if(inst.M36.x6==0x25){
- *cause=EVENT_MOV_FROM_PSR;
- }else if(inst.M29.x6==0x2A){
- *cause=EVENT_MOV_TO_AR;
- }else if(inst.M31.x6==0x22){
- *cause=EVENT_MOV_FROM_AR;
- }else if(inst.M45.x6==0x09){
- *cause=EVENT_PTC_L;
- }else if(inst.M45.x6==0x0A){
- *cause=EVENT_PTC_G;
- }else if(inst.M45.x6==0x0B){
- *cause=EVENT_PTC_GA;
- }else if(inst.M45.x6==0x0C){
- *cause=EVENT_PTR_D;
- }else if(inst.M45.x6==0x0D){
- *cause=EVENT_PTR_I;
- }else if(inst.M46.x6==0x1A){
- *cause=EVENT_THASH;
- }else if(inst.M46.x6==0x1B){
- *cause=EVENT_TTAG;
- }else if(inst.M46.x6==0x1E){
- *cause=EVENT_TPA;
- }else if(inst.M46.x6==0x1F){
- *cause=EVENT_TAK;
- }else if(inst.M47.x6==0x34){
- *cause=EVENT_PTC_E;
- }else if(inst.M41.x6==0x2E){
- *cause=EVENT_ITC_D;
- }else if(inst.M41.x6==0x2F){
- *cause=EVENT_ITC_I;
- }else if(inst.M42.x6==0x00){
- *cause=EVENT_MOV_TO_RR;
- }else if(inst.M42.x6==0x01){
- *cause=EVENT_MOV_TO_DBR;
- }else if(inst.M42.x6==0x02){
- *cause=EVENT_MOV_TO_IBR;
- }else if(inst.M42.x6==0x03){
- *cause=EVENT_MOV_TO_PKR;
- }else if(inst.M42.x6==0x04){
- *cause=EVENT_MOV_TO_PMC;
- }else if(inst.M42.x6==0x05){
- *cause=EVENT_MOV_TO_PMD;
- }else if(inst.M42.x6==0x0E){
- *cause=EVENT_ITR_D;
- }else if(inst.M42.x6==0x0F){
- *cause=EVENT_ITR_I;
- }else if(inst.M43.x6==0x10){
- *cause=EVENT_MOV_FROM_RR;
- }else if(inst.M43.x6==0x11){
- *cause=EVENT_MOV_FROM_DBR;
- }else if(inst.M43.x6==0x12){
- *cause=EVENT_MOV_FROM_IBR;
- }else if(inst.M43.x6==0x13){
- *cause=EVENT_MOV_FROM_PKR;
- }else if(inst.M43.x6==0x14){
- *cause=EVENT_MOV_FROM_PMC;
-/*
- }else if(inst.M43.x6==0x15){
- *cause=EVENT_MOV_FROM_PMD;
-*/
- }else if(inst.M43.x6==0x17){
- *cause=EVENT_MOV_FROM_CPUID;
- }
- }
- }
- break;
- case B:
- if(inst.generic.major==0){
- if(inst.B8.x6==0x02){
- *cause=EVENT_COVER;
- }else if(inst.B8.x6==0x08){
- *cause=EVENT_RFI;
- }else if(inst.B8.x6==0x0c){
- *cause=EVENT_BSW_0;
- }else if(inst.B8.x6==0x0d){
- *cause=EVENT_BSW_1;
- }
- }
- }
-}
-
-IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst)
-{
- UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
- return vmx_vcpu_reset_psr_sm(vcpu,imm24);
-}
-
-IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst)
-{
- UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm;
- return vmx_vcpu_set_psr_sm(vcpu,imm24);
-}
-
-unsigned long last_guest_psr = 0x0;
-IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst)
-{
- UINT64 tgt = inst.M33.r1;
- UINT64 val;
- IA64FAULT fault;
-
-/*
- if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT)
- return vmx_vcpu_set_gr(vcpu, tgt, val);
- else return fault;
- */
- val = vmx_vcpu_get_psr(vcpu);
- val = (val & MASK(0, 32)) | (val & MASK(35, 2));
- last_guest_psr = val;
- return vmx_vcpu_set_gr(vcpu, tgt, val, 0);
-}
-
-/**
- * @todo Check for reserved bits and return IA64_RSVDREG_FAULT.
- */
-IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst)
-{
- UINT64 val;
- IA64FAULT fault;
- if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT)
- panic(" get_psr nat bit fault\n");
-
- val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32));
-#if 0
- if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32))))
- while(1);
- else
- last_mov_from_psr = 0;
-#endif
- return vmx_vcpu_set_psr_l(vcpu,val);
-}
-
-
-/**************************************************************************
-Privileged operation emulation routines
-**************************************************************************/
-
-IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst)
-{
- IA64_PSR vpsr;
- REGS *regs;
-#ifdef CHECK_FAULT
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- regs=vcpu_regs(vcpu);
- vpsr.val=regs->cr_ipsr;
- if ( vpsr.is == 1 ) {
- panic ("We do not support IA32 instruction yet");
- }
-
- return vmx_vcpu_rfi(vcpu);
-}
-
-IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst)
-{
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- return vmx_vcpu_bsw0(vcpu);
-}
-
-IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst)
-{
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- return vmx_vcpu_bsw1(vcpu);
-}
-
-IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst)
-{
- return vmx_vcpu_cover(vcpu);
-}
-
-IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst)
-{
- u64 r2,r3;
- ISR isr;
- IA64_PSR vpsr;
-
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){
-#ifdef VMAL_NO_FAULT_CHECK
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif // VMAL_NO_FAULT_CHECK
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if (unimplemented_gva(vcpu,r3) ) {
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_RESERVED_REG_FAULT;
- vcpu_set_isr(vcpu, isr.val);
- unimpl_daddr(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
- return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7));
-}
-
-IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst)
-{
- u64 r3;
- ISR isr;
- IA64_PSR vpsr;
-
- vpsr.val=vmx_vcpu_get_psr(vcpu);
-#ifdef VMAL_NO_FAULT_CHECK
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
- if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){
-#ifdef VMAL_NO_FAULT_CHECK
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif // VMAL_NO_FAULT_CHECK
- }
- return vmx_vcpu_ptc_e(vcpu,r3);
-}
-
-IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst)
-{
- return vmx_emul_ptc_l(vcpu, inst);
-}
-
-IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst)
-{
- return vmx_emul_ptc_l(vcpu, inst);
-}
-
-IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3)
-{
- ISR isr;
- IA64FAULT ret1, ret2;
-
-#ifdef VMAL_NO_FAULT_CHECK
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
- ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3);
- ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2);
-#ifdef VMAL_NO_FAULT_CHECK
- if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) {
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
- }
- if (unimplemented_gva(vcpu,r3) ) {
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_RESERVED_REG_FAULT;
- vcpu_set_isr(vcpu, isr.val);
- unimpl_daddr(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst)
-{
- u64 r2,r3;
- if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
- return IA64_FAULT;
- return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7));
-}
-
-IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst)
-{
- u64 r2,r3;
- if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT )
- return IA64_FAULT;
- return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7));
-}
-
-
-IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst)
-{
- u64 r1,r3;
- ISR visr;
- IA64_PSR vpsr;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M46.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
-#ifdef CHECK_FAULT
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
- return IA64_NO_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(unimplemented_gva(vcpu, r3)){
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
- return IA64_NO_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_thash(vcpu, r3, &r1);
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
- return(IA64_NO_FAULT);
-}
-
-
-IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst)
-{
- u64 r1,r3;
- ISR visr;
- IA64_PSR vpsr;
- #ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M46.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
-#ifdef CHECK_FAULT
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
- return IA64_NO_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(unimplemented_gva(vcpu, r3)){
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1);
- return IA64_NO_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_ttag(vcpu, r3, &r1);
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
- return(IA64_NO_FAULT);
-}
-
-
-IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst)
-{
- u64 r1,r3;
- ISR visr;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M46.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if(vpsr.cpl!=0){
- visr.val=0;
- vcpu_set_isr(vcpu, visr.val);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,1);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if (unimplemented_gva(vcpu,r3) ) {
- // inject unimplemented_data_address_fault
- visr.val = set_isr_ei_ni(vcpu);
- visr.code = IA64_RESERVED_REG_FAULT;
- vcpu_set_isr(vcpu, isr.val);
- // FAULT_UNIMPLEMENTED_DATA_ADDRESS.
- unimpl_daddr(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
-
- if(vmx_vcpu_tpa(vcpu, r3, &r1)){
- return IA64_FAULT;
- }
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
- return(IA64_NO_FAULT);
-}
-
-IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst)
-{
- u64 r1,r3;
- ISR visr;
- IA64_PSR vpsr;
- int fault=IA64_NO_FAULT;
-#ifdef CHECK_FAULT
- visr.val=0;
- if(check_target_register(vcpu, inst.M46.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if(vpsr.cpl!=0){
- vcpu_set_isr(vcpu, visr.val);
- return IA64_FAULT;
- }
-#endif
- if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,1);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif
- }
- if(vmx_vcpu_tak(vcpu, r3, &r1)){
- return IA64_FAULT;
- }
- vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
- return(IA64_NO_FAULT);
-}
-
-
-/************************************
- * Insert translation register/cache
-************************************/
-
-IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst)
-{
- UINT64 fault, itir, ifa, pte, slot;
- ISR isr;
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.ic ) {
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
-#ifdef VMAL_NO_FAULT_CHECK
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif // VMAL_NO_FAULT_CHECK
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if(is_reserved_rr_register(vcpu, slot)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
- if (vmx_vcpu_get_itir(vcpu,&itir)){
- return(IA64_FAULT);
- }
- if (vmx_vcpu_get_ifa(vcpu,&ifa)){
- return(IA64_FAULT);
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if (is_reserved_itir_field(vcpu, itir)) {
- // TODO
- return IA64_FAULT;
- }
- if (unimplemented_gva(vcpu,ifa) ) {
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_RESERVED_REG_FAULT;
- vcpu_set_isr(vcpu, isr.val);
- unimpl_daddr(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
- return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot));
-}
-
-IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst)
-{
- UINT64 fault, itir, ifa, pte, slot;
- ISR isr;
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.ic ) {
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){
-#ifdef VMAL_NO_FAULT_CHECK
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif // VMAL_NO_FAULT_CHECK
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if(is_reserved_rr_register(vcpu, slot)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
- if (vmx_vcpu_get_itir(vcpu,&itir)){
- return(IA64_FAULT);
- }
- if (vmx_vcpu_get_ifa(vcpu,&ifa)){
- return(IA64_FAULT);
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if (is_reserved_itir_field(vcpu, itir)) {
- // TODO
- return IA64_FAULT;
- }
- if (unimplemented_gva(vcpu,ifa) ) {
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_RESERVED_REG_FAULT;
- vcpu_set_isr(vcpu, isr.val);
- unimpl_daddr(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
- return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot));
-}
-
-IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64
*pte)
-{
- UINT64 fault;
- ISR isr;
- IA64_PSR vpsr;
- IA64FAULT ret1;
-
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.ic ) {
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-
-#ifdef VMAL_NO_FAULT_CHECK
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
- ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte);
-#ifdef VMAL_NO_FAULT_CHECK
- if( ret1 != IA64_NO_FAULT ){
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
-
- if (vmx_vcpu_get_itir(vcpu,itir)){
- return(IA64_FAULT);
- }
- if (vmx_vcpu_get_ifa(vcpu,ifa)){
- return(IA64_FAULT);
- }
-#ifdef VMAL_NO_FAULT_CHECK
- if (unimplemented_gva(vcpu,ifa) ) {
- isr.val = set_isr_ei_ni(vcpu);
- isr.code = IA64_RESERVED_REG_FAULT;
- vcpu_set_isr(vcpu, isr.val);
- unimpl_daddr(vcpu);
- return IA64_FAULT;
- }
-#endif // VMAL_NO_FAULT_CHECK
- return IA64_NO_FAULT;
-}
-
-IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst)
-{
- UINT64 itir, ifa, pte;
-
- if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
- return IA64_FAULT;
- }
-
- return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa));
-}
-
-IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst)
-{
- UINT64 itir, ifa, pte;
-
- if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) {
- return IA64_FAULT;
- }
-
- return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa));
-
-}
-
-/*************************************
- * Moves to semi-privileged registers
-*************************************/
-
-IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst)
-{
- // I27 and M30 are identical for these fields
- if(inst.M30.ar3!=44){
- panic("Can't support ar register other than itc");
- }
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- UINT64 imm;
- if(inst.M30.s){
- imm = -inst.M30.imm;
- }else{
- imm = inst.M30.imm;
- }
- return (vmx_vcpu_set_itc(vcpu, imm));
-}
-
-IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst)
-{
- // I26 and M29 are identical for these fields
- u64 r2;
- if(inst.M29.ar3!=44){
- panic("Can't support ar register other than itc");
- }
- if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- return (vmx_vcpu_set_itc(vcpu, r2));
-}
-
-
-IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst)
-{
- // I27 and M30 are identical for these fields
- if(inst.M31.ar3!=44){
- panic("Can't support ar register other than itc");
- }
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu,inst.M31.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.si&& vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- u64 r1;
- vmx_vcpu_get_itc(vcpu,&r1);
- vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0);
- return IA64_NO_FAULT;
-}
-
-
-/********************************
- * Moves to privileged registers
-********************************/
-
-IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r2;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
- return (vmx_vcpu_set_pkr(vcpu,r3,r2));
-}
-
-IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r2;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
- return (vmx_vcpu_set_rr(vcpu,r3,r2));
-}
-
-IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r2;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
- return (vmx_vcpu_set_dbr(vcpu,r3,r2));
-}
-
-IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r2;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
- return (vmx_vcpu_set_ibr(vcpu,r3,r2));
-}
-
-IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r2;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
- return (vmx_vcpu_set_pmc(vcpu,r3,r2));
-}
-
-IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r2;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
- return (vmx_vcpu_set_pmd(vcpu,r3,r2));
-}
-
-
-/**********************************
- * Moves from privileged registers
- **********************************/
-
-IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r1;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M43.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){
- set_rsv_reg_field_isr(vcpu);
- rsv_reg_field(vcpu);
- }
-#endif //CHECK_FAULT
- vmx_vcpu_get_rr(vcpu,r3,&r1);
- return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
-}
-
-IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r1;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M43.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(is_reserved_indirect_register(vcpu,r3)){
- set_rsv_reg_field_isr(vcpu);
- rsv_reg_field(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_get_pkr(vcpu,r3,&r1);
- return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
-}
-
-IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r1;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M43.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(is_reserved_indirect_register(vcpu,r3)){
- set_rsv_reg_field_isr(vcpu);
- rsv_reg_field(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_get_dbr(vcpu,r3,&r1);
- return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
-}
-
-IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r1;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M43.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(is_reserved_indirect_register(vcpu,r3)){
- set_rsv_reg_field_isr(vcpu);
- rsv_reg_field(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_get_ibr(vcpu,r3,&r1);
- return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
-}
-
-IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r1;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M43.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if (vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(is_reserved_indirect_register(vcpu,r3)){
- set_rsv_reg_field_isr(vcpu);
- rsv_reg_field(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_get_pmc(vcpu,r3,&r1);
- return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
-}
-
-IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst)
-{
- u64 r3,r1;
-#ifdef CHECK_FAULT
- if(check_target_register(vcpu, inst.M43.r1)){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if(is_reserved_indirect_register(vcpu,r3)){
- set_rsv_reg_field_isr(vcpu);
- rsv_reg_field(vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- vmx_vcpu_get_cpuid(vcpu,r3,&r1);
- return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0);
-}
-
-IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst)
-{
- u64 r2,cr3;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
-
if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
- if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){
-#ifdef CHECK_FAULT
- set_isr_reg_nat_consumption(vcpu,0,0);
- rnat_comsumption(vcpu);
- return IA64_FAULT;
-#endif //CHECK_FAULT
- }
-#ifdef CHECK_FAULT
- if ( check_cr_rsv_fields (inst.M32.cr3, r2)) {
- /* Inject Reserved Register/Field fault
- * into guest */
- set_rsv_reg_field_isr (vcpu,0);
- rsv_reg_field (vcpu);
- return IA64_FAULT;
- }
-#endif //CHECK_FAULT
- extern u64 cr_igfld_mask(int index, u64 value);
- r2 = cr_igfld_mask(inst.M32.cr3,r2);
- VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2;
- switch (inst.M32.cr3) {
- case 0: return vmx_vcpu_set_dcr(vcpu,r2);
- case 1: return vmx_vcpu_set_itm(vcpu,r2);
- case 2: return vmx_vcpu_set_iva(vcpu,r2);
- case 8: return vmx_vcpu_set_pta(vcpu,r2);
- case 16:return vmx_vcpu_set_ipsr(vcpu,r2);
- case 17:return vmx_vcpu_set_isr(vcpu,r2);
- case 19:return vmx_vcpu_set_iip(vcpu,r2);
- case 20:return vmx_vcpu_set_ifa(vcpu,r2);
- case 21:return vmx_vcpu_set_itir(vcpu,r2);
- case 22:return vmx_vcpu_set_iipa(vcpu,r2);
- case 23:return vmx_vcpu_set_ifs(vcpu,r2);
- case 24:return vmx_vcpu_set_iim(vcpu,r2);
- case 25:return vmx_vcpu_set_iha(vcpu,r2);
- case 64:printk("SET LID to 0x%lx\n", r2);
- return vmx_vcpu_set_lid(vcpu,r2);
- case 65:return IA64_NO_FAULT;
- case 66:return vmx_vcpu_set_tpr(vcpu,r2);
- case 67:return vmx_vcpu_set_eoi(vcpu,r2);
- case 68:return IA64_NO_FAULT;
- case 69:return IA64_NO_FAULT;
- case 70:return IA64_NO_FAULT;
- case 71:return IA64_NO_FAULT;
- case 72:return vmx_vcpu_set_itv(vcpu,r2);
- case 73:return vmx_vcpu_set_pmv(vcpu,r2);
- case 74:return vmx_vcpu_set_cmcv(vcpu,r2);
- case 80:return vmx_vcpu_set_lrr0(vcpu,r2);
- case 81:return vmx_vcpu_set_lrr1(vcpu,r2);
- default: return IA64_NO_FAULT;
- }
-}
-
-
-#define cr_get(cr) \
- ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\
- vmx_vcpu_set_gr(vcpu, tgt, val,0):fault;
-
-
-IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst)
-{
- UINT64 tgt = inst.M33.r1;
- UINT64 val;
- IA64FAULT fault;
-#ifdef CHECK_FAULT
- IA64_PSR vpsr;
- vpsr.val=vmx_vcpu_get_psr(vcpu);
- if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3||
- (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){
- set_illegal_op_isr(vcpu);
- illegal_op(vcpu);
- return IA64_FAULT;
- }
- if ( vpsr.cpl != 0) {
- /* Inject Privileged Operation fault into guest */
- set_privileged_operation_isr (vcpu, 0);
- privilege_op (vcpu);
- return IA64_FAULT;
- }
-#endif // CHECK_FAULT
-
-// from_cr_cnt[inst.M33.cr3]++;
- switch (inst.M33.cr3) {
- case 0: return cr_get(dcr);
- case 1: return cr_get(itm);
- case 2: return cr_get(iva);
- case 8: return cr_get(pta);
- case 16:return cr_get(ipsr);
- case 17:return cr_get(isr);
- case 19:return cr_get(iip);
- case 20:return cr_get(ifa);
- case 21:return cr_get(itir);
- case 22:return cr_get(iipa);
- case 23:return cr_get(ifs);
- case 24:return cr_get(iim);
- case 25:return cr_get(iha);
-// case 64:val = ia64_getreg(_IA64_REG_CR_LID);
-// return vmx_vcpu_set_gr(vcpu,tgt,val,0);
- case 64:return cr_get(lid);
- case 65:
- vmx_vcpu_get_ivr(vcpu,&val);
- return vmx_vcpu_set_gr(vcpu,tgt,val,0);
- case 66:return cr_get(tpr);
- case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0);
- case 68:return cr_get(irr0);
- case 69:return cr_get(irr1);
- case 70:return cr_get(irr2);
- case 71:return cr_get(irr3);
- case 72:return cr_get(itv);
- case 73:return cr_get(pmv);
- case 74:return cr_get(cmcv);
- case 80:return cr_get(lrr0);
- case 81:return cr_get(lrr1);
- default:
- panic("Read reserved cr register");
- }
-}
-
-
-static void post_emulation_action(VCPU *vcpu)
-{
- if ( vcpu->arch.irq_new_condition ) {
- vcpu->arch.irq_new_condition = 0;
- vhpi_detection(vcpu);
- }
-}
-
-//#define BYPASS_VMAL_OPCODE
-extern IA64_SLOT_TYPE slot_types[0x20][3];
-IA64_BUNDLE __vmx_get_domain_bundle(u64 iip)
-{
- IA64_BUNDLE bundle;
-
- fetch_code( current,iip, &bundle.i64[0]);
- fetch_code( current,iip+8, &bundle.i64[1]);
- return bundle;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void
-vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode)
-{
- IA64_BUNDLE bundle;
- int slot;
- IA64_SLOT_TYPE slot_type;
- IA64FAULT status;
- INST64 inst;
- REGS * regs;
- UINT64 iip;
- regs = vcpu_regs(vcpu);
- iip = regs->cr_iip;
- IA64_PSR vpsr;
-/*
- if (privop_trace) {
- static long i = 400;
- //if (i > 0) printf("privop @%p\n",iip);
- if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n",
- iip,ia64_get_itc(),ia64_get_itm());
- i--;
- }
-*/
-#ifdef VTLB_DEBUG
- check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu));
- dump_vtlb(vmx_vcpu_get_vtlb(vcpu));
-#endif
-#if 0
-if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) {
- printf ("VMAL decode error: cause - %lx; op - %lx\n",
- cause, opcode );
- return;
-}
-#endif
-#ifdef BYPASS_VMAL_OPCODE
- // make a local copy of the bundle containing the privop
- bundle = __vmx_get_domain_bundle(iip);
- slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
- if (!slot) inst.inst = bundle.slot0;
- else if (slot == 1)
- inst.inst = bundle.slot1a + (bundle.slot1b<<18);
- else if (slot == 2) inst.inst = bundle.slot2;
- else printf("priv_handle_op: illegal slot: %d\n", slot);
- slot_type = slot_types[bundle.template][slot];
- ia64_priv_decoder(slot_type, inst, &cause);
- if(cause==0){
- printf("This instruction at 0x%lx slot %d can't be virtualized", iip,
slot);
- panic("123456\n");
- }
-#else
- inst.inst=opcode;
-#endif /* BYPASS_VMAL_OPCODE */
-
- /*
- * Switch to actual virtual rid in rr0 and rr4,
- * which is required by some tlb related instructions.
- */
- prepare_if_physical_mode(vcpu);
-
- switch(cause) {
- case EVENT_RSM:
- status=vmx_emul_rsm(vcpu, inst);
- break;
- case EVENT_SSM:
- status=vmx_emul_ssm(vcpu, inst);
- break;
- case EVENT_MOV_TO_PSR:
- status=vmx_emul_mov_to_psr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_PSR:
- status=vmx_emul_mov_from_psr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_CR:
- status=vmx_emul_mov_from_cr(vcpu, inst);
- break;
- case EVENT_MOV_TO_CR:
- status=vmx_emul_mov_to_cr(vcpu, inst);
- break;
- case EVENT_BSW_0:
- status=vmx_emul_bsw0(vcpu, inst);
- break;
- case EVENT_BSW_1:
- status=vmx_emul_bsw1(vcpu, inst);
- break;
- case EVENT_COVER:
- status=vmx_emul_cover(vcpu, inst);
- break;
- case EVENT_RFI:
- status=vmx_emul_rfi(vcpu, inst);
- break;
- case EVENT_ITR_D:
- status=vmx_emul_itr_d(vcpu, inst);
- break;
- case EVENT_ITR_I:
- status=vmx_emul_itr_i(vcpu, inst);
- break;
- case EVENT_PTR_D:
- status=vmx_emul_ptr_d(vcpu, inst);
- break;
- case EVENT_PTR_I:
- status=vmx_emul_ptr_i(vcpu, inst);
- break;
- case EVENT_ITC_D:
- status=vmx_emul_itc_d(vcpu, inst);
- break;
- case EVENT_ITC_I:
- status=vmx_emul_itc_i(vcpu, inst);
- break;
- case EVENT_PTC_L:
- status=vmx_emul_ptc_l(vcpu, inst);
- break;
- case EVENT_PTC_G:
- status=vmx_emul_ptc_g(vcpu, inst);
- break;
- case EVENT_PTC_GA:
- status=vmx_emul_ptc_ga(vcpu, inst);
- break;
- case EVENT_PTC_E:
- status=vmx_emul_ptc_e(vcpu, inst);
- break;
- case EVENT_MOV_TO_RR:
- status=vmx_emul_mov_to_rr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_RR:
- status=vmx_emul_mov_from_rr(vcpu, inst);
- break;
- case EVENT_THASH:
- status=vmx_emul_thash(vcpu, inst);
- break;
- case EVENT_TTAG:
- status=vmx_emul_ttag(vcpu, inst);
- break;
- case EVENT_TPA:
- status=vmx_emul_tpa(vcpu, inst);
- break;
- case EVENT_TAK:
- status=vmx_emul_tak(vcpu, inst);
- break;
- case EVENT_MOV_TO_AR_IMM:
- status=vmx_emul_mov_to_ar_imm(vcpu, inst);
- break;
- case EVENT_MOV_TO_AR:
- status=vmx_emul_mov_to_ar_reg(vcpu, inst);
- break;
- case EVENT_MOV_FROM_AR:
- status=vmx_emul_mov_from_ar_reg(vcpu, inst);
- break;
- case EVENT_MOV_TO_DBR:
- status=vmx_emul_mov_to_dbr(vcpu, inst);
- break;
- case EVENT_MOV_TO_IBR:
- status=vmx_emul_mov_to_ibr(vcpu, inst);
- break;
- case EVENT_MOV_TO_PMC:
- status=vmx_emul_mov_to_pmc(vcpu, inst);
- break;
- case EVENT_MOV_TO_PMD:
- status=vmx_emul_mov_to_pmd(vcpu, inst);
- break;
- case EVENT_MOV_TO_PKR:
- status=vmx_emul_mov_to_pkr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_DBR:
- status=vmx_emul_mov_from_dbr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_IBR:
- status=vmx_emul_mov_from_ibr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_PMC:
- status=vmx_emul_mov_from_pmc(vcpu, inst);
- break;
- case EVENT_MOV_FROM_PKR:
- status=vmx_emul_mov_from_pkr(vcpu, inst);
- break;
- case EVENT_MOV_FROM_CPUID:
- status=vmx_emul_mov_from_cpuid(vcpu, inst);
- break;
- case EVENT_VMSW:
- printf ("Unimplemented instruction %d\n", cause);
- status=IA64_FAULT;
- break;
- default:
- printf("unknown cause %d, iip: %lx, ipsr: %lx\n",
cause,regs->cr_iip,regs->cr_ipsr);
- while(1);
- /* For unknown cause, let hardware to re-execute */
- status=IA64_RETRY;
- break;
-// panic("unknown cause in virtualization intercept");
- };
-
-#if 0
- if (status == IA64_FAULT)
- panic("Emulation failed with cause %d:\n", cause);
-#endif
-
- if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) {
- vmx_vcpu_increment_iip(vcpu);
- }
-
- recover_if_physical_mode(vcpu);
- post_emulation_action (vcpu);
-//TODO set_irq_check(v);
- return;
-
-}
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_vsa.S
--- a/xen/arch/ia64/vmx_vsa.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,84 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vmx_vsa.c: Call PAL virtualization services.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Arun Sharma <arun.sharma@xxxxxxxxx>
- * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx)
- */
-
-#include <asm/asmmacro.h>
-
-
- .text
-
-/*
- * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2,
- * UINT64 arg3, UINT64 arg4, UINT64 arg5,
- * UINT64 arg6, UINT64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- * rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
- .regstk 4,4,0,0
-
-rpsave = loc0
-pfssave = loc1
-psrsave = loc2
-entry = loc3
-hostret = r24
-
- alloc pfssave=ar.pfs,4,4,0,0
- mov rpsave=rp
- movl entry=@gprel(__vsa_base)
-1: mov hostret=ip
- mov r25=in1 // copy arguments
- mov r26=in2
- mov r27=in3
- mov psrsave=psr
- ;;
- add entry=entry,gp
- tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
- tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
- ;;
- ld8 entry=[entry] // read entry point
- ;;
- add hostret=2f-1b,hostret // calculate return address
- add entry=entry,in0
- ;;
- rsm psr.i | psr.ic
- ;;
- srlz.d
- mov b6=entry
- br.cond.sptk b6 // call the service
-2:
- // Architectural sequence for enabling interrupts if necessary
-(p7) ssm psr.ic
- ;;
-(p7) srlz.d
- ;;
-(p6) ssm psr.i
- ;;
- mov rp=rpsave
- mov ar.pfs=pfssave
- mov r8=r31
- ;;
- srlz.d
- br.ret.sptk rp
-
-END(ia64_call_vsa)
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vtlb.c
--- a/xen/arch/ia64/vtlb.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,1094 +0,0 @@
-
-/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx)
- * XiaoYan Feng (Fleming Feng) (Fleming.feng@xxxxxxxxx)
- */
-
-#include <linux/sched.h>
-#include <asm/tlb.h>
-#include <asm/mm.h>
-#include <asm/vmx_mm_def.h>
-#include <asm/gcc_intrin.h>
-#include <linux/interrupt.h>
-#include <asm/vmx_vcpu.h>
-#define MAX_CCH_LENGTH 40
-
-
-static void cch_mem_init(thash_cb_t *hcb)
-{
- thash_cch_mem_t *p, *q;
-
- hcb->cch_freelist = p = hcb->cch_buf;
-
- for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz;
- p++, q++ ) {
- p->next = q;
- }
- p->next = NULL;
-}
-
-static thash_data_t *cch_alloc(thash_cb_t *hcb)
-{
- thash_cch_mem_t *p;
-
- if ( (p = hcb->cch_freelist) != NULL ) {
- hcb->cch_freelist = p->next;
- }
- return &(p->data);
-}
-
-static void cch_free(thash_cb_t *hcb, thash_data_t *cch)
-{
- thash_cch_mem_t *p = (thash_cch_mem_t*)cch;
-
- p->next = hcb->cch_freelist;
- hcb->cch_freelist = p;
-}
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE
cl)
-{
- u64 size1,sa1,ea1;
-
- if ( tlb->rid != rid || tlb->cl != cl )
- return 0;
- size1 = PSIZE(tlb->ps);
- sa1 = tlb->vadr & ~(size1-1); // mask the low address bits
- ea1 = sa1 + size1;
-
- if ( va >= sa1 && (va < ea1 || ea1 == 0) )
- return 1;
- else
- return 0;
-}
-
-/*
- * Only for TLB format.
- */
-static int
-__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64
sva, u64 eva)
-{
- uint64_t size1,size2,sa1,ea1,ea2;
-
- if ( entry->invalid || entry->rid != rid || entry->cl != cl ) {
- return 0;
- }
- size1=PSIZE(entry->ps);
- sa1 = entry->vadr & ~(size1-1); // mask the low address bits
- ea1 = sa1 + size1;
- if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) )
- return 0;
- else
- return 1;
-
-}
-
-static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr)
-{
- if ( hcb->remove_notifier ) {
- (hcb->remove_notifier)(hcb,tr);
- }
- tr->invalid = 1;
-}
-
-static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx)
-{
- *tr = *data;
- tr->tr_idx = idx;
-}
-
-
-static void __init_tr(thash_cb_t *hcb)
-{
- int i;
- thash_data_t *tr;
-
- for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) {
- tr[i].invalid = 1;
- }
- for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) {
- tr[i].invalid = 1;
- }
-}
-
-/*
- * Replace TR entry.
- */
-static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx)
-{
- thash_data_t *tr;
-
- if ( insert->cl == ISIDE_TLB ) {
- tr = &ITR(hcb,idx);
- }
- else {
- tr = &DTR(hcb,idx);
- }
- if ( !INVALID_TLB(tr) ) {
- __rem_tr(hcb, tr);
- }
- __set_tr (tr, insert, idx);
-}
-
-/*
- * remove TR entry.
- */
-static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx)
-{
- thash_data_t *tr;
-
- if ( cl == ISIDE_TLB ) {
- tr = &ITR(hcb,idx);
- }
- else {
- tr = &DTR(hcb,idx);
- }
- if ( !INVALID_TLB(tr) ) {
- __rem_tr(hcb, tr);
- }
-}
-
-/*
- * Delete an thash entry in collision chain.
- * prev: the previous entry.
- * rem: the removed entry.
- */
-static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t
*rem)
-{
- //prev->next = rem->next;
- if ( hcb->remove_notifier ) {
- (hcb->remove_notifier)(hcb,rem);
- }
- cch_free (hcb, rem);
-}
-
-/*
- * Delete an thash entry leading collision chain.
- */
-static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash)
-{
- thash_data_t *next=hash->next;
-
- if ( hcb->remove_notifier ) {
- (hcb->remove_notifier)(hcb,hash);
- }
- if ( next != NULL ) {
- *hash = *next;
- cch_free (hcb, next);
- }
- else {
- INVALIDATE_HASH(hcb, hash);
- }
-}
-
-thash_data_t *__vtr_lookup(thash_cb_t *hcb,
- u64 rid, u64 va,
- CACHE_LINE_TYPE cl)
-{
- thash_data_t *tr;
- int num,i;
-
- if ( cl == ISIDE_TLB ) {
- tr = &ITR(hcb,0);
- num = NITRS;
- }
- else {
- tr = &DTR(hcb,0);
- num = NDTRS;
- }
- for ( i=0; i<num; i++ ) {
- if ( !INVALID_ENTRY(hcb,&tr[i]) &&
- __is_translated(&tr[i], rid, va, cl) )
- return &tr[i];
- }
- return NULL;
-}
-
-
-/*
- * Find overlap VHPT entry within current collision chain
- * base on internal priv info.
- */
-static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb)
-{
- thash_data_t *cch;
- thash_internal_t *priv = &hcb->priv;
-
-
- for (cch=priv->cur_cch; cch; cch = cch->next) {
- if ( priv->tag == cch->etag ) {
- return cch;
- }
- }
- return NULL;
-}
-
-/*
- * Find overlap TLB/VHPT entry within current collision chain
- * base on internal priv info.
- */
-static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb)
-{
- thash_data_t *cch;
- thash_internal_t *priv = &hcb->priv;
-
- /* Find overlap TLB entry */
- for (cch=priv->cur_cch; cch; cch = cch->next) {
- if ( ( cch->tc ? priv->s_sect.tc : priv->s_sect.tr ) &&
- __is_tlb_overlap(hcb, cch, priv->rid, priv->cl,
- priv->_curva, priv->_eva) ) {
- return cch;
- }
- }
- return NULL;
-}
-
-/*
- * Get the machine format of VHPT entry.
- * PARAS:
- * 1: tlb: means the tlb format hash entry converting to VHPT.
- * 2: va means the guest virtual address that must be coverd by
- * the translated machine VHPT.
- * 3: vhpt: means the machine format VHPT converting from tlb.
- * NOTES:
- * 1: In case of the machine address is discontiguous,
- * "tlb" needs to be covered by several machine VHPT. va
- * is used to choice one of them.
- * 2: Foreign map is supported in this API.
- * RETURN:
- * 0/1: means successful or fail.
- *
- */
-int __tlb_to_vhpt(thash_cb_t *hcb,
- thash_data_t *tlb, u64 va,
- thash_data_t *vhpt)
-{
- u64 pages,mfn;
- ia64_rr vrr;
-
- ASSERT ( hcb->ht == THASH_VHPT );
- vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
- pages = PSIZE(vrr.ps) >> PAGE_SHIFT;
- mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages);
- if ( mfn == INVALID_MFN ) return 0;
-
- // TODO with machine discontinuous address space issue.
- vhpt->etag = (hcb->vs->tag_func)( hcb->pta,
- tlb->vadr, tlb->rid, tlb->ps);
- //vhpt->ti = 0;
- vhpt->itir = tlb->itir & ~ITIR_RV_MASK;
- vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK;
- vhpt->ppn = mfn;
- vhpt->next = 0;
- return 1;
-}
-
-
-/*
- * Insert an entry to hash table.
- * NOTES:
- * 1: TLB entry may be TR, TC or Foreign Map. For TR entry,
- * itr[]/dtr[] need to be updated too.
- * 2: Inserting to collision chain may trigger recycling if
- * the buffer for collision chain is empty.
- * 3: The new entry is inserted at the next of hash table.
- * (I.e. head of the collision chain)
- * 4: The buffer holding the entry is allocated internally
- * from cch_buf or just in the hash table.
- * 5: Return the entry in hash table or collision chain.
- * 6: Input parameter, entry, should be in TLB format.
- * I.e. Has va, rid, ps...
- * 7: This API is invoked by emulating ITC/ITR and tlb_miss.
- *
- */
-
-void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx)
-{
- if ( hcb->ht != THASH_TLB || entry->tc ) {
- panic("wrong parameter\n");
- }
- entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
- entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
- rep_tr(hcb, entry, idx);
- return ;
-}
-
-thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry)
-{
- thash_data_t *cch;
-
- cch = cch_alloc(hcb);
- if(cch == NULL){
- // recycle
- if ( hcb->recycle_notifier ) {
- hcb->recycle_notifier(hcb,(u64)entry);
- }
- thash_purge_all(hcb);
- cch = cch_alloc(hcb);
- }
- return cch;
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- * 1: When inserting VHPT to thash, "va" is a must covered
- * address by the inserted machine VHPT entry.
- * 2: The format of entry is always in TLB.
- * 3: The caller need to make sure the new entry will not overlap
- * with any existed entry.
- */
-void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
-{
- thash_data_t *hash_table, *cch;
- int flag;
- ia64_rr vrr;
- u64 gppn;
- u64 ppns, ppne;
-
- hash_table = (hcb->hash_func)(hcb->pta,
- va, entry->rid, entry->ps);
- if( INVALID_ENTRY(hcb, hash_table) ) {
- *hash_table = *entry;
- hash_table->next = 0;
- }
- else {
- // TODO: Add collision chain length limitation.
- cch = __alloc_chain(hcb,entry);
-
- *cch = *hash_table;
- *hash_table = *entry;
- hash_table->next = cch;
- }
- if(hcb->vcpu->domain->domain_id==0){
- thash_insert(hcb->ts->vhpt, entry, va);
- return;
- }
- flag = 1;
- gppn =
(POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT;
- ppns = PAGEALIGN((entry->ppn<<12),entry->ps);
- ppne = ppns + PSIZE(entry->ps);
- if(((ppns<=0xa0000)&&(ppne>0xa0000))||((ppne>0xc0000)&&(ppns<=0xc0000)))
- flag = 0;
- if((__gpfn_is_mem(hcb->vcpu->domain, gppn)&&flag))
- thash_insert(hcb->ts->vhpt, entry, va);
- return ;
-}
-
-static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
-{
- thash_data_t *hash_table, *cch;
- ia64_rr vrr;
-
- hash_table = (hcb->hash_func)(hcb->pta,
- va, entry->rid, entry->ps);
- if( INVALID_ENTRY(hcb, hash_table) ) {
- if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
- panic("Can't convert to machine VHPT entry\n");
- }
- hash_table->next = 0;
- }
- else {
- // TODO: Add collision chain length limitation.
- cch = __alloc_chain(hcb,entry);
-
- *cch = *hash_table;
- if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) {
- panic("Can't convert to machine VHPT entry\n");
- }
- hash_table->next = cch;
- if(hash_table->tag==hash_table->next->tag)
- while(1);
- }
- return /*hash_table*/;
-}
-
-void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
-{
- thash_data_t *hash_table;
- ia64_rr vrr;
-
- vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr);
- if ( entry->ps != vrr.ps && entry->tc ) {
- panic("Not support for multiple page size now\n");
- }
- entry->vadr = PAGEALIGN(entry->vadr,entry->ps);
- entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12);
- (hcb->ins_hash)(hcb, entry, va);
-
-}
-
-static void rem_thash(thash_cb_t *hcb, thash_data_t *entry)
-{
- thash_data_t *hash_table, *p, *q;
- thash_internal_t *priv = &hcb->priv;
- int idx;
-
- hash_table = priv->hash_base;
- if ( hash_table == entry ) {
-// if ( PURGABLE_ENTRY(hcb, entry) ) {
- __rem_hash_head (hcb, entry);
-// }
- return ;
- }
- // remove from collision chain
- p = hash_table;
- for ( q=p->next; q; q = p->next ) {
- if ( q == entry ){
-// if ( PURGABLE_ENTRY(hcb,q ) ) {
- p->next = q->next;
- __rem_chain(hcb, entry);
-// }
- return ;
- }
- p = q;
- }
- panic("Entry not existed or bad sequence\n");
-}
-
-static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry)
-{
- thash_data_t *hash_table, *p, *q;
- thash_internal_t *priv = &hcb->priv;
- int idx;
-
- if ( !entry->tc ) {
- return rem_tr(hcb, entry->cl, entry->tr_idx);
- }
- rem_thash(hcb, entry);
-}
-
-int cch_depth=0;
-/*
- * Purge the collision chain starting from cch.
- * NOTE:
- * For those UN-Purgable entries(FM), this function will return
- * the head of left collision chain.
- */
-static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch)
-{
- thash_data_t *next;
-
- if ( ++cch_depth > MAX_CCH_LENGTH ) {
- printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n");
- while(1);
- }
- if ( cch -> next ) {
- next = thash_rem_cch(hcb, cch->next);
- }
- else {
- next = NULL;
- }
- if ( PURGABLE_ENTRY(hcb, cch) ) {
- __rem_chain(hcb, cch);
- return next;
- }
- else {
- cch->next = next;
- return cch;
- }
-}
-
-/*
- * Purge one hash line (include the entry in hash table).
- * Can only be called by thash_purge_all.
- * Input:
- * hash: The head of collision chain (hash table)
- *
- */
-static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash)
-{
- if ( INVALID_ENTRY(hcb, hash) ) return;
-
- if ( hash->next ) {
- cch_depth = 0;
- hash->next = thash_rem_cch(hcb, hash->next);
- }
- // Then hash table itself.
- if ( PURGABLE_ENTRY(hcb, hash) ) {
- __rem_hash_head(hcb, hash);
- }
-}
-
-
-/*
- * Find an overlap entry in hash table and its collision chain.
- * Refer to SDM2 4.1.1.4 for overlap definition.
- * PARAS:
- * 1: in: TLB format entry, rid:ps must be same with vrr[].
- * va & ps identify the address space for overlap lookup
- * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX)
- * 3: cl means I side or D side.
- * RETURNS:
- * NULL to indicate the end of findings.
- * NOTES:
- *
- */
-thash_data_t *thash_find_overlap(thash_cb_t *hcb,
- thash_data_t *in, search_section_t s_sect)
-{
- return (hcb->find_overlap)(hcb, in->vadr,
- PSIZE(in->ps), in->rid, in->cl, s_sect);
-}
-
-static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb,
- u64 va, u64 size, int rid, char cl, search_section_t s_sect)
-{
- thash_data_t *hash_table;
- thash_internal_t *priv = &hcb->priv;
- u64 tag;
- ia64_rr vrr;
-
- priv->_curva = va & ~(size-1);
- priv->_eva = priv->_curva + size;
- priv->rid = rid;
- vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
- priv->ps = vrr.ps;
- hash_table = (hcb->hash_func)(hcb->pta,
- priv->_curva, rid, priv->ps);
-
- priv->s_sect = s_sect;
- priv->cl = cl;
- priv->_tr_idx = 0;
- priv->hash_base = hash_table;
- priv->cur_cch = hash_table;
- return (hcb->next_overlap)(hcb);
-}
-
-static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb,
- u64 va, u64 size, int rid, char cl, search_section_t s_sect)
-{
- thash_data_t *hash_table;
- thash_internal_t *priv = &hcb->priv;
- u64 tag;
- ia64_rr vrr;
-
- priv->_curva = va & ~(size-1);
- priv->_eva = priv->_curva + size;
- priv->rid = rid;
- vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
- priv->ps = vrr.ps;
- hash_table = (hcb->hash_func)( hcb->pta,
- priv->_curva, rid, priv->ps);
- tag = (hcb->vs->tag_func)( hcb->pta,
- priv->_curva, rid, priv->ps);
-
- priv->tag = tag;
- priv->hash_base = hash_table;
- priv->cur_cch = hash_table;
- return (hcb->next_overlap)(hcb);
-}
-
-
-static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb)
-{
- thash_data_t *tr;
- thash_internal_t *priv = &hcb->priv;
- int num;
-
- if ( priv->cl == ISIDE_TLB ) {
- num = NITRS;
- tr = &ITR(hcb,0);
- }
- else {
- num = NDTRS;
- tr = &DTR(hcb,0);
- }
- for (; priv->_tr_idx < num; priv->_tr_idx ++ ) {
- if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx],
- priv->rid, priv->cl,
- priv->_curva, priv->_eva) ) {
- return &tr[priv->_tr_idx++];
- }
- }
- return NULL;
-}
-
-/*
- * Similar with vtlb_next_overlap but find next entry.
- * NOTES:
- * Intermediate position information is stored in hcb->priv.
- */
-static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb)
-{
- thash_data_t *ovl;
- thash_internal_t *priv = &hcb->priv;
- u64 addr,rr_psize;
- ia64_rr vrr;
-
- if ( priv->s_sect.tr ) {
- ovl = vtr_find_next_overlap (hcb);
- if ( ovl ) return ovl;
- priv->s_sect.tr = 0;
- }
- if ( priv->s_sect.v == 0 ) return NULL;
- vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
- rr_psize = PSIZE(vrr.ps);
-
- while ( priv->_curva < priv->_eva ) {
- if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
- ovl = _vtlb_next_overlap_in_chain(hcb);
- if ( ovl ) {
- priv->cur_cch = ovl->next;
- return ovl;
- }
- }
- priv->_curva += rr_psize;
- priv->hash_base = (hcb->hash_func)( hcb->pta,
- priv->_curva, priv->rid, priv->ps);
- priv->cur_cch = priv->hash_base;
- }
- return NULL;
-}
-
-static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb)
-{
- thash_data_t *ovl;
- thash_internal_t *priv = &hcb->priv;
- u64 addr,rr_psize;
- ia64_rr vrr;
-
- vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
- rr_psize = PSIZE(vrr.ps);
-
- while ( priv->_curva < priv->_eva ) {
- if ( !INVALID_ENTRY(hcb, priv->hash_base) ) {
- ovl = _vhpt_next_overlap_in_chain(hcb);
- if ( ovl ) {
- priv->cur_cch = ovl->next;
- return ovl;
- }
- }
- priv->_curva += rr_psize;
- priv->hash_base = (hcb->hash_func)( hcb->pta,
- priv->_curva, priv->rid, priv->ps);
- priv->tag = (hcb->vs->tag_func)( hcb->pta,
- priv->_curva, priv->rid, priv->ps);
- priv->cur_cch = priv->hash_base;
- }
- return NULL;
-}
-
-
-/*
- * Find and purge overlap entries in hash table and its collision chain.
- * PARAS:
- * 1: in: TLB format entry, rid:ps must be same with vrr[].
- * rid, va & ps identify the address space for purge
- * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX)
- * 3: cl means I side or D side.
- * NOTES:
- *
- */
-void thash_purge_entries(thash_cb_t *hcb,
- thash_data_t *in, search_section_t p_sect)
-{
- return thash_purge_entries_ex(hcb, in->rid, in->vadr,
- in->ps, p_sect, in->cl);
-}
-
-void thash_purge_entries_ex(thash_cb_t *hcb,
- u64 rid, u64 va, u64 ps,
- search_section_t p_sect,
- CACHE_LINE_TYPE cl)
-{
- thash_data_t *ovl;
-
- ovl = (hcb->find_overlap)(hcb, va, PSIZE(ps), rid, cl, p_sect);
- while ( ovl != NULL ) {
- (hcb->rem_hash)(hcb, ovl);
- ovl = (hcb->next_overlap)(hcb);
- };
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in)
-{
- thash_data_t *ovl;
- search_section_t sections;
-
-#ifdef XEN_DEBUGGER
- vrr = (hcb->get_rr_fn)(hcb->vcpu,in->vadr);
- if ( in->ps != vrr.ps || hcb->ht != THASH_TLB || !in->tc ) {
- panic ("Oops, wrong call for purge_and_insert\n");
- return;
- }
-#endif
- in->vadr = PAGEALIGN(in->vadr,in->ps);
- in->ppn = PAGEALIGN(in->ppn, in->ps-12);
- sections.tr = 0;
- sections.tc = 1;
- ovl = (hcb->find_overlap)(hcb, in->vadr, PSIZE(in->ps),
- in->rid, in->cl, sections);
- if(ovl)
- (hcb->rem_hash)(hcb, ovl);
-#ifdef XEN_DEBUGGER
- ovl = (hcb->next_overlap)(hcb);
- if ( ovl ) {
- panic ("Oops, 2+ overlaps for purge_and_insert\n");
- return;
- }
-#endif
- (hcb->ins_hash)(hcb, in, in->vadr);
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-// TODO: add sections.
-void thash_purge_all(thash_cb_t *hcb)
-{
- thash_data_t *hash_table;
-
-#ifdef VTLB_DEBUG
- extern u64 sanity_check;
- static u64 statistics_before_purge_all=0;
- if ( statistics_before_purge_all ) {
- sanity_check = 1;
- check_vtlb_sanity(hcb);
- }
-#endif
-
- hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
-
- for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
- thash_rem_line(hcb, hash_table);
- }
-}
-
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- * in: TLB format for both VHPT & TLB.
- */
-thash_data_t *vtlb_lookup(thash_cb_t *hcb,
- thash_data_t *in)
-{
- return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl);
-}
-
-thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb,
- u64 rid, u64 va,
- CACHE_LINE_TYPE cl)
-{
- thash_data_t *hash_table, *cch;
- u64 tag;
- ia64_rr vrr;
-
- ASSERT ( hcb->ht == THASH_VTLB );
-
- cch = __vtr_lookup(hcb, rid, va, cl);;
- if ( cch ) return cch;
-
- vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
- hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps);
-
- if ( INVALID_ENTRY(hcb, hash_table ) )
- return NULL;
-
-
- for (cch=hash_table; cch; cch = cch->next) {
- if ( __is_translated(cch, rid, va, cl) )
- return cch;
- }
- return NULL;
-}
-
-/*
- * Lock/Unlock TC if found.
- * NOTES: Only the page in prefered size can be handled.
- * return:
- * 1: failure
- * 0: success
- */
-int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int
lock)
-{
- thash_data_t *ovl;
- search_section_t sections;
-
- sections.tr = 1;
- sections.tc = 1;
- ovl = (hcb->find_overlap)(hcb, va, size, rid, cl, sections);
- if ( ovl ) {
- if ( !ovl->tc ) {
-// panic("Oops, TR for lock\n");
- return 0;
- }
- else if ( lock ) {
- if ( ovl->locked ) {
- DPRINTK("Oops, already locked entry\n");
- }
- ovl->locked = 1;
- }
- else if ( !lock ) {
- if ( !ovl->locked ) {
- DPRINTK("Oops, already unlocked entry\n");
- }
- ovl->locked = 0;
- }
- return 0;
- }
- return 1;
-}
-
-/*
- * Notifier when TLB is deleted from hash table and its collision chain.
- * NOTES:
- * The typical situation is that TLB remove needs to inform
- * VHPT to remove too.
- * PARAS:
- * 1: hcb is TLB object.
- * 2: The format of entry is always in TLB.
- *
- */
-void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry)
-{
- thash_cb_t *vhpt;
- search_section_t s_sect;
-
- s_sect.v = 0;
- thash_purge_entries(hcb->ts->vhpt, entry, s_sect);
- machine_tlb_purge(entry->rid, entry->vadr, entry->ps);
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(thash_cb_t *hcb, u64 sz)
-{
- thash_data_t *hash_table;
-
- cch_mem_init (hcb);
- hcb->magic = THASH_CB_MAGIC;
- hcb->pta.val = hcb->hash;
- hcb->pta.vf = 1;
- hcb->pta.ve = 1;
- hcb->pta.size = sz;
- hcb->get_rr_fn = vmmu_get_rr;
- ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 );
- if ( hcb->ht == THASH_TLB ) {
- hcb->remove_notifier = tlb_remove_notifier;
- hcb->find_overlap = vtlb_find_overlap;
- hcb->next_overlap = vtlb_next_overlap;
- hcb->rem_hash = rem_vtlb;
- hcb->ins_hash = vtlb_insert;
- __init_tr(hcb);
- }
- else {
- hcb->remove_notifier = NULL;
- hcb->find_overlap = vhpt_find_overlap;
- hcb->next_overlap = vhpt_next_overlap;
- hcb->rem_hash = rem_thash;
- hcb->ins_hash = vhpt_insert;
- }
- hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz);
-
- for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) {
- INVALIDATE_HASH(hcb,hash_table);
- }
-}
-
-#ifdef VTLB_DEBUG
-static u64 cch_length_statistics[MAX_CCH_LENGTH+1];
-u64 sanity_check=0;
-u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash)
-{
- thash_data_t *cch;
- thash_data_t *ovl;
- search_section_t s_sect;
- u64 num=0;
-
- s_sect.v = 0;
- for (cch=hash; cch; cch=cch->next) {
- ovl = thash_find_overlap(vhpt, cch, s_sect);
- while ( ovl != NULL ) {
- ovl->checked = 1;
- ovl = (vhpt->next_overlap)(vhpt);
- };
- num ++;
- }
- if ( num >= MAX_CCH_LENGTH ) {
- cch_length_statistics[MAX_CCH_LENGTH] ++;
- }
- else {
- cch_length_statistics[num] ++;
- }
- return num;
-}
-
-void check_vtlb_sanity(thash_cb_t *vtlb)
-{
-// struct pfn_info *page;
- u64 hash_num, i, psr;
- static u64 check_ok_num, check_fail_num,check_invalid;
-// void *vb1, *vb2;
- thash_data_t *hash, *cch;
- thash_data_t *ovl;
- search_section_t s_sect;
- thash_cb_t *vhpt = vtlb->ts->vhpt;
- u64 invalid_ratio;
-
- if ( sanity_check == 0 ) return;
- sanity_check --;
- s_sect.v = 0;
-// page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0);
-// if ( page == NULL ) {
-// panic("No enough contiguous memory for init_domain_mm\n");
-// };
-// vb1 = page_to_virt(page);
-// printf("Allocated page=%lp vbase=%lp\n", page, vb1);
-// vb2 = vb1 + vtlb->hash_sz;
- hash_num = vhpt->hash_sz / sizeof(thash_data_t);
-// printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num);
- printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n",
- vtlb, vtlb->hash,vtlb->hash_sz,
- vhpt, vhpt->hash, vhpt->hash_sz);
- //memcpy(vb1, vtlb->hash, vtlb->hash_sz);
- //memcpy(vb2, vhpt->hash, vhpt->hash_sz);
- for ( i=0; i <
sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
- cch_length_statistics[i] = 0;
- }
-
- local_irq_save(psr);
-
- hash = vhpt->hash;
- for (i=0; i < hash_num; i++) {
- if ( !INVALID_ENTRY(vhpt, hash) ) {
- for ( cch= hash; cch; cch=cch->next) {
- cch->checked = 0;
- }
- }
- hash ++;
- }
- printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num);
- check_invalid = 0;
- check_ok_num=0;
- hash = vtlb->hash;
- for ( i=0; i< hash_num; i++ ) {
- if ( !INVALID_ENTRY(vtlb, hash) ) {
- check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash);
- }
- else {
- check_invalid++;
- }
- hash ++;
- }
- printf("Done vtlb entry check, hash=%lp\n", hash);
- printf("check_ok_num = 0x%lx check_invalid=0x%lx\n",
check_ok_num,check_invalid);
- invalid_ratio = 1000*check_invalid / hash_num;
- printf("%02ld.%01ld%% entries are invalid\n",
- invalid_ratio/10, invalid_ratio % 10 );
- for (i=0; i<NDTRS; i++) {
- ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect);
- while ( ovl != NULL ) {
- ovl->checked = 1;
- ovl = (vhpt->next_overlap)(vhpt);
- };
- }
- printf("Done dTR\n");
- for (i=0; i<NITRS; i++) {
- ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect);
- while ( ovl != NULL ) {
- ovl->checked = 1;
- ovl = (vhpt->next_overlap)(vhpt);
- };
- }
- printf("Done iTR\n");
- check_fail_num = 0;
- check_invalid = 0;
- check_ok_num=0;
- hash = vhpt->hash;
- for (i=0; i < hash_num; i++) {
- if ( !INVALID_ENTRY(vhpt, hash) ) {
- for ( cch= hash; cch; cch=cch->next) {
- if ( !cch->checked ) {
- printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash,
cch);
- check_fail_num ++;
- }
- else {
- check_ok_num++;
- }
- }
- }
- else {
- check_invalid ++;
- }
- hash ++;
- }
- local_irq_restore(psr);
- printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n",
- check_ok_num, check_fail_num, check_invalid);
- //memcpy(vtlb->hash, vb1, vtlb->hash_sz);
- //memcpy(vhpt->hash, vb2, vhpt->hash_sz);
- printf("The statistics of collision chain length is listed\n");
- for ( i=0; i <
sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) {
- printf("CCH length=%02ld, chain number=%ld\n", i,
cch_length_statistics[i]);
- }
-// free_domheap_pages(page, VCPU_TLB_ORDER);
- printf("Done check_vtlb\n");
-}
-
-void dump_vtlb(thash_cb_t *vtlb)
-{
- static u64 dump_vtlb=0;
- thash_data_t *hash, *cch, *tr;
- u64 hash_num,i;
-
- if ( dump_vtlb == 0 ) return;
- dump_vtlb --;
- hash_num = vtlb->hash_sz / sizeof(thash_data_t);
- hash = vtlb->hash;
-
- printf("Dump vTC\n");
- for ( i = 0; i < hash_num; i++ ) {
- if ( !INVALID_ENTRY(vtlb, hash) ) {
- printf("VTLB at hash=%lp\n", hash);
- for (cch=hash; cch; cch=cch->next) {
- printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
- cch, cch->vadr, cch->ps, cch->rid);
- }
- }
- hash ++;
- }
- printf("Dump vDTR\n");
- for (i=0; i<NDTRS; i++) {
- tr = &DTR(vtlb,i);
- printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
- tr, tr->vadr, tr->ps, tr->rid);
- }
- printf("Dump vITR\n");
- for (i=0; i<NITRS; i++) {
- tr = &ITR(vtlb,i);
- printf("Entry %lp va=%lx ps=%lx rid=%lx\n",
- tr, tr->vadr, tr->ps, tr->rid);
- }
- printf("End of vTLB dump\n");
-}
-#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen.lds.S
--- a/xen/arch/ia64/xen.lds.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,251 +0,0 @@
-#include <linux/config.h>
-
-#include <asm/cache.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/pgtable.h>
-
-#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_FORMAT("elf64-ia64-little")
-OUTPUT_ARCH(ia64)
-ENTRY(phys_start)
-jiffies = jiffies_64;
-PHDRS {
- code PT_LOAD;
- percpu PT_LOAD;
- data PT_LOAD;
-}
-SECTIONS
-{
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.exit.text)
- *(.exit.data)
- *(.exitcall.exit)
- *(.IA_64.unwind.exit.text)
- *(.IA_64.unwind_info.exit.text)
- }
-
- v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
- phys_start = _start - LOAD_OFFSET;
-
- code : { } :code
- . = KERNEL_START;
-
- _text = .;
- _stext = .;
-
- .text : AT(ADDR(.text) - LOAD_OFFSET)
- {
- *(.text.ivt)
- *(.text)
- SCHED_TEXT
- LOCK_TEXT
- *(.gnu.linkonce.t*)
- }
- .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
- { *(.text2) }
-#ifdef CONFIG_SMP
- .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
- { *(.text.lock) }
-#endif
- _etext = .;
-
- /* Read-only data */
-
- /* Exception table */
- . = ALIGN(16);
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
- {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
- }
-
- .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
- {
- __start___vtop_patchlist = .;
- *(.data.patch.vtop)
- __end___vtop_patchlist = .;
- }
-
- .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
- {
- __start___mckinley_e9_bundles = .;
- *(.data.patch.mckinley_e9)
- __end___mckinley_e9_bundles = .;
- }
-
- /* Global data */
- _data = .;
-
-#if defined(CONFIG_IA64_GENERIC)
- /* Machine Vector */
- . = ALIGN(16);
- .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
- {
- machvec_start = .;
- *(.machvec)
- machvec_end = .;
- }
-#endif
-
- /* Unwind info & table: */
- . = ALIGN(8);
- .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
- { *(.IA_64.unwind_info*) }
- .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
- {
- __start_unwind = .;
- *(.IA_64.unwind*)
- __end_unwind = .;
- }
-
- RODATA
-
- .opd : AT(ADDR(.opd) - LOAD_OFFSET)
- { *(.opd) }
-
- /* Initialization code and data: */
-
- . = ALIGN(PAGE_SIZE);
- __init_begin = .;
- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
- {
- _sinittext = .;
- *(.init.text)
- _einittext = .;
- }
-
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
- { *(.init.data) }
-
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
- {
- __initramfs_start = .;
- *(.init.ramfs)
- __initramfs_end = .;
- }
-
- . = ALIGN(16);
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
- {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
- }
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
- {
- __initcall_start = .;
- *(.initcall1.init)
- *(.initcall2.init)
- *(.initcall3.init)
- *(.initcall4.init)
- *(.initcall5.init)
- *(.initcall6.init)
- *(.initcall7.init)
- __initcall_end = .;
- }
- __con_initcall_start = .;
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
- { *(.con_initcall.init) }
- __con_initcall_end = .;
- __security_initcall_start = .;
- .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
- { *(.security_initcall.init) }
- __security_initcall_end = .;
- . = ALIGN(PAGE_SIZE);
- __init_end = .;
-
- /* The initial task and kernel stack */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
- { *(.data.init_task) }
-
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
- { *(__special_page_section)
- __start_gate_section = .;
- *(.data.gate)
- __stop_gate_section = .;
- }
- . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't
expose kernel data */
-
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
- { *(.data.cacheline_aligned) }
-
- /* Per-cpu data: */
- percpu : { } :percpu
- . = ALIGN(PERCPU_PAGE_SIZE);
- __phys_per_cpu_start = .;
- .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
- {
- __per_cpu_start = .;
- *(.data.percpu)
- __per_cpu_end = .;
- }
- . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into
percpu page size */
-
- data : { } :data
- .data : AT(ADDR(.data) - LOAD_OFFSET)
- { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
-
- . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
- .got : AT(ADDR(.got) - LOAD_OFFSET)
- { *(.got.plt) *(.got) }
- __gp = ADDR(.got) + 0x200000;
- /* We want the small data sections together, so single-instruction offsets
- can access them all, and initialized data all before uninitialized, so
- we can shorten the on-disk segment size. */
- .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
- { *(.sdata) *(.sdata1) *(.srdata) }
- _edata = .;
- _bss = .;
- .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
- { *(.sbss) *(.scommon) }
- .bss : AT(ADDR(.bss) - LOAD_OFFSET)
- { *(.bss) *(COMMON) }
-
- _end = .;
-
- code : { } :code
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- /* DWARF debug sections.
- Symbols in the DWARF debugging sections are relative to the beginning
- of the section so we begin them at 0. */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* These must appear regardless of . */
- /* Discard them for now since Intel SoftSDV cannot handle them.
- .comment 0 : { *(.comment) }
- .note 0 : { *(.note) }
- */
- /DISCARD/ : { *(.comment) }
- /DISCARD/ : { *(.note) }
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenasm.S
--- a/xen/arch/ia64/xenasm.S Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,501 +0,0 @@
-/*
- * Assembly support routines for Xen/ia64
- *
- * Copyright (C) 2004 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- */
-
-#include <linux/config.h>
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/vhpt.h>
-
-#if 0
-// FIXME: there's gotta be a better way...
-// ski and spaski are different... moved to xenmisc.c
-#define RunningOnHpSki(rx,ry,pn) \
- addl rx = 2, r0; \
- addl ry = 3, r0; \
- ;; \
- mov rx = cpuid[rx]; \
- mov ry = cpuid[ry]; \
- ;; \
- cmp.eq pn,p0 = 0, rx; \
- ;; \
- (pn) movl rx = 0x7000004 ; \
- ;; \
- (pn) cmp.ge pn,p0 = ry, rx; \
- ;;
-
-//int platform_is_hp_ski(void)
-GLOBAL_ENTRY(platform_is_hp_ski)
- mov r8 = 0
- RunningOnHpSki(r3,r9,p8)
-(p8) mov r8 = 1
- br.ret.sptk.many b0
-END(platform_is_hp_ski)
-#endif
-
-// Change rr7 to the passed value while ensuring
-// Xen is mapped into the new region.
-// in0: new rr7 value
-// in1: Xen virtual address of shared info (to be pinned)
-#define PSR_BITS_TO_CLEAR \
- (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
- IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
- IA64_PSR_DFL | IA64_PSR_DFH)
-// FIXME? Note that this turns off the DB bit (debug)
-#define PSR_BITS_TO_SET IA64_PSR_BN
-
-//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void
*shared_arch_info);
-GLOBAL_ENTRY(ia64_new_rr7)
- // not sure this unwind statement is correct...
- .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1)
- alloc loc1 = ar.pfs, 3, 8, 0, 0
-1: {
- mov r28 = in0 // copy procedure index
- mov r8 = ip // save ip to compute branch
- mov loc0 = rp // save rp
- };;
- .body
- movl loc2=PERCPU_ADDR
- ;;
- tpa loc2=loc2 // grab this BEFORE changing rr7
- ;;
-#if VHPT_ENABLED
- movl loc6=VHPT_ADDR
- ;;
- tpa loc6=loc6 // grab this BEFORE changing rr7
- ;;
-#endif
- mov loc5=in1
- ;;
- tpa loc5=loc5 // grab this BEFORE changing rr7
- ;;
- mov loc7=in2 // arch_vcpu_info_t
- ;;
- tpa loc7=loc7 // grab this BEFORE changing rr7
- ;;
- mov loc3 = psr // save psr
- adds r8 = 1f-1b,r8 // calculate return address for call
- ;;
- tpa r8=r8 // convert rp to physical
- ;;
- mov loc4=ar.rsc // save RSE configuration
- ;;
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- movl r16=PSR_BITS_TO_CLEAR
- movl r17=PSR_BITS_TO_SET
- ;;
- or loc3=loc3,r17 // add in psr the bits to set
- ;;
- andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode_phys
-1:
- // now in physical mode with psr.i/ic off so do rr7 switch
- dep r16=-1,r0,61,3
- ;;
- mov rr[r16]=in0
- srlz.d
- ;;
-
- // re-pin mappings for kernel text and data
- mov r18=KERNEL_TR_PAGE_SHIFT<<2
- movl r17=KERNEL_START
- ;;
- rsm psr.i | psr.ic
- ;;
- srlz.i
- ;;
- ptr.i r17,r18
- ptr.d r17,r18
- ;;
- mov cr.itir=r18
- mov cr.ifa=r17
- mov r16=IA64_TR_KERNEL
- //mov r3=ip
- movl r18=PAGE_KERNEL
- ;;
- dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
- ;;
- or r18=r2,r18
- ;;
- srlz.i
- ;;
- itr.i itr[r16]=r18
- ;;
- itr.d dtr[r16]=r18
- ;;
-
- // re-pin mappings for stack (current), per-cpu, vhpt, and shared info
-
- // unless overlaps with KERNEL_TR
- dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT
- ;;
- cmp.eq p7,p0=r17,r18
-(p7) br.cond.sptk .stack_overlaps
- ;;
- movl r25=PAGE_KERNEL
- dep r21=0,r13,60,4 // physical address of "current"
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r25=IA64_GRANULE_SHIFT<<2
- ;;
- ptr.d r13,r25
- ;;
- mov cr.itir=r25
- mov cr.ifa=r13 // VA of next task...
- ;;
- mov r25=IA64_TR_CURRENT_STACK
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-.stack_overlaps:
-
- movl r22=PERCPU_ADDR
- ;;
- movl r25=PAGE_KERNEL
- ;;
- mov r21=loc2 // saved percpu physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=PERCPU_PAGE_SHIFT<<2
- ;;
- ptr.d r22,r24
- ;;
- mov cr.itir=r24
- mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_PERCPU_DATA
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-
-#if VHPT_ENABLED
- movl r22=VHPT_ADDR
- ;;
- movl r25=PAGE_KERNEL
- ;;
- mov r21=loc6 // saved vhpt physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=VHPT_PAGE_SHIFT<<2
- ;;
- ptr.d r22,r24
- ;;
- mov cr.itir=r24
- mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_VHPT
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-#endif
-
- movl r22=SHAREDINFO_ADDR
- ;;
- movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
- ;;
- mov r21=loc5 // saved sharedinfo physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=PAGE_SHIFT<<2
- ;;
- ptr.d r22,r24
- ;;
- mov cr.itir=r24
- mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_SHARED_INFO
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
- // Map for arch_vcpu_info_t
- movl r22=SHARED_ARCHINFO_ADDR
- ;;
- movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW)
- ;;
- mov r21=loc7 // saved sharedinfo physical address
- ;;
- or r23=r25,r21 // construct PA | page properties
- mov r24=PAGE_SHIFT<<2
- ;;
- ptr.d r22,r24
- ;;
- mov cr.itir=r24
- mov cr.ifa=r22
- ;;
- mov r25=IA64_TR_ARCH_INFO
- ;;
- itr.d dtr[r25]=r23 // wire in new mapping...
- ;;
-
- // done, switch back to virtual and return
- mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
- mov psr.l = loc3 // restore init PSR
-
- mov ar.pfs = loc1
- mov rp = loc0
- ;;
- mov ar.rsc=loc4 // restore RSE configuration
- srlz.d // seralize restoration of psr.l
- br.ret.sptk.many rp
-END(ia64_new_rr7)
-
-#include "minstate.h"
-
-GLOBAL_ENTRY(ia64_prepare_handle_privop)
- .prologue
- /*
- * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
- */
- mov r16=r0
- DO_SAVE_SWITCH_STACK
- br.call.sptk.many rp=ia64_handle_privop // stack frame setup in
ivt
-.ret22: .body
- DO_LOAD_SWITCH_STACK
- br.cond.sptk.many rp // goes to
ia64_leave_kernel
-END(ia64_prepare_handle_privop)
-
-GLOBAL_ENTRY(ia64_prepare_handle_break)
- .prologue
- /*
- * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
- */
- mov r16=r0
- DO_SAVE_SWITCH_STACK
- br.call.sptk.many rp=ia64_handle_break // stack frame setup in ivt
-.ret23: .body
- DO_LOAD_SWITCH_STACK
- br.cond.sptk.many rp // goes to ia64_leave_kernel
-END(ia64_prepare_handle_break)
-
-GLOBAL_ENTRY(ia64_prepare_handle_reflection)
- .prologue
- /*
- * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
- */
- mov r16=r0
- DO_SAVE_SWITCH_STACK
- br.call.sptk.many rp=ia64_handle_reflection // stack frame setup in
ivt
-.ret24: .body
- DO_LOAD_SWITCH_STACK
- br.cond.sptk.many rp // goes to ia64_leave_kernel
-END(ia64_prepare_handle_reflection)
-
-GLOBAL_ENTRY(__get_domain_bundle)
- EX(.failure_in_get_bundle,ld8 r8=[r32],8)
- ;;
- EX(.failure_in_get_bundle,ld8 r9=[r32])
- ;;
- br.ret.sptk.many rp
- ;;
-.failure_in_get_bundle:
- mov r8=0
- ;;
- mov r9=0
- ;;
- br.ret.sptk.many rp
- ;;
-END(__get_domain_bundle)
-
-GLOBAL_ENTRY(dorfirfi)
- movl r16 = XSI_IIP
- movl r17 = XSI_IPSR
- movl r18 = XSI_IFS
- ;;
- ld8 r16 = [r16]
- ld8 r17 = [r17]
- ld8 r18 = [r18]
- ;;
- mov cr.iip=r16
- mov cr.ipsr=r17
- mov cr.ifs=r18
- ;;
- // fall through
-END(dorfirfi)
-
-GLOBAL_ENTRY(dorfi)
- rfi
- ;;
-END(dorfirfi)
-
-//
-// Long's Peak UART Offsets
-//
-#define COM_TOP 0xff5e0000
-#define COM_BOT 0xff5e2000
-
-// UART offsets
-#define UART_TX 0 /* Out: Transmit buffer (DLAB=0) */
-#define UART_INT_ENB 1 /* interrupt enable (DLAB=0) */
-#define UART_INT_ID 2 /* Interrupt ID register */
-#define UART_LINE_CTL 3 /* Line control register */
-#define UART_MODEM_CTL 4 /* Modem Control Register */
-#define UART_LSR 5 /* In: Line Status Register */
-#define UART_MSR 6 /* Modem status register */
-#define UART_DLATCH_LOW UART_TX
-#define UART_DLATCH_HIGH UART_INT_ENB
-#define COM1 0x3f8
-#define COM2 0x2F8
-#define COM3 0x3E8
-
-/* interrupt enable bits (offset 1) */
-#define DATA_AVAIL_INT 1
-#define XMIT_HOLD_EMPTY_INT 2
-#define LINE_STAT_INT 4
-#define MODEM_STAT_INT 8
-
-/* line status bits (offset 5) */
-#define REC_DATA_READY 1
-#define OVERRUN 2
-#define PARITY_ERROR 4
-#define FRAMING_ERROR 8
-#define BREAK_INTERRUPT 0x10
-#define XMIT_HOLD_EMPTY 0x20
-#define XMIT_SHIFT_EMPTY 0x40
-
-// Write a single character
-// input: r32 = character to be written
-// output: none
-GLOBAL_ENTRY(longs_peak_putc)
- rsm psr.dt
- movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR
- ;;
- srlz.i
- ;;
-
-.Chk_THRE_p:
- ld1.acq r18=[r16]
- ;;
-
- and r18 = XMIT_HOLD_EMPTY, r18
- ;;
- cmp4.eq p6,p0=0,r18
- ;;
-
-(p6) br .Chk_THRE_p
- ;;
- movl r16 = 0x8000000000000000 + COM_TOP + UART_TX
- ;;
- st1.rel [r16]=r32
- ;;
- ssm psr.dt
- ;;
- srlz.i
- ;;
- br.ret.sptk.many b0
-END(longs_peak_putc)
-
-/* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */
-GLOBAL_ENTRY(pal_emulator_static)
- mov r8=-1
- mov r9=256
- ;;
- cmp.gtu p7,p8=r9,r32 /* r32 <= 255? */
-(p7) br.cond.sptk.few static
- ;;
- mov r9=512
- ;;
- cmp.gtu p7,p8=r9,r32
-(p7) br.cond.sptk.few stacked
- ;;
-static: cmp.eq p7,p8=6,r32 /* PAL_PTCE_INFO */
-(p8) br.cond.sptk.few 1f
- ;;
- mov r8=0 /* status = 0 */
- movl r9=0x100000000 /* tc.base */
- movl r10=0x0000000200000003 /* count[0], count[1] */
- movl r11=0x1000000000002000 /* stride[0], stride[1] */
- br.ret.sptk.few rp
-1: cmp.eq p7,p8=14,r32 /* PAL_FREQ_RATIOS */
-(p8) br.cond.sptk.few 1f
- mov r8=0 /* status = 0 */
- movl r9 =0x900000002 /* proc_ratio (1/100) */
- movl r10=0x100000100 /* bus_ratio<<32 (1/256) */
- movl r11=0x900000002 /* itc_ratio<<32 (1/100) */
- ;;
-1: cmp.eq p7,p8=19,r32 /* PAL_RSE_INFO */
-(p8) br.cond.sptk.few 1f
- mov r8=0 /* status = 0 */
- mov r9=96 /* num phys stacked */
- mov r10=0 /* hints */
- mov r11=0
- br.ret.sptk.few rp
-1: cmp.eq p7,p8=1,r32 /* PAL_CACHE_FLUSH */
-(p8) br.cond.sptk.few 1f
-#if 0
- mov r9=ar.lc
- movl r8=524288 /* flush 512k million cache lines
(16MB) */
- ;;
- mov ar.lc=r8
- movl r8=0xe000000000000000
- ;;
-.loop: fc r8
- add r8=32,r8
- br.cloop.sptk.few .loop
- sync.i
- ;;
- srlz.i
- ;;
- mov ar.lc=r9
- mov r8=r0
- ;;
-1: cmp.eq p7,p8=15,r32 /* PAL_PERF_MON_INFO */
-(p8) br.cond.sptk.few 1f
- mov r8=0 /* status = 0 */
- movl r9 =0x08122f04 /* generic=4 width=47 retired=8
cycles=18 */
- mov r10=0 /* reserved */
- mov r11=0 /* reserved */
- mov r16=0xffff /* implemented PMC */
- mov r17=0x3ffff /* implemented PMD */
- add r18=8,r29 /* second index */
- ;;
- st8 [r29]=r16,16 /* store implemented PMC */
- st8 [r18]=r0,16 /* clear remaining bits */
- ;;
- st8 [r29]=r0,16 /* clear remaining bits */
- st8 [r18]=r0,16 /* clear remaining bits */
- ;;
- st8 [r29]=r17,16 /* store implemented PMD */
- st8 [r18]=r0,16 /* clear remaining bits */
- mov r16=0xf0 /* cycles count capable PMC */
- ;;
- st8 [r29]=r0,16 /* clear remaining bits */
- st8 [r18]=r0,16 /* clear remaining bits */
- mov r17=0xf0 /* retired bundles capable PMC */
- ;;
- st8 [r29]=r16,16 /* store cycles capable */
- st8 [r18]=r0,16 /* clear remaining bits */
- ;;
- st8 [r29]=r0,16 /* clear remaining bits */
- st8 [r18]=r0,16 /* clear remaining bits */
- ;;
- st8 [r29]=r17,16 /* store retired bundle capable */
- st8 [r18]=r0,16 /* clear remaining bits */
- ;;
- st8 [r29]=r0,16 /* clear remaining bits */
- st8 [r18]=r0,16 /* clear remaining bits */
- ;;
-1: br.cond.sptk.few rp
-#else
-1:
-#endif
-stacked:
- br.ret.sptk.few rp
-END(pal_emulator_static)
-
-GLOBAL_ENTRY(vhpt_insert)
-// alloc loc0 = ar.pfs, 3, 1, 0, 0
- mov r16=r32
- mov r26=r33
- mov r27=r34
- ;;
- VHPT_INSERT()
-// VHPT_INSERT1() ... add collision chains later
-// mov ar.pfs = loc0
- br.ret.sptk.few rp
- ;;
-END(vhpt_insert)
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenirq.c
--- a/xen/arch/ia64/xenirq.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,77 +0,0 @@
-/*
- * Xen irq routines
- *
- * Copyright (C) 2005 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <asm/ptrace.h>
-#include <asm/hw_irq.h>
-
-
-void
-xen_debug_irq(ia64_vector vector, struct pt_regs *regs)
-{
-//FIXME: For debug only, can be removed
- static char firstirq = 1;
- static char firsttime[256];
- static char firstpend[256];
- if (firstirq) {
- int i;
- for (i=0;i<256;i++) firsttime[i] = 1;
- for (i=0;i<256;i++) firstpend[i] = 1;
- firstirq = 0;
- }
- if (firsttime[vector]) {
- printf("**** (entry) First received int on vector=%d,itc=%lx\n",
- (unsigned long) vector, ia64_get_itc());
- firsttime[vector] = 0;
- }
-}
-
-
-int
-xen_do_IRQ(ia64_vector vector)
-{
- if (vector != 0xef) {
- extern void vcpu_pend_interrupt(void *, int);
-#if 0
- if (firsttime[vector]) {
- printf("**** (iterate) First received int on
vector=%d,itc=%lx\n",
- (unsigned long) vector, ia64_get_itc());
- firsttime[vector] = 0;
- }
- if (firstpend[vector]) {
- printf("**** First pended int on vector=%d,itc=%lx\n",
- (unsigned long) vector,ia64_get_itc());
- firstpend[vector] = 0;
- }
-#endif
- //FIXME: TEMPORARY HACK!!!!
- vcpu_pend_interrupt(dom0->vcpu[0],vector);
- vcpu_wake(dom0->vcpu[0]);
- return(1);
- }
- return(0);
-}
-
-/* From linux/kernel/softirq.c */
-#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
-# define invoke_softirq() __do_softirq()
-#else
-# define invoke_softirq() do_softirq()
-#endif
-
-/*
- * Exit an interrupt context. Process softirqs if needed and possible:
- */
-void irq_exit(void)
-{
- //account_system_vtime(current);
- //sub_preempt_count(IRQ_EXIT_OFFSET);
- if (!in_interrupt() && local_softirq_pending())
- invoke_softirq();
- //preempt_enable_no_resched();
-}
-/* end from linux/kernel/softirq.c */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenmem.c
--- a/xen/arch/ia64/xenmem.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,86 +0,0 @@
-/*
- * Xen memory allocator routines
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- * Copyright (C) 2005 Intel Corp.
- *
- * Routines used by ia64 machines with contiguous (or virtually contiguous)
- * memory.
- */
-
-#include <linux/config.h>
-#include <asm/pgtable.h>
-#include <xen/mm.h>
-
-extern struct page *zero_page_memmap_ptr;
-struct pfn_info *frame_table;
-unsigned long frame_table_size;
-unsigned long max_page;
-
-struct page *mem_map;
-#define MAX_DMA_ADDRESS ~0UL // FIXME???
-
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-static unsigned long num_dma_physpages;
-#endif
-
-/*
- * Set up the page tables.
- */
-#ifdef CONFIG_VTI
-unsigned long *mpt_table;
-unsigned long mpt_table_size;
-#endif // CONFIG_VTI
-
-void
-paging_init (void)
-{
- struct pfn_info *pg;
-
-#ifdef CONFIG_VTI
- unsigned int mpt_order;
- /* Create machine to physical mapping table
- * NOTE: similar to frame table, later we may need virtually
- * mapped mpt table if large hole exists. Also MAX_ORDER needs
- * to be changed in common code, which only support 16M by far
- */
- mpt_table_size = max_page * sizeof(unsigned long);
- mpt_order = get_order(mpt_table_size);
- ASSERT(mpt_order <= MAX_ORDER);
- if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL)
- panic("Not enough memory to bootstrap Xen.\n");
-
- printk("machine to physical table: 0x%lx\n", (u64)mpt_table);
- memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size);
-#endif // CONFIG_VTI
-
- /* Other mapping setup */
-
- zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
-}
-
-/* FIXME: postpone support to machines with big holes between physical memorys.
- * Current hack allows only efi memdesc upto 4G place. (See efi.c)
- */
-#ifndef CONFIG_VIRTUAL_MEM_MAP
-#define FT_ALIGN_SIZE (16UL << 20)
-void __init init_frametable(void)
-{
- unsigned long i, pfn;
- frame_table_size = max_page * sizeof(struct pfn_info);
- frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
-
- /* Request continuous trunk from boot allocator, since HV
- * address is identity mapped */
- pfn = alloc_boot_pages(
- frame_table_size >> PAGE_SHIFT, FT_ALIGN_SIZE >> PAGE_SHIFT);
- if (pfn == 0)
- panic("Not enough memory for frame table.\n");
-
- frame_table = __va(pfn << PAGE_SHIFT);
- memset(frame_table, 0, frame_table_size);
- printk("size of frame_table: %lukB\n",
- frame_table_size >> 10);
-}
-#endif
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,391 +0,0 @@
-/*
- * Xen misc
- *
- * Functions/decls that are/may be needed to link with Xen because
- * of x86 dependencies
- *
- * Copyright (C) 2004 Hewlett-Packard Co.
- * Dan Magenheimer (dan.magenheimer@xxxxxx)
- *
- */
-
-#include <linux/config.h>
-#include <xen/sched.h>
-#include <linux/efi.h>
-#include <asm/processor.h>
-#include <xen/serial.h>
-#include <asm/io.h>
-#include <xen/softirq.h>
-
-efi_memory_desc_t ia64_efi_io_md;
-EXPORT_SYMBOL(ia64_efi_io_md);
-unsigned long wait_init_idle;
-int phys_proc_id[NR_CPUS];
-unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c
-
-void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n");
}
-void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check
abort handling)\n"); }
-void ia64_mca_cpu_init(void *x) { }
-void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { }
-void ia64_patch_vtop(unsigned long a, unsigned long b) { }
-void hpsim_setup(char **x)
-{
-#ifdef CONFIG_SMP
- init_smp_config();
-#endif
-}
-
-// called from mem_init... don't think s/w I/O tlb is needed in Xen
-//void swiotlb_init(void) { } ...looks like it IS needed
-
-long
-is_platform_hp_ski(void)
-{
- int i;
- long cpuid[6];
-
- for (i = 0; i < 5; ++i)
- cpuid[i] = ia64_get_cpuid(i);
- if ((cpuid[0] & 0xff) != 'H') return 0;
- if ((cpuid[3] & 0xff) != 0x4) return 0;
- if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
- if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
- if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
- return 1;
-}
-
-long
-platform_is_hp_ski(void)
-{
- extern long running_on_sim;
- return running_on_sim;
-}
-
-/* calls in xen/common code that are unused on ia64 */
-
-void sync_lazy_execstate_cpu(unsigned int cpu) {}
-
-#ifdef CONFIG_VTI
-int grant_table_create(struct domain *d) { return 0; }
-void grant_table_destroy(struct domain *d) { return; }
-#endif
-
-struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); }
-
-void raise_actimer_softirq(void)
-{
- raise_softirq(AC_TIMER_SOFTIRQ);
-}
-
-#ifndef CONFIG_VTI
-unsigned long
-__gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
-{
- if (d == dom0)
- return(gpfn);
- else {
- unsigned long pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT);
- if (!pte) {
-printk("__gpfn_to_mfn_foreign: bad gpfn. spinning...\n");
-while(1);
- return 0;
- }
- return ((pte & _PFN_MASK) >> PAGE_SHIFT);
- }
-}
-
-u32
-__mfn_to_gpfn(struct domain *d, unsigned long frame)
-{
- // FIXME: is this right?
-if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) {
-printk("__mfn_to_gpfn: bad frame. spinning...\n");
-while(1);
-}
- return frame;
-}
-#endif
-
-#ifndef CONFIG_VTI
-unsigned long __hypercall_create_continuation(
- unsigned int op, unsigned int nr_args, ...)
-{
- printf("__hypercall_create_continuation: not implemented!!!\n");
-}
-#endif
-
-///////////////////////////////
-
-///////////////////////////////
-// from arch/x86/apic.c
-///////////////////////////////
-
-extern unsigned long domain0_ready;
-
-int reprogram_ac_timer(s_time_t timeout)
-{
- struct vcpu *v = current;
-
-#ifdef CONFIG_VTI
-// if(VMX_DOMAIN(v))
- return 1;
-#endif // CONFIG_VTI
- if (!domain0_ready) return 1;
- local_cpu_data->itm_next = timeout;
- if (is_idle_task(v->domain)) vcpu_safe_set_itm(timeout);
- else vcpu_set_next_timer(current);
- return 1;
-}
-
-///////////////////////////////
-// from arch/ia64/page_alloc.c
-///////////////////////////////
-DEFINE_PER_CPU(struct page_state, page_states) = {0};
-unsigned long totalram_pages;
-
-void __mod_page_state(unsigned offset, unsigned long delta)
-{
- unsigned long flags;
- void* ptr;
-
- local_irq_save(flags);
- ptr = &__get_cpu_var(page_states);
- *(unsigned long*)(ptr + offset) += delta;
- local_irq_restore(flags);
-}
-
-///////////////////////////////
-// from arch/x86/flushtlb.c
-///////////////////////////////
-
-u32 tlbflush_clock;
-u32 tlbflush_time[NR_CPUS];
-
-///////////////////////////////
-// from arch/x86/memory.c
-///////////////////////////////
-
-void init_percpu_info(void)
-{
- dummy();
- //memset(percpu_info, 0, sizeof(percpu_info));
-}
-
-void free_page_type(struct pfn_info *page, unsigned int type)
-{
- dummy();
-}
-
-///////////////////////////////
-//// misc memory stuff
-///////////////////////////////
-
-unsigned long __get_free_pages(unsigned int mask, unsigned int order)
-{
- void *p = alloc_xenheap_pages(order);
-
- memset(p,0,PAGE_SIZE<<order);
- return (unsigned long)p;
-}
-
-void __free_pages(struct page *page, unsigned int order)
-{
- if (order) BUG();
- free_xenheap_page(page);
-}
-
-void *pgtable_quicklist_alloc(void)
-{
- return alloc_xenheap_pages(0);
-}
-
-void pgtable_quicklist_free(void *pgtable_entry)
-{
- free_xenheap_page(pgtable_entry);
-}
-
-///////////////////////////////
-// from arch/ia64/traps.c
-///////////////////////////////
-
-void show_registers(struct pt_regs *regs)
-{
- printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n");
-}
-
-int is_kernel_text(unsigned long addr)
-{
- extern char _stext[], _etext[];
- if (addr >= (unsigned long) _stext &&
- addr <= (unsigned long) _etext)
- return 1;
-
- return 0;
-}
-
-unsigned long kernel_text_end(void)
-{
- extern char _etext[];
- return (unsigned long) _etext;
-}
-
-///////////////////////////////
-// from common/keyhandler.c
-///////////////////////////////
-void dump_pageframe_info(struct domain *d)
-{
- printk("dump_pageframe_info not implemented\n");
-}
-
-///////////////////////////////
-// called from arch/ia64/head.S
-///////////////////////////////
-
-void console_print(char *msg)
-{
- printk("console_print called, how did start_kernel return???\n");
-}
-
-void kernel_thread_helper(void)
-{
- printk("kernel_thread_helper not implemented\n");
- dummy();
-}
-
-void sys_exit(void)
-{
- printk("sys_exit not implemented\n");
- dummy();
-}
-
-////////////////////////////////////
-// called from unaligned.c
-////////////////////////////////////
-
-void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__
((noreturn)) */
-{
- printk("die_if_kernel: called, not implemented\n");
-}
-
-long
-ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
- unsigned long user_rbs_end, unsigned long addr, long *val)
-{
- printk("ia64_peek: called, not implemented\n");
-}
-
-long
-ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
- unsigned long user_rbs_end, unsigned long addr, long val)
-{
- printk("ia64_poke: called, not implemented\n");
-}
-
-void
-ia64_sync_fph (struct task_struct *task)
-{
- printk("ia64_sync_fph: called, not implemented\n");
-}
-
-void
-ia64_flush_fph (struct task_struct *task)
-{
- printk("ia64_flush_fph: called, not implemented\n");
-}
-
-////////////////////////////////////
-// called from irq_ia64.c:init_IRQ()
-// (because CONFIG_IA64_HP_SIM is specified)
-////////////////////////////////////
-void hpsim_irq_init(void) { }
-
-
-// accomodate linux extable.c
-//const struct exception_table_entry *
-void *search_module_extables(unsigned long addr) { return NULL; }
-void *__module_text_address(unsigned long addr) { return NULL; }
-void *module_text_address(unsigned long addr) { return NULL; }
-
-void cs10foo(void) {}
-void cs01foo(void) {}
-
-unsigned long context_switch_count = 0;
-
-void context_switch(struct vcpu *prev, struct vcpu *next)
-{
-//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
-//printk("@@@@@@ context switch from domain %d (%x) to domain %d (%x)\n",
-//prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff);
-//if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo();
-//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
-//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
-#ifdef CONFIG_VTI
- vtm_domain_out(prev);
-#endif
- context_switch_count++;
- switch_to(prev,next,prev);
-#ifdef CONFIG_VTI
- vtm_domain_in(current);
-#endif
-
-// leave this debug for now: it acts as a heartbeat when more than
-// one domain is active
-{
-static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50};
-static int i = 100;
-int id = ((struct vcpu *)current)->domain->domain_id & 0xf;
-if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
-if (!i--) { printk("+",id); i = 1000000; }
-}
-
-#ifdef CONFIG_VTI
- if (VMX_DOMAIN(current))
- vmx_load_all_rr(current);
-#else
- if (!is_idle_task(current->domain)) {
- load_region_regs(current);
- if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
- }
- if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif
-}
-
-void context_switch_finalise(struct vcpu *next)
-{
- /* nothing to do */
-}
-
-void continue_running(struct vcpu *same)
-{
- /* nothing to do */
-}
-
-void panic_domain(struct pt_regs *regs, const char *fmt, ...)
-{
- va_list args;
- char buf[128];
- struct vcpu *v = current;
- static volatile int test = 1; // so can continue easily in debug
- extern spinlock_t console_lock;
- unsigned long flags;
-
-loop:
- printf("$$$$$ PANIC in domain %d (k6=%p): ",
- v->domain->domain_id,
- __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT]);
- va_start(args, fmt);
- (void)vsnprintf(buf, sizeof(buf), fmt, args);
- va_end(args);
- printf(buf);
- if (regs) show_registers(regs);
- domain_pause_by_systemcontroller(current->domain);
- v->domain->shutdown_code = SHUTDOWN_crash;
- set_bit(_DOMF_shutdown, v->domain->domain_flags);
- if (v->domain->domain_id == 0) {
- int i = 1000000000L;
- // if domain0 crashes, just periodically print out panic
- // message to make post-mortem easier
- while(i--);
- goto loop;
- }
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xensetup.c
--- a/xen/arch/ia64/xensetup.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,389 +0,0 @@
-/******************************************************************************
- * xensetup.c
- * Copyright (c) 2004-2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-//#include <xen/spinlock.h>
-#include <xen/multiboot.h>
-#include <xen/sched.h>
-#include <xen/mm.h>
-//#include <xen/delay.h>
-#include <xen/compile.h>
-//#include <xen/console.h>
-#include <xen/serial.h>
-#include <xen/trace.h>
-#include <asm/meminit.h>
-#include <asm/page.h>
-#include <asm/setup.h>
-#include <xen/string.h>
-
-unsigned long xenheap_phys_end;
-
-char saved_command_line[COMMAND_LINE_SIZE];
-
-struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu };
-
-cpumask_t cpu_present_map;
-
-#ifdef CLONE_DOMAIN0
-struct domain *clones[CLONE_DOMAIN0];
-#endif
-extern unsigned long domain0_ready;
-
-int find_max_pfn (unsigned long, unsigned long, void *);
-void start_of_day(void);
-
-/* opt_nosmp: If true, secondary processors are ignored. */
-static int opt_nosmp = 0;
-boolean_param("nosmp", opt_nosmp);
-
-/* maxcpus: maximum number of CPUs to activate. */
-static unsigned int max_cpus = NR_CPUS;
-integer_param("maxcpus", max_cpus);
-
-/*
- * opt_xenheap_megabytes: Size of Xen heap in megabytes, including:
- * xen image
- * bootmap bits
- * xen heap
- * Note: To allow xenheap size configurable, the prerequisite is
- * to configure elilo allowing relocation defaultly. Then since
- * elilo chooses 256M as alignment when relocating, alignment issue
- * on IPF can be addressed.
- */
-unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
-unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
-extern long running_on_sim;
-unsigned long xen_pstart;
-
-static int
-xen_count_pages(u64 start, u64 end, void *arg)
-{
- unsigned long *count = arg;
-
- /* FIXME: do we need consider difference between DMA-usable memory and
- * normal memory? Seems that HV has no requirement to operate DMA which
- * is owned by Dom0? */
- *count += (end - start) >> PAGE_SHIFT;
- return 0;
-}
-
-/* Find first hole after trunk for xen image */
-static int
-xen_find_first_hole(u64 start, u64 end, void *arg)
-{
- unsigned long *first_hole = arg;
-
- if ((*first_hole) == 0) {
- if ((start <= KERNEL_START) && (KERNEL_START < end))
- *first_hole = __pa(end);
- }
-
- return 0;
-}
-
-static void __init do_initcalls(void)
-{
- initcall_t *call;
- for ( call = &__initcall_start; call < &__initcall_end; call++ )
- (*call)();
-}
-
-/*
- * IPF loader only supports one commaind line currently, for
- * both xen and guest kernel. This function provides pre-parse
- * to mixed command line, to split it into two parts.
- *
- * User should split the parameters by "--", with strings after
- * spliter for guest kernel. Missing "--" means whole line belongs
- * to guest. Example:
- * "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty
- * root=/dev/sda3 ro"
- */
-static char null[4] = { 0 };
-
-void early_cmdline_parse(char **cmdline_p)
-{
- char *guest_cmd;
- char *split = "--";
-
- if (*cmdline_p == NULL) {
- *cmdline_p = &null[0];
- saved_command_line[0] = '\0';
- return;
- }
-
- guest_cmd = strstr(*cmdline_p, split);
- /* If no spliter, whole line is for guest */
- if (guest_cmd == NULL) {
- guest_cmd = *cmdline_p;
- *cmdline_p = &null[0];
- } else {
- *guest_cmd = '\0'; /* Split boot parameters for xen and guest */
- guest_cmd += strlen(split);
- while (*guest_cmd == ' ') guest_cmd++;
- }
-
- strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE);
- return;
-}
-
-struct ns16550_defaults ns16550_com1 = {
- .baud = BAUD_AUTO,
- .data_bits = 8,
- .parity = 'n',
- .stop_bits = 1
-};
-
-struct ns16550_defaults ns16550_com2 = {
- .baud = BAUD_AUTO,
- .data_bits = 8,
- .parity = 'n',
- .stop_bits = 1
-};
-
-void start_kernel(void)
-{
- unsigned char *cmdline;
- void *heap_start;
- int i;
- unsigned long max_mem, nr_pages, firsthole_start;
- unsigned long dom0_memory_start, dom0_memory_end;
- unsigned long initial_images_start, initial_images_end;
-
- running_on_sim = is_platform_hp_ski();
- /* Kernel may be relocated by EFI loader */
- xen_pstart = ia64_tpa(KERNEL_START);
-
- /* Must do this early -- e.g., spinlocks rely on get_current(). */
- //set_current(&idle0_vcpu);
- ia64_r13 = (void *)&idle0_vcpu;
- idle0_vcpu.domain = &idle0_domain;
-
- early_setup_arch(&cmdline);
-
- /* We initialise the serial devices very early so we can get debugging. */
- if (running_on_sim) hpsim_serial_init();
- else {
- ns16550_init(0, &ns16550_com1);
- /* Also init com2 for Tiger4. */
- ns16550_com2.io_base = 0x2f8;
- ns16550_com2.irq = 3;
- ns16550_init(1, &ns16550_com2);
- }
- serial_init_preirq();
-
- init_console();
- set_printk_prefix("(XEN) ");
-
- /* xenheap should be in same TR-covered range with xen image */
- xenheap_phys_end = xen_pstart + xenheap_size;
- printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
- xen_pstart, xenheap_phys_end);
-
- /* Find next hole */
- firsthole_start = 0;
- efi_memmap_walk(xen_find_first_hole, &firsthole_start);
-
- initial_images_start = xenheap_phys_end;
- initial_images_end = initial_images_start + ia64_boot_param->initrd_size;
-
- /* Later may find another memory trunk, even away from xen image... */
- if (initial_images_end > firsthole_start) {
- printk("Not enough memory to stash the DOM0 kernel image.\n");
- printk("First hole:0x%lx, relocation end: 0x%lx\n",
- firsthole_start, initial_images_end);
- for ( ; ; );
- }
-
- /* This copy is time consuming, but elilo may load Dom0 image
- * within xenheap range */
- printk("ready to move Dom0 to 0x%lx...", initial_images_start);
- memmove(__va(initial_images_start),
- __va(ia64_boot_param->initrd_start),
- ia64_boot_param->initrd_size);
- ia64_boot_param->initrd_start = initial_images_start;
- printk("Done\n");
-
- /* first find highest page frame number */
- max_page = 0;
- efi_memmap_walk(find_max_pfn, &max_page);
- printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
-
- heap_start = memguard_init(ia64_imva(&_end));
- printf("Before heap_start: 0x%lx\n", heap_start);
- heap_start = __va(init_boot_allocator(__pa(heap_start)));
- printf("After heap_start: 0x%lx\n", heap_start);
-
- reserve_memory();
-
- efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
- efi_memmap_walk(xen_count_pages, &nr_pages);
-
- printk("System RAM: %luMB (%lukB)\n",
- nr_pages >> (20 - PAGE_SHIFT),
- nr_pages << (PAGE_SHIFT - 10));
-
- init_frametable();
-
- ia64_fph_enable();
- __ia64_init_fpu();
-
- alloc_dom0();
-#ifdef DOMU_BUILD_STAGING
- alloc_domU_staging();
-#endif
-
- end_boot_allocator();
-
- init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
- printk("Xen heap: %luMB (%lukB)\n",
- (xenheap_phys_end-__pa(heap_start)) >> 20,
- (xenheap_phys_end-__pa(heap_start)) >> 10);
-
- late_setup_arch(&cmdline);
- setup_per_cpu_areas();
- mem_init();
-
-printk("About to call scheduler_init()\n");
- scheduler_init();
- local_irq_disable();
-printk("About to call xen_time_init()\n");
- xen_time_init();
-#ifdef CONFIG_VTI
- init_xen_time(); /* initialise the time */
-#endif // CONFIG_VTI
-printk("About to call ac_timer_init()\n");
- ac_timer_init();
-// init_xen_time(); ???
-
-#ifdef CONFIG_SMP
- if ( opt_nosmp )
- {
- max_cpus = 0;
- smp_num_siblings = 1;
- //boot_cpu_data.x86_num_cores = 1;
- }
-
- smp_prepare_cpus(max_cpus);
-
- /* We aren't hotplug-capable yet. */
- //BUG_ON(!cpus_empty(cpu_present_map));
- for_each_cpu ( i )
- cpu_set(i, cpu_present_map);
-
- //BUG_ON(!local_irq_is_enabled());
-
-printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus);
- for_each_present_cpu ( i )
- {
- if ( num_online_cpus() >= max_cpus )
- break;
- if ( !cpu_online(i) ) {
-printk("About to call __cpu_up(%d)\n",i);
- __cpu_up(i);
- }
- }
-
- printk("Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(max_cpus);
-#endif
-
-
- // FIXME: Should the following be swapped and moved later?
- schedulers_start();
- do_initcalls();
-printk("About to call sort_main_extable()\n");
- sort_main_extable();
-
- /* surrender usage of kernel registers to domain, use percpu area instead
*/
- __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE);
- __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] =
ia64_get_kr(IA64_KR_PER_CPU_DATA);
- __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] =
ia64_get_kr(IA64_KR_CURRENT_STACK);
- __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] =
ia64_get_kr(IA64_KR_FPU_OWNER);
- __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT);
- __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE);
-
- /* Create initial domain 0. */
-printk("About to call do_createdomain()\n");
- dom0 = do_createdomain(0, 0);
- init_task.domain = &idle0_domain;
- init_task.processor = 0;
-// init_task.mm = &init_mm;
- init_task.domain->arch.mm = &init_mm;
-// init_task.thread = INIT_THREAD;
- //arch_do_createdomain(current);
-#ifdef CLONE_DOMAIN0
- {
- int i;
- for (i = 0; i < CLONE_DOMAIN0; i++) {
- clones[i] = do_createdomain(i+1, 0);
- if ( clones[i] == NULL )
- panic("Error creating domain0 clone %d\n",i);
- }
- }
-#endif
- if ( dom0 == NULL )
- panic("Error creating domain 0\n");
-
- set_bit(_DOMF_privileged, &dom0->domain_flags);
-
- /*
- * We're going to setup domain0 using the module(s) that we stashed safely
- * above our heap. The second module, if present, is an initrd ramdisk.
- */
-printk("About to call construct_dom0()\n");
- dom0_memory_start = __va(ia64_boot_param->initrd_start);
- dom0_memory_end = ia64_boot_param->initrd_size;
- if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
- 0,
- 0,
- 0) != 0)
- panic("Could not set up DOM0 guest OS\n");
-#ifdef CLONE_DOMAIN0
- {
- int i;
- dom0_memory_start = __va(ia64_boot_param->initrd_start);
- dom0_memory_end = ia64_boot_param->initrd_size;
- for (i = 0; i < CLONE_DOMAIN0; i++) {
-printk("CONSTRUCTING DOMAIN0 CLONE #%d\n",i+1);
- if ( construct_domU(clones[i], dom0_memory_start, dom0_memory_end,
- 0,
- 0,
- 0) != 0)
- panic("Could not set up DOM0 clone %d\n",i);
- }
- }
-#endif
-
- /* The stash space for the initial kernel image can now be freed up. */
- init_domheap_pages(ia64_boot_param->initrd_start,
- ia64_boot_param->initrd_start +
ia64_boot_param->initrd_size);
- if (!running_on_sim) // slow on ski and pages are pre-initialized to zero
- scrub_heap_pages();
-
-printk("About to call init_trace_bufs()\n");
- init_trace_bufs();
-
- /* Give up the VGA console if DOM0 is configured to grab it. */
-#ifndef IA64
- console_endboot(cmdline && strstr(cmdline, "tty0"));
-#endif
-
-#ifdef CLONE_DOMAIN0
- {
- int i;
- for (i = 0; i < CLONE_DOMAIN0; i++)
- domain_unpause_by_systemcontroller(clones[i]);
- }
-#endif
- domain_unpause_by_systemcontroller(dom0);
- domain0_ready = 1;
- local_irq_enable();
-printk("About to call startup_cpu_idle_loop()\n");
- startup_cpu_idle_loop();
-}
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xentime.c
--- a/xen/arch/ia64/xentime.c Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,382 +0,0 @@
-/*
- * xen/arch/ia64/time.c
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- * Dan Magenheimer <dan.magenheimer@xxxxxx>
- */
-
-#include <linux/config.h>
-
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/profile.h>
-#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/interrupt.h>
-#include <linux/efi.h>
-#include <linux/profile.h>
-#include <linux/timex.h>
-
-#include <asm/machvec.h>
-#include <asm/delay.h>
-#include <asm/hw_irq.h>
-#include <asm/ptrace.h>
-#include <asm/sal.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#ifdef XEN
-#include <asm/vcpu.h>
-#include <linux/jiffies.h> // not included by xen/sched.h
-#endif
-#include <xen/softirq.h>
-
-#ifdef XEN
-seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
-#endif
-
-#define TIME_KEEPER_ID 0
-extern unsigned long wall_jiffies;
-
-static s_time_t stime_irq; /* System time at last 'time update' */
-
-unsigned long domain0_ready = 0;
-
-#ifndef CONFIG_VTI
-static inline u64 get_time_delta(void)
-{
- return ia64_get_itc();
-}
-#else // CONFIG_VTI
-static s_time_t stime_irq = 0x0; /* System time at last 'time
update' */
-unsigned long itc_scale;
-unsigned long itc_at_irq;
-static unsigned long wc_sec, wc_nsec; /* UTC time at last 'time update'. */
-//static rwlock_t time_lock = RW_LOCK_UNLOCKED;
-static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs
*regs);
-
-static inline u64 get_time_delta(void)
-{
- s64 delta_itc;
- u64 delta, cur_itc;
-
- cur_itc = ia64_get_itc();
-
- delta_itc = (s64)(cur_itc - itc_at_irq);
- if ( unlikely(delta_itc < 0) ) delta_itc = 0;
- delta = ((u64)delta_itc) * itc_scale;
- delta = delta >> 32;
-
- return delta;
-}
-
-u64 tick_to_ns(u64 tick)
-{
- return (tick * itc_scale) >> 32;
-}
-#endif // CONFIG_VTI
-
-s_time_t get_s_time(void)
-{
- s_time_t now;
- unsigned long flags;
-
- read_lock_irqsave(&xtime_lock, flags);
-
- now = stime_irq + get_time_delta();
-
- /* Ensure that the returned system time is monotonically increasing. */
- {
- static s_time_t prev_now = 0;
- if ( unlikely(now < prev_now) )
- now = prev_now;
- prev_now = now;
- }
-
- read_unlock_irqrestore(&xtime_lock, flags);
-
- return now;
-}
-
-void update_dom_time(struct vcpu *v)
-{
-// FIXME: implement this?
-// printf("update_dom_time: called, not implemented, skipping\n");
- return;
-}
-
-/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
-{
-#ifdef CONFIG_VTI
- u64 _nsecs;
-
- write_lock_irq(&xtime_lock);
-
- _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base);
- while ( _nsecs >= 1000000000 )
- {
- _nsecs -= 1000000000;
- secs++;
- }
-
- wc_sec = secs;
- wc_nsec = (unsigned long)_nsecs;
-
- write_unlock_irq(&xtime_lock);
-
- update_dom_time(current->domain);
-#else
-// FIXME: Should this be do_settimeofday (from linux)???
- printf("do_settime: called, not implemented, stopping\n");
- dummy();
-#endif
-}
-
-irqreturn_t
-xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
-{
- unsigned long new_itm;
-
-#define HEARTBEAT_FREQ 16 // period in seconds
-#ifdef HEARTBEAT_FREQ
- static long count = 0;
- if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) {
- printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n",
- regs->cr_iip,
- VCPU(current,interrupt_delivery_enabled),
- VCPU(current,pending_interruption));
- count = 0;
- }
-#endif
-#ifndef XEN
- if (unlikely(cpu_is_offline(smp_processor_id()))) {
- return IRQ_HANDLED;
- }
-#endif
-#ifdef XEN
- if (current->domain == dom0) {
- // FIXME: there's gotta be a better way of doing this...
- // We have to ensure that domain0 is launched before we
- // call vcpu_timer_expired on it
- //domain0_ready = 1; // moved to xensetup.c
- VCPU(current,pending_interruption) = 1;
- }
- if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
- vcpu_pend_timer(dom0->vcpu[0]);
- //vcpu_set_next_timer(dom0->vcpu[0]);
- vcpu_wake(dom0->vcpu[0]);
- }
- if (!is_idle_task(current->domain) && current->domain != dom0) {
- if (vcpu_timer_expired(current)) {
- vcpu_pend_timer(current);
- // ensure another timer interrupt happens even if
domain doesn't
- vcpu_set_next_timer(current);
- vcpu_wake(current);
- }
- }
- raise_actimer_softirq();
-#endif
-
-#ifndef XEN
- platform_timer_interrupt(irq, dev_id, regs);
-#endif
-
- new_itm = local_cpu_data->itm_next;
-
- if (!time_after(ia64_get_itc(), new_itm))
-#ifdef XEN
- return;
-#else
- printk(KERN_ERR "Oops: timer tick before it's due
(itc=%lx,itm=%lx)\n",
- ia64_get_itc(), new_itm);
-#endif
-
-#ifdef XEN
-// printf("GOT TO HERE!!!!!!!!!!!\n");
- //while(1);
-#else
- profile_tick(CPU_PROFILING, regs);
-#endif
-
- while (1) {
-#ifndef XEN
- update_process_times(user_mode(regs));
-#endif
-
- new_itm += local_cpu_data->itm_delta;
-
- if (smp_processor_id() == TIME_KEEPER_ID) {
- /*
- * Here we are in the timer irq handler. We have irqs
locally
- * disabled, but we don't know if the timer_bh is
running on
- * another CPU. We need to avoid to SMP race by
acquiring the
- * xtime_lock.
- */
-#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- write_seqlock(&xtime_lock);
-#endif
-#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- do_timer(regs);
-#endif
- local_cpu_data->itm_next = new_itm;
-#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- write_sequnlock(&xtime_lock);
-#endif
- } else
- local_cpu_data->itm_next = new_itm;
-
- if (time_after(new_itm, ia64_get_itc()))
- break;
- }
-
- do {
- /*
- * If we're too close to the next clock tick for
- * comfort, we increase the safety margin by
- * intentionally dropping the next tick(s). We do NOT
- * update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
- * too fast (with the potentially devastating effect
- * of losing monotony of time).
- */
- while (!time_after(new_itm, ia64_get_itc() +
local_cpu_data->itm_delta/2))
- new_itm += local_cpu_data->itm_delta;
-//#ifdef XEN
-// vcpu_set_next_timer(current);
-//#else
-//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm);
- ia64_set_itm(new_itm);
-//#endif
- /* double check, in case we got hit by a (slow) PMI: */
- } while (time_after_eq(ia64_get_itc(), new_itm));
- return IRQ_HANDLED;
-}
-
-static struct irqaction xen_timer_irqaction = {
-#ifdef CONFIG_VTI
- .handler = vmx_timer_interrupt,
-#else // CONFIG_VTI
- .handler = xen_timer_interrupt,
-#endif // CONFIG_VTI
-#ifndef XEN
- .flags = SA_INTERRUPT,
-#endif
- .name = "timer"
-};
-
-void __init
-xen_time_init (void)
-{
- register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction);
- ia64_init_itm();
-}
-
-
-#ifdef CONFIG_VTI
-
-/* Late init function (after all CPUs are booted). */
-int __init init_xen_time()
-{
- struct timespec tm;
-
- itc_scale = 1000000000UL << 32 ;
- itc_scale /= local_cpu_data->itc_freq;
-
- /* System time ticks from zero. */
- stime_irq = (s_time_t)0;
- itc_at_irq = ia64_get_itc();
-
- /* Wallclock time starts as the initial RTC time. */
- efi_gettimeofday(&tm);
- wc_sec = tm.tv_sec;
- wc_nsec = tm.tv_nsec;
-
-
- printk("Time init:\n");
- printk(".... System Time: %ldns\n", NOW());
- printk(".... scale: %16lX\n", itc_scale);
- printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_nsec/1000);
-
- return 0;
-}
-
-static irqreturn_t
-vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
-{
- unsigned long new_itm;
- struct vcpu *v = current;
-
-
- new_itm = local_cpu_data->itm_next;
-
- if (!time_after(ia64_get_itc(), new_itm))
- return;
-
- while (1) {
-#ifdef CONFIG_SMP
- /*
- * For UP, this is done in do_timer(). Weird, but
- * fixing that would require updates to all
- * platforms.
- */
- update_process_times(user_mode(v, regs));
-#endif
- new_itm += local_cpu_data->itm_delta;
-
- if (smp_processor_id() == TIME_KEEPER_ID) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- local_cpu_data->itm_next = new_itm;
-
- write_lock_irq(&xtime_lock);
- /* Update jiffies counter. */
- (*(unsigned long *)&jiffies_64)++;
-
- /* Update wall time. */
- wc_nsec += 1000000000/HZ;
- if ( wc_nsec >= 1000000000 )
- {
- wc_nsec -= 1000000000;
- wc_sec++;
- }
-
- /* Updates system time (nanoseconds since boot). */
- stime_irq += MILLISECS(1000/HZ);
- itc_at_irq = ia64_get_itc();
-
- write_unlock_irq(&xtime_lock);
-
- } else
- local_cpu_data->itm_next = new_itm;
-
- if (time_after(new_itm, ia64_get_itc()))
- break;
- }
-
- do {
- /*
- * If we're too close to the next clock tick for
- * comfort, we increase the safety margin by
- * intentionally dropping the next tick(s). We do NOT
- * update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
- * too fast (with the potentially devastating effect
- * of losing monotony of time).
- */
- while (!time_after(new_itm, ia64_get_itc() +
local_cpu_data->itm_delta/2))
- new_itm += local_cpu_data->itm_delta;
- ia64_set_itm(new_itm);
- /* double check, in case we got hit by a (slow) PMI: */
- } while (time_after_eq(ia64_get_itc(), new_itm));
- raise_softirq(AC_TIMER_SOFTIRQ);
-
- return IRQ_HANDLED;
-}
-#endif // CONFIG_VTI
-
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/pgtable.h
--- a/xen/include/asm-ia64/linux/asm/pgtable.h Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,567 +0,0 @@
-#ifndef _ASM_IA64_PGTABLE_H
-#define _ASM_IA64_PGTABLE_H
-
-/*
- * This file contains the functions and defines necessary to modify and use
- * the IA-64 page table tree.
- *
- * This hopefully works with any (fixed) IA-64 page-size, as defined
- * in <asm/page.h>.
- *
- * Copyright (C) 1998-2004 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-
-#include <linux/config.h>
-
-#include <asm/mman.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/types.h>
-
-#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits
(architected) */
-
-/*
- * First, define the various bits in a PTE. Note that the PTE format
- * matches the VHPT short format, the firt doubleword of the VHPD long
- * format, and the first doubleword of the TLB insertion format.
- */
-#define _PAGE_P_BIT 0
-#define _PAGE_A_BIT 5
-#define _PAGE_D_BIT 6
-
-#define _PAGE_P (1 << _PAGE_P_BIT) /* page present
bit */
-#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute
*/
-#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute
*/
-#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */
-#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory
attribute */
-#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */
-#define _PAGE_MA_MASK (0x7 << 2)
-#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */
-#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */
-#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */
-#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */
-#define _PAGE_PL_MASK (3 << 7)
-#define _PAGE_AR_R (0 << 9) /* read only */
-#define _PAGE_AR_RX (1 << 9) /* read & execute */
-#define _PAGE_AR_RW (2 << 9) /* read & write */
-#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */
-#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */
-#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read,
write & exec */
-#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec /
read & write */
-#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec
*/
-#define _PAGE_AR_MASK (7 << 9)
-#define _PAGE_AR_SHIFT 9
-#define _PAGE_A (1 << _PAGE_A_BIT) /* page
accessed bit */
-#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty
bit */
-#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) &
~0xfffUL)
-#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
-#define _PAGE_PROTNONE (__IA64_UL(1) << 63)
-
-/* Valid only for a PTE with the present bit cleared: */
-#define _PAGE_FILE (1 << 1) /* see swap & file pte
remarks below */
-
-#define _PFN_MASK _PAGE_PPN_MASK
-/* Mask of bits which may be changed by pte_modify(); the odd bits are there
for _PAGE_PROTNONE */
-#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK |
_PAGE_AR_MASK | _PAGE_ED)
-
-#define _PAGE_SIZE_4K 12
-#define _PAGE_SIZE_8K 13
-#define _PAGE_SIZE_16K 14
-#define _PAGE_SIZE_64K 16
-#define _PAGE_SIZE_256K 18
-#define _PAGE_SIZE_1M 20
-#define _PAGE_SIZE_4M 22
-#define _PAGE_SIZE_16M 24
-#define _PAGE_SIZE_64M 26
-#define _PAGE_SIZE_256M 28
-#define _PAGE_SIZE_1G 30
-#define _PAGE_SIZE_4G 32
-
-#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB
-#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB
-#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED
-
-/*
- * Definitions for first level:
- *
- * PGDIR_SHIFT determines what a first-level page table entry can map.
- */
-#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3))
-#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3))
-#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user
regions */
-#define FIRST_USER_PGD_NR 0
-
-/*
- * Definitions for second level:
- *
- * PMD_SHIFT determines the size of the area a second-level page table
- * can map.
- */
-#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3))
-
-/*
- * Definitions for third level:
- */
-#define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3))
-
-/*
- * All the normal masks have the "page accessed" bits on, as any time
- * they are used, the page is accessed. They are cleared only by the
- * page-out routines.
- */
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A)
-#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW)
-#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
-#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R)
-#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
-#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
-#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
-#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
-
-# ifndef __ASSEMBLY__
-
-#include <asm/bitops.h>
-#include <asm/cacheflush.h>
-#include <asm/mmu_context.h>
-#include <asm/processor.h>
-
-/*
- * Next come the mappings that determine how mmap() protection bits
- * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The
- * _P version gets used for a private shared memory segment, the _S
- * version gets used for a shared memory segment with MAP_SHARED on.
- * In a private shared memory segment, we do a copy-on-write if a task
- * attempts to write to the page.
- */
- /* xwr */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_READONLY /* write to priv pg -> copy & make writable */
-#define __P011 PAGE_READONLY /* ditto */
-#define __P100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
-#define __P101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED /* we don't have (and don't need) write-only */
-#define __S011 PAGE_SHARED
-#define __S100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX)
-#define __S101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX)
-#define __S110 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
-#define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX)
-
-#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__,
pgd_val(e))
-#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__,
pmd_val(e))
-#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__,
pte_val(e))
-
-
-/*
- * Some definitions to translate between mem_map, PTEs, and page addresses:
- */
-
-
-/* Quick test to see if ADDR is a (potentially) valid physical address. */
-static inline long
-ia64_phys_addr_valid (unsigned long addr)
-{
- return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
-}
-
-/*
- * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
- * memory. For the return value to be meaningful, ADDR must be >=
- * PAGE_OFFSET. This operation can be relatively expensive (e.g.,
- * require a hash-, or multi-level tree-lookup or something of that
- * sort) but it guarantees to return TRUE only if accessing the page
- * at that address does not cause an error. Note that there may be
- * addresses for which kern_addr_valid() returns FALSE even though an
- * access would not cause an error (e.g., this is typically true for
- * memory mapped I/O regions.
- *
- * XXX Need to implement this for IA-64.
- */
-#define kern_addr_valid(addr) (1)
-
-
-/*
- * Now come the defines and routines to manage and access the three-level
- * page table.
- */
-
-/*
- * On some architectures, special things need to be done when setting
- * the PTE in a page table. Nothing special needs to be on IA-64.
- */
-#define set_pte(ptep, pteval) (*(ptep) = (pteval))
-
-#define RGN_SIZE (1UL << 61)
-#define RGN_KERNEL 7
-
-#define VMALLOC_START 0xa000000200000000UL
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-# define VMALLOC_END_INIT (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT -
9)))
-# define VMALLOC_END vmalloc_end
- extern unsigned long vmalloc_end;
-#else
-# define VMALLOC_END (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT -
9)))
-#endif
-
-/* fs/proc/kcore.c */
-#define kc_vaddr_to_offset(v) ((v) - 0xa000000000000000UL)
-#define kc_offset_to_vaddr(o) ((o) + 0xa000000000000000UL)
-
-/*
- * Conversion functions: convert page frame number (pfn) and a protection
value to a page
- * table entry (pte).
- */
-#define pfn_pte(pfn, pgprot) \
-({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot);
__pte; })
-
-/* Extract pfn from pte. */
-#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT)
-
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-
-/* This takes a physical page address that is used by the remapping functions
*/
-#define mk_pte_phys(physpage, pgprot) \
-({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; })
-
-#define pte_modify(_pte, newprot) \
- (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) &
_PAGE_CHG_MASK)))
-
-#define page_pte_prot(page,prot) mk_pte(page, prot)
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
-#define pte_none(pte) (!pte_val(pte))
-#define pte_present(pte) (pte_val(pte) & (_PAGE_P |
_PAGE_PROTNONE))
-#define pte_clear(pte) (pte_val(*(pte)) = 0UL)
-/* pte_page() returns the "struct page *" corresponding to the PTE: */
-#define pte_page(pte) virt_to_page(((pte_val(pte) &
_PFN_MASK) + PAGE_OFFSET))
-
-#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd)))
-#define pmd_present(pmd) (pmd_val(pmd) != 0UL)
-#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) &
_PFN_MASK))
-#define pmd_page(pmd) virt_to_page((pmd_val(pmd) +
PAGE_OFFSET))
-
-#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud)))
-#define pud_present(pud) (pud_val(pud) != 0UL)
-#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
-
-#define pud_page(pud) ((unsigned long) __va(pud_val(pud) &
_PFN_MASK))
-
-/*
- * The following have defined behavior only work if pte_present() is true.
- */
-#define pte_user(pte) ((pte_val(pte) & _PAGE_PL_MASK) == _PAGE_PL_3)
-#define pte_read(pte) (((pte_val(pte) & _PAGE_AR_MASK) >>
_PAGE_AR_SHIFT) < 6)
-#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >>
_PAGE_AR_SHIFT) - 2) <= 4)
-#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0)
-#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0)
-#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0)
-#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0)
-/*
- * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit
in the
- * access rights:
- */
-#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW))
-#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW))
-#define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX))
-#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A))
-#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
-#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
-#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
-
-/*
- * Macro to a page protection value as "uncacheable". Note that "protection"
is really a
- * misnomer here as the protection value contains the memory attribute bits,
dirty bits,
- * and various other bits as well.
- */
-#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) &
~_PAGE_MA_MASK) | _PAGE_MA_UC)
-
-/*
- * Macro to make mark a page protection value as "write-combining".
- * Note that "protection" is really a misnomer here as the protection
- * value contains the memory attribute bits, dirty bits, and various
- * other bits as well. Accesses through a write-combining translation
- * works bypasses the caches, but does allow for consecutive writes to
- * be combined into single (but larger) write transactions.
- */
-#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) &
~_PAGE_MA_MASK) | _PAGE_MA_WC)
-
-static inline unsigned long
-pgd_index (unsigned long address)
-{
- unsigned long region = address >> 61;
- unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3)
- 1);
-
- return (region << (PAGE_SHIFT - 6)) | l1index;
-}
-
-/* The offset in the 1-level directory is given by the 3 region bits
- (61..63) and the level-1 bits. */
-static inline pgd_t*
-pgd_offset (struct mm_struct *mm, unsigned long address)
-{
- return mm->pgd + pgd_index(address);
-}
-
-/* In the kernel's mapped region we completely ignore the region number
- (since we know it's in region number 5). */
-#define pgd_offset_k(addr) \
- (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
-
-/* Look up a pgd entry in the gate area. On IA-64, the gate-area
- resides in the kernel-mapped segment, hence we use pgd_offset_k()
- here. */
-#define pgd_offset_gate(mm, addr) pgd_offset_k(addr)
-
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(dir,addr) \
- ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD -
1)))
-
-/*
- * Find an entry in the third-level page table. This looks more complicated
than it
- * should be because some platforms place page tables in high memory.
- */
-#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE
- 1))
-#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) +
pte_index(addr))
-#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr)
-#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr)
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
-
-/* atomic versions of the some PTE manipulations: */
-
-static inline int
-ptep_test_and_clear_young (pte_t *ptep)
-{
-#ifdef CONFIG_SMP
- if (!pte_young(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_A_BIT, ptep);
-#else
- pte_t pte = *ptep;
- if (!pte_young(pte))
- return 0;
- set_pte(ptep, pte_mkold(pte));
- return 1;
-#endif
-}
-
-static inline int
-ptep_test_and_clear_dirty (pte_t *ptep)
-{
-#ifdef CONFIG_SMP
- if (!pte_dirty(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_D_BIT, ptep);
-#else
- pte_t pte = *ptep;
- if (!pte_dirty(pte))
- return 0;
- set_pte(ptep, pte_mkclean(pte));
- return 1;
-#endif
-}
-
-static inline pte_t
-ptep_get_and_clear (pte_t *ptep)
-{
-#ifdef CONFIG_SMP
- return __pte(xchg((long *) ptep, 0));
-#else
- pte_t pte = *ptep;
- pte_clear(ptep);
- return pte;
-#endif
-}
-
-static inline void
-ptep_set_wrprotect (pte_t *ptep)
-{
-#ifdef CONFIG_SMP
- unsigned long new, old;
-
- do {
- old = pte_val(*ptep);
- new = pte_val(pte_wrprotect(__pte (old)));
- } while (cmpxchg((unsigned long *) ptep, old, new) != old);
-#else
- pte_t old_pte = *ptep;
- set_pte(ptep, pte_wrprotect(old_pte));
-#endif
-}
-
-static inline void
-ptep_mkdirty (pte_t *ptep)
-{
-#ifdef CONFIG_SMP
- set_bit(_PAGE_D_BIT, ptep);
-#else
- pte_t old_pte = *ptep;
- set_pte(ptep, pte_mkdirty(old_pte));
-#endif
-}
-
-static inline int
-pte_same (pte_t a, pte_t b)
-{
- return pte_val(a) == pte_val(b);
-}
-
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern void paging_init (void);
-
-/*
- * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number
of
- * bits in the swap-type field of the swap pte. It would be nice to
- * enforce that, but we can't easily include <linux/swap.h> here.
- * (Of course, better still would be to define MAX_SWAPFILES_SHIFT
here...).
- *
- * Format of swap pte:
- * bit 0 : present bit (must be zero)
- * bit 1 : _PAGE_FILE (must be zero)
- * bits 2- 8: swap-type
- * bits 9-62: swap offset
- * bit 63 : _PAGE_PROTNONE bit
- *
- * Format of file pte:
- * bit 0 : present bit (must be zero)
- * bit 1 : _PAGE_FILE (must be one)
- * bits 2-62: file_offset/PAGE_SIZE
- * bit 63 : _PAGE_PROTNONE bit
- */
-#define __swp_type(entry) (((entry).val >> 2) & 0x7f)
-#define __swp_offset(entry) (((entry).val << 1) >> 10)
-#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) |
((long) (offset) << 9) })
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-
-#define PTE_FILE_MAX_BITS 61
-#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE })
-
-/* XXX is this right? */
-#define io_remap_page_range(vma, vaddr, paddr, size, prot) \
- remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
-
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-extern struct page *zero_page_memmap_ptr;
-#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
-
-/* We provide our own get_unmapped_area to cope with VA holes for userland */
-#define HAVE_ARCH_UNMAPPED_AREA
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3))
-#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
-#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1))
-struct mmu_gather;
-extern void hugetlb_free_pgtables(struct mmu_gather *tlb,
- struct vm_area_struct * prev, unsigned long start, unsigned long end);
-#endif
-
-/*
- * IA-64 doesn't have any external MMU info: the page tables contain all the
necessary
- * information. However, we use this routine to take care of any (delayed)
i-cache
- * flushing that may be necessary.
- */
-extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr,
pte_t pte);
-
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-/*
- * Update PTEP with ENTRY, which is guaranteed to be a less
- * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and
- * WRITABLE bits turned on, when the value at PTEP did not. The
- * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE.
- *
- * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without
- * having to worry about races. On SMP machines, there are only two
- * cases where this is true:
- *
- * (1) *PTEP has the PRESENT bit turned OFF
- * (2) ENTRY has the DIRTY bit turned ON
- *
- * On ia64, we could implement this routine with a cmpxchg()-loop
- * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY.
- * However, like on x86, we can get a more streamlined version by
- * observing that it is OK to drop ACCESSED bit updates when
- * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is
- * result in an extra Access-bit fault, which would then turn on the
- * ACCESSED bit in the low-level fault handler (iaccess_bit or
- * daccess_bit in ivt.S).
- */
-#ifdef CONFIG_SMP
-# define ptep_set_access_flags(__vma, __addr, __ptep, __entry,
__safely_writable) \
-do {
\
- if (__safely_writable) {
\
- set_pte(__ptep, __entry);
\
- flush_tlb_page(__vma, __addr);
\
- }
\
-} while (0)
-#else
-# define ptep_set_access_flags(__vma, __addr, __ptep, __entry,
__safely_writable) \
- ptep_establish(__vma, __addr, __ptep, __entry)
-#endif
-
-# ifdef CONFIG_VIRTUAL_MEM_MAP
- /* arch mem_map init routine is needed due to holes in a virtual mem_map */
-# define __HAVE_ARCH_MEMMAP_INIT
- extern void memmap_init (unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn);
-# endif /* CONFIG_VIRTUAL_MEM_MAP */
-# endif /* !__ASSEMBLY__ */
-
-/*
- * Identity-mapped regions use a large page size. We'll call such large pages
- * "granules". If you can think of a better name that's unambiguous, let me
- * know...
- */
-#if defined(CONFIG_IA64_GRANULE_64MB)
-# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M
-#elif defined(CONFIG_IA64_GRANULE_16MB)
-# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M
-#endif
-#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT)
-/*
- * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL):
- */
-#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M
-#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
-
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-/* These tell get_user_pages() that the first gate page is accessible from
user-level. */
-#define FIXADDR_USER_START GATE_ADDR
-#define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE)
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
-#define __HAVE_ARCH_PTE_SAME
-#define __HAVE_ARCH_PGD_OFFSET_GATE
-#include <asm-generic/pgtable.h>
-#include <asm-generic/pgtable-nopud.h>
-
-#endif /* _ASM_IA64_PGTABLE_H */
diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/linuxtime.h
--- a/xen/include/asm-ia64/linux/linuxtime.h Thu Sep 1 17:09:27 2005
+++ /dev/null Thu Sep 1 18:46:28 2005
@@ -1,181 +0,0 @@
-#ifndef _LINUX_TIME_H
-#define _LINUX_TIME_H
-
-#include <linux/types.h>
-
-#ifdef __KERNEL__
-#include <linux/seqlock.h>
-#endif
-
-#ifndef _STRUCT_TIMESPEC
-#define _STRUCT_TIMESPEC
-struct timespec {
- time_t tv_sec; /* seconds */
- long tv_nsec; /* nanoseconds */
-};
-#endif /* _STRUCT_TIMESPEC */
-
-struct timeval {
- time_t tv_sec; /* seconds */
- suseconds_t tv_usec; /* microseconds */
-};
-
-struct timezone {
- int tz_minuteswest; /* minutes west of Greenwich */
- int tz_dsttime; /* type of dst correction */
-};
-
-#ifdef __KERNEL__
-
-/* Parameters used to convert the timespec values */
-#ifndef USEC_PER_SEC
-#define USEC_PER_SEC (1000000L)
-#endif
-
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000L)
-#endif
-
-#ifndef NSEC_PER_USEC
-#define NSEC_PER_USEC (1000L)
-#endif
-
-static __inline__ int timespec_equal(struct timespec *a, struct timespec *b)
-{
- return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
-}
-
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
- * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
- * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
- *
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines were long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
- */
-static inline unsigned long
-mktime (unsigned int year, unsigned int mon,
- unsigned int day, unsigned int hour,
- unsigned int min, unsigned int sec)
-{
- if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
- mon += 12; /* Puts Feb last since it has leap day
*/
- year -= 1;
- }
-
- return (((
- (unsigned long) (year/4 - year/100 + year/400 + 367*mon/12 +
day) +
- year*365 - 719499
- )*24 + hour /* now have hours */
- )*60 + min /* now have minutes */
- )*60 + sec; /* finally seconds */
-}
-
-extern struct timespec xtime;
-extern struct timespec wall_to_monotonic;
-extern seqlock_t xtime_lock;
-
-static inline unsigned long get_seconds(void)
-{
- return xtime.tv_sec;
-}
-
-struct timespec current_kernel_time(void);
-
-#define CURRENT_TIME (current_kernel_time())
-#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 })
-
-extern void do_gettimeofday(struct timeval *tv);
-extern int do_settimeofday(struct timespec *tv);
-extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
-extern void clock_was_set(void); // call when ever the clock is set
-extern int do_posix_clock_monotonic_gettime(struct timespec *tp);
-extern long do_nanosleep(struct timespec *t);
-extern long do_utimes(char __user * filename, struct timeval * times);
-struct itimerval;
-extern int do_setitimer(int which, struct itimerval *value, struct itimerval
*ovalue);
-extern int do_getitimer(int which, struct itimerval *value);
-extern void getnstimeofday (struct timespec *tv);
-
-extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
-
-static inline void
-set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
-{
- while (nsec > NSEC_PER_SEC) {
- nsec -= NSEC_PER_SEC;
- ++sec;
- }
- while (nsec < 0) {
- nsec += NSEC_PER_SEC;
- --sec;
- }
- ts->tv_sec = sec;
- ts->tv_nsec = nsec;
-}
-
-#endif /* __KERNEL__ */
-
-#define NFDBITS __NFDBITS
-
-#define FD_SETSIZE __FD_SETSIZE
-#define FD_SET(fd,fdsetp) __FD_SET(fd,fdsetp)
-#define FD_CLR(fd,fdsetp) __FD_CLR(fd,fdsetp)
-#define FD_ISSET(fd,fdsetp) __FD_ISSET(fd,fdsetp)
-#define FD_ZERO(fdsetp) __FD_ZERO(fdsetp)
-
-/*
- * Names of the interval timers, and structure
- * defining a timer setting.
- */
-#define ITIMER_REAL 0
-#define ITIMER_VIRTUAL 1
-#define ITIMER_PROF 2
-
-struct itimerspec {
- struct timespec it_interval; /* timer period */
- struct timespec it_value; /* timer expiration */
-};
-
-struct itimerval {
- struct timeval it_interval; /* timer interval */
- struct timeval it_value; /* current value */
-};
-
-
-/*
- * The IDs of the various system clocks (for POSIX.1b interval timers).
- */
-#define CLOCK_REALTIME 0
-#define CLOCK_MONOTONIC 1
-#define CLOCK_PROCESS_CPUTIME_ID 2
-#define CLOCK_THREAD_CPUTIME_ID 3
-#define CLOCK_REALTIME_HR 4
-#define CLOCK_MONOTONIC_HR 5
-
-/*
- * The IDs of various hardware clocks
- */
-
-
-#define CLOCK_SGI_CYCLE 10
-#define MAX_CLOCKS 16
-#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \
- CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR)
-#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR)
-
-/*
- * The various flags for setting POSIX.1b interval timers.
- */
-
-#define TIMER_ABSTIME 0x01
-
-
-#endif
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|