# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID dfaf788ab18cdd92f626380ddd97a64fa92abbcd
# Parent de3576a1c62cea31cad7333af2426eaf65884926
# Parent 3bbc9384be3f408a96baf6db4666bc21cebfb955
Merge.
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h Thu Aug 25 20:52:38 2005
+++ b/extras/mini-os/include/lib.h Fri Aug 26 20:47:16 2005
@@ -79,36 +79,4 @@
char *strstr(const char *s1, const char *s2);
-/* dlmalloc functions */
-struct mallinfo {
- int arena; /* non-mmapped space allocated from system */
- int ordblks; /* number of free chunks */
- int smblks; /* number of fastbin blocks */
- int hblks; /* number of mmapped regions */
- int hblkhd; /* space in mmapped regions */
- int usmblks; /* maximum total allocated space */
- int fsmblks; /* space available in freed fastbin blocks */
- int uordblks; /* total allocated space */
- int fordblks; /* total free space */
- int keepcost; /* top-most, releasable (via malloc_trim) space */
-};
-
-void *malloc(size_t n);
-void *calloc(size_t n_elements, size_t element_size);
-void free(void* p);
-void *realloc(void* p, size_t n);
-void *memalign(size_t alignment, size_t n);
-void *valloc(size_t n);
-struct mallinfo mallinfo(void);
-int mallopt(int parameter_number, int parameter_value);
-
-void **independent_calloc(size_t n_elements, size_t size, void* chunks[]);
-void **independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
-void *pvalloc(size_t n);
-void cfree(void* p);
-int malloc_trim(size_t pad);
-size_t malloc_usable_size(void* p);
-void malloc_stats(void);
-
-
#endif /* _LIB_H_ */
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h Thu Aug 25 20:52:38 2005
+++ b/extras/mini-os/include/mm.h Fri Aug 26 20:47:16 2005
@@ -126,6 +126,18 @@
void init_mm(void);
unsigned long alloc_pages(int order);
-int is_mfn_mapped(unsigned long mfn);
+#define alloc_page() alloc_pages(0);
+void free_pages(void *pointer, int order);
+//int is_mfn_mapped(unsigned long mfn);
+
+static __inline__ int get_order(unsigned long size)
+{
+ int order;
+ size = (size-1) >> PAGE_SHIFT;
+ for ( order = 0; size; order++ )
+ size >>= 1;
+ return order;
+}
+
#endif /* _MM_H_ */
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h Thu Aug 25 20:52:38 2005
+++ b/extras/mini-os/include/types.h Fri Aug 26 20:47:16 2005
@@ -49,4 +49,6 @@
typedef unsigned long u_quad_t;
typedef unsigned long uintptr_t;
#endif
+
+#define UINT_MAX (~0U)
#endif /* _TYPES_H_ */
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/mm.c
--- a/extras/mini-os/mm.c Thu Aug 25 20:52:38 2005
+++ b/extras/mini-os/mm.c Fri Aug 26 20:47:16 2005
@@ -1,6 +1,7 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+/*
****************************************************************************
* (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
****************************************************************************
*
* File: mm.c
@@ -13,8 +14,6 @@
* Description: memory management related functions
* contains buddy page allocator from Xen.
*
- ****************************************************************************
- * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
****************************************************************************
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
@@ -40,7 +39,7 @@
#include <mm.h>
#include <types.h>
#include <lib.h>
-
+#include <xmalloc.h>
#ifdef MM_DEBUG
#define DEBUG(_f, _a...) \
@@ -505,6 +504,6 @@
(u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
#endif
-
+
printk("MM: done\n");
}
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Aug 26
20:47:16 2005
@@ -853,7 +853,7 @@
atomic_set(&init_deasserted, 0);
#if 1
- cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+ cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
printk("GDT: copying %d bytes from %lx to %lx\n",
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c Thu Aug 25 20:52:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c Fri Aug 26 20:47:16 2005
@@ -105,13 +105,18 @@
struct mm_struct * old_mm;
int retval = 0;
+ memset(&mm->context, 0, sizeof(mm->context));
init_MUTEX(&mm->context.sem);
- mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
down(&old_mm->context.sem);
retval = copy_ldt(&mm->context, &old_mm->context);
up(&old_mm->context.sem);
+ }
+ if (retval == 0) {
+ spin_lock(&mm_unpinned_lock);
+ list_add(&mm->context.unpinned, &mm_unpinned);
+ spin_unlock(&mm_unpinned_lock);
}
return retval;
}
@@ -133,6 +138,11 @@
else
kfree(mm->context.ldt);
mm->context.size = 0;
+ }
+ if (!mm->context.pinned) {
+ spin_lock(&mm_unpinned_lock);
+ list_del(&mm->context.unpinned);
+ spin_unlock(&mm_unpinned_lock);
}
}
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Fri Aug 26
20:47:16 2005
@@ -739,7 +739,7 @@
atomic_set(&init_deasserted, 0);
#ifdef CONFIG_XEN
- cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
+ cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
memcpy((void *)cpu_gdt_descr[cpu].address,
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Thu Aug 25 20:52:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 26 20:47:16 2005
@@ -712,6 +712,7 @@
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
memset(empty_zero_page, 0, sizeof(empty_zero_page));
+ init_mm.context.pinned = 1;
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
{
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c Fri Aug 26
20:47:16 2005
@@ -12,19 +12,145 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_XEN
#include <asm/pgalloc.h>
-#include <asm/io.h>
+#include <asm/mmu_context.h>
+
+LIST_HEAD(mm_unpinned);
+DEFINE_SPINLOCK(mm_unpinned_lock);
+
+static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
+{
+ struct page *page = virt_to_page(pt);
+ unsigned long pfn = page_to_pfn(page);
+
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte(pfn, flags), 0));
+}
+
+static void mm_walk(struct mm_struct *mm, pgprot_t flags)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int g,u,m;
+
+ pgd = mm->pgd;
+ for (g = 0; g <= USER_PTRS_PER_PGD; g++, pgd++) {
+ if (pgd_none(*pgd))
+ continue;
+ pud = pud_offset(pgd, 0);
+ if (PTRS_PER_PUD > 1) /* not folded */
+ mm_walk_set_prot(pud,flags);
+ for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, 0);
+ if (PTRS_PER_PMD > 1) /* not folded */
+ mm_walk_set_prot(pmd,flags);
+ for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+ if (pmd_none(*pmd))
+ continue;
+ pte = pte_offset_kernel(pmd,0);
+ mm_walk_set_prot(pte,flags);
+ }
+ }
+ }
+}
+
+void mm_pin(struct mm_struct *mm)
+{
+ spin_lock(&mm->page_table_lock);
+
+ mm_walk(mm, PAGE_KERNEL_RO);
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)mm->pgd,
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT,
PAGE_KERNEL_RO),
+ UVMF_TLB_FLUSH));
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)__user_pgd(mm->pgd),
+ pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT,
PAGE_KERNEL_RO),
+ UVMF_TLB_FLUSH));
+ xen_pgd_pin(__pa(mm->pgd)); /* kernel */
+ xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */
+ mm->context.pinned = 1;
+ spin_lock(&mm_unpinned_lock);
+ list_del(&mm->context.unpinned);
+ spin_unlock(&mm_unpinned_lock);
+
+ spin_unlock(&mm->page_table_lock);
+}
+
+void mm_unpin(struct mm_struct *mm)
+{
+ spin_lock(&mm->page_table_lock);
+
+ xen_pgd_unpin(__pa(mm->pgd));
+ xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)mm->pgd,
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL),
0));
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)__user_pgd(mm->pgd),
+ pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT,
PAGE_KERNEL), 0));
+ mm_walk(mm, PAGE_KERNEL);
+ xen_tlb_flush();
+ mm->context.pinned = 0;
+ spin_lock(&mm_unpinned_lock);
+ list_add(&mm->context.unpinned, &mm_unpinned);
+ spin_unlock(&mm_unpinned_lock);
+
+ spin_unlock(&mm->page_table_lock);
+}
+
+void mm_pin_all(void)
+{
+ while (!list_empty(&mm_unpinned))
+ mm_pin(list_entry(mm_unpinned.next, struct mm_struct,
+ context.unpinned));
+}
+
+void _arch_exit_mmap(struct mm_struct *mm)
+{
+ struct task_struct *tsk = current;
+
+ task_lock(tsk);
+
+ /*
+ * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+ * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+ */
+ if ( tsk->active_mm == mm )
+ {
+ tsk->active_mm = &init_mm;
+ atomic_inc(&init_mm.mm_count);
+
+ switch_mm(mm, &init_mm, tsk);
+
+ atomic_dec(&mm->mm_count);
+ BUG_ON(atomic_read(&mm->mm_count) == 0);
+ }
+
+ task_unlock(tsk);
+
+ if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) )
+ mm_unpin(mm);
+}
void pte_free(struct page *pte)
{
- pte_t *ptep;
-
- ptep = pfn_to_kaddr(page_to_pfn(pte));
-
- xen_pte_unpin(__pa(ptep));
- make_page_writable(ptep);
- __free_page(pte);
-}
+ unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
+
+ if (!pte_write(*virt_to_ptep(va)))
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
+ __free_page(pte);
+}
+#endif /* CONFIG_XEN */
static inline pte_t *lookup_address(unsigned long address)
{
@@ -78,7 +204,7 @@
} else
asm volatile("wbinvd":::"memory");
if (address)
- __flush_tlb_one((unsigned long) address);
+ __flush_tlb_one(address);
else
__flush_tlb_all();
}
@@ -166,14 +292,17 @@
BUG();
/* on x86-64 the direct mapping set at boot is not using 4k pages */
-// BUG_ON(PageReserved(kpte_page));
/*
* ..., but the XEN guest kernels (currently) do:
* If the pte was reserved, it means it was created at boot
* time (not via split_large_page) and in turn we must not
* replace it with a large page.
*/
- if (!PageReserved(kpte_page)) {
+#ifndef CONFIG_XEN
+ BUG_ON(PageReserved(kpte_page));
+#else
+ if (!PageReserved(kpte_page))
+#endif
switch (page_count(kpte_page)) {
case 1:
save_page(address, kpte_page);
@@ -182,7 +311,6 @@
case 0:
BUG(); /* memleak and failed 2M page regeneration */
}
- }
return 0;
}
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Fri Aug 26
20:47:16 2005
@@ -124,6 +124,7 @@
if (blkif->blk_ring.sring) {
unmap_frontend_page(blkif);
vfree(blkif->blk_ring.sring);
+ blkif->blk_ring.sring = NULL;
}
kmem_cache_free(blkif_cachep, blkif);
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 26
20:47:16 2005
@@ -1258,6 +1258,7 @@
err = talk_to_backend(dev, info);
if (err) {
kfree(info);
+ dev->data = NULL;
return err;
}
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 26
20:47:16 2005
@@ -939,18 +939,12 @@
static int destroy_netdev(struct net_device *netdev)
{
- struct net_private *np = NULL;
#ifdef CONFIG_PROC_FS
xennet_proc_delif(netdev);
#endif
unregister_netdev(netdev);
-
- np = netdev_priv(netdev);
- list_del(&np->list);
-
- kfree(netdev);
return 0;
}
@@ -1244,11 +1238,16 @@
}
info = netdev_priv(netdev);
+ dev->data = info;
+
err = talk_to_backend(dev, info);
if (err) {
destroy_netdev(netdev);
+ kfree(netdev);
+ dev->data = NULL;
return err;
}
+
/* Call once in case entries already there. */
watch_for_status(&info->watch, info->watch.node);
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Aug 26
20:47:16 2005
@@ -63,16 +63,19 @@
"popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
: "=a" (ret) : "0" (&hypercall) : "memory" );
#elif defined (__x86_64__)
- __asm__ __volatile__ (
- "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR
- : "=a" (ret)
- : "a" ((unsigned long)hypercall.op),
- "D" ((unsigned long)hypercall.arg[0]),
- "S" ((unsigned long)hypercall.arg[1]),
- "d" ((unsigned long)hypercall.arg[2]),
- "g" ((unsigned long)hypercall.arg[3]),
- "g" ((unsigned long)hypercall.arg[4])
- : "r11","rcx","r8","r10","memory");
+ {
+ long ign1, ign2, ign3;
+ __asm__ __volatile__ (
+ "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR
+ : "=a" (ret), "=D" (ign1), "=S" (ign2), "=d" (ign3)
+ : "0" ((unsigned long)hypercall.op),
+ "1" ((unsigned long)hypercall.arg[0]),
+ "2" ((unsigned long)hypercall.arg[1]),
+ "3" ((unsigned long)hypercall.arg[2]),
+ "g" ((unsigned long)hypercall.arg[3]),
+ "g" ((unsigned long)hypercall.arg[4])
+ : "r11","rcx","r8","r10","memory");
+ }
#endif
}
break;
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Fri Aug 26
20:47:16 2005
@@ -29,551 +29,362 @@
#ifndef __HYPERCALL_H__
#define __HYPERCALL_H__
+
#include <asm-xen/xen-public/xen.h>
-/*
- * Assembler stubs for hyper-calls.
- */
+#define _hypercall0(type, name) \
+({ \
+ long __res; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res) \
+ : "0" (__HYPERVISOR_##name) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall1(type, name, a1) \
+({ \
+ long __res, __ign1; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=b" (__ign1) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall2(type, name, a1, a2) \
+({ \
+ long __res, __ign1, __ign2; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall3(type, name, a1, a2, a3) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
+ "=d" (__ign3) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)), "3" ((long)(a3)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4) \
+({ \
+ long __res, __ign1, __ign2, __ign3, __ign4; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
+ "=d" (__ign3), "=S" (__ign4) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)), "3" ((long)(a3)), \
+ "4" ((long)(a4)) \
+ : "memory" ); \
+ (type)__res; \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
+({ \
+ long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
+ "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)), "3" ((long)(a3)), \
+ "4" ((long)(a4)), "5" ((long)(a5)) \
+ : "memory" ); \
+ (type)__res; \
+})
static inline int
HYPERVISOR_set_trap_table(
- trap_info_t *table)
-{
- int ret;
- unsigned long ignore;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ignore)
- : "0" (__HYPERVISOR_set_trap_table), "1" (table)
- : "memory" );
-
- return ret;
+ trap_info_t *table)
+{
+ return _hypercall1(int, set_trap_table, table);
}
static inline int
HYPERVISOR_mmu_update(
- mmu_update_t *req, int count, int *success_count, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
- "3" (success_count), "4" (domid)
- : "memory" );
-
- return ret;
+ mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmu_update, req, count, success_count, domid);
}
static inline int
HYPERVISOR_mmuext_op(
- struct mmuext_op *op, int count, int *success_count, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count),
- "3" (success_count), "4" (domid)
- : "memory" );
-
- return ret;
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmuext_op, op, count, success_count, domid);
}
static inline int
HYPERVISOR_set_gdt(
- unsigned long *frame_list, int entries)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
- : "memory" );
-
-
- return ret;
+ unsigned long *frame_list, int entries)
+{
+ return _hypercall2(int, set_gdt, frame_list, entries);
}
static inline int
HYPERVISOR_stack_switch(
- unsigned long ss, unsigned long esp)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
- : "memory" );
-
- return ret;
+ unsigned long ss, unsigned long esp)
+{
+ return _hypercall2(int, stack_switch, ss, esp);
}
static inline int
HYPERVISOR_set_callbacks(
- unsigned long event_selector, unsigned long event_address,
- unsigned long failsafe_selector, unsigned long failsafe_address)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
- "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
- : "memory" );
-
- return ret;
+ unsigned long event_selector, unsigned long event_address,
+ unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+ return _hypercall4(int, set_callbacks,
+ event_selector, event_address,
+ failsafe_selector, failsafe_address);
}
static inline int
HYPERVISOR_fpu_taskswitch(
- int set)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set)
- : "memory" );
-
- return ret;
+ int set)
+{
+ return _hypercall1(int, fpu_taskswitch, set);
}
static inline int
HYPERVISOR_yield(
- void)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
- : "memory", "ecx" );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_yield, 0);
}
static inline int
HYPERVISOR_block(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
- : "memory", "ecx" );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_block, 0);
}
static inline int
HYPERVISOR_shutdown(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
- : "memory", "ecx" );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+ (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0);
}
static inline int
HYPERVISOR_reboot(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
- : "memory", "ecx" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(
- unsigned long srec)
-{
- int ret;
- unsigned long ign1, ign2;
-
- /* NB. On suspend, control software expects a suspend record in %esi. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=S" (ign2)
- : "0" (__HYPERVISOR_sched_op),
- "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
- "S" (srec) : "memory", "ecx");
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+ (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0);
}
static inline int
HYPERVISOR_crash(
- void)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
- : "memory", "ecx" );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+ (SHUTDOWN_crash << SCHEDOP_reasonshift), 0);
}
static inline long
HYPERVISOR_set_timer_op(
- u64 timeout)
-{
- int ret;
- unsigned long timeout_hi = (unsigned long)(timeout>>32);
- unsigned long timeout_lo = (unsigned long)timeout;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi)
- : "memory");
-
- return ret;
+ u64 timeout)
+{
+ unsigned long timeout_hi = (unsigned long)(timeout>>32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
}
static inline int
HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
-{
- int ret;
- unsigned long ign1;
-
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
- : "memory");
-
- return ret;
+ dom0_op_t *dom0_op)
+{
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ return _hypercall1(int, dom0_op, dom0_op);
}
static inline int
HYPERVISOR_set_debugreg(
- int reg, unsigned long value)
-{
- int ret;
- unsigned long ign1, ign2;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
- : "memory" );
-
- return ret;
+ int reg, unsigned long value)
+{
+ return _hypercall2(int, set_debugreg, reg, value);
}
static inline unsigned long
HYPERVISOR_get_debugreg(
- int reg)
-{
- unsigned long ret;
- unsigned long ign;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
- : "memory" );
-
- return ret;
+ int reg)
+{
+ return _hypercall1(unsigned long, get_debugreg, reg);
}
static inline int
HYPERVISOR_update_descriptor(
- u64 ma, u64 desc)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_update_descriptor),
- "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)),
- "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32))
- : "memory" );
-
- return ret;
+ u64 ma, u64 desc)
+{
+ return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
}
static inline int
HYPERVISOR_dom_mem_op(
- unsigned int op, unsigned long *extent_list,
- unsigned long nr_extents, unsigned int extent_order)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4, ign5;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
- "=D" (ign5)
- : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
- "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
- : "memory" );
-
- return ret;
+ unsigned int op, unsigned long *extent_list,
+ unsigned long nr_extents, unsigned int extent_order)
+{
+ return _hypercall5(int, dom_mem_op, op, extent_list,
+ nr_extents, extent_order, DOMID_SELF);
}
static inline int
HYPERVISOR_multicall(
- void *call_list, int nr_calls)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
- : "memory" );
-
- return ret;
+ void *call_list, int nr_calls)
+{
+ return _hypercall2(int, multicall, call_list, nr_calls);
}
static inline int
HYPERVISOR_update_va_mapping(
- unsigned long va, pte_t new_val, unsigned long flags)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
- : "0" (__HYPERVISOR_update_va_mapping),
- "1" (va), "2" ((new_val).pte_low),
+ unsigned long va, pte_t new_val, unsigned long flags)
+{
+ unsigned long pte_hi = 0;
#ifdef CONFIG_X86_PAE
- "3" ((new_val).pte_high),
-#else
- "3" (0),
+ pte_hi = new_val.pte_high;
#endif
- "4" (flags)
- : "memory" );
-
- return ret;
+ return _hypercall4(int, update_va_mapping, va,
+ new_val.pte_low, pte_hi, flags);
}
static inline int
HYPERVISOR_event_channel_op(
- void *op)
-{
- int ret;
- unsigned long ignore;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ignore)
- : "0" (__HYPERVISOR_event_channel_op), "1" (op)
- : "memory" );
-
- return ret;
+ void *op)
+{
+ return _hypercall1(int, event_channel_op, op);
}
static inline int
HYPERVISOR_xen_version(
- int cmd)
-{
- int ret;
- unsigned long ignore;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ignore)
- : "0" (__HYPERVISOR_xen_version), "1" (cmd)
- : "memory" );
-
- return ret;
+ int cmd)
+{
+ return _hypercall1(int, xen_version, cmd);
}
static inline int
HYPERVISOR_console_io(
- int cmd, int count, char *str)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
- : "memory" );
-
- return ret;
+ int cmd, int count, char *str)
+{
+ return _hypercall3(int, console_io, cmd, count, str);
}
static inline int
HYPERVISOR_physdev_op(
- void *physdev_op)
-{
- int ret;
- unsigned long ign;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign)
- : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
- : "memory" );
-
- return ret;
+ void *physdev_op)
+{
+ return _hypercall1(int, physdev_op, physdev_op);
}
static inline int
HYPERVISOR_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count)
-{
- int ret;
- unsigned long ign1, ign2, ign3;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
- : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
- : "memory" );
-
- return ret;
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ return _hypercall3(int, grant_table_op, cmd, uop, count);
}
static inline int
HYPERVISOR_update_va_mapping_otherdomain(
- unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
-{
- int ret;
- unsigned long ign1, ign2, ign3, ign4, ign5;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3),
- "=S" (ign4), "=D" (ign5)
- : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
- "1" (va), "2" ((new_val).pte_low),
+ unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ unsigned long pte_hi = 0;
#ifdef CONFIG_X86_PAE
- "3" ((new_val).pte_high),
-#else
- "3" (0),
+ pte_hi = new_val.pte_high;
#endif
- "4" (flags), "5" (domid) :
- "memory" );
-
- return ret;
+ return _hypercall5(int, update_va_mapping_otherdomain, va,
+ new_val.pte_low, pte_hi, flags, domid);
}
static inline int
HYPERVISOR_vm_assist(
- unsigned int cmd, unsigned int type)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
- : "memory" );
-
- return ret;
+ unsigned int cmd, unsigned int type)
+{
+ return _hypercall2(int, vm_assist, cmd, type);
}
static inline int
HYPERVISOR_boot_vcpu(
- unsigned long vcpu, vcpu_guest_context_t *ctxt)
-{
- int ret;
- unsigned long ign1, ign2;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
- : "memory");
-
- return ret;
+ unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+ return _hypercall2(int, boot_vcpu, vcpu, ctxt);
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+ int vcpu)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_vcpu_up |
+ (vcpu << SCHEDOP_vcpushift), 0);
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+ int vcpu, vcpu_guest_context_t *ctxt)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle |
+ (vcpu << SCHEDOP_vcpushift), ctxt);
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ /* On suspend, control software expects a suspend record in %esi. */
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=S" (ign2)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_suspend <<
+ SCHEDOP_reasonshift)),
+ "2" (srec) : "memory", "ecx");
+
+ return ret;
}
static inline int
HYPERVISOR_vcpu_down(
- int vcpu)
-{
- int ret;
- unsigned long ign1;
- /* Yes, I really do want to clobber edx here: when we resume a
- vcpu after unpickling a multi-processor domain, it returns
- here, but clobbers all of the call clobbered registers. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
- : "memory", "ecx", "edx" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_vcpu_up(
- int vcpu)
-{
- int ret;
- unsigned long ign1;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
- : "memory", "ecx" );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_vcpu_pickle(
- int vcpu, vcpu_guest_context_t *ctxt)
-{
- int ret;
- unsigned long ign1, ign2;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret), "=b" (ign1), "=c" (ign2)
- : "0" (__HYPERVISOR_sched_op),
- "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
- "2" (ctxt)
- : "memory" );
-
- return ret;
+ int vcpu)
+{
+ int ret;
+ unsigned long ign1;
+ /* Yes, I really do want to clobber edx here: when we resume a
+ vcpu after unpickling a multi-processor domain, it returns
+ here, but clobbers all of the call clobbered registers. */
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
+ : "memory", "ecx", "edx" );
+ return ret;
}
#endif /* __HYPERCALL_H__ */
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Aug
25 20:52:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Fri Aug
26 20:47:16 2005
@@ -4,6 +4,10 @@
* Linux-specific hypervisor handling.
*
* Copyright (c) 2002-2004, K A Fraser
+ *
+ * 64-bit updates:
+ * Benjamin Liu <benjamin.liu@xxxxxxxxx>
+ * Jun Nakajima <jun.nakajima@xxxxxxxxx>
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
@@ -26,497 +30,331 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-/*
- * Benjamin Liu <benjamin.liu@xxxxxxxxx>
- * Jun Nakajima <jun.nakajima@xxxxxxxxx>
- * Ported to x86-64.
- *
- */
#ifndef __HYPERCALL_H__
#define __HYPERCALL_H__
+
#include <asm-xen/xen-public/xen.h>
#define __syscall_clobber "r11","rcx","memory"
-/*
- * Assembler stubs for hyper-calls.
- */
+#define _hypercall0(type, name) \
+({ \
+ long __res; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res) \
+ : "0" (__HYPERVISOR_##name) \
+ : __syscall_clobber ); \
+ (type)__res; \
+})
+
+#define _hypercall1(type, name, a1) \
+({ \
+ long __res, __ign1; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=D" (__ign1) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \
+ : __syscall_clobber ); \
+ (type)__res; \
+})
+
+#define _hypercall2(type, name, a1, a2) \
+({ \
+ long __res, __ign1, __ign2; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)) \
+ : __syscall_clobber ); \
+ (type)__res; \
+})
+
+#define _hypercall3(type, name, a1, a2, a3) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ TRAP_INSTR \
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
+ "=d" (__ign3) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)), "3" ((long)(a3)) \
+ : __syscall_clobber ); \
+ (type)__res; \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ "movq %8,%%r10; " TRAP_INSTR \
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
+ "=d" (__ign3) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)), "3" ((long)(a3)), \
+ "g" ((long)(a4)) \
+ : __syscall_clobber, "r10" ); \
+ (type)__res; \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
+({ \
+ long __res, __ign1, __ign2, __ign3; \
+ asm volatile ( \
+ "movq %8,%%r10; movq %9,%%r8; " TRAP_INSTR \
+ : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \
+ "=d" (__ign3) \
+ : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \
+ "2" ((long)(a2)), "3" ((long)(a3)), \
+ "g" ((long)(a4)), "g" ((long)(a5)) \
+ : __syscall_clobber, "r10", "r8" ); \
+ (type)__res; \
+})
+
static inline int
HYPERVISOR_set_trap_table(
- trap_info_t *table)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table)
- : __syscall_clobber );
-
- return ret;
+ trap_info_t *table)
+{
+ return _hypercall1(int, set_trap_table, table);
}
static inline int
HYPERVISOR_mmu_update(
- mmu_update_t *req, int count, int *success_count, domid_t domid)
-{
- int ret;
-
- __asm__ __volatile__ (
- "movq %5, %%r10;" TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S"
((long)count),
- "d" (success_count), "g" ((unsigned long)domid)
- : __syscall_clobber, "r10" );
-
- return ret;
+ mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmu_update, req, count, success_count, domid);
}
static inline int
HYPERVISOR_mmuext_op(
- struct mmuext_op *op, int count, int *success_count, domid_t domid)
-{
- int ret;
-
- __asm__ __volatile__ (
- "movq %5, %%r10;" TRAP_INSTR
- : "=a" (ret)
- : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count),
- "d" (success_count), "g" ((unsigned long)domid)
- : __syscall_clobber, "r10" );
-
- return ret;
+ struct mmuext_op *op, int count, int *success_count, domid_t domid)
+{
+ return _hypercall4(int, mmuext_op, op, count, success_count, domid);
}
static inline int
HYPERVISOR_set_gdt(
- unsigned long *frame_list, int entries)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S"
((long)entries)
- : __syscall_clobber );
-
-
- return ret;
-}
+ unsigned long *frame_list, int entries)
+{
+ return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
static inline int
HYPERVISOR_stack_switch(
- unsigned long ss, unsigned long esp)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp)
- : __syscall_clobber );
-
- return ret;
+ unsigned long ss, unsigned long esp)
+{
+ return _hypercall2(int, stack_switch, ss, esp);
}
static inline int
HYPERVISOR_set_callbacks(
- unsigned long event_address, unsigned long failsafe_address,
- unsigned long syscall_address)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address),
- "S" (failsafe_address), "d" (syscall_address)
- : __syscall_clobber );
-
- return ret;
+ unsigned long event_address, unsigned long failsafe_address,
+ unsigned long syscall_address)
+{
+ return _hypercall3(int, set_callbacks,
+ event_address, failsafe_address, syscall_address);
}
static inline int
HYPERVISOR_fpu_taskswitch(
- int set)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch),
- "D" ((unsigned long) set) : __syscall_clobber );
-
- return ret;
+ int set)
+{
+ return _hypercall1(int, fpu_taskswitch, set);
}
static inline int
HYPERVISOR_yield(
- void)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned
long)SCHEDOP_yield)
- : __syscall_clobber );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_yield, 0);
}
static inline int
HYPERVISOR_block(
- void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned
long)SCHEDOP_block)
- : __syscall_clobber );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_block, 0);
}
static inline int
HYPERVISOR_shutdown(
- void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff <<
SCHEDOP_reasonshift)))
- : __syscall_clobber );
-
- return ret;
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+ (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0);
}
static inline int
HYPERVISOR_reboot(
- void)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot <<
SCHEDOP_reasonshift)))
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_suspend(
- unsigned long srec)
-{
- int ret;
-
- /* NB. On suspend, control software expects a suspend record in %esi. */
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend <<
SCHEDOP_reasonshift))),
- "S" (srec)
- : __syscall_clobber );
-
- return ret;
-}
-
-/*
- * We can have the timeout value in a single argument for the hypercall, but
- * that will break the common code.
- */
+ void)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+ (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0);
+}
+
static inline long
HYPERVISOR_set_timer_op(
- u64 timeout)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_timer_op),
- "D" (timeout)
- : __syscall_clobber );
-
- return ret;
+ u64 timeout)
+{
+ return _hypercall1(long, set_timer_op, timeout);
}
static inline int
HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
-{
- int ret;
-
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_dom0_op), "D" (dom0_op)
- : __syscall_clobber );
-
- return ret;
+ dom0_op_t *dom0_op)
+{
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ return _hypercall1(int, dom0_op, dom0_op);
}
static inline int
HYPERVISOR_set_debugreg(
- int reg, unsigned long value)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_debugreg), "D" ((unsigned
long)reg), "S" (value)
- : __syscall_clobber );
-
- return ret;
+ int reg, unsigned long value)
+{
+ return _hypercall2(int, set_debugreg, reg, value);
}
static inline unsigned long
HYPERVISOR_get_debugreg(
- int reg)
-{
- unsigned long ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_get_debugreg), "D" ((unsigned
long)reg)
- : __syscall_clobber );
-
- return ret;
+ int reg)
+{
+ return _hypercall1(unsigned long, get_debugreg, reg);
}
static inline int
HYPERVISOR_update_descriptor(
- unsigned long ma, unsigned long word)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_update_descriptor), "D" (ma),
- "S" (word)
- : __syscall_clobber );
-
- return ret;
+ unsigned long ma, unsigned long word)
+{
+ return _hypercall2(int, update_descriptor, ma, word);
}
static inline int
HYPERVISOR_dom_mem_op(
- unsigned int op, unsigned long *extent_list,
- unsigned long nr_extents, unsigned int extent_order)
-{
- int ret;
-
- __asm__ __volatile__ (
- "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_dom_mem_op), "D" ((unsigned
long)op), "S" (extent_list),
- "d" (nr_extents), "g" ((unsigned long) extent_order), "g" ((unsigned
long) DOMID_SELF)
- : __syscall_clobber,"r8","r10");
-
- return ret;
+ unsigned int op, unsigned long *extent_list,
+ unsigned long nr_extents, unsigned int extent_order)
+{
+ return _hypercall5(int, dom_mem_op, op, extent_list,
+ nr_extents, extent_order, DOMID_SELF);
}
static inline int
HYPERVISOR_multicall(
- void *call_list, int nr_calls)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_multicall), "D" (call_list), "S"
((unsigned long)nr_calls)
- : __syscall_clobber);
-
- return ret;
+ void *call_list, int nr_calls)
+{
+ return _hypercall2(int, multicall, call_list, nr_calls);
}
static inline int
HYPERVISOR_update_va_mapping(
- unsigned long page_nr, pte_t new_val, unsigned long flags)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_update_va_mapping),
- "D" (page_nr), "S" (new_val.pte), "d" (flags)
- : __syscall_clobber);
-
- return ret;
+ unsigned long va, pte_t new_val, unsigned long flags)
+{
+ return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
}
static inline int
HYPERVISOR_event_channel_op(
- void *op)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_event_channel_op), "D" (op)
- : __syscall_clobber);
-
- return ret;
+ void *op)
+{
+ return _hypercall1(int, event_channel_op, op);
}
static inline int
HYPERVISOR_xen_version(
- int cmd)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_xen_version), "D" ((unsigned
long)cmd)
- : __syscall_clobber);
-
- return ret;
+ int cmd)
+{
+ return _hypercall1(int, xen_version, cmd);
}
static inline int
HYPERVISOR_console_io(
- int cmd, int count, char *str)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_console_io), "D" ((unsigned
long)cmd), "S" ((unsigned long)count), "d" (str)
- : __syscall_clobber);
-
- return ret;
+ int cmd, int count, char *str)
+{
+ return _hypercall3(int, console_io, cmd, count, str);
}
static inline int
HYPERVISOR_physdev_op(
- void *physdev_op)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_physdev_op), "D" (physdev_op)
- : __syscall_clobber);
-
- return ret;
+ void *physdev_op)
+{
+ return _hypercall1(int, physdev_op, physdev_op);
}
static inline int
HYPERVISOR_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_grant_table_op), "D" ((unsigned
long)cmd), "S" ((unsigned long)uop), "d" (count)
- : __syscall_clobber);
-
- return ret;
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ return _hypercall3(int, grant_table_op, cmd, uop, count);
}
static inline int
HYPERVISOR_update_va_mapping_otherdomain(
- unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
-{
- int ret;
-
- __asm__ __volatile__ (
- "movq %5, %%r10;" TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_update_va_mapping_otherdomain),
- "D" (page_nr), "S" (new_val.pte), "d" (flags), "g" ((unsigned
long)domid)
- : __syscall_clobber,"r10");
-
- return ret;
+ unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ return _hypercall4(int, update_va_mapping_otherdomain, va,
+ new_val.pte, flags, domid);
}
static inline int
HYPERVISOR_vm_assist(
- unsigned int cmd, unsigned int type)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_vm_assist), "D" ((unsigned
long)cmd), "S" ((unsigned long)type)
- : __syscall_clobber);
-
- return ret;
+ unsigned int cmd, unsigned int type)
+{
+ return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int
+HYPERVISOR_boot_vcpu(
+ unsigned long vcpu, vcpu_guest_context_t *ctxt)
+{
+ return _hypercall2(int, boot_vcpu, vcpu, ctxt);
+}
+
+static inline int
+HYPERVISOR_vcpu_up(
+ int vcpu)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_vcpu_up |
+ (vcpu << SCHEDOP_vcpushift), 0);
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+ int vcpu, vcpu_guest_context_t *ctxt)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle |
+ (vcpu << SCHEDOP_vcpushift), ctxt);
}
static inline int
HYPERVISOR_switch_to_user(void)
{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_switch_to_user) :
__syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_boot_vcpu(
- unsigned long vcpu, vcpu_guest_context_t *ctxt)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" (__HYPERVISOR_boot_vcpu), "D" (vcpu), "S" (ctxt)
- : __syscall_clobber);
-
- return ret;
+ return _hypercall0(int, switch_to_user);
}
static inline int
HYPERVISOR_set_segment_base(
- int reg, unsigned long value)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_set_segment_base), "D" ((unsigned
long)reg), "S" (value)
- : __syscall_clobber );
-
- return ret;
-}
-
-static inline int
-HYPERVISOR_vcpu_pickle(
- int vcpu, vcpu_guest_context_t *ctxt)
-{
- int ret;
-
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret)
- : "0" ((unsigned long)__HYPERVISOR_sched_op),
- "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
- "S" ((unsigned long)ctxt)
- : __syscall_clobber );
-
- return ret;
+ int reg, unsigned long value)
+{
+ return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ return _hypercall2(int, sched_op, SCHEDOP_shutdown |
+ (SHUTDOWN_suspend << SCHEDOP_reasonshift), srec);
}
#endif /* __HYPERCALL_H__ */
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h Thu Aug
25 20:52:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h Fri Aug
26 20:47:16 2005
@@ -58,6 +58,9 @@
}
}
+extern void mm_pin(struct mm_struct *mm);
+extern void mm_unpin(struct mm_struct *mm);
+void mm_pin_all(void);
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
@@ -66,6 +69,9 @@
struct mmuext_op _op[3], *op = _op;
if (likely(prev != next)) {
+ if (!next->context.pinned)
+ mm_pin(next);
+
/* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask);
#if 0 /* XEN: no lazy tlb */
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h Thu Aug 25
20:52:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h Fri Aug 26
20:47:16 2005
@@ -21,12 +21,27 @@
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page
*pte)
{
- set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
+ if (unlikely((mm)->context.pinned)) {
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(page_to_pfn(pte) <<
PAGE_SHIFT),
+ pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
+ set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) <<
PAGE_SHIFT)));
+ } else {
+ *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
+ }
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
+ if (unlikely((mm)->context.pinned)) {
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)pmd,
+ pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
+ PAGE_KERNEL_RO), 0));
+ set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
+ } else {
+ *(pud) = __pud(_PAGE_TABLE | __pa(pmd));
+ }
}
/*
@@ -35,53 +50,54 @@
*/
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
- set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
- set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
-}
-
-extern __inline__ pmd_t *get_pmd(void)
-{
- pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
- if (!pmd)
- return NULL;
- make_page_readonly(pmd);
- xen_pmd_pin(__pa(pmd));
- return pmd;
+ if (unlikely((mm)->context.pinned)) {
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)pud,
+ pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
+ PAGE_KERNEL_RO), 0));
+ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+ set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
+ } else {
+ *(pgd) = __pgd(_PAGE_TABLE | __pa(pud));
+ *(__user_pgd(pgd)) = *(pgd);
+ }
}
extern __inline__ void pmd_free(pmd_t *pmd)
{
- BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
- xen_pmd_unpin(__pa(pmd));
- make_page_writable(pmd);
+ pte_t *ptep = virt_to_ptep(pmd);
+
+ if (!pte_write(*ptep)) {
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)pmd,
+ pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL),
+ 0));
+ }
free_page((unsigned long)pmd);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- if (!pmd)
- return NULL;
- make_page_readonly(pmd);
- xen_pmd_pin(__pa(pmd));
return pmd;
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- if (!pud)
- return NULL;
- make_page_readonly(pud);
- xen_pud_pin(__pa(pud));
return pud;
}
static inline void pud_free(pud_t *pud)
{
- BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
- xen_pud_unpin(__pa(pud));
- make_page_writable(pud);
+ pte_t *ptep = virt_to_ptep(pud);
+
+ if (!pte_write(*ptep)) {
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)pud,
+ pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL),
+ 0));
+ }
free_page((unsigned long)pud);
}
@@ -107,10 +123,6 @@
(PTRS_PER_PGD - boundary) * sizeof(pgd_t));
memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
- make_pages_readonly(pgd, 2);
-
- xen_pgd_pin(__pa(pgd)); /* kernel */
- xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
/*
* Set level3_user_pgt for vsyscall area
*/
@@ -121,31 +133,45 @@
static inline void pgd_free(pgd_t *pgd)
{
- BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
- xen_pgd_unpin(__pa(pgd));
- xen_pgd_unpin(__pa(__user_pgd(pgd)));
- make_pages_writable(pgd, 2);
+ pte_t *ptep = virt_to_ptep(pgd);
+
+ if (!pte_write(*ptep)) {
+ xen_pgd_unpin(__pa(pgd));
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)pgd,
+ pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT,
PAGE_KERNEL),
+ 0));
+ }
+
+ ptep = virt_to_ptep(__user_pgd(pgd));
+
+ if (!pte_write(*ptep)) {
+ xen_pgd_unpin(__pa(__user_pgd(pgd)));
+ BUG_ON(HYPERVISOR_update_va_mapping(
+ (unsigned long)__user_pgd(pgd),
+
pfn_pte(virt_to_phys(__user_pgd(pgd))>>PAGE_SHIFT,
+ PAGE_KERNEL),
+ 0));
+ }
+
free_pages((unsigned long)pgd, 1);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long
address)
{
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- if (!pte)
- return NULL;
- make_page_readonly(pte);
- xen_pte_pin(__pa(pte));
+ if (pte)
+ make_page_readonly(pte);
+
return pte;
}
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long
address)
{
- pte_t *pte = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
- if (!pte)
- return NULL;
- make_page_readonly(pte);
- xen_pte_pin(__pa(pte));
- return virt_to_page((unsigned long)pte);
+ struct page *pte;
+
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ return pte;
}
/* Should really implement gc for free page table pages. This could be
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h Thu Aug
25 20:52:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h Fri Aug
26 20:47:16 2005
@@ -18,7 +18,7 @@
#define __flush_tlb_all() __flush_tlb_global()
-#define __flush_tlb_one(addr) xen_invlpg(addr)
+#define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
/*
diff -r de3576a1c62c -r dfaf788ab18c tools/python/xen/util/Brctl.py
--- a/tools/python/xen/util/Brctl.py Thu Aug 25 20:52:38 2005
+++ b/tools/python/xen/util/Brctl.py Fri Aug 26 20:47:16 2005
@@ -76,6 +76,7 @@
def bridge_del(bridge):
"""Delete a bridge.
"""
+ cmd(CMD_IFCONFIG, '%s down' % bridge)
cmd(CMD_BRCTL, 'delbr %s' % bridge)
def routes():
diff -r de3576a1c62c -r dfaf788ab18c tools/python/xen/xend/XendVnet.py
--- a/tools/python/xen/xend/XendVnet.py Thu Aug 25 20:52:38 2005
+++ b/tools/python/xen/xend/XendVnet.py Fri Aug 26 20:47:16 2005
@@ -22,7 +22,7 @@
from xen.xend import sxp
from xen.xend.XendError import XendError
from xen.xend.XendLogging import log
-from xen.xend.xenstore import XenNode, DBMap
+from xen.xend.xenstore import XenNode, DBMap, DBVar
def vnet_cmd(cmd):
out = None
@@ -38,17 +38,40 @@
class XendVnetInfo:
vifctl_ops = {'up': 'vif.add', 'down': 'vif.del'}
+
+ __exports__ = [
+ DBVar('id', ty='str'),
+ DBVar('dbid', ty='str'),
+ DBVar('config', ty='sxpr'),
+ ]
- def __init__(self, config):
- self.config = config
- self.id = sxp.child_value(config, 'id')
- self.id = str(self.id)
+ def __init__(self, db, config=None):
+ if config:
+ self.id = sxp.child_value(config, 'id')
+ self.id = str(self.id)
+ self.dbid = self.id.replace(':', '-')
+ self.db = db.addChild(self.dbid)
+ self.config = config
+ else:
+ self.db = db
+ self.importFromDB()
+ config = self.config
+
self.bridge = sxp.child_value(config, 'bridge')
if not self.bridge:
self.bridge = "vnet%s" % self.id
self.vnetif = sxp.child_value(config, 'vnetif')
if not self.vnetif:
- self.vnetif = "vnetif%s" % self.id
+ self.vnetif = "vnif%s" % self.id
+
+ def saveToDB(self, save=False, sync=False):
+ self.db.saveDB(save=save, sync=sync)
+
+ def exportToDB(self, save=False, sync=False):
+ self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
+
+ def importFromDB(self):
+ self.db.importFromDB(self, fields=self.__exports__)
def sxpr(self):
return self.config
@@ -64,7 +87,9 @@
log.info("Deleting vnet %s", self.id)
Brctl.vif_bridge_rem({'bridge': self.bridge, 'vif': self.vnetif})
Brctl.bridge_del(self.bridge)
- return vnet_cmd(['vnet.del', self.id])
+ val = vnet_cmd(['vnet.del', self.id])
+ self.db.delete()
+ return val
def vifctl(self, op, vif, vmac):
try:
@@ -82,16 +107,18 @@
def __init__(self):
# Table of vnet info indexed by vnet id.
self.vnet = {}
- self.dbmap = DBMap(db=XenNode(self.dbpath))
- self.dbmap.readDB()
- for vnetdb in self.dbmap.values():
- config = vnetdb.config
- info = XendVnetInfo(config)
- self.vnet[info.id] = info
+ self.db = DBMap(db=XenNode(self.dbpath))
+ self.db.readDB()
+ for vnetdb in self.db.values():
try:
+ info = XendVnetInfo(vnetdb)
+ self.vnet[info.id] = info
info.configure()
except XendError, ex:
log.warning("Failed to configure vnet %s: %s", str(info.id),
str(ex))
+ except Exception, ex:
+ log.exception("Vnet error")
+ vnetdb.delete()
def vnet_of_bridge(self, bridge):
"""Get the vnet for a bridge (if any).
@@ -128,9 +155,9 @@
@param config: config
"""
- info = XendVnetInfo(config)
+ info = XendVnetInfo(self.db, config=config)
self.vnet[info.id] = info
- self.dbmap["%s/config" % info.id] = info.sxpr()
+ info.saveToDB()
info.configure()
def vnet_delete(self, id):
@@ -141,7 +168,6 @@
info = self.vnet_get(id)
if info:
del self.vnet[id]
- self.dbmap.delete(id)
info.delete()
def instance():
diff -r de3576a1c62c -r dfaf788ab18c tools/python/xen/xend/server/SrvVnetDir.py
--- a/tools/python/xen/xend/server/SrvVnetDir.py Thu Aug 25 20:52:38 2005
+++ b/tools/python/xen/xend/server/SrvVnetDir.py Fri Aug 26 20:47:16 2005
@@ -19,6 +19,7 @@
from xen.xend.Args import FormFn
from xen.xend import PrettyPrint
from xen.xend import XendVnet
+from xen.xend.XendError import XendError
from xen.web.SrvDir import SrvDir
diff -r de3576a1c62c -r dfaf788ab18c tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Thu Aug 25 20:52:38 2005
+++ b/tools/python/xen/xm/main.py Fri Aug 26 20:47:16 2005
@@ -104,6 +104,11 @@
network-limit <DomId> <Vif> <Credit> <Period>
Limit the transmission rate of a virtual network interface
network-list <DomId> List virtual network interfaces for a domain
+
+ Vnet commands:
+ vnet-list [-l|--long] list vnets
+ vnet-create <config> create a vnet from a config file
+ vnet-delete <vnetid> delete a vnet
For a short list of subcommands run 'xm help'
For more help on xm see the xm(1) man page
@@ -546,6 +551,47 @@
from xen.xend.XendClient import server
server.xend_domain_device_destroy(dom, 'vbd', dev)
+
+def xm_vnet_list(args):
+ from xen.xend.XendClient import server
+ try:
+ (options, params) = getopt(args, 'l', ['long'])
+ except GetoptError, opterr:
+ err(opterr)
+ sys.exit(1)
+
+ use_long = 0
+ for (k, v) in options:
+ if k in ['-l', '--long']:
+ use_long = 1
+
+ if params:
+ use_long = 1
+ vnets = params
+ else:
+ vnets = server.xend_vnets()
+
+ for vnet in vnets:
+ try:
+ if use_long:
+ info = server.xend_vnet(vnet)
+ PrettyPrint.prettyprint(info)
+ else:
+ print vnet
+ except Exception, ex:
+ print vnet, ex
+
+def xm_vnet_create(args):
+ arg_check(args, 1, "vnet-create")
+ conf = args[0]
+ from xen.xend.XendClient import server
+ server.xend_vnet_create(conf)
+
+def xm_vnet_delete(args):
+ arg_check(args, 1, "vnet-delete")
+ vnet = args[0]
+ from xen.xend.XendClient import server
+ server.xend_vnet_delete(vnet)
commands = {
# console commands
@@ -592,7 +638,11 @@
"block-refresh": xm_block_refresh,
# network
"network-limit": xm_network_limit,
- "network-list": xm_network_list
+ "network-list": xm_network_list,
+ # vnet
+ "vnet-list": xm_vnet_list,
+ "vnet-create": xm_vnet_create,
+ "vnet-delete": xm_vnet_delete,
}
aliases = {
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/00INSTALL
--- a/tools/vnet/00INSTALL Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/00INSTALL Fri Aug 26 20:47:16 2005
@@ -1,14 +1,34 @@
-To compile and install run "make install"; if it fails or you need to
reinstall
-run "make clean" first or the build will fail, at least that is what I have
-found under 2.6.10.
+make
+ - compile in local dirs. The module is in vnet-module/vnet_module.ko.
-Other important items:
+make dist
+ - compile and install into $(XEN_ROOT)/dist/install,
+ - where XEN_ROOT is the root of the xen tree.
+
+make install
+ - compile and install into system.
+
+The xen0 kernel must have been compiled before building the vnet module.
+The vnet module installs to
+ /lib/modules/<kernel version>-xen0/kernel/xen/vnet_module.ko
+
+The vnet module should be loaded before starting xend, or
+xend will fail to create any persistent vnets it has in its configuration.
+The script network-vnet is a modified version of the xen network script
+that loads the module if it's not already loaded.
+
+The module uses kernel crypto functions, and these need to be
+enabled in the xen0 kernel config. They should be on by default -
+if they're not you will get compile or insmod errors (see below).
+
+Kernel config options:
+
1) You will need to have your xen0 kernel compiled with HMAC_SUPPORT
2.6.x = (MAIN MENU: Cryptographic Options -> HMAC Support)
BEFORE running "make install".
-2) You will want at least some of the other alogorithms listed under
+2) You will want at least some of the other algorithms listed under
"Cryptographic Options" for the kernel compiled as modules.
3) You will want the networking IPsec/VLAN options compiled in as modules
@@ -23,9 +43,5 @@
802.1Q VLAN Support
-4) The module (vnet_module) will not properly load from the command line
- with a "modprobe vnet_module". Use network-vnet to properly configure
- your system and load the module for you.
-
Please refer to the additional documentation found in tools/vnet/doc for
proper syntax and config file parameters.
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/Makefile
--- a/tools/vnet/Makefile Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/Makefile Fri Aug 26 20:47:16 2005
@@ -1,18 +1,21 @@
+# -*- mode: Makefile; -*-
-export LINUX_SERIES ?=2.6
+ifndef VNET_ROOT
+export VNET_ROOT = $(shell pwd)
+include $(VNET_ROOT)/Make.env
+endif
-# Root path to install in.
-# Set to '/' to install relative to filesystem root.
-export prefix?=$(shell cd ../../dist/install && pwd)
+.PHONY: all compile install dist clean pristine
+.PHONY: gc-all gc-install gc-clean
-.PHONY: all compile
-.PHONY: gc-install gc-clean gc-prstine
-.PHONY: libxutil vnetd vnet-module install dist clean pristine
+SUBDIRS:=
+SUBDIRS+= examples
+SUBDIRS+= gc
+SUBDIRS+= libxutil
+SUBDIRS+= vnetd
+SUBDIRS+= vnet-module
all: compile
-
-compile: libxutil vnetd vnet-module
-#compile: vnet-module
gc.tar.gz:
wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@
@@ -21,36 +24,39 @@
tar xfz gc.tar.gz
ln -sf gc?.? gc
-gc-install: gc
- (cd gc && make test && ./configure --prefix=`pwd`/install)
+$(GC_LIB_A): gc
+ (cd gc && ./configure --prefix=$(GC_DIR) )
make -C gc
- make -C gc install
+ DESTDIR="" make -C gc install
+
+gc-all: $(GC_LIB_A)
+
+gc-install:
gc-clean:
- -$(MAKE) -C gc clean
+ -@$(RM) -r gc?.? gc
-gc-pristine:
- -rm -rf gc?.? gc
+submak = $(MAKE) -C $(patsubst %-$(1),%,$(@)) $(1)
+subtgt = $(patsubst %,%-$(1),$(SUBDIRS))
-libxutil:
- $(MAKE) -C libxutil
+%-all:
+ $(call submak,all)
-vnetd: gc-install
- $(MAKE) -C vnetd
+%-clean:
+ -$(call submak,clean)
-vnet-module:
- $(MAKE) -C vnet-module
+%-install:
+ $(call submak,install)
-install: compile
- $(MAKE) -C libxutil install
- $(MAKE) -C vnetd install
- $(MAKE) -C vnet-module install
- $(MAKE) -C examples install
+compile: $(call subtgt,all)
-clean:
- -$(MAKE) -C libxutil clean
- -$(MAKE) -C vnetd clean
- -$(MAKE) -C vnet-module clean
- -rm -rf gc?.? gc
+install: DESTDIR=
+install: dist
-pristine: clean gc-pristine
+dist: compile $(call subtgt,install)
+
+clean: $(call subtgt,clean)
+ -@$(RM) -r build
+
+pristine: clean
+ -@$(RM) gc.tar.gz
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/doc/vnet-module.txt
--- a/tools/vnet/doc/vnet-module.txt Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/doc/vnet-module.txt Fri Aug 26 20:47:16 2005
@@ -1,20 +1,33 @@
Vnet Module Command Interface
Mike Wray <mike.wray@xxxxxx>
-2004/09/17
+2005/08/25
When insmod the vnet-module creates /proc/vnet/policy which
can be used to control the module by writing commands into it.
The return code from the command should be returned by close.
+Xend uses these commands to implement its vnet interface.
The commands are:
-(vnet.add (id <id>) [(security { none | auth | conf } )] )
+(vnet.add (id <id>) [(vnetif <ifname>)] [(security { none | auth | conf } )] )
Create the vnet with id <id> and the given security level (default none).
+Vnet ids are 128-bit and can be specified as 8 fields of 1 to 4 hex digits
+separated by colons. A vnet id with no colons is treated as one with the first
+7 fields zero. Examples:
+
+1500 - equivalent to 0:0:0:0:0:0:0:1500
+aaff:0:0:0:0:0:77:88
+
Security levels:
- none: no security
- auth: message authentication (IPSEC hmac)
- conf: message confidentiality (IPSEC hmac and encryption)
+
+The <ifname> is the name of the network device created for the vnet.
+If not given it defaults to vnif<N>, where <N> is the hex for the
+8-th field in the id. Note that network device names can have a
+maximum of 14 characters.
(vnet.del (id <id>))
@@ -31,11 +44,17 @@
Remove the vif with MAC address <macaddr> from the vnet with id <vnetid>.
The vnet module will stop responding to VARP for the vif.
+(vif.print)
+
+Print the known vnets, vifs and varp cache on the console.
+
Examples:
To create vnet 10 with no security:
echo '(vnet.add (id 10))' > /proc/vnet/policy
+
+This creates a device vnif0010.
To create vnet 11 with message authentication:
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/doc/vnet-xend.txt
--- a/tools/vnet/doc/vnet-xend.txt Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/doc/vnet-xend.txt Fri Aug 26 20:47:16 2005
@@ -3,11 +3,13 @@
Mike Wray <mike.wray@xxxxxx>
+2005/08/25
+
0) Introduction
---------------
Vnets provide virtual private LANs for virtual machines.
-This is done using bridging and tunneling. A virtual interface
+This is done using bridging and multipoint tunneling. A virtual interface
on a vnet can only see other interfaces on the same vnet - it cannot
see the real network, and the real network cannot see it either.
@@ -32,13 +34,16 @@
Restart xend.
+Alternatively insert the vnet module using vnet-insert,
+preferably before xend starts.
+
2) Creating vnets
-----------------
Xend already implements commands to add/remove vnets and
bridge to them. To add a vnet use
-xm call vnet_add <vnet config file>
+xm vnet-create <vnet config file>
For example, if vnet97.sxp contains:
@@ -46,7 +51,7 @@
do
-xm call vnet_add vnet97.sxp
+xm vnet-create vnet97.sxp
This will define a vnet with id 97 and no security. The bridge for the
vnet is called vnet97 and the virtual interface for it is vnetif97.
@@ -64,31 +69,35 @@
Once configured, vnets are persistent in the xend database.
To remove a vnet use
-xm call vnet_delete <vnet id>
+xm vnet-delete <vnet id>
To list vnets use
-xm call vnets
+xm vnet-list
To get information on a vnet id use
-xm call vnet <vnet id>
+xm vnet-list <vnet id>
3) Troubleshooting
------------------
The vnet module should appear in 'lsmod'.
-If a vnet has been configured it should appear in the output of 'xm call
vnets'.
+If a vnet has been configured it should appear in the output of 'xm vnet-list'.
Its bridge and interface should appear in 'ifconfig'.
It should also show in 'brctl show', with its attached interfaces.
-You can 'see into' a vnet from dom0 if you put an IP address on the bridge.
+You can 'see into' a vnet from dom0 if you put an IP address on the bridge
+and configure its MAC address as a vif.
For example, if you have vnet97 with a vm with ip addr 10.0.0.12 on it,
-then
+and <mac> is the MAC address of vnet97 (use ifconfig), then
+echo '(vif.add (vnet 97) (vmac <mac>))' >/proc/vnet/policy
ifconfig vnet97 10.0.0.20 up
should let you ping 10.0.0.12 via the vnet97 bridge.
+This works even if the vm with vif 10.0.0.12 is on another
+machine (it only works locally if you don't use vif.add).
4) Examples
-----------
@@ -104,11 +113,11 @@
(linux
(kernel /boot/vmlinuz-2.6-xenU)
(ip 10.0.0.12:1.2.3.4::::eth0:off)
- (root /dev/hda1)
+ (root /dev/sda1)
(args 'rw fastboot 4')
)
)
- (device (vbd (uname phy:hda2) (dev hda1) (mode w)))
+ (device (vbd (uname phy:hda2) (dev sda1) (mode w)))
(device (vif (mac aa:00:00:11:00:12) (bridge vnet97)))
)
@@ -123,11 +132,11 @@
(linux
(kernel /boot/vmlinuz-2.6-xenU)
(ip 10.0.0.11:1.2.3.4::::eth0:off)
- (root /dev/hda1)
+ (root /dev/sda1)
(args 'rw fastboot 4')
)
)
- (device (vbd (uname phy:hda3) (dev hda1) (mode w)))
+ (device (vbd (uname phy:hda3) (dev sda1) (mode w)))
(device (vif (mac aa:00:00:11:00:11) (bridge vnet97)))
)
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/examples/Makefile
--- a/tools/vnet/examples/Makefile Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/examples/Makefile Fri Aug 26 20:47:16 2005
@@ -2,11 +2,14 @@
#============================================================================
XEN_SCRIPT_DIR:=/etc/xen/scripts
+
+.PHONY: all install clean
all:
install:
install -m 0755 -d $(DESTDIR)$(XEN_SCRIPT_DIR)
install -m 0554 network-vnet $(DESTDIR)$(XEN_SCRIPT_DIR)
+ install -m 0554 vnet-insert $(DESTDIR)$(XEN_SCRIPT_DIR)
clean:
\ No newline at end of file
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/examples/network-vnet
--- a/tools/vnet/examples/network-vnet Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/examples/network-vnet Fri Aug 26 20:47:16 2005
@@ -1,218 +1,10 @@
#!/bin/sh
-#============================================================================
-# Default Xen network start/stop script.
-# Xend calls a network script when it starts.
-# The script name to use is defined in /etc/xen/xend-config.sxp
-# in the network-script field.
-#
-# This script creates a bridge (default xen-br0), adds a device
-# (default eth0) to it, copies the IP addresses from the device
-# to the bridge and adjusts the routes accordingly.
-#
-# If all goes well, this should ensure that networking stays up.
-# However, some configurations are upset by this, especially
-# NFS roots. If the bridged setup does not meet your needs,
-# configure a different script, for example using routing instead.
-#
-# Usage:
-#
-# network (start|stop|status) {VAR=VAL}*
-#
-# Vars:
-#
-# bridge The bridge to use (default xen-br0).
-# netdev The interface to add to the bridge (default eth0).
-# antispoof Whether to use iptables to prevent spoofing (default yes).
-#
-# start:
-# Creates the bridge and enslaves netdev to it.
-# Copies the IP addresses from netdev to the bridge.
-# Deletes the routes to netdev and adds them on bridge.
-#
-# stop:
-# Removes netdev from the bridge.
-# Deletes the routes to bridge and adds them to netdev.
-#
-# status:
-# Print ifconfig for netdev and bridge.
-# Print routes.
-#
-#============================================================================
+scriptdir=/etc/xen/scripts/
-# Exit if anything goes wrong.
-set -e
+case ${1} in
+ start)
+ ${scriptdir}/vnet-insert
+ ;;
+esac
-# First arg is the operation.
-OP=$1
-shift
-
-# Pull variables in args in to environment.
-for arg ; do export "${arg}" ; done
-
-bridge=${bridge:-xen-br0}
-netdev=${netdev:-eth0}
-antispoof=${antispoof:-yes}
-
-echo "network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof"
-
-# Usage: transfer_addrs src dst
-# Copy all IP addresses (including aliases) from device $src to device $dst.
-transfer_addrs () {
- local src=$1
- local dst=$2
- # Don't bother if $dst already has IP addresses.
- if ip addr show dev ${dst} | egrep -q '^ *inet' ; then
- return
- fi
- # Address lines start with 'inet' and have the device in them.
- # Replace 'inet' with 'ip addr add' and change the device name $src
- # to 'dev $src'. Remove netmask as we'll add routes later.
- ip addr show dev ${src} | egrep '^ *inet' | sed -e "
-s/inet/ip addr add/
-s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@
-s/${src}/dev ${dst}/
-" | sh -e
-}
-
-# Usage: transfer_routes src dst
-# Get all IP routes to device $src, delete them, and
-# add the same routes to device $dst.
-# The original routes have to be deleted, otherwise adding them
-# for $dst fails (duplicate routes).
-transfer_routes () {
- local src=$1
- local dst=$2
- # List all routes and grep the ones with $src in.
- # Stick 'ip route del' on the front to delete.
- # Change $src to $dst and use 'ip route add' to add.
- ip route list | grep ${src} | sed -e "
-h
-s/^/ip route del /
-P
-g
-s/${src}/${dst}/
-s/^/ip route add /
-P
-d
-" | sh -e
-}
-
-# Usage: create_bridge dev bridge
-# Create bridge $bridge and add device $dev to it.
-create_bridge () {
- local dev=$1
- local bridge=$2
-
- # Don't create the bridge if it already exists.
- if ! brctl show | grep -q ${bridge} ; then
- brctl addbr ${bridge}
- brctl stp ${bridge} off
- brctl setfd ${bridge} 0
- fi
- ifconfig ${bridge} up
-}
-
-# Usage: antispoofing dev bridge
-# Set the default forwarding policy for $dev to drop.
-# Allow forwarding to the bridge.
-antispoofing () {
- local dev=$1
- local bridge=$2
-
- iptables -P FORWARD DROP
- iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT
-}
-
-# Usage: show_status dev bridge
-# Print ifconfig and routes.
-show_status () {
- local dev=$1
- local bridge=$2
-
- echo '============================================================'
- ifconfig ${dev}
- ifconfig ${bridge}
- echo ' '
- ip route list
- echo ' '
- route -n
- echo '============================================================'
-}
-
-# Insert the vnet module if it can be found and
-# it's not already there.
-vnet_insert () {
- local module="vnet_module"
- local mod_dir=/lib/modules/$(uname -r)/kernel
- local mod_path="${mod_dir}/${module}"
- local mod_obj=""
-
- for ext in ".o" ".ko" ; do
- f=${mod_path}${ext}
- if [ -f ${f} ] ; then
- mod_obj=$f
- break
- fi
- done
- if [ "${mod_obj}" == "" ] ; then
- return
- fi
- if lsmod | grep -q ${module} ; then
- echo "VNET: ${module} loaded"
- else
- echo "VNET: Loading ${module}..."
- insmod ${mod_obj}
- fi
-}
-
-op_start () {
- if [ "${bridge}" == "null" ] ; then
- return
- fi
- # Create the bridge and give it the interface IP addresses.
- # Move the interface routes onto the bridge.
- create_bridge ${netdev} ${bridge}
- transfer_addrs ${netdev} ${bridge}
- transfer_routes ${netdev} ${bridge}
- # Don't add $dev to $bridge if it's already on a bridge.
- if ! brctl show | grep -q ${netdev} ; then
- brctl addif ${bridge} ${netdev}
- fi
-
- if [ ${antispoof} == 'yes' ] ; then
- antispoofing ${netdev} ${bridge}
- fi
-
- vnet_insert
-}
-
-op_stop () {
- if [ "${bridge}" == "null" ] ; then
- return
- fi
- # Remove the interface from the bridge.
- # Move the routes back to the interface.
- brctl delif ${bridge} ${netdev}
- transfer_routes ${bridge} ${netdev}
-
- # It's not our place to be enabling forwarding...
-}
-
-case ${OP} in
- start)
- op_start
- ;;
-
- stop)
- op_stop
- ;;
-
- status)
- show_status ${netdev} ${bridge}
- ;;
-
- *)
- echo 'Unknown command: ' ${OP}
- echo 'Valid commands are: start, stop, status'
- exit 1
-esac
+${scriptdir}/network-bridge "$@"
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/examples/vnet97.sxp
--- a/tools/vnet/examples/vnet97.sxp Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/examples/vnet97.sxp Fri Aug 26 20:47:16 2005
@@ -1,3 +1,2 @@
# Vnet configuration for a vnet with id 97 and no security.
-# Configure using 'xm call vnet_add vnet97.sxp'.
(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none))
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/examples/vnet98.sxp
--- a/tools/vnet/examples/vnet98.sxp Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/examples/vnet98.sxp Fri Aug 26 20:47:16 2005
@@ -1,3 +1,2 @@
# Vnet configuration for a vnet with id 98 and message authentication.
-# Configure using 'xm call vnet_add vnet98.sxp'.
(vnet (id 98) (bridge vnet98) (vnetif vnetif98) (security auth))
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/examples/vnet99.sxp
--- a/tools/vnet/examples/vnet99.sxp Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/examples/vnet99.sxp Fri Aug 26 20:47:16 2005
@@ -1,3 +1,2 @@
# Vnet configuration for a vnet with id 99 and message confidentiality.
-# Configure using 'xm call vnet_add vnet99.sxp'.
(vnet (id 99) (bridge vnet99) (vnetif vnetif99) (security conf))
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/Makefile
--- a/tools/vnet/libxutil/Makefile Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/Makefile Fri Aug 26 20:47:16 2005
@@ -1,5 +1,8 @@
+ifndef VNET_ROOT
+export VNET_ROOT = $(shell cd .. && pwd)
+include $(VNET_ROOT)/Make.env
+endif
-XEN_ROOT = ../../..
INSTALL = install
INSTALL_DATA = $(INSTALL) -m0644
INSTALL_PROG = $(INSTALL) -m0755
@@ -15,6 +18,7 @@
LIB_SRCS += hash_table.c
LIB_SRCS += iostream.c
LIB_SRCS += lexis.c
+LIB_SRCS += mem_stream.c
LIB_SRCS += string_stream.c
LIB_SRCS += sxpr.c
LIB_SRCS += sxpr_parser.c
@@ -26,6 +30,7 @@
PIC_OBJS := $(LIB_SRCS:.c=.opic)
CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing
+CFLAGS += -g
# Get gcc to generate the dependencies for us.
CFLAGS += -Wp,-MD,.$(@F).d
@@ -39,6 +44,7 @@
LIB += libxutil.a
all: build
+
build: check-for-zlib
$(MAKE) $(LIB)
@@ -70,8 +76,8 @@
ln -sf libxutil.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so
clean:
- $(RM) *.a *.so* *.o *.opic *.rpm
- $(RM) *~
- $(RM) $(DEPS)
+ -@$(RM) *.a *.so* *.o *.opic *.rpm
+ -@$(RM) *~
+ -@$(RM) $(DEPS)
-include $(DEPS)
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/debug.h
--- a/tools/vnet/libxutil/debug.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/debug.h Fri Aug 26 20:47:16 2005
@@ -49,9 +49,9 @@
#ifdef DEBUG
#define dprintf(fmt, args...) fprintf(stdout, "%d [DBG] " MODULE_NAME ">%s"
fmt, getpid(), __FUNCTION__, ##args)
-#define wprintf(fmt, args...) fprintf(stderr, "%d [WRN] " MODULE_NAME ">%s"
fmt, getpid(),__FUNCTION__, ##args)
-#define iprintf(fmt, args...) fprintf(stderr, "%d [INF] " MODULE_NAME ">%s"
fmt, getpid(),__FUNCTION__, ##args)
-#define eprintf(fmt, args...) fprintf(stderr, "%d [ERR] " MODULE_NAME ">%s"
fmt, getpid(),__FUNCTION__, ##args)
+#define wprintf(fmt, args...) fprintf(stderr, "%d [WRN] " MODULE_NAME ">%s"
fmt, getpid(), __FUNCTION__, ##args)
+#define iprintf(fmt, args...) fprintf(stderr, "%d [INF] " MODULE_NAME ">%s"
fmt, getpid(), __FUNCTION__, ##args)
+#define eprintf(fmt, args...) fprintf(stderr, "%d [ERR] " MODULE_NAME ">%s"
fmt, getpid(), __FUNCTION__, ##args)
#else
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/sxpr.c
--- a/tools/vnet/libxutil/sxpr.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/sxpr.c Fri Aug 26 20:47:16 2005
@@ -405,7 +405,6 @@
#endif /* USE_GC */
/** Create a new atom with the given name.
- * Makes an integer sxpr if the name can be parsed as an int.
*
* @param name the name
* @return new atom
@@ -414,7 +413,8 @@
Sxpr n, obj = ONOMEM;
long v;
- if(convert_atol(name, &v) == 0){
+ // Don't always want to do this.
+ if(0 && convert_atol(name, &v) == 0){
obj = OINT(v);
} else {
n = string_new(name);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/sxpr.h
--- a/tools/vnet/libxutil/sxpr.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/sxpr.h Fri Aug 26 20:47:16 2005
@@ -228,7 +228,9 @@
*
* @param val pointer
*/
-#define PTR(val) OBJP(T_UINT, (void*)(val))
+static inline Sxpr PTR(void *val){
+ return OBJP(T_UINT, (void*)(val));
+}
/** Allocate some memory and return an sxpr containing it.
* Returns ONOMEM if allocation failed.
@@ -237,7 +239,9 @@
* @param ty typecode
* @return sxpr
*/
-#define halloc(_n, _ty) OBJP(_ty, allocate(_n))
+static inline Sxpr halloc(int n, int ty){
+ return OBJP(ty, allocate(n));
+}
/** Allocate an sxpr containing a pointer to the given type.
*
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/sxpr_parser.c
--- a/tools/vnet/libxutil/sxpr_parser.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/sxpr_parser.c Fri Aug 26 20:47:16 2005
@@ -472,7 +472,14 @@
}
int Parser_atom(Parser *p){
- Sxpr obj = atom_new(peek_token(p));
+ Sxpr obj;
+ long v;
+ if(Parser_flags(p, PARSE_INT) &&
+ convert_atol(peek_token(p), &v) == 0){
+ obj = OINT(v);
+ } else {
+ obj = atom_new(peek_token(p));
+ }
return Parser_set_value(p, obj);
}
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/sxpr_parser.h
--- a/tools/vnet/libxutil/sxpr_parser.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/sxpr_parser.h Fri Aug 26 20:47:16 2005
@@ -89,15 +89,17 @@
/** Parser flags. */
-//enum {
-//};
+enum {
+ /** Convert integer atoms to ints. */
+ PARSE_INT=1,
+};
/** Raise some parser flags.
*
* @param in parser
* @param flags flags mask
*/
-inline static void Parser_flags_raise(Parser *in, int flags){
+static inline void Parser_flags_raise(Parser *in, int flags){
in->flags |= flags;
}
@@ -106,7 +108,7 @@
* @param in parser
* @param flags flags mask
*/
-inline static void Parser_flags_lower(Parser *in, int flags){
+static inline void Parser_flags_lower(Parser *in, int flags){
in->flags &= ~flags;
}
@@ -114,8 +116,12 @@
*
* @param in parser
*/
-inline static void Parser_flags_clear(Parser *in){
+static inline void Parser_flags_clear(Parser *in){
in->flags = 0;
+}
+
+static inline int Parser_flags(Parser *in, int flags){
+ return in->flags & flags;
}
extern void Parser_free(Parser *z);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/sys_string.c
--- a/tools/vnet/libxutil/sys_string.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/sys_string.c Fri Aug 26 20:47:16 2005
@@ -27,6 +27,31 @@
#include "allocate.h"
#include "sys_string.h"
+
+#ifdef __KERNEL__
+
+#define deferr(_err) case _err: return #_err
+
+extern char *strerror(int err)
+{
+ switch(err){
+ deferr(EPERM);
+ deferr(ENOENT);
+ deferr(ESRCH);
+ deferr(EINTR);
+ deferr(EIO);
+ deferr(EINVAL);
+ deferr(ENOMEM);
+ deferr(EACCES);
+ deferr(EFAULT);
+ deferr(EBUSY);
+
+ default:
+ return "ERROR";
+ }
+}
+
+#endif
/** Set the base to use for converting a string to a number. Base is
* hex if starts with 0x, otherwise decimal.
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/libxutil/sys_string.h
--- a/tools/vnet/libxutil/sys_string.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/libxutil/sys_string.h Fri Aug 26 20:47:16 2005
@@ -31,6 +31,8 @@
#include <linux/types.h>
#include <stdarg.h>
#include "allocate.h"
+
+extern char *strerror(int err);
#if 0
static inline int tolower(int c){
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/00README
--- a/tools/vnet/vnet-module/00README Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/00README Fri Aug 26 20:47:16 2005
@@ -6,11 +6,11 @@
The makefiles use the following variables, which
can be set in your env or on the make command line:
-LINUX_SERIES: linux release to compile for, 2.4 (default), or 2.6.
-XEN_ROOT: root of the xen tree containing kernel source.
+LINUX_SERIES: linux release to compile for: 2.4, or 2.6 (default).
+XEN_ROOT: root of the xen tree containing kernel source.
KERNEL_VERSION: kernel version, default got from XEN_ROOT.
-KERNEL_MINOR: kernel minor version, default -xen0.
-KERNEL_SRC: path to kernel source, default linux-<VERSION> under XEN_ROOT.
+KERNEL_MINOR: kernel minor version, default -xen0.
+KERNEL_SRC: path to kernel source, default linux-<VERSION> under XEN_ROOT.
*) For 2.4 kernel
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/Makefile
--- a/tools/vnet/vnet-module/Makefile Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/Makefile Fri Aug 26 20:47:16 2005
@@ -18,9 +18,13 @@
# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
#============================================================================
+ifndef VNET_ROOT
+export VNET_ROOT = $(shell cd .. && pwd)
+include $(VNET_ROOT)/Make.env
+endif
+
#============================================================================
ifeq ($(src),)
-LINUX_SERIES ?=2.6
include Makefile-$(LINUX_SERIES)
@@ -45,7 +49,7 @@
# Setup explicit rules for them using the kbuild C compile rule.
# File names in the lib dir.
-remote_srcs = $(foreach file,$(VNET_LIB_SRC),$(LIB_DIR)/$(file))
+remote_srcs = $(foreach file,$(VNET_LIB_SRC),$(LIBXUTIL_DIR)/$(file))
# Equivalent file names here.
local_srcs = $(foreach file,$(VNET_LIB_SRC),$(src)/$(file))
@@ -54,12 +58,12 @@
local_objs = $(local_srcs:.c=.o)
# Make the local objects depend on compiling the remote sources.
-$(local_objs): $(src)/%.o: $(LIB_DIR)/%.c
+$(local_objs): $(src)/%.o: $(LIBXUTIL_DIR)/%.c
$(call if_changed_rule,cc_o_c)
#----------------------------------------------------------------------------
-vpath %.h $(LIB_DIR)
-EXTRA_CFLAGS += -I $(LIB_DIR)
+vpath %.h $(LIBXUTIL_DIR)
+EXTRA_CFLAGS += -I $(LIBXUTIL_DIR)
EXTRA_CFLAGS += -I $(src)
endif
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/Makefile-2.4
--- a/tools/vnet/vnet-module/Makefile-2.4 Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/Makefile-2.4 Fri Aug 26 20:47:16 2005
@@ -21,7 +21,7 @@
#============================================================================
# Vnet module makefile for 2.4 series kernels.
-LINUX_SERIES ?=2.4
+LINUX_SERIES =2.4
include Makefile.ver
KERNEL_MODULE := vnet_module.o
@@ -37,9 +37,9 @@
vpath %.h $(KERNEL_SRC)/include
INCLUDES+= -I $(KERNEL_SRC)/include
-vpath %.h $(LIB_DIR)
-vpath %.c $(LIB_DIR)
-INCLUDES += -I $(LIB_DIR)
+vpath %.h $(LIBXUTIL_DIR)
+vpath %.c $(LIBXUTIL_DIR)
+INCLUDES += -I $(LIBXUTIL_DIR)
INCLUDES+= -I .
@@ -61,6 +61,7 @@
CFLAGS += -Wno-unused-function
CFLAGS += -Wno-unused-parameter
+CFLAGS += -g
CFLAGS += -O2
CFLAGS += -fno-strict-aliasing
CFLAGS += -fno-common
@@ -90,8 +91,8 @@
.PHONY: clean
clean:
- @rm -f *.a *.o *.ko *~
- @rm -f $(VNET_DEP) .*.cmd *.mod.?
- @rm -rf .tmp_versions
+ -@$(RM) *.a *.o *.ko *~
+ -@$(RM) $(VNET_DEP) .*.cmd *.mod.?
+ -@$(RM) -r .tmp_versions
-include $(VNET_DEP)
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/Makefile-2.6
--- a/tools/vnet/vnet-module/Makefile-2.6 Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/Makefile-2.6 Fri Aug 26 20:47:16 2005
@@ -21,7 +21,7 @@
#============================================================================
# Vnet module makefile for 2.6 series kernels.
-LINUX_SERIES ?=2.6
+LINUX_SERIES =2.6
include Makefile.ver
KERNEL_MODULE = vnet_module.ko
@@ -38,13 +38,14 @@
.PHONY: install install-module modules_install
install install-module modules_install: module
- install -m 0755 -d $(DESTDIR)$(KERNEL_MODULE_DIR)/xen
- install -m 0554 $(KERNEL_MODULE) $(DESTDIR)$(KERNEL_MODULE_DIR)/xen
+ install -m 0755 -d $(DESTDIR)$(KERNEL_MODULE_DIR)
+ install -m 0554 $(KERNEL_MODULE) $(DESTDIR)$(KERNEL_MODULE_DIR)
.PHONY: clean
clean:
- @$(MAKE) -C $(KERNEL_SRC) M=$(PWD) clean
- @rm -f *.a *.o *.ko *~ .*.d .*.cmd *.mod.?
+ -@$(MAKE) -C $(KERNEL_SRC) M=$(PWD) clean
+ -@$(RM) *.a *.o *.ko *~ .*.d .*.cmd *.mod.?
+ -@$(RM) -r .tmp_versions
TAGS:
etags *.c *.h
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/Makefile.ver
--- a/tools/vnet/vnet-module/Makefile.ver Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/Makefile.ver Fri Aug 26 20:47:16 2005
@@ -18,22 +18,11 @@
# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
#============================================================================
-#----------------------------------------------------------------------------
-# Xeno/xen.
-
-# Root of xen tree.
-XEN_ROOT ?=../../..
-
-# Path to relativize the install. Set to /
-# to install relative to filesystem root.
-prefix ?=$(XEN_ROOT)/install/
-#----------------------------------------------------------------------------
-
LINUX_SERIES ?=2.6
KERNEL_MINOR ?=-xen0
-LINUX_VERSION ?= $(shell ( /bin/ls -ld
$(XEN_ROOT)/linux-$(LINUX_SERIES).*-xen-sparse ) 2>/dev/null | \
- sed -e 's!^.*linux-\(.\+\)-xen-sparse!\1!' )
+LINUX_VERSION ?= $(shell ( /bin/ls -ld
$(XEN_ROOT)/linux-$(LINUX_SERIES).*-xen0 ) 2>/dev/null | \
+ sed -e 's!^.*linux-\(.\+\)-xen0!\1!' )
ifeq ($(LINUX_VERSION),)
$(error Kernel source for linux $(LINUX_SERIES) not found)
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/Makefile.vnet
--- a/tools/vnet/vnet-module/Makefile.vnet Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/Makefile.vnet Fri Aug 26 20:47:16 2005
@@ -23,8 +23,6 @@
else
SRC_DIR=$(src)/
endif
-
-LIB_DIR := $(SRC_DIR)../libxutil
VNET_SRC :=
VNET_SRC += esp.c
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/etherip.c
--- a/tools/vnet/vnet-module/etherip.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/etherip.c Fri Aug 26 20:47:16 2005
@@ -42,6 +42,7 @@
#include <vnet.h>
#include <varp.h>
#include <if_varp.h>
+#include <varp.h>
#include <skb_util.h>
#define MODULE_NAME "VNET"
@@ -53,22 +54,18 @@
* The etherip protocol is used to transport Ethernet frames in IP packets.
*/
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw)
+/** Get the vnet label from an etherip header.
+ *
+ * @param hdr header
+ * @@param vnet (in net order)
+ */
+void etheriphdr_get_vnet(struct etheriphdr *hdr, VnetId *vnet){
+#ifdef CONFIG_ETHERIP_EXT
+ *vnet = *(VnetId*)hdr->vnet;
#else
-#define MAC_ETH(_skb) ((_skb)->mac.ethernet)
-#endif
-
-/** Get the vnet label from an etherip header.
- *
- * @param hdr header
- * @return vnet (in host order)
- */
-int etheriphdr_get_vnet(struct etheriphdr *hdr){
-#ifdef CONFIG_ETHERIP_EXT
- return ntohl(hdr->vnet);
-#else
- return hdr->reserved;
+ *vnet = (VnetId){};
+ vnet->u.vnet16[7] = (unsigned short)hdr->reserved;
+
#endif
}
@@ -76,15 +73,15 @@
* Also sets the etherip version.
*
* @param hdr header
- * @param vnet vnet label (in host order)
- */
-void etheriphdr_set_vnet(struct etheriphdr *hdr, int vnet){
+ * @param vnet vnet label (in net order)
+ */
+void etheriphdr_set_vnet(struct etheriphdr *hdr, VnetId *vnet){
#ifdef CONFIG_ETHERIP_EXT
- hdr->version = 4;
- hdr->vnet = htonl(vnet);
+ hdr->version = ETHERIP_VERSION;
+ *(VnetId*)hdr->vnet = *vnet;
#else
- hdr->version = 3;
- hdr->reserved = vnet & 0x0fff;
+ hdr->version = ETHERIP_VERSION;
+ hdr->reserved = (vnet->u.vnet16[7] & 0x0fff);
#endif
}
@@ -119,12 +116,12 @@
const int ip_n = sizeof(struct iphdr);
const int eth_n = ETH_HLEN;
int head_n = 0;
- int vnet = tunnel->key.vnet;
+ VnetId *vnet = &tunnel->key.vnet;
struct etheriphdr *etheriph;
struct ethhdr *ethh;
u32 saddr = 0;
- dprintf("> skb=%p vnet=%d\n", skb, vnet);
+ //dprintf("> skb=%p vnet=%d\n", skb, vnet);
head_n = etherip_n + ip_n + eth_n;
err = skb_make_room(&skb, skb, head_n, 0);
if(err) goto exit;
@@ -133,7 +130,7 @@
//if(err) goto exit;
// The original ethernet header.
- ethh = MAC_ETH(skb);
+ ethh = eth_hdr(skb);
//print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len);
// Null the pointer as we are pushing a new IP header.
skb->mac.raw = NULL;
@@ -155,7 +152,7 @@
skb->nh.iph->ttl = 64; // Linux default
time-to-live.
skb->nh.iph->protocol = IPPROTO_ETHERIP; // IP protocol number.
skb->nh.iph->saddr = saddr; // Source address.
- skb->nh.iph->daddr = tunnel->key.addr; // Destination address.
+ skb->nh.iph->daddr = tunnel->key.addr.u.ip4.s_addr; // Destination
address.
skb->nh.iph->check = 0;
// Ethernet header will be filled-in by device.
@@ -213,15 +210,18 @@
struct etheriphdr *etheriph;
struct ethhdr *ethhdr;
Vnet *vinfo = NULL;
- u32 vnet;
-
- ethhdr = MAC_ETH(skb);
- if(MULTICAST(skb->nh.iph->daddr) &&
- (skb->nh.iph->daddr != varp_mcast_addr)){
+ VnetId vnet = {};
+ u32 saddr, daddr;
+ char vnetbuf[VNET_ID_BUF];
+
+ saddr = skb->nh.iph->saddr;
+ daddr = skb->nh.iph->daddr;
+ ethhdr = eth_hdr(skb);
+ if(MULTICAST(daddr) && (daddr != varp_mcast_addr)){
// Ignore multicast packets not addressed to us.
- dprintf("> dst=%u.%u.%u.%u varp_mcast_addr=%u.%u.%u.%u\n",
- NIPQUAD(skb->nh.iph->daddr),
- NIPQUAD(varp_mcast_addr));
+ dprintf("> Ignoring mcast skb: src=%u.%u.%u.%u dst=%u.%u.%u.%u"
+ " varp_mcast_addr=%u.%u.%u.%u\n",
+ NIPQUAD(saddr), NIPQUAD(daddr), NIPQUAD(varp_mcast_addr));
goto exit;
}
ip_n = (skb->nh.iph->ihl << 2);
@@ -229,7 +229,8 @@
// skb->data points at ethernet header.
//dprintf("> len=%d\n", skb->len);
if (!pskb_may_pull(skb, eth_n + ip_n)){
- wprintf("> Malformed skb\n");
+ wprintf("> Malformed skb (eth+ip) src=%u.%u.%u.%u\n",
+ NIPQUAD(saddr));
err = -EINVAL;
goto exit;
}
@@ -237,18 +238,30 @@
}
// Assume skb->data points at etherip header.
etheriph = (void*)skb->data;
- if(!pskb_may_pull(skb, etherip_n)){
- wprintf("> Malformed skb\n");
+ if(etheriph->version != ETHERIP_VERSION){
+ wprintf("> Bad etherip version=%d src=%u.%u.%u.%u\n",
+ etheriph->version,
+ NIPQUAD(saddr));
err = -EINVAL;
goto exit;
}
- vnet = etheriphdr_get_vnet(etheriph);
- dprintf("> Rcvd skb=%p vnet=%d\n", skb, vnet);
+ if(!pskb_may_pull(skb, etherip_n)){
+ wprintf("> Malformed skb (etherip) src=%u.%u.%u.%u\n",
+ NIPQUAD(saddr));
+ err = -EINVAL;
+ goto exit;
+ }
+ etheriphdr_get_vnet(etheriph, &vnet);
+ dprintf("> Rcvd skb vnet=%s src=%u.%u.%u.%u\n",
+ VnetId_ntoa(&vnet, vnetbuf),
+ NIPQUAD(saddr));
// If vnet is secure, context must include IPSEC ESP.
- err = vnet_check_context(vnet, SKB_CONTEXT(skb), &vinfo);
+ err = vnet_check_context(&vnet, SKB_CONTEXT(skb), &vinfo);
Vnet_decref(vinfo);
if(err){
- wprintf("> Failed security check\n");
+ wprintf("> Failed security check vnet=%s src=%u.%u.%u.%u\n",
+ VnetId_ntoa(&vnet, vnetbuf),
+ NIPQUAD(saddr));
goto exit;
}
mine = 1;
@@ -258,19 +271,29 @@
// Know source ip, vnet, vmac, so could update varp cache.
// But if traffic comes to us over a vnetd tunnel this points the coa
// at the vnetd rather than the endpoint. So don't do it.
- //varp_update(htonl(vnet), MAC_ETH(skb)->h_source, skb->nh.iph->saddr);
+ //varp_update(vnet, eth_hdr(skb)->h_source, skb->nh.iph->saddr);
// Assuming a standard Ethernet frame.
+ // Should check for protocol? Support ETH_P_8021Q too.
skb->nh.raw = skb_pull(skb, ETH_HLEN);
+
+ dprintf("> Unpacked vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
+ VnetId_ntoa(&vnet, vnetbuf),
+ MAC6TUPLE(eth_hdr(skb)->h_source),
+ MAC6TUPLE(eth_hdr(skb)->h_dest));
#ifdef CONFIG_NETFILTER
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
// This stops our new pkt header being clobbered by a subsequent
- // call to nf_bridge_maybe_copy_header. Just replicate the
- // corresponding nf_bridge_save_header.
+ // call to nf_bridge_maybe_copy_header.
+ // Code from nf_bridge_save_header() modidifed to use h_proto
+ // instead of skb->protocol.
if(skb->nf_bridge){
+ // Hmm. Standard ethernet header is ETH_HLEN (14),
+ // VLAN header (802.1q) is VLAN_ETH_HLEN (18).
+ // Where does 16 come from?
int header_size = 16;
- if(MAC_ETH(skb)->h_proto == __constant_htons(ETH_P_8021Q)) {
+ if(eth_hdr(skb)->h_proto == __constant_htons(ETH_P_8021Q)) {
header_size = 18;
}
memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
@@ -279,7 +302,7 @@
#endif
if(1){
- struct ethhdr *eth = MAC_ETH(skb);
+ struct ethhdr *eth = eth_hdr(skb);
// Devices use eth_type_trans() to set skb->pkt_type and skb->protocol.
// Set them from contained ethhdr, or leave as received?
// 'Ware use of hard_header_len in eth_type_trans().
@@ -310,6 +333,7 @@
}
dst_release(skb->dst);
skb->dst = NULL;
+
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
@@ -321,7 +345,7 @@
//print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len + ETH_HLEN);
- err = vnet_skb_recv(skb, vnet, (Vmac*)MAC_ETH(skb)->h_dest);
+ err = vnet_skb_recv(skb, &vnet, (Vmac*)eth_hdr(skb)->h_dest);
exit:
if(mine) err = 1;
dprintf("< skb=%p err=%d\n", skb, err);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/if_etherip.h
--- a/tools/vnet/vnet-module/if_etherip.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/if_etherip.h Fri Aug 26 20:47:16 2005
@@ -18,15 +18,30 @@
*/
#ifndef _VNET_IF_ETHERIP_H_
#define _VNET_IF_ETHERIP_H_
-/*----------------------------------------------------------------------------*/
+
+#define CONFIG_ETHERIP_EXT
+
#ifdef CONFIG_ETHERIP_EXT
+
+#define ETHERIP_VERSION 4
+
struct etheriphdr {
- __u8 version;
- __u32 vnet;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u16 reserved:12,
+ version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u16 version:4,
+ reserved:12;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 vnet[16];
} __attribute__ ((packed));
-/*----------------------------------------------------------------------------*/
#else
+
+#define ETHERIP_VERSION 3
+
struct etheriphdr
{
#if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -42,10 +57,9 @@
};
#endif
+
#ifndef IPPROTO_ETHERIP
#define IPPROTO_ETHERIP 97
#endif
-/*----------------------------------------------------------------------------*/
-
#endif /* ! _VNET_IF_ETHERIP_H_ */
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/if_varp.h
--- a/tools/vnet/vnet-module/if_varp.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/if_varp.h Fri Aug 26 20:47:16 2005
@@ -20,6 +20,14 @@
#ifndef _VNET_IF_VARP_H
#define _VNET_IF_VARP_H
+/* Need struct in_addr, struct in6_addr. */
+#ifdef __KERNEL__
+#include <linux/in.h>
+#include <linux/in6.h>
+#else
+#include <netinet/in.h>
+#endif
+
typedef struct Vmac {
unsigned char mac[ETH_ALEN];
} Vmac;
@@ -30,17 +38,35 @@
VARP_OP_ANNOUNCE = 2,
};
+typedef struct VnetId {
+ union {
+ uint8_t vnet8[16];
+ uint16_t vnet16[8];
+ uint32_t vnet32[4];
+ } u;
+} __attribute__((packed)) VnetId;
+
+typedef struct VarpAddr {
+ uint8_t family; // AF_INET or AF_INET6.
+ union {
+ uint8_t raw[16];
+ struct in_addr ip4;
+ struct in6_addr ip6;
+ } u;
+} __attribute__((packed)) VarpAddr;
+
typedef struct VnetMsgHdr {
uint16_t id;
uint16_t opcode;
} __attribute__((packed)) VnetMsgHdr;
typedef struct VarpHdr {
- VnetMsgHdr vnetmsghdr;
- uint32_t vnet;
- Vmac vmac;
- uint32_t addr;
+ VnetMsgHdr hdr;
+ VnetId vnet;
+ Vmac vmac;
+ VarpAddr addr;
} __attribute__((packed)) VarpHdr;
+
/** Default address for varp/vnet broadcasts: 224.10.0.1 */
#define VARP_MCAST_ADDR 0xe00a0001
@@ -48,6 +74,4 @@
/** UDP port to use for varp protocol. */
#define VARP_PORT 1798
-
-
-#endif /* ! _VNET_IF_VARP_H */
+#endif /* ! _VNET_IF_VARP_H */
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/skb_util.h
--- a/tools/vnet/vnet-module/skb_util.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/skb_util.h Fri Aug 26 20:47:16 2005
@@ -19,7 +19,9 @@
#ifndef _VNET_SKB_UTIL_H_
#define _VNET_SKB_UTIL_H_
-struct sk_buff;
+#include <net/route.h>
+#include <linux/skbuff.h>
+
struct scatterlist;
extern int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int
head_n, int tail_n);
@@ -40,4 +42,53 @@
extern void print_skb_data(char *msg, int count, struct sk_buff *skb, u8
*data, int len);
+/* The mac.ethernet field went away in 2.6 in favour of eth_hdr().
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#else
+static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
+{
+ return (struct ethhdr *)skb->mac.raw;
+}
#endif
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+
+static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
+ int err = 0;
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = skb->nh.iph->daddr,
+ .saddr = skb->nh.iph->saddr,
+ .tos = skb->nh.iph->tos,
+ }
+ }
+ };
+
+ if(skb->dev){
+ fl.oif = skb->dev->ifindex;
+ }
+ err = ip_route_output_key(prt, &fl);
+ return err;
+}
+
+#else
+
+static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
+ int err = 0;
+ struct rt_key key = { };
+ key.dst = skb->nh.iph->daddr;
+ key.src = skb->nh.iph->saddr;
+ key.tos = skb->nh.iph->tos;
+ if(skb->dev){
+ key.oif = skb->dev->ifindex;
+ }
+ err = ip_route_output_key(prt, &key);
+ return err;
+}
+
+#endif
+
+#endif
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/tunnel.c
--- a/tools/vnet/vnet-module/tunnel.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/tunnel.c Fri Aug 26 20:47:16 2005
@@ -36,7 +36,7 @@
#include "hash_table.h"
#define MODULE_NAME "VNET"
-//#define DEBUG 1
+#define DEBUG 1
#undef DEBUG
#include "debug.h"
@@ -56,11 +56,9 @@
}
}
-int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel
**val){
+int Tunnel_create(TunnelType *type, VnetId *vnet, VarpAddr *addr, Tunnel
*base, Tunnel **val){
int err = 0;
Tunnel *tunnel = NULL;
- dprintf("> type=%s vnet=%d addr=" IPFMT " base=%s\n",
- type->name, vnet, NIPQUAD(addr), (base ? base->type->name : "ip"));
if(!type || !type->open || !type->send || !type->close){
err = -EINVAL;
goto exit;
@@ -71,8 +69,8 @@
goto exit;
}
atomic_set(&tunnel->refcount, 1);
- tunnel->key.vnet = vnet;
- tunnel->key.addr = addr;
+ tunnel->key.vnet = *vnet;
+ tunnel->key.addr = *addr;
tunnel->type = type;
tunnel->data = NULL;
tunnel->send_stats = (TunnelStats){};
@@ -89,7 +87,7 @@
return err;
}
-int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel
**tunnel){
+int Tunnel_open(TunnelType *type, VnetId *vnet, VarpAddr *addr, Tunnel *base,
Tunnel **tunnel){
int err = 0;
dprintf(">\n");
@@ -123,15 +121,16 @@
static inline Hashcode tunnel_table_key_hash_fn(void *k){
TunnelKey *key = k;
Hashcode h = 0;
- h = hash_2ul(key->vnet, key->addr);
+ h = VnetId_hash(h, &key->vnet);
+ h = VarpAddr_hash(h, &key->addr);
return h;
}
static int tunnel_table_key_equal_fn(void *k1, void *k2){
TunnelKey *key1 = k1;
TunnelKey *key2 = k2;
- return (key1->vnet == key2->vnet)
- && (key1->addr == key2->addr);
+ return VnetId_eq(&key1->vnet, &key2->vnet) &&
+ VarpAddr_eq(&key1->addr, &key2->addr);
}
static void tunnel_table_entry_free_fn(HashTable *table, HTEntry *entry){
@@ -165,9 +164,9 @@
* @param addr destination address
* @return tunnel state or NULL
*/
-Tunnel * Tunnel_lookup(u32 vnet, u32 addr){
+Tunnel * Tunnel_lookup(VnetId *vnet, VarpAddr *addr){
Tunnel *tunnel = NULL;
- TunnelKey key = {.vnet = vnet, .addr = addr };
+ TunnelKey key = {.vnet = *vnet, .addr = *addr };
dprintf(">\n");
tunnel = HashTable_get(tunnel_table, &key);
Tunnel_incref(tunnel);
@@ -199,23 +198,16 @@
*/
int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
int err = 0;
- int len;
dprintf("> tunnel=%p skb=%p\n", tunnel, skb);
- len = skb->len;
if(tunnel){
+ int len = skb->len;
dprintf("> type=%s type->send...\n", tunnel->type->name);
+ // Must not refer to skb after sending - might have been freed.
err = tunnel->type->send(tunnel, skb);
- // Must not refer to skb after sending - might have been freed.
TunnelStats_update(&tunnel->send_stats, len, err);
} else {
- struct net_device *dev = NULL;
- err = vnet_get_device(DEVICE, &dev);
- if(err) goto exit;
- skb->dev = dev;
err = skb_xmit(skb);
- dev_put(dev);
- }
- exit:
+ }
dprintf("< err=%d\n", err);
return err;
}
@@ -225,4 +217,8 @@
}
void __exit tunnel_module_exit(void){
-}
+ if(tunnel_table){
+ HashTable_free(tunnel_table);
+ tunnel_table = NULL;
+ }
+}
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/tunnel.h
--- a/tools/vnet/vnet-module/tunnel.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/tunnel.h Fri Aug 26 20:47:16 2005
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <asm/atomic.h>
+#include <if_varp.h>
struct sk_buff;
struct Tunnel;
@@ -41,8 +42,8 @@
} TunnelStats;
typedef struct TunnelKey {
- u32 vnet;
- u32 addr;
+ VnetId vnet;
+ VarpAddr addr;
} TunnelKey;
typedef struct Tunnel {
@@ -87,13 +88,15 @@
}
extern int Tunnel_init(void);
-extern Tunnel * Tunnel_lookup(u32 vnet, u32 addr);
+extern Tunnel * Tunnel_lookup(struct VnetId *vnet, struct VarpAddr *addr);
extern int Tunnel_add(Tunnel *tunnel);
extern int Tunnel_del(Tunnel *tunnel);
extern int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb);
-extern int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base,
Tunnel **tunnelp);
-extern int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base,
Tunnel **tunnelp);
+extern int Tunnel_create(TunnelType *type, struct VnetId *vnet, struct
VarpAddr *addr,
+ Tunnel *base, Tunnel **tunnelp);
+extern int Tunnel_open(TunnelType *type, struct VnetId *vnet, struct VarpAddr
*addr,
+ Tunnel *base, Tunnel **tunnelp);
extern int tunnel_module_init(void);
extern void tunnel_module_exit(void);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/varp.c
--- a/tools/vnet/vnet-module/varp.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/varp.c Fri Aug 26 20:47:16 2005
@@ -40,26 +40,20 @@
#include <tunnel.h>
#include <vnet.h>
#include <vif.h>
+#include <if_varp.h>
#include <varp.h>
-#include <if_varp.h>
+#include <vnet.h>
#include "allocate.h"
#include "hash_table.h"
#include "sys_net.h"
#include "sys_string.h"
+#include "skb_util.h"
#define MODULE_NAME "VARP"
-//#define DEBUG 1
+#define DEBUG 1
#undef DEBUG
#include "debug.h"
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-// The 'ethernet' field in the skb->mac union went away.
-#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw)
-#else
-#define MAC_ETH(_skb) ((_skb)->mac.ethernet)
-#endif
/** @file VARP: Virtual ARP.
*
@@ -121,8 +115,8 @@
/** Key for varp entries. */
typedef struct VarpKey {
- /** Vnet id (host order). */
- u32 vnet;
+ /** Vnet id (network order). */
+ VnetId vnet;
/** Virtual MAC address. */
Vmac vmac;
} VarpKey;
@@ -132,7 +126,7 @@
/** Key for the entry. */
VarpKey key;
/** Care-of address for the key. */
- u32 addr;
+ VarpAddr addr;
/** Last-updated timestamp. */
unsigned long timestamp;
/** State. */
@@ -152,8 +146,6 @@
struct sk_buff_head queue;
/** Maximum size of the queue. */
int queue_max;
-
- int locks;
} VarpEntry;
/** The varp cache. Varp entries indexed by VarpKey. */
@@ -181,14 +173,10 @@
/** Multicast address (network order). */
u32 varp_mcast_addr = 0;
-/** Unicast address (network order). */
-u32 varp_ucast_addr = 0;
-
/** UDP port (network order). */
u16 varp_port = 0;
-/** Network device to use. */
-char *varp_device = DEVICE;
+char *varp_device = "xen-br0";
#define VarpTable_read_lock(z, flags) do{ (flags) = 0; down(&(z)->lock); }
while(0)
#define VarpTable_read_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); }
while(0)
@@ -199,7 +187,10 @@
#define VarpEntry_unlock(ventry, flags)
write_unlock_irqrestore(&(ventry)->lock, (flags))
void VarpTable_sweep(VarpTable *z, int all);
+void VarpTable_flush(VarpTable *z);
void VarpTable_print(VarpTable *z);
+
+#include "./varp_util.c"
/** Print the varp cache (if debug on).
*/
@@ -209,14 +200,53 @@
#endif
}
+/** Flush the varp cache.
+ */
+void varp_flush(void){
+ VarpTable_flush(varp_table);
+}
+
+static int device_ucast_addr(const char *device, uint32_t *addr)
+{
+ int err;
+ struct net_device *dev = NULL;
+
+ err = vnet_get_device(device, &dev);
+ if(err) goto exit;
+ err = vnet_get_device_address(dev, addr);
+ exit:
+ if(err){
+ *addr = 0;
+ }
+ return err;
+}
+
+/** Get the unicast address of the varp device.
+ */
+int varp_ucast_addr(uint32_t *addr)
+{
+ int err = -ENODEV;
+ const char *devices[] = { varp_device, "eth0", "eth1", "eth2", NULL };
+ const char **p;
+ for(p = devices; err && *p; p++){
+ err = device_ucast_addr(*p, addr);
+ }
+ return err;
+}
+
/** Print varp info and the varp cache.
*/
void varp_print(void){
+ uint32_t addr = 0;
+ varp_ucast_addr(&addr);
+
printk(KERN_INFO "=== VARP
===============================================================\n");
printk(KERN_INFO "varp_device %s\n", varp_device);
printk(KERN_INFO "varp_mcast_addr " IPFMT "\n", NIPQUAD(varp_mcast_addr));
- printk(KERN_INFO "varp_ucast_addr " IPFMT "\n", NIPQUAD(varp_ucast_addr));
+ printk(KERN_INFO "varp_ucast_addr " IPFMT "\n", NIPQUAD(addr));
printk(KERN_INFO "varp_port %d\n", ntohs(varp_port));
+ vnet_print();
+ vif_print();
VarpTable_print(varp_table);
printk(KERN_INFO
"========================================================================\n");
}
@@ -246,18 +276,43 @@
int err = 0;
struct in_device *in_dev;
- //printk("%s>\n", __FUNCTION__);
in_dev = in_dev_get(dev);
if(!in_dev){
- err = -EIO;
+ err = -ENODEV;
goto exit;
}
*addr = in_dev->ifa_list->ifa_address;
in_dev_put(in_dev);
exit:
- //printk("%s< err=%d\n", __FUNCTION__, err);
- return err;
-}
+ return err;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+
+static inline int addr_route(u32 daddr, struct rtable **prt){
+ int err = 0;
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = daddr,
+ }
+ }
+ };
+
+ err = ip_route_output_key(prt, &fl);
+ return err;
+}
+
+#else
+
+static inline int addr_route(u32 daddr, struct rtable **prt){
+ int err = 0;
+ struct rt_key key = { .dst = daddr };
+ err = ip_route_output_key(prt, &key);
+ return err;
+}
+
+#endif
#ifndef LL_RESERVED_SPACE
#define HH_DATA_MOD 16
@@ -270,12 +325,12 @@
* @param opcode varp opcode (host order)
* @param dev device (may be null)
* @param skb skb being replied to (may be null)
- * @param vnet vnet id (in host order)
+ * @param vnet vnet id (in network order)
* @param vmac vmac (in network order)
* @return 0 on success, error code otherwise
*/
int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin,
- u32 vnet, Vmac *vmac){
+ VnetId *vnet, Vmac *vmac){
int err = 0;
int link_n = 0;
int ip_n = sizeof(struct iphdr);
@@ -285,45 +340,53 @@
struct in_device *in_dev = NULL;
VarpHdr *varph = NULL;
u8 macbuf[6] = {};
- u8 *smac, *dmac;
+ u8 *smac, *dmac = macbuf;
u32 saddr, daddr;
u16 sport, dport;
-
- dmac = macbuf;
- dprintf("> opcode=%d vnet=%d vmac=" MACFMT "\n",
- opcode, ntohl(vnet), MAC6TUPLE(vmac->mac));
- if(!dev){
- //todo: should use routing for daddr to get device.
- err = vnet_get_device(varp_device, &dev);
- if(err) goto exit;
- }
- link_n = LL_RESERVED_SPACE(dev);
- in_dev = in_dev_get(dev);
- if(!in_dev) goto exit;
-
- smac = dev->dev_addr;
- saddr = in_dev->ifa_list->ifa_address;
-
+#if defined(DEBUG)
+ char vnetbuf[VNET_ID_BUF];
+#endif
+
+ dprintf("> opcode=%d vnet= %s vmac=" MACFMT "\n",
+ opcode, VnetId_ntoa(vnet, vnetbuf), MAC6TUPLE(vmac->mac));
+
+ dport = varp_port;
if(skbin){
- dmac = MAC_ETH(skbin)->h_source;
+ daddr = skbin->nh.iph->saddr;
+ dmac = eth_hdr(skbin)->h_source;
sport = skbin->h.uh->dest;
- daddr = skbin->nh.iph->saddr;
- //dport = skbin->h.uh->source;
- dport = varp_port;
} else {
- if(!in_dev) goto exit;
if(MULTICAST(varp_mcast_addr)){
daddr = varp_mcast_addr;
ip_eth_mc_map(daddr, dmac);
} else {
- daddr = in_dev->ifa_list->ifa_broadcast;
- dmac = dev->broadcast;
+ daddr = INADDR_BROADCAST;
}
sport = varp_port;
- dport = varp_port;
+ }
+
+ if(!dev){
+ struct rtable *rt = NULL;
+ err = addr_route(daddr, &rt);
+ if(err) goto exit;
+ dev = rt->u.dst.dev;
+ }
+
+ in_dev = in_dev_get(dev);
+ if(!in_dev){
+ err = -ENODEV;
+ goto exit;
+ }
+ link_n = LL_RESERVED_SPACE(dev);
+ saddr = in_dev->ifa_list->ifa_address;
+ smac = dev->dev_addr;
+ if(daddr == INADDR_BROADCAST){
+ daddr = in_dev->ifa_list->ifa_broadcast;
+ dmac = dev->broadcast;
}
in_dev_put(in_dev);
+ dprintf("> dev=%s\n", dev->name);
dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac),
MAC6TUPLE(dmac));
dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr),
NIPQUAD(daddr));
dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport));
@@ -368,11 +431,12 @@
// Varp header.
varph = (void*)skb_put(skbout, varp_n);
*varph = (VarpHdr){};
- varph->vnetmsghdr.id = htons(VARP_ID);
- varph->vnetmsghdr.opcode = htons(opcode);
- varph->vnet = htonl(vnet);
+ varph->hdr.id = htons(VARP_ID);
+ varph->hdr.opcode = htons(opcode);
+ varph->vnet = *vnet;
varph->vmac = *vmac;
- varph->addr = saddr;
+ varph->addr.family = AF_INET;
+ varph->addr.u.ip4.s_addr = saddr;
err = skb_xmit(skbout);
@@ -385,16 +449,13 @@
/** Send a varp request for the vnet and destination mac of a packet.
*
* @param skb packet
- * @param vnet vnet (in host order)
+ * @param vnet vnet (in network order)
* @return 0 on success, error code otherwise
*/
-int varp_solicit(struct sk_buff *skb, int vnet){
- int err = 0;
- dprintf("> skb=%p\n", skb);
- varp_dprint();
+int varp_solicit(struct sk_buff *skb, VnetId *vnet){
+ int err = 0;
err = varp_send(VARP_OP_REQUEST, NULL, NULL,
- vnet, (Vmac*)MAC_ETH(skb)->h_dest);
- dprintf("< err=%d\n", err);
+ vnet, (Vmac*)eth_hdr(skb)->h_dest);
return err;
}
@@ -430,22 +491,26 @@
*/
void VarpEntry_print(VarpEntry *ventry){
if(ventry){
- char *c, *d;
+ char *state, *flags;
+ char vnetbuf[VNET_ID_BUF];
+ char addrbuf[VARP_ADDR_BUF];
+
switch(ventry->state){
- case VARP_STATE_INCOMPLETE: c = "INC"; break;
- case VARP_STATE_REACHABLE: c = "RCH"; break;
- case VARP_STATE_FAILED: c = "FLD"; break;
- default: c = "UNK"; break;
+ case VARP_STATE_INCOMPLETE: state = "INC"; break;
+ case VARP_STATE_REACHABLE: state = "RCH"; break;
+ case VARP_STATE_FAILED: state = "FLD"; break;
+ default: state = "UNK"; break;
}
- d = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : " ");
-
- printk(KERN_INFO "VENTRY(%p ref=%1d %s %s vnet=%d vmac=" MACFMT "
addr=" IPFMT " q=%d t=%lu)\n",
+ flags = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : " ");
+
+ printk(KERN_INFO "VENTRY(%p ref=%1d %s %s vnet=%s vmac=" MACFMT
+ " addr=%s q=%3d t=%lu)\n",
ventry,
atomic_read(&ventry->refcount),
- c, d,
- ventry->key.vnet,
+ state, flags,
+ VnetId_ntoa(&ventry->key.vnet, vnetbuf),
MAC6TUPLE(ventry->key.vmac.mac),
- NIPQUAD(ventry->addr),
+ VarpAddr_ntoa(&ventry->addr, addrbuf),
skb_queue_len(&ventry->queue),
ventry->timestamp);
} else {
@@ -469,7 +534,6 @@
void VarpEntry_incref(VarpEntry *z){
if(!z) return;
atomic_inc(&z->refcount);
- //dprintf("> "); VarpEntry_print(z);
}
/** Decrement reference count, freeing if zero.
@@ -478,9 +542,7 @@
*/
void VarpEntry_decref(VarpEntry *z){
if(!z) return;
- //dprintf("> "); VarpEntry_print(z);
if(atomic_dec_and_test(&z->refcount)){
- //dprintf("> freeing %p...\n", z);
VarpEntry_free(z);
}
}
@@ -499,7 +561,7 @@
/** Schedule the varp entry timer.
* Must increment the reference count before doing
- * this the first time, so the ventry won' be freed
+ * this the first time, so the ventry won't be freed
* before the timer goes off.
*
* @param ventry varp entry
@@ -538,7 +600,7 @@
atomic_inc(&ventry->probes);
VarpEntry_unlock(ventry, flags);
locked = 0;
- varp_solicit(skb, ventry->key.vnet);
+ varp_solicit(skb, &ventry->key.vnet);
} else {
dprintf("> empty queue.\n");
}
@@ -568,7 +630,7 @@
* @param vmac virtual MAC address (copied)
* @return ventry or null
*/
-VarpEntry * VarpEntry_new(u32 vnet, Vmac *vmac){
+VarpEntry * VarpEntry_new(VnetId *vnet, Vmac *vmac){
VarpEntry *z = ALLOCATE(VarpEntry);
if(z){
unsigned long now = jiffies;
@@ -584,7 +646,7 @@
z->timestamp = now;
z->error = varp_error_fn;
- z->key.vnet = vnet;
+ z->key.vnet = *vnet;
z->key.vmac = *vmac;
}
return z;
@@ -598,15 +660,9 @@
*/
Hashcode varp_key_hash_fn(void *k){
VarpKey *key = k;
- Hashcode h;
- h = hash_2ul(key->vnet,
- (key->vmac.mac[0] << 24) |
- (key->vmac.mac[1] << 16) |
- (key->vmac.mac[2] << 8) |
- (key->vmac.mac[3] ));
- h = hash_hul(h,
- (key->vmac.mac[4] << 8) |
- (key->vmac.mac[5] ));
+ Hashcode h = 0;
+ h = VnetId_hash(h, &key->vnet);
+ h = Vmac_hash(h, &key->vmac);
return h;
}
@@ -620,8 +676,8 @@
int varp_key_equal_fn(void *k1, void *k2){
VarpKey *key1 = k1;
VarpKey *key2 = k2;
- return (key1->vnet == key2->vnet)
- && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0);
+ return (VnetId_eq(&key1->vnet, &key2->vnet) &&
+ Vmac_eq(&key1->vmac, &key2->vmac));
}
/** Free an entry in the varp cache.
@@ -670,12 +726,10 @@
*/
static void varp_table_timer_fn(unsigned long arg){
VarpTable *z = (VarpTable *)arg;
- //dprintf("> z=%p\n", z);
if(z){
VarpTable_sweep(z, 0);
VarpTable_schedule(z);
}
- //dprintf("<\n");
}
/** Print a varp table.
@@ -687,7 +741,6 @@
VarpEntry *ventry;
unsigned long flags, vflags;
- //dprintf(">\n");
VarpTable_read_lock(z, flags);
HashTable_for_each(entry, varp_table->table){
ventry = entry->value;
@@ -696,7 +749,6 @@
VarpEntry_unlock(ventry, vflags);
}
VarpTable_read_unlock(z, flags);
- //dprintf("<\n");
}
/** Create a varp table.
@@ -735,7 +787,7 @@
* @param vmac virtual MAC address (copied)
* @return new entry or null
*/
-VarpEntry * VarpTable_add(VarpTable *z, u32 vnet, Vmac *vmac){
+VarpEntry * VarpTable_add(VarpTable *z, VnetId *vnet, Vmac *vmac){
int err = -ENOMEM;
VarpEntry *ventry;
HTEntry *entry;
@@ -743,7 +795,6 @@
ventry = VarpEntry_new(vnet, vmac);
if(!ventry) goto exit;
- //dprintf("> "); VarpEntry_print(ventry);
VarpTable_write_lock(z, flags);
entry = HashTable_add(z->table, ventry, ventry);
VarpTable_write_unlock(z, flags);
@@ -775,19 +826,20 @@
* @param vmac virtual MAC addres
* @return entry found or null
*/
-VarpEntry * VarpTable_lookup(VarpTable *z, u32 vnet, Vmac *vmac){
+VarpEntry * VarpTable_lookup(VarpTable *z, VnetId *vnet, Vmac *vmac){
unsigned long flags;
- VarpKey key = { .vnet = vnet, .vmac = *vmac };
+ VarpKey key = { .vnet = *vnet, .vmac = *vmac };
VarpEntry *ventry;
VarpTable_read_lock(z, flags);
ventry = HashTable_get(z->table, &key);
+ if(ventry) VarpEntry_incref(ventry);
VarpTable_read_unlock(z, flags);
- if(ventry) VarpEntry_incref(ventry);
return ventry;
}
/** Handle output for a reachable ventry.
* Send the skb using the tunnel to the care-of address.
+ * Assumes the ventry lock is held.
*
* @param ventry varp entry
* @param skb skb to send
@@ -796,12 +848,12 @@
int VarpEntry_send(VarpEntry *ventry, struct sk_buff *skb){
int err = 0;
unsigned long flags = 0;
- u32 addr;
+ VarpAddr addr;
dprintf("> skb=%p\n", skb);
addr = ventry->addr;
VarpEntry_unlock(ventry, flags);
- err = vnet_tunnel_send(ventry->key.vnet, addr, skb);
+ err = vnet_tunnel_send(&ventry->key.vnet, &addr, skb);
VarpEntry_lock(ventry, flags);
dprintf("< err=%d\n", err);
return err;
@@ -811,6 +863,7 @@
* If the entry is still incomplete, queue the skb, otherwise
* send it. If the queue is full, dequeue and free an old skb to
* make room for the new one.
+ * Assumes the ventry lock is held.
*
* @param ventry varp entry
* @param skb skb to send
@@ -820,7 +873,7 @@
int err = 0;
unsigned long flags = 0;
- dprintf("> skb=%p\n", skb); //VarpEntry_print(ventry);
+ dprintf("> skb=%p\n", skb);
ventry->state = VARP_STATE_INCOMPLETE;
atomic_set(&ventry->probes, 1);
if(!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
@@ -829,7 +882,7 @@
VarpEntry_schedule(ventry);
}
VarpEntry_unlock(ventry, flags);
- varp_solicit(skb, ventry->key.vnet);
+ varp_solicit(skb, &ventry->key.vnet);
VarpEntry_lock(ventry, flags);
if(ventry->state == VARP_STATE_INCOMPLETE){
@@ -837,7 +890,7 @@
struct sk_buff *oldskb;
oldskb = ventry->queue.next;
__skb_unlink(oldskb, &ventry->queue);
- dprintf("> purging skb=%p\n", oldskb);
+ dprintf("> dropping skb=%p\n", oldskb);
kfree_skb(oldskb);
}
__skb_queue_tail(&ventry->queue, skb);
@@ -893,33 +946,39 @@
* @param state state
* @return 0 on success, error code otherwise
*/
-int VarpEntry_update(VarpEntry *ventry, u32 addr, int state){
+int VarpEntry_update(VarpEntry *ventry, VarpAddr *addr, int state){
int err = 0;
unsigned long now = jiffies;
unsigned long flags;
dprintf("> addr=" IPFMT " state=%d\n", NIPQUAD(addr), state);
- //VarpEntry_print(ventry);
VarpEntry_lock(ventry, flags);
if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit;
- ventry->addr = addr;
+ ventry->addr = *addr;
ventry->timestamp = now;
ventry->state = state;
VarpEntry_process_queue(ventry);
exit:
- //dprintf("> "); VarpEntry_print(ventry);
VarpEntry_unlock(ventry, flags);
dprintf("< err=%d\n", err);
return err;
}
-int VarpTable_update(VarpTable *z, int vnet, Vmac *vmac, u32 addr,
+int VarpTable_update(VarpTable *z, VnetId *vnet, Vmac *vmac, VarpAddr *addr,
int state, int force){
int err = 0;
VarpEntry *ventry;
+#ifdef DEBUG
+ char vnetbuf[VNET_ID_BUF];
+ char addrbuf[VARP_ADDR_BUF];
+#endif
- dprintf("> vnet=%d mac=" MACFMT " addr=" IPFMT " state=%d force=%d\n",
- vnet, MAC6TUPLE(vmac->mac), NIPQUAD(addr), state, force);
+ dprintf("> vnet=%s mac=" MACFMT " addr=%s state=%d force=%d\n",
+ VnetId_ntoa(vnet, vnetbuf),
+ MAC6TUPLE(vmac->mac),
+ VarpAddr_ntoa(addr, addrbuf),
+ state,
+ force);
ventry = VarpTable_lookup(z, vnet, vmac);
if(force && !ventry){
dprintf("> No entry, adding\n");
@@ -945,10 +1004,10 @@
* @return 0 on success, -ENOENT if no entry found
*/
int VarpTable_update_entry(VarpTable *z, VarpHdr *varph, int state){
- return VarpTable_update(z, ntohl(varph->vnet), &varph->vmac, varph->addr,
state, 0);
-}
-
-int varp_update(int vnet, unsigned char *vmac, u32 addr){
+ return VarpTable_update(z, &varph->vnet, &varph->vmac, &varph->addr,
state, 0);
+}
+
+int varp_update(VnetId *vnet, unsigned char *vmac, VarpAddr *addr){
if(!varp_table){
return -ENOSYS;
}
@@ -971,7 +1030,6 @@
unsigned long old = now - VARP_ENTRY_TTL;
unsigned long flags, vflags;
- //dprintf(">\n");
VarpTable_read_lock(z, flags);
HashTable_for_each(entry, varp_table->table){
ventry = entry->value;
@@ -984,7 +1042,36 @@
VarpEntry_unlock(ventry, vflags);
}
VarpTable_read_unlock(z, flags);
- //dprintf("<\n");
+}
+
+/** Flush the varp table.
+ * Remove old unreachable varp entries with empty queues.
+ * Permanent entries are not removed.
+ *
+ * @param z table
+ */
+void VarpTable_flush(VarpTable *z){
+ HashTable_for_decl(entry);
+ VarpEntry *ventry;
+ unsigned long now = jiffies;
+ unsigned long old = now - VARP_ENTRY_TTL;
+ unsigned long flags, vflags;
+ int flush;
+
+ VarpTable_write_lock(z, flags);
+ HashTable_for_each(entry, varp_table->table){
+ ventry = entry->value;
+ VarpEntry_lock(ventry, vflags);
+ flush = (!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) &&
+ (ventry->timestamp < old) &&
+ (ventry->state != VARP_STATE_REACHABLE) &&
+ (skb_queue_len(&ventry->queue) == 0));
+ VarpEntry_unlock(ventry, vflags);
+ if(flush){
+ VarpTable_remove(z, ventry);
+ }
+ }
+ VarpTable_write_unlock(z, flags);
}
/** Handle a varp request. Look for a vif with the requested
@@ -997,14 +1084,13 @@
*/
int varp_handle_request(struct sk_buff *skb, VarpHdr *varph){
int err = -ENOENT;
- u32 vnet;
+ VnetId *vnet;
Vmac *vmac;
Vif *vif = NULL;
dprintf(">\n");
- vnet = ntohl(varph->vnet);
+ vnet = &varph->vnet;
vmac = &varph->vmac;
- dprintf("> vnet=%d vmac=" MACFMT "\n", vnet, MAC6TUPLE(vmac->mac));
if(vif_lookup(vnet, vmac, &vif)) goto exit;
varp_send(VARP_OP_ANNOUNCE, skb->dev, skb, vnet, vmac);
vif_decref(vif);
@@ -1026,7 +1112,7 @@
err = -ENOSYS;
goto exit;
}
- err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, vif->vnet, &vif->vmac);
+ err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, &vif->vnet, &vif->vmac);
exit:
dprintf("< err=%d\n", err);
return err;
@@ -1067,7 +1153,7 @@
(skb->nh.iph->daddr != varp_mcast_addr)){
// Ignore multicast packets not addressed to us.
err = 0;
- dprintf("> daddr=" IPFMT " mcaddr=" IPFMT "\n",
+ dprintf("> Ignoring daddr=" IPFMT " mcaddr=" IPFMT "\n",
NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr));
goto exit;
}
@@ -1076,23 +1162,29 @@
goto exit;
}
mine = 1;
- if(varph->vnetmsghdr.id != htons(VARP_ID)){
+ if(varph->hdr.id != htons(VARP_ID)){
// It's not varp at all - ignore it.
- wprintf("> Unknown id: %d \n", ntohs(varph->vnetmsghdr.id));
+ wprintf("> Invalid varp id: %d, expected %d \n",
+ ntohs(varph->hdr.id),
+ VARP_ID);
goto exit;
}
- if(1){
+#ifdef DEBUG
+ {
+ char vnetbuf[VNET_ID_BUF];
+ char addrbuf[VARP_ADDR_BUF];
dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n",
NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr));
dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source),
ntohs(skb->h.uh->dest));
- dprintf("> opcode=%d vnet=%u vmac=" MACFMT " addr=" IPFMT "\n",
- ntohs(varph->vnetmsghdr.opcode),
- ntohl(varph->vnet),
+ dprintf("> opcode=%d vnet=%s vmac=" MACFMT " addr=%s\n",
+ ntohs(varph->hdr.opcode),
+ VnetId_ntoa(&varph->vnet, vnetbuf),
MAC6TUPLE(varph->vmac.mac),
- NIPQUAD(varph->addr));
+ VarpAddr_ntoa(&varph->addr, addrbuf));
varp_dprint();
}
- switch(ntohs(varph->vnetmsghdr.opcode)){
+#endif
+ switch(ntohs(varph->hdr.opcode)){
case VARP_OP_REQUEST:
err = varp_handle_request(skb, varph);
break;
@@ -1100,8 +1192,8 @@
err = varp_handle_announce(skb, varph);
break;
default:
- wprintf("> Unknown opcode: %d \n", ntohs(varph->vnetmsghdr.opcode));
- break;
+ wprintf("> Unknown opcode: %d \n", ntohs(varph->hdr.opcode));
+ break;
}
exit:
if(mine) err = 1;
@@ -1112,30 +1204,32 @@
/** Send an outgoing packet on the appropriate vnet tunnel.
*
* @param skb outgoing message
- * @param vnet vnet (host order)
+ * @param vnet vnet (network order)
* @return 0 on success, error code otherwise
*/
-int varp_output(struct sk_buff *skb, u32 vnet){
+int varp_output(struct sk_buff *skb, VnetId *vnet){
int err = 0;
unsigned char *mac = NULL;
Vmac *vmac = NULL;
VarpEntry *ventry = NULL;
- dprintf("> skb=%p vnet=%u\n", skb, vnet);
+ dprintf(">\n");
if(!varp_table){
err = -ENOSYS;
goto exit;
}
- dprintf("> skb.mac=%p\n", skb->mac.raw);
if(!skb->mac.raw){
wprintf("> No ethhdr in skb!\n");
err = -EINVAL;
goto exit;
}
- mac = MAC_ETH(skb)->h_dest;
+ mac = eth_hdr(skb)->h_dest;
vmac = (Vmac*)mac;
if(mac_is_multicast(mac)){
- err = vnet_tunnel_send(vnet, varp_mcast_addr, skb);
+ VarpAddr addr = {};
+ addr.family = AF_INET;
+ addr.u.ip4.s_addr = varp_mcast_addr;
+ err = vnet_tunnel_send(vnet, &addr, skb);
} else {
ventry = VarpTable_lookup(varp_table, vnet, vmac);
if(!ventry){
@@ -1165,7 +1259,7 @@
int err = 0;
varp_close();
varp_mcast_addr = addr;
- err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port);
+ err = varp_open(varp_mcast_addr, varp_port);
return err;
}
@@ -1191,7 +1285,6 @@
*/
int varp_init(void){
int err = 0;
- struct net_device *dev = NULL;
dprintf(">\n");
varp_table = VarpTable_new();
@@ -1200,18 +1293,10 @@
goto exit;
}
varp_init_mcast_addr(varp_mcaddr);
- err = vnet_get_device(varp_device, &dev);
- dprintf("> vnet_get_device(%s)=%d\n", varp_device, err);
- if(err) goto exit;
- err = vnet_get_device_address(dev, &varp_ucast_addr);
- dprintf("> vnet_get_device_address()=%d\n", err);
- if(err) goto exit;
varp_port = htons(VARP_PORT);
- err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port);
- dprintf("> varp_open()=%d\n", err);
+ err = varp_open(varp_mcast_addr, varp_port);
exit:
- if(dev) dev_put(dev);
dprintf("< err=%d\n", err);
return err;
}
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/varp.h
--- a/tools/vnet/vnet-module/varp.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/varp.h Fri Aug 26 20:47:16 2005
@@ -19,6 +19,10 @@
#ifndef _VNET_VARP_H
#define _VNET_VARP_H
+#include "hash_table.h"
+#include "if_varp.h"
+#include "varp_util.h"
+
#define CONFIG_VARP_GRATUITOUS 1
@@ -26,29 +30,26 @@
struct sk_buff;
struct Vif;
-#define DEVICE "xen-br0"
-
extern int vnet_get_device(const char *name, struct net_device **dev);
extern int vnet_get_device_address(struct net_device *dev, u32 *addr);
extern int varp_handle_message(struct sk_buff *skb);
-extern int varp_output(struct sk_buff *skb, u32 vnet);
-extern int varp_update(int vnet, unsigned char *vmac, u32 addr);
+extern int varp_output(struct sk_buff *skb, struct VnetId *vnet);
+extern int varp_update(struct VnetId *vnet, unsigned char *vmac, struct
VarpAddr *addr);
extern int varp_init(void);
extern void varp_exit(void);
-extern int varp_open(u32 mcaddr, u32 addr, u16 port);
+extern int varp_open(u32 mcaddr, u16 port);
extern void varp_close(void);
extern int varp_set_mcast_addr(u32 addr);
extern void varp_print(void);
+extern void varp_flush(void);
extern int varp_announce_vif(struct net_device *dev, struct Vif *vif);
-//extern int varp_announce_vifs(struct net_device *dev, struct task_struct
*domain);
extern u32 varp_mcast_addr;
-
/* MAC broadcast addr is ff-ff-ff-ff-ff-ff (all 1's).
* MAC multicast addr has low bit 1, i.e. 01-00-00-00-00-00.
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/varp_socket.c
--- a/tools/vnet/vnet-module/varp_socket.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/varp_socket.c Fri Aug 26 20:47:16 2005
@@ -177,7 +177,7 @@
/*============================================================================*/
/** Socket flags. */
-enum {
+enum VsockFlag {
VSOCK_REUSE = 1,
VSOCK_BIND = 2,
VSOCK_CONNECT = 4,
@@ -256,28 +256,13 @@
*/
int setsock_multicast(int sock, uint32_t saddr){
int err = 0;
- struct net_device *dev = NULL;
- u32 addr = 0;
struct ip_mreqn mreq = {};
int mloop = 0;
- err = vnet_get_device(DEVICE, &dev);
- if(err){
- eprintf("> error getting device: %d %d\n", err, errno);
- goto exit;
- }
- err = vnet_get_device_address(dev, &addr);
- if(err){
- eprintf("> error getting device address: %d %d\n", err, errno);
- goto exit;
- }
// See 'man 7 ip' for these options.
mreq.imr_multiaddr.s_addr = saddr; // IP multicast address.
- //mreq.imr_address.s_addr = addr; // Interface IP address.
mreq.imr_address.s_addr = INADDR_ANY; // Interface IP address.
mreq.imr_ifindex = 0; // Interface index (0 means any).
- dprintf("> saddr=%u.%u.%u.%u addr=%u.%u.%u.%u ifindex=%d\n",
- NIPQUAD(saddr), NIPQUAD(addr), mreq.imr_ifindex);
err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop));
if(err < 0){
eprintf("> setsockopt IP_MULTICAST_LOOP: %d %d\n", err, errno);
@@ -305,7 +290,7 @@
}
/** Create a socket.
- * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT.
+ * The flags can include values from enum VsockFlag.
*
* @param socktype socket type
* @param saddr address
@@ -368,19 +353,15 @@
/** Open the varp multicast socket.
*
* @param mcaddr multicast address
- * @param saddr address
* @param port port
* @param val return parameter for the socket
* @return 0 on success, error code otherwise
*/
-int varp_mcast_open(uint32_t mcaddr, uint32_t saddr, uint16_t port, int *val){
+int varp_mcast_open(uint32_t mcaddr, uint16_t port, int *val){
int err = 0;
int flags = VSOCK_REUSE;
int multicast = MULTICAST(mcaddr);
int sock = 0;
- struct sockaddr_in addr_in;
- struct sockaddr *addr = (struct sockaddr *)&addr_in;
- int addr_n = sizeof(addr_in);
dprintf(">\n");
flags |= VSOCK_MULTICAST;
@@ -392,23 +373,6 @@
err = setsock_multicast_ttl(sock, 1);
if(err < 0) goto exit;
}
- if(0){
- addr_in.sin_family = AF_INET;
- addr_in.sin_addr.s_addr = saddr;
- addr_in.sin_port = port;
- err = bind(sock, addr, addr_n);
- if(err < 0){
- eprintf("> bind: %d %d\n", err, errno);
- goto exit;
- }
- }
- if(0){
- struct sockaddr_in self = {};
- int self_n;
- getsockname(sock, (struct sockaddr *)&self, &self_n);
- dprintf("> sockname sock=%d addr=%u.%u.%u.%u port=%d\n",
- sock, NIPQUAD(saddr), ntohs(port));
- }
exit:
if(err){
shutdown(sock, 2);
@@ -427,7 +391,7 @@
*/
int varp_ucast_open(uint32_t addr, u16 port, int *val){
int err = 0;
- int flags = VSOCK_BIND | VSOCK_REUSE;
+ int flags = (VSOCK_BIND | VSOCK_REUSE);
dprintf(">\n");
err = create_socket(SOCK_DGRAM, addr, port, flags, val);
dprintf("< err=%d val=%d\n", err, *val);
@@ -536,7 +500,6 @@
err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait);
err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait);
for(n = 1; atomic_read(&varp_run) == 1; n++){
- //dprintf("> n=%d\n", n);
count = 0;
count += handle_sock_skb(varp_mcast_sock);
count += handle_sock_skb(varp_ucast_sock);
@@ -609,20 +572,18 @@
/** Open the varp sockets and start the thread handling them.
*
* @param mcaddr multicast address
- * @param addr unicast address
* @param port port
* @return 0 on success, error code otherwise
*/
-int varp_open(u32 mcaddr, u32 addr, u16 port){
+int varp_open(u32 mcaddr, u16 port){
int err = 0;
mm_segment_t oldfs;
//MOD_INC_USE_COUNT;
- dprintf("> mcaddr=%u.%u.%u.%u addr=%u.%u.%u.%u port=%u\n",
- NIPQUAD(mcaddr), NIPQUAD(addr), ntohs(port));
- //MOD_INC_USE_COUNT;
+ dprintf("> mcaddr=%u.%u.%u.%u port=%u\n",
+ NIPQUAD(mcaddr), ntohs(port));
oldfs = change_fs(KERNEL_DS);
- err = varp_mcast_open(mcaddr, addr, port, &varp_mcast_sock);
+ err = varp_mcast_open(mcaddr, port, &varp_mcast_sock);
if(err < 0 ) goto exit;
err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock);
if(err < 0 ) goto exit;
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vif.c
--- a/tools/vnet/vnet-module/vif.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vif.c Fri Aug 26 20:47:16 2005
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/string.h>
+#include <linux/version.h>
#include <linux/net.h>
#include <linux/in.h>
@@ -33,11 +34,14 @@
#include <net/protocol.h>
#include <net/route.h>
#include <linux/skbuff.h>
+#include <linux/spinlock.h>
#include <etherip.h>
#include <if_varp.h>
#include <vnet_dev.h>
#include <vif.h>
+#include <varp.h>
+
#include "allocate.h"
#include "hash_table.h"
#include "sys_net.h"
@@ -50,6 +54,27 @@
/** Table of vifs indexed by VifKey. */
HashTable *vif_table = NULL;
+rwlock_t vif_table_lock = RW_LOCK_UNLOCKED;
+
+#define vif_read_lock(flags) read_lock_irqsave(&vif_table_lock, (flags))
+#define vif_read_unlock(flags) read_unlock_irqrestore(&vif_table_lock,
(flags))
+#define vif_write_lock(flags) write_lock_irqsave(&vif_table_lock, (flags))
+#define vif_write_unlock(flags) write_unlock_irqrestore(&vif_table_lock,
(flags))
+
+void vif_print(void){
+ HashTable_for_decl(entry);
+ Vif *vif;
+ unsigned long flags;
+ char vnetbuf[VNET_ID_BUF];
+
+ vif_read_lock(flags);
+ HashTable_for_each(entry, vif_table){
+ vif = entry->value;
+ printk(KERN_INFO "VIF(vnet=%s vmac=" MACFMT ")\n",
+ VnetId_ntoa(&vif->vnet, vnetbuf), MAC6TUPLE(vif->vmac.mac));
+ }
+ vif_read_unlock(flags);
+}
void vif_decref(Vif *vif){
if(!vif) return;
@@ -71,18 +96,11 @@
*/
Hashcode vif_key_hash_fn(void *k){
VifKey *key = k;
- Hashcode h;
- h = hash_2ul(key->vnet,
- (key->vmac.mac[0] << 24) |
- (key->vmac.mac[1] << 16) |
- (key->vmac.mac[2] << 8) |
- (key->vmac.mac[3] ));
- h = hash_hul(h,
- (key->vmac.mac[4] << 8) |
- (key->vmac.mac[5] ));
+ Hashcode h = 0;
+ h = VnetId_hash(h, &key->vnet);
+ h = Vmac_hash(h, &key->vmac);
return h;
}
-
/** Test equality for keys in the vif table.
* Compares vnet and mac.
@@ -94,7 +112,8 @@
int vif_key_equal_fn(void *k1, void *k2){
VifKey *key1 = k1;
VifKey *key2 = k2;
- return (key1->vnet == key2->vnet) && (memcmp(key1->vmac.mac,
key2->vmac.mac, ETH_ALEN) == 0);
+ return (VnetId_eq(&key1->vnet , &key2->vnet) &&
+ Vmac_eq(&key1->vmac, &key2->vmac));
}
/** Free an entry in the vif table.
@@ -118,13 +137,13 @@
* @param mac MAC address
* @return 0 on success, -ENOENT otherwise
*/
-int vif_lookup(int vnet, Vmac *vmac, Vif **vif){
- int err = 0;
- VifKey key = {};
+int vif_lookup(VnetId *vnet, Vmac *vmac, Vif **vif){
+ int err = 0;
+ VifKey key = { .vnet = *vnet, .vmac = *vmac };
HTEntry *entry = NULL;
+ unsigned long flags;
- key.vnet = vnet;
- key.vmac = *vmac;
+ vif_read_lock(flags);
entry = HashTable_get_entry(vif_table, &key);
if(entry){
*vif = entry->value;
@@ -133,7 +152,7 @@
*vif = NULL;
err = -ENOENT;
}
- //dprintf("< err=%d addr=" IPFMT "\n", err, NIPQUAD(*coaddr));
+ vif_read_unlock(flags);
return err;
}
@@ -143,10 +162,12 @@
* @param mac MAC address
* @return 0 on success, negative error code otherwise
*/
-int vif_add(int vnet, Vmac *vmac, Vif **val){
+int vif_add(VnetId *vnet, Vmac *vmac, Vif **val){
int err = 0;
Vif *vif = NULL;
HTEntry *entry;
+ unsigned long flags;
+
dprintf("> vnet=%d\n", vnet);
vif = ALLOCATE(Vif);
if(!vif){
@@ -154,9 +175,11 @@
goto exit;
}
atomic_set(&vif->refcount, 1);
- vif->vnet = vnet;
+ vif->vnet = *vnet;
vif->vmac = *vmac;
+ vif_write_lock(flags);
entry = HashTable_add(vif_table, vif, vif);
+ vif_write_unlock(flags);
if(!entry){
err = -ENOMEM;
deallocate(vif);
@@ -177,22 +200,14 @@
* @param coaddr return parameter for care-of address
* @return number of entries deleted, or negative error code
*/
-int vif_remove(int vnet, Vmac *vmac){
- int err = 0;
- VifKey key = { .vnet = vnet, .vmac = *vmac };
- //dprintf("> vnet=%d addr=%u.%u.%u.%u\n", vnet, NIPQUAD(coaddr));
+int vif_remove(VnetId *vnet, Vmac *vmac){
+ int err = 0;
+ VifKey key = { .vnet = *vnet, .vmac = *vmac };
+ unsigned long flags;
+
+ vif_write_lock(flags);
err = HashTable_remove(vif_table, &key);
- //dprintf("< err=%d\n", err);
- return err;
-}
-
-int vif_find(int vnet, Vmac *vmac, int create, Vif **vif){
- int err = 0;
-
- err = vif_lookup(vnet, vmac, vif);
- if(err && create){
- err = vif_add(vnet, vmac, vif);
- }
+ vif_write_unlock(flags);
return err;
}
@@ -200,15 +215,15 @@
HashTable_clear(vif_table);
}
-int vif_create(int vnet, Vmac *vmac, Vif **vif){
+int vif_create(VnetId *vnet, Vmac *vmac, Vif **vif){
int err = 0;
dprintf(">\n");
- if(!vif_lookup(vnet, vmac, vif)){
+ if(vif_lookup(vnet, vmac, vif) == 0){
+ vif_decref(*vif);
err = -EEXIST;
goto exit;
}
- dprintf("> vif_add...\n");
err = vif_add(vnet, vmac, vif);
exit:
if(err){
@@ -218,25 +233,6 @@
return err;
}
-/** Create a vif.
- *
- * @param vnet vnet id
- * @param mac mac address (as a string)
- * @return 0 on success, error code otherwise
- */
-int mkvif(int vnet, char *mac){
- int err = 0;
- Vmac vmac = {};
- Vif *vif = NULL;
- dprintf("> vnet=%d mac=%s\n", vnet, mac);
- err = mac_aton(mac, vmac.mac);
- if(err) goto exit;
- err = vif_create(vnet, &vmac, &vif);
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
/** Initialize the vif table.
*
* @return 0 on success, error code otherwise
@@ -250,12 +246,9 @@
goto exit;
}
vif_table->entry_free_fn = vif_entry_free_fn;
- vif_table->key_hash_fn = vif_key_hash_fn;
- vif_table->key_equal_fn = vif_key_equal_fn;
-
- // Some vifs for testing.
- //mkvif(1, "aa:00:00:00:20:11");
- //mkvif(2, "aa:00:00:00:20:12");
+ vif_table->key_hash_fn = vif_key_hash_fn;
+ vif_table->key_equal_fn = vif_key_equal_fn;
+
exit:
if(err < 0) wprintf("< err=%d\n", err);
dprintf("< err=%d\n", err);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vif.h
--- a/tools/vnet/vnet-module/vif.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vif.h Fri Aug 26 20:47:16 2005
@@ -24,12 +24,12 @@
/** Key for entries in the vif table. */
typedef struct VifKey {
- int vnet;
+ VnetId vnet;
Vmac vmac;
} VifKey;
typedef struct Vif {
- int vnet;
+ VnetId vnet;
Vmac vmac;
struct net_device *dev;
atomic_t refcount;
@@ -38,15 +38,17 @@
struct HashTable;
extern struct HashTable *vif_table;
+extern void vif_print(void);
+
extern void vif_decref(Vif *vif);
extern void vif_incref(Vif *vif);
-extern int vif_create(int vnet, Vmac *vmac, Vif **vif);
+extern int vif_create(struct VnetId *vnet, Vmac *vmac, Vif **vif);
-extern int vif_add(int vnet, Vmac *vmac, Vif **vif);
-extern int vif_lookup(int vnet, Vmac *vmac, Vif **vif);
-extern int vif_remove(int vnet, Vmac *vmac);
-extern int vif_find(int vnet, Vmac *vmac, int create, Vif **vif);
+extern int vif_create(VnetId *vnet, Vmac *vmac, Vif **vif);
+extern int vif_add(struct VnetId *vnet, Vmac *vmac, Vif **vif);
+extern int vif_lookup(struct VnetId *vnet, Vmac *vmac, Vif **vif);
+extern int vif_remove(struct VnetId *vnet, Vmac *vmac);
extern void vif_purge(void);
extern int vif_init(void);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vnet.c
--- a/tools/vnet/vnet-module/vnet.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vnet.c Fri Aug 26 20:47:16 2005
@@ -47,6 +47,7 @@
#include <random.h>
#include <tunnel.h>
+#include <skb_util.h>
#include <vnet_dev.h>
#include <vnet.h>
#include <vif.h>
@@ -70,7 +71,7 @@
/** Key for entries in the vnet address table. */
typedef struct VnetAddrKey {
/** Vnet id. */
- int vnet;
+ VnetId vnet;
/** MAC address. */
unsigned char mac[ETH_ALEN];
} VnetAddrKey;
@@ -88,7 +89,6 @@
void Vnet_decref(Vnet *info){
if(!info) return;
if(atomic_dec_and_test(&info->refcount)){
- dprintf("> free vnet=%u\n", info->vnet);
vnet_dev_remove(info);
deallocate(info);
}
@@ -101,6 +101,28 @@
void Vnet_incref(Vnet *info){
if(!info) return;
atomic_inc(&info->refcount);
+}
+
+void Vnet_print(Vnet *info)
+{
+ char vnetbuf[VNET_ID_BUF];
+
+ printk(KERN_INFO "VNET(vnet=%s device=%s security=%c%c)\n",
+ VnetId_ntoa(&info->vnet, vnetbuf),
+ info->device,
+ ((info->security & SA_AUTH) ? 'a' : '-'),
+ ((info->security & SA_CONF) ? 'c' : '-'));
+}
+
+void vnet_print(void)
+{
+ HashTable_for_decl(entry);
+ Vnet *info;
+
+ HashTable_for_each(entry, vnet_table){
+ info = entry->value;
+ Vnet_print(info);
+ }
}
/** Allocate a vnet, setting reference count to 1.
@@ -129,7 +151,7 @@
HTEntry *entry = NULL;
// Vnet_del(info->vnet); //todo: Delete existing vnet info?
Vnet_incref(info);
- entry = HashTable_add(vnet_table, HKEY(info->vnet), info);
+ entry = HashTable_add(vnet_table, &info->vnet, info);
if(!entry){
err = -ENOMEM;
Vnet_decref(info);
@@ -142,8 +164,8 @@
* @param vnet id of vnet to remove
* @return number of vnets removed
*/
-int Vnet_del(vnetid_t vnet){
- return HashTable_remove(vnet_table, HKEY(vnet));
+int Vnet_del(VnetId *vnet){
+ return HashTable_remove(vnet_table, vnet);
}
/** Lookup a vnet by id.
@@ -153,17 +175,14 @@
* @param info return parameter for vnet
* @return 0 on sucess, -ENOENT if no vnet found
*/
-int Vnet_lookup(vnetid_t vnet, Vnet **info){
- int err = 0;
- dprintf("> vnet=%u info=%p\n", vnet, info);
- dprintf("> vnet_table=%p\n",vnet_table);
- *info = HashTable_get(vnet_table, HKEY(vnet));
+int Vnet_lookup(VnetId *vnet, Vnet **info){
+ int err = 0;
+ *info = HashTable_get(vnet_table, vnet);
if(*info){
Vnet_incref(*info);
} else {
err = -ENOENT;
}
- dprintf("< err=%d\n", err);
return err;
}
@@ -191,22 +210,33 @@
*/
static int vnet_setup(void){
int err = 0;
- int i, n = 5; //20;
+ int i, n = 3;
int security = vnet_security_default;
+ uint32_t vnetid;
Vnet *vnet;
- dprintf(">\n");
for(i=0; i<n; i++){
err = Vnet_alloc(&vnet);
if(err) break;
- vnet->vnet = VNET_VIF + i;
- vnet->security = (vnet->vnet > 10 ? security : 0);
- //err = Vnet_add(vnet);
+ vnetid = VNET_VIF + i;
+ vnet->vnet = toVnetId(vnetid);
+ sprintf(vnet->device, "vnif%04x", vnetid);
+ vnet->security = (vnetid > 10 ? security : 0);
err = Vnet_create(vnet);
if(err) break;
}
- dprintf("< err=%d\n", err);
- return err;
+ return err;
+}
+
+int vnet_key_equal_fn(void *k1, void *k2){
+ VnetId *key1 = k1;
+ VnetId *key2 = k2;
+ return VnetId_eq(key1, key2);
+}
+
+Hashcode vnet_key_hash_fn(void *k){
+ VnetId *key = k;
+ return VnetId_hash(0, key);
}
/** Initialize the vnet table and the physical vnet.
@@ -216,18 +246,18 @@
int vnet_init(void){
int err = 0;
- dprintf(">\n");
vnet_table = HashTable_new(0);
- dprintf("> vnet_table=%p\n", vnet_table);
if(!vnet_table){
err = -ENOMEM;
goto exit;
}
+ vnet_table->key_equal_fn = vnet_key_equal_fn;
+ vnet_table->key_hash_fn = vnet_key_hash_fn;
vnet_table->entry_free_fn = vnet_entry_free_fn;
err = Vnet_alloc(&vnet_physical);
if(err) goto exit;
- vnet_physical->vnet = VNET_PHYS;
+ vnet_physical->vnet = toVnetId(VNET_PHYS);
vnet_physical->security = 0;
err = Vnet_add(vnet_physical);
if(err) goto exit;
@@ -237,7 +267,6 @@
if(err) goto exit;
err = vif_init();
exit:
- if(err < 0) wprintf("< err=%d\n", err);
return err;
}
@@ -248,50 +277,28 @@
vnet_table = NULL;
}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-
-static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
- int err = 0;
- struct flowi fl = {
- .oif = skb->dev->ifindex,
- .nl_u = {
- .ip4_u = {
- .daddr = skb->nh.iph->daddr,
- .saddr = skb->nh.iph->saddr,
- .tos = skb->nh.iph->tos,
- }
- }
- };
-
- err = ip_route_output_key(prt, &fl);
- return err;
-}
-
-#else
-
-static inline int skb_route(struct sk_buff *skb, struct rtable **prt){
- int err = 0;
- struct rt_key key = { };
- key.dst = skb->nh.iph->daddr;
- key.src = skb->nh.iph->saddr;
- key.tos = skb->nh.iph->tos;
- key.oif = skb->dev->ifindex;
- err = ip_route_output_key(prt, &key);
- return err;
-}
-
-#endif
-
inline int skb_xmit(struct sk_buff *skb){
int err = 0;
struct rtable *rt = NULL;
- dprintf("> skb=%p dev=%s\n", skb, skb->dev->name);
-
+ dprintf(">\n");
skb->protocol = htons(ETH_P_IP);
err = skb_route(skb, &rt);
- if(err) goto exit;
+ if(err){
+ wprintf("> skb_route=%d\n", err);
+ wprintf("> dev=%s idx=%d src=%u.%u.%u.%u dst=%u.%u.%u.%u tos=%d\n",
+ (skb->dev ? skb->dev->name : "???"),
+ (skb->dev ? skb->dev->ifindex : -1),
+ NIPQUAD(skb->nh.iph->saddr),
+ NIPQUAD(skb->nh.iph->daddr),
+ skb->nh.iph->tos);
+
+ goto exit;
+ }
skb->dst = &rt->u.dst;
+ if(!skb->dev){
+ skb->dev = rt->u.dst.dev;
+ }
ip_select_ident(skb->nh.iph, &rt->u.dst, NULL);
@@ -317,39 +324,27 @@
*
* @todo fixme
*/
-int vnet_skb_send(struct sk_buff *skb, u32 vnet){
- int err = 0;
- Vif *vif = NULL;
-
- dprintf("> skb=%p vnet=%u\n", skb, vnet);
- if(vnet == VNET_PHYS || !vnet){
- // For completeness, send direct to the network.
- if(skb->dev){
- err = skb_xmit(skb);
- } else {
- // Can't assume eth0 - might be nbe-br or other. Need to route.
- struct net_device *dev = NULL;
- err = vnet_get_device(DEVICE, &dev);
- if(err) goto exit;
- skb->dev = dev;
- err = skb_xmit(skb);
- dev_put(dev);
- }
+int vnet_skb_send(struct sk_buff *skb, VnetId *vnet){
+ int err = 0;
+ VnetId vnet_phys = toVnetId(VNET_PHYS);
+
+ dprintf(">\n");
+ skb->dev = NULL;
+ if(!vnet || VnetId_eq(vnet, &vnet_phys)){
+ // No vnet or physical vnet, send direct to the network.
+ skb_xmit(skb);
} else {
- dprintf("> varp_output\n");
err = varp_output(skb, vnet);
}
- //dprintf("< err=%d\n", err);
- exit:
- if(vif) vif_decref(vif);
dprintf("< err=%d\n", err);
return err;
}
/** Receive an skb for a vnet.
+ * We make the skb come out of the vif for the vnet, and
+ * let ethernet bridging forward it to related interfaces.
* If the dest is broadcast, goes to all vifs on the vnet.
- * If the dest is unicast, goes to addressed vif on vnet.
- * For each vif we set the packet dev and receive the packet.
+ * If the dest is unicast, goes to the addressed vif on the vnet.
*
* The packet must have skb->mac.raw set and skb->data must point
* after the device (ethernet) header.
@@ -359,139 +354,19 @@
* @param vmac packet vmac
* @return 0 on success, error code otherwise
*/
-#if 1
-int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){
- // Receive the skb for a vnet.
- // We make the skb come out of the vif for the vnet, and
- // let ethernet bridging forward it to related interfaces.
+int vnet_skb_recv(struct sk_buff *skb, VnetId *vnet, Vmac *vmac){
int err = 0;
Vnet *info = NULL;
- dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac));
err = Vnet_lookup(vnet, &info);
if(err) goto exit;
skb->dev = info->dev;
- dprintf("> netif_rx dev=%s\n", skb->dev->name);
netif_rx(skb);
exit:
if(info) Vnet_decref(info);
if(err){
- kfree_skb(skb);
- }
- dprintf("< err=%d\n", err);
- return err;
-}
-
-#else
-int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){
- int err = 0;
- Vif *vif = NULL;
-
- dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac));
- if(mac_is_multicast(vmac->mac)){
- HashTable_for_decl(entry);
- int count = 0;
- struct sk_buff *new_skb;
-
- HashTable_for_each(entry, vif_table){
- vif = entry->value;
- if(vif->vnet != vnet) continue;
- count++;
- new_skb = skb_copy(skb, GFP_ATOMIC);
- if(!new_skb) break;
- new_skb->dev = vif->dev;
- dprintf("> %d] netif_rx dev=%s\n", count, new_skb->dev->name);
- netif_rx(new_skb);
- }
kfree_skb(skb);
- } else {
- err = vif_lookup(vnet, vmac, &vif);
- if(err){
- kfree_skb(skb);
- goto exit;
- }
- skb->dev = vif->dev;
- dprintf("> netif_rx dev=%s\n", skb->dev->name);
- netif_rx(skb);
- }
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-#endif
-
-/** Check validity of an incoming IP frame.
- *
- * @param skb frame
- * @return 0 if ok, error code otherwise
- *
- * @todo fixme Can prob skip most of this because linux will have done it.
- * @todo Only need the vnet skb context check.
- */
-int check_ip_frame(struct sk_buff *skb){
- int err = -EINVAL;
- struct iphdr* iph;
- struct net_device *dev;
- __u32 len;
- __u16 check;
-
-#if 0
- if(skb->context){
- // Todo: After ESP want to skip most checks (including checksum),
- // Todo: but in general may not want to skip all checks on detunnel.
- //dprintf("> Skip check, has context\n");
- err = 0;
- goto exit;
- }
-#endif
- // Check we have enough for an ip header - the skb passed should
- // have data pointing at the eth header and skb->len should include
- // that. skb->nh should already have been set. Let the indvidual
- // protocol handlers worry about the exact ip header len
- // (i.e. whether any ip options are set).
- dev = skb->dev;
-
- if(skb->len < ETH_HLEN + sizeof(struct iphdr)){
- wprintf("> packet too short for ip header\n");
- goto exit;
- }
-
- iph = skb->nh.iph;
- /*
- * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
checksum.
- *
- * Is the datagram acceptable?
- *
- * 1. Length at least the size of an ip header
- * 2. Version of 4
- * 3. Checksums correctly. [Speed optimisation for later, skip
loopback checksums]
- * 4. Doesn't have a bogus length
- */
- if (iph->ihl < 5 || iph->version != 4){
- wprintf("> len and version check failed\n");
- goto exit;
- }
- if(skb->len < ETH_HLEN + (iph->ihl << 2)){
- wprintf("> packet too short for given ihl\n");
- goto exit;
- }
-
- check = iph->check;
- //iph->check = 0;
- //iph->check = compute_cksum((__u16 *)iph, (iph->ihl << 1));
- if(iph->check != check){
- wprintf("> invalid checksum\n");
- goto exit;
- }
-
- len = ntohs(iph->tot_len);
- if (skb->len < len + ETH_HLEN || len < (iph->ihl << 2)){
- wprintf("> packet too short for tot_len\n");
- goto exit;
- }
- skb->h.raw = skb->nh.raw + (iph->ihl << 2);
- err = 0;
- exit:
+ }
return err;
}
@@ -539,14 +414,13 @@
*
* @todo Need to check that the sa provides the correct security level.
*/
-int vnet_check_context(int vnet, SkbContext *context, Vnet **val){
+int vnet_check_context(VnetId *vnet, SkbContext *context, Vnet **val){
int err = 0;
Vnet *info = NULL;
SAState *sa = NULL;
err = Vnet_lookup(vnet, &info);
if(err){
- wprintf("> No vnet %d\n", vnet);
goto exit;
}
if(!info->security) goto exit;
@@ -556,7 +430,8 @@
goto exit;
}
if(context->protocol != IPPROTO_ESP){
- wprintf("> Invalid protocol: wanted %d, got %d\n", IPPROTO_ESP,
context->protocol);
+ wprintf("> Invalid protocol: wanted %d, got %d\n",
+ IPPROTO_ESP, context->protocol);
goto exit;
}
sa = context->data;
@@ -586,13 +461,11 @@
*/
static void sa_tunnel_close(Tunnel *tunnel){
SAState *sa;
- dprintf(">\n");
if(!tunnel) return;
sa = tunnel->data;
if(!sa) return;
SAState_decref(sa);
tunnel->data = NULL;
- dprintf("<\n");
}
/** Packet send function for SA tunnels.
@@ -604,7 +477,6 @@
static int sa_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){
int err = -EINVAL;
SAState *sa;
- //dprintf("> tunnel=%p\n", tunnel);
if(!tunnel){
wprintf("> Null tunnel!\n");
goto exit;
@@ -616,7 +488,6 @@
}
err = SAState_send(sa, skb, tunnel->base);
exit:
- //dprintf("< err=%d\n", err);
return err;
}
@@ -638,7 +509,7 @@
* @param tunnel return parameter
* @return 0 on success, error code otherwise
*/
-int vnet_tunnel_open(u32 vnet, u32 addr, Tunnel **tunnel){
+int vnet_tunnel_open(VnetId *vnet, VarpAddr *addr, Tunnel **tunnel){
extern TunnelType *etherip_tunnel_type;
int err = 0;
Vnet *info = NULL;
@@ -646,20 +517,17 @@
Tunnel *sa_tunnel = NULL;
Tunnel *etherip_tunnel = NULL;
- dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr));
err = Vnet_lookup(vnet, &info);
- dprintf("> Vnet_lookup=%d\n", err);
if(err) goto exit;
if(info->security){
SAState *sa = NULL;
- dprintf("> security=%d\n", info->security);
+ //FIXME: Assuming IPv4 for now.
+ u32 ipaddr = addr->u.ip4.s_addr;
err = Tunnel_create(sa_tunnel_type, vnet, addr, base_tunnel,
&sa_tunnel);
if(err) goto exit;
- dprintf("> sa_tunnel=%p\n", sa_tunnel);
- err = sa_create(info->security, 0, IPPROTO_ESP, addr, &sa);
+ err = sa_create(info->security, 0, IPPROTO_ESP, ipaddr, &sa);
if(err) goto exit;
sa_tunnel->data = sa;
- dprintf("> sa=%p\n", sa);
base_tunnel = sa_tunnel;
}
err = Tunnel_create(etherip_tunnel_type, vnet, addr, base_tunnel,
ðerip_tunnel);
@@ -673,7 +541,6 @@
} else {
*tunnel = etherip_tunnel;
}
- dprintf("< err=%d\n", err);
return err;
}
@@ -685,14 +552,12 @@
* @param tunnel return parameter
* @return 0 on success, error code otherwise
*/
-int vnet_tunnel_lookup(u32 vnet, u32 addr, Tunnel **tunnel){
- int err = 0;
- dprintf("> vnet=%d addr=" IPFMT "\n", vnet, NIPQUAD(addr));
+int vnet_tunnel_lookup(VnetId *vnet, VarpAddr *addr, Tunnel **tunnel){
+ int err = 0;
*tunnel = Tunnel_lookup(vnet, addr);
if(!*tunnel){
err = vnet_tunnel_open(vnet, addr, tunnel);
}
- dprintf("< err=%d\n", err);
return err;
}
@@ -703,16 +568,14 @@
* @param skb packet
* @return 0 on success, error code otherwise
*/
-int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb){
+int vnet_tunnel_send(VnetId *vnet, VarpAddr *addr, struct sk_buff *skb){
int err = 0;
Tunnel *tunnel = NULL;
- dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr));
err = vnet_tunnel_lookup(vnet, addr, &tunnel);
if(err) goto exit;
err = Tunnel_send(tunnel, skb);
Tunnel_decref(tunnel);
exit:
- dprintf("< err=%d\n", err);
return err;
}
@@ -722,7 +585,7 @@
vnet_exit();
esp_module_exit();
etherip_module_exit();
- tunnel_module_init();
+ tunnel_module_exit();
random_module_exit();
}
@@ -753,12 +616,13 @@
sa_algorithm_probe_all();
err = sa_table_init();
if(err) wprintf("> sa_table_init err=%d\n", err);
+ if(err) goto exit;
ProcFS_init();
exit:
if(err < 0){
vnet_module_exit();
- }
- if(err < 0) wprintf("< err=%d\n", err);
+ wprintf("< err=%d\n", err);
+ }
return err;
}
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vnet.h
--- a/tools/vnet/vnet-module/vnet.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vnet.h Fri Aug 26 20:47:16 2005
@@ -29,17 +29,15 @@
struct Vif;
struct net_device;
-typedef uint32_t vnetid_t;
-typedef uint32_t vnetaddr_t;
-
/** Vnet property record. */
typedef struct Vnet {
/** Reference count. */
atomic_t refcount;
/** Vnet id. */
- vnetid_t vnet;
+ struct VnetId vnet;
/** Security flag. If true the vnet requires ESP. */
int security;
+ char device[IFNAMSIZ];
struct net_device *dev;
struct net_device *bridge;
@@ -51,30 +49,28 @@
int recursion;
} Vnet;
-extern int Vnet_lookup(vnetid_t id, Vnet **vnet);
-extern int Vnet_add(Vnet *vnet);
-extern int Vnet_del(vnetid_t vnet);
-extern void Vnet_incref(Vnet *);
-extern void Vnet_decref(Vnet *);
-extern int Vnet_alloc(Vnet **vnet);
+extern void vnet_print(void);
+extern void Vnet_print(Vnet *info);
+
+extern int Vnet_lookup(struct VnetId *vnet, struct Vnet **info);
+extern int Vnet_add(struct Vnet *info);
+extern int Vnet_del(struct VnetId *vnet);
+extern void Vnet_incref(struct Vnet *info);
+extern void Vnet_decref(struct Vnet *info);
+extern int Vnet_alloc(struct Vnet **info);
extern Vnet *vnet_physical;
extern int skb_xmit(struct sk_buff *skb);
-extern int vnet_skb_send(struct sk_buff *skb, u32 vnet);
-extern int vnet_skb_recv(struct sk_buff *skb, u32 vnet, struct Vmac *vmac);
+extern int vnet_skb_send(struct sk_buff *skb, struct VnetId *vnet);
+extern int vnet_skb_recv(struct sk_buff *skb, struct VnetId *vnet, struct Vmac
*vmac);
-extern int vnet_check_context(int vnet, SkbContext *context, Vnet **vinfo);
+extern int vnet_check_context(struct VnetId *vnet, SkbContext *context, Vnet
**vinfo);
-extern int vnet_tunnel_open(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel);
-extern int vnet_tunnel_lookup(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel);
-extern int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff
*skb);
+extern int vnet_tunnel_open(struct VnetId *vnet, struct VarpAddr *addr, Tunnel
**tunnel);
+extern int vnet_tunnel_lookup(struct VnetId *vnet, struct VarpAddr *addr,
Tunnel **tunnel);
+extern int vnet_tunnel_send(struct VnetId *vnet, struct VarpAddr *addr, struct
sk_buff *skb);
extern int vnet_init(void);
-
-enum {
- HANDLE_OK = 1,
- HANDLE_NO = 0,
-};
extern int vnet_sa_security(u32 spi, int protocol, u32 addr);
struct SAState;
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vnet_dev.c
--- a/tools/vnet/vnet-module/vnet_dev.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vnet_dev.c Fri Aug 26 20:47:16 2005
@@ -48,15 +48,9 @@
#undef DEBUG
#include "debug.h"
-#define VNETIF_FMT "vnetif%u"
-#define VNETBR_FMT "vnet%u"
-
#ifndef CONFIG_BRIDGE
#error Must configure ethernet bridging in Network Options
#endif
-
-#include <linux/../../net/bridge/br_private.h>
-#define dev_bridge(_dev) ((struct net_bridge *)(_dev)->priv)
static void vnet_dev_destructor(struct net_device *dev){
dprintf(">\n");
@@ -113,135 +107,16 @@
Vnet *vnet = (void*)dev->priv;
dprintf(">\n");
- dprintf("> vnet=%d\n", vnet->vnet);
- snprintf(dev->name, IFNAMSIZ - 1, VNETIF_FMT, vnet->vnet);
- if(__dev_get_by_name(dev->name)){
+ if(__dev_get_by_name(vnet->device)){
err = -ENOMEM;
- }
+ wprintf("> vnet device name in use: %s\n", vnet->device);
+ }
+ strcpy(dev->name, vnet->device);
dprintf("< err=%d\n", err);
return err;
}
-//============================================================================
-#ifdef CONFIG_VNET_BRIDGE
-
-#define BRIDGE DEVICE
-
-void vnet_bridge_fini(Vnet *vnet){
- if(!vnet) return;
- if(vnet->bridge){
- br_del_bridge(vnet->bridge->name);
- vnet->bridge = NULL;
- }
-}
-
-/** Create the bridge for a vnet, and add the
- * vnet interface to it.
- *
- * @param vnet vnet
- * @return 0 on success, error code otherwise
- */
-int vnet_bridge_init(Vnet *vnet){
- int err = 0;
- char bridge[IFNAMSIZ] = {};
- struct net_bridge *br;
- vnet->bridge = NULL;
- snprintf(bridge, IFNAMSIZ - 1, VNETBR_FMT, vnet->vnet);
- rtnl_lock();
- err = br_add_bridge(bridge);
- rtnl_unlock();
- if(err){
- dprintf("> Error creating vnet bridge %s: err=%d\n", bridge, err);
- goto exit;
- }
- vnet->bridge = __dev_get_by_name(bridge);
- if(!vnet->bridge){
- wprintf("> Vnet bridge %s is null!\n", bridge);
- err = -EINVAL;
- goto exit;
- }
- br = dev_bridge(vnet->bridge);
- br->stp_enabled = 0;
- br->bridge_hello_time = 0;
- br->hello_time = 0;
- br->bridge_forward_delay = 0;
- br->forward_delay = 0;
- rtnl_lock();
- err = br_add_if(br, vnet->dev);
- rtnl_unlock();
- if(err){
- dprintf("> Error adding vif %s to vnet bridge %s: err=%d\n",
- vnet->dev->name, bridge, err);
- goto exit;
- }
- rtnl_lock();
- dev_open(vnet->dev);
- dev_open(vnet->bridge);
- rtnl_unlock();
- exit:
- if(err){
- if(vnet->bridge){
- rtnl_lock();
- br_del_bridge(bridge);
- rtnl_unlock();
- vnet->bridge = NULL;
- }
- }
- return err;
-}
-
-
-/** Add an interface to the bridge for a vnet.
- *
- * @param vnet vnet
- * @param dev interface
- * @return 0 on success, error code otherwise
- */
-int vnet_add_if(Vnet *vnet, struct net_device *dev){
- int err = 0;
- struct net_device *brdev;
-
- dprintf(">\n");
- if(!vnet->bridge){
- err = -EINVAL;
- goto exit;
- }
- // Delete the interface from the default bridge.
- // todo: Really want to delete it from any bridge it's in.
- if(!vnet_get_device(BRIDGE, &brdev)){
- rtnl_lock();
- br_del_if(dev_bridge(brdev), dev);
- rtnl_unlock();
- }
- dprintf("> br_add_if %s %s\n", vnet->bridge->name, dev->name);
- rtnl_lock();
- dev_open(dev);
- dev_open(vnet->bridge);
- err = br_add_if(dev_bridge(vnet->bridge), dev);
- rtnl_unlock();
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-int vnet_del_if(Vnet *vnet, struct net_device *dev){
- int err = 0;
-
- dprintf(">\n");
- if(!vnet->bridge){
- err = -EINVAL;
- goto exit;
- }
- rtnl_lock();
- br_del_if(dev_bridge(vnet->bridge), dev);
- rtnl_unlock();
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-
-/** Create the bridge and virtual interface for a vnet.
+/** Create the virtual interface for a vnet.
*
* @param info vnet
* @return 0 on success, error code otherwise
@@ -249,25 +124,13 @@
int Vnet_create(Vnet *info){
int err = 0;
- dprintf("> %u\n", info->vnet);
err = vnet_dev_add(info);
if(err) goto exit;
- dprintf("> vnet_bridge_init\n");
- err = vnet_bridge_init(info);
- if(err) goto exit;
- dprintf("> Vnet_add...\n");
err = Vnet_add(info);
exit:
- if(err){
- dprintf("> vnet_bridge_fini...\n");
- vnet_bridge_fini(info);
- }
- dprintf("< err=%d\n", err);
return err;
}
-
-
/** Remove the net device for a vnet.
* Clears the dev field of the vnet.
* Safe to call if the vnet or its dev are null.
@@ -276,75 +139,13 @@
*/
void vnet_dev_remove(Vnet *vnet){
if(!vnet) return;
- dprintf("> vnet=%u\n", vnet->vnet);
- if(vnet->bridge){
- dprintf("> br_del_bridge(%s)\n", vnet->bridge->name);
- rtnl_lock();
- br_del_bridge(vnet->bridge->name);
- rtnl_unlock();
- vnet->bridge = NULL;
- }
if(vnet->dev){
//dev_put(vnet->dev);
dprintf("> unregister_netdev(%s)\n", vnet->dev->name);
unregister_netdev(vnet->dev);
vnet->dev = NULL;
}
- dprintf("<\n");
-}
-
-//============================================================================
-#else
-//============================================================================
-
-/** Create the virtual interface for a vnet.
- *
- * @param info vnet
- * @return 0 on success, error code otherwise
- */
-int Vnet_create(Vnet *info){
- int err = 0;
-
- dprintf("> %u\n", info->vnet);
- err = vnet_dev_add(info);
- if(err) goto exit;
- dprintf("> Vnet_add...\n");
- err = Vnet_add(info);
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-int vnet_add_if(Vnet *vnet, struct net_device *dev){
- int err = -ENOSYS;
- return err;
-}
-
-
-int vnet_del_if(Vnet *vnet, struct net_device *dev){
- int err = 0;
- return err;
-}
-
-/** Remove the net device for a vnet.
- * Clears the dev field of the vnet.
- * Safe to call if the vnet or its dev are null.
- *
- * @param vnet vnet
- */
-void vnet_dev_remove(Vnet *vnet){
- if(!vnet) return;
- dprintf("> vnet=%u\n", vnet->vnet);
- if(vnet->dev){
- //dev_put(vnet->dev);
- dprintf("> unregister_netdev(%s)\n", vnet->dev->name);
- unregister_netdev(vnet->dev);
- vnet->dev = NULL;
- }
- dprintf("<\n");
-}
-#endif
-//============================================================================
+}
static int vnet_dev_open(struct net_device *dev){
int err = 0;
@@ -365,6 +166,7 @@
static int vnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device
*dev){
int err = 0;
Vnet *vnet = dev->priv;
+ int len = 0;
dprintf("> skb=%p\n", skb);
if(vnet->recursion++) {
@@ -385,12 +187,14 @@
skb->mac.raw = skb->data;
}
//dev->trans_start = jiffies;
- err = vnet_skb_send(skb, vnet->vnet);
+ len = skb->len;
+ // Must not use skb pointer after vnet_skb_send().
+ err = vnet_skb_send(skb, &vnet->vnet);
if(err < 0){
vnet->stats.tx_errors++;
} else {
vnet->stats.tx_packets++;
- vnet->stats.tx_bytes += skb->len;
+ vnet->stats.tx_bytes += len;
}
exit:
vnet->recursion--;
@@ -416,43 +220,48 @@
struct net_device *dev, unsigned short type,
void *daddr, void *saddr, unsigned len){
int err = 0;
- dprintf("> skb=%p ethhdr=%p dev=%s len=%u\n",
- skb, skb->mac.raw, dev->name, len);
- if(saddr){
- dprintf("> saddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)saddr));
- } else {
- dprintf("> saddr=NULL\n");
- }
- if(daddr){
- dprintf("> daddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)daddr));
- } else {
- dprintf("> daddr=NULL\n");
- }
+
err = eth_hard_header(skb, dev, type, daddr, saddr, len);
- dprintf("> eth_hard_header=%d\n", err);
+ if(err) goto exit;
skb->mac.raw = skb->data;
- dprintf("> src=" MACFMT " dst=" MACFMT "\n",
- MAC6TUPLE(skb->mac.ethernet->h_source),
- MAC6TUPLE(skb->mac.ethernet->h_dest));
- dprintf("< err=%d\n", err);
+ exit:
+ return err;
+}
+
+void vnet_default_mac(unsigned char *mac)
+{
+ static unsigned val = 1;
+ mac[0] = 0xAA;
+ mac[1] = 0xFF;
+ mac[2] = (unsigned char)((val >> 24) & 0xff);
+ mac[3] = (unsigned char)((val >> 16) & 0xff);
+ mac[4] = (unsigned char)((val >> 8) & 0xff);
+ mac[5] = (unsigned char)((val ) & 0xff);
+ val++;
+}
+
+int vnet_device_mac(const char *device, unsigned char *mac){
+ int err;
+ struct net_device *dev;
+
+ err = vnet_get_device(device, &dev);
+ if(err) goto exit;
+ memcpy(mac, dev->dev_addr, ETH_ALEN);
+ dev_put(dev);
+ exit:
return err;
}
void vnet_dev_mac(unsigned char *mac){
- static unsigned val = 1;
- struct net_device *dev;
-
- if(vnet_get_device(DEVICE, &dev)){
- mac[0] = 0xAA;
- mac[1] = 0xFF;
- mac[2] = (unsigned char)((val >> 24) & 0xff);
- mac[3] = (unsigned char)((val >> 16) & 0xff);
- mac[4] = (unsigned char)((val >> 8) & 0xff);
- mac[5] = (unsigned char)((val ) & 0xff);
- val++;
- } else {
- memcpy(mac, dev->dev_addr, ETH_ALEN);
- dev_put(dev);
+ const char *devices[] = { "eth0", "eth1", "eth2", NULL };
+ const char **pdev;
+ int err = -ENODEV;
+
+ for(pdev = devices; err && *pdev; pdev++){
+ err = vnet_device_mac(*pdev, mac);
+ }
+ if(err){
+ vnet_default_mac(mac);
}
}
@@ -463,7 +272,9 @@
dprintf(">\n");
ether_setup(dev);
- if(!eth_hard_header) eth_hard_header = dev->hard_header;
+ if(!eth_hard_header){
+ eth_hard_header = dev->hard_header;
+ }
dev->hard_header = vnet_dev_hard_header;
dev->open = vnet_dev_open;
@@ -507,7 +318,10 @@
if(vnet->dev) goto exit;
vnet->header_n = sizeof(struct iphdr) + sizeof(struct etheriphdr);
dev = kmalloc(sizeof(struct net_device), GFP_ATOMIC);
- if(!dev){ err = -ENOMEM; goto exit; }
+ if(!dev){
+ err = -ENOMEM;
+ goto exit;
+ }
*dev = (struct net_device){};
dev->priv = vnet;
vnet->dev = dev;
@@ -515,9 +329,10 @@
err = vnet_dev_set_name(dev);
if(err) goto exit;
vnet_dev_init(dev);
- dprintf("> name=%s, register_netdev...\n", dev->name);
err = register_netdev(dev);
- dprintf("> register_netdev=%d\n", err);
+ if(err){
+ wprintf("> register_netdev(%s) = %d\n", dev->name, err);
+ }
if(err) goto exit;
rtnl_lock();
dev_open(dev);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vnet_dev.h
--- a/tools/vnet/vnet-module/vnet_dev.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vnet_dev.h Fri Aug 26 20:47:16 2005
@@ -20,12 +20,9 @@
#define _VNET_VNET_DEV_H_
struct Vnet;
-struct net_device;
extern int vnet_dev_add(struct Vnet *vnet);
extern void vnet_dev_remove(struct Vnet *vnet);
extern int Vnet_create(struct Vnet *info);
-extern int vnet_add_if(struct Vnet *vnet, struct net_device *dev);
-extern int vnet_del_if(struct Vnet *vnet, struct net_device *dev);
#endif
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnet-module/vnet_ioctl.c
--- a/tools/vnet/vnet-module/vnet_ioctl.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnet-module/vnet_ioctl.c Fri Aug 26 20:47:16 2005
@@ -59,7 +59,7 @@
on the kernel interface being available to us (it's not exported @!$"%!).
Create a vnet N:
-- create the vnet device vnetifN: using commands to /proc, kernel api
+- create the vnet device vnifN: using commands to /proc, kernel api
- create the vnet bridge vnetN: using brctl in user-space
- for best results something should keep track of the mapping vnet id <->
bridge name
@@ -312,7 +312,6 @@
err = Parser_input(parser, NULL, 0);
if(err) goto exit;
obj = parser->val;
- objprint(iostdout, obj, 0); IOStream_print(iostdout, "\n");
for(l = obj; CONSP(l); l = CDR(l)){
err = eval(CAR(l));
if(err) break;
@@ -451,6 +450,7 @@
return err;
}
+#if 0
static int intof(Sxpr exp, int *v){
int err = 0;
char *s;
@@ -473,6 +473,24 @@
err = intof(val, v);
return err;
}
+#endif
+
+static int vnetof(Sxpr exp, VnetId *v){
+ int err = 0;
+ char *s;
+ err = stringof(exp, &s);
+ if(err) goto exit;
+ err = VnetId_aton(s, v);
+ exit:
+ return err;
+}
+
+static int child_vnet(Sxpr exp, Sxpr key, VnetId *v){
+ int err = 0;
+ Sxpr val = sxpr_child_value(exp, key, ONONE);
+ err = vnetof(val, v);
+ return err;
+}
static int macof(Sxpr exp, unsigned char *v){
int err = 0;
@@ -515,20 +533,27 @@
* It is an error if a vnet with the same id exists.
*
* @param vnet vnet id
+ * @param device vnet device name
* @param security security level
* @return 0 on success, error code otherwise
*/
-static int ctrl_vnet_add(int vnet, int security){
+static int ctrl_vnet_add(VnetId *vnet, char *device, int security){
int err = 0;
Vnet *vnetinfo = NULL;
+
+ if(strlen(device) >= IFNAMSIZ){
+ err = -EINVAL;
+ goto exit;
+ }
if(Vnet_lookup(vnet, &vnetinfo) == 0){
err = -EEXIST;
goto exit;
}
err = Vnet_alloc(&vnetinfo);
if(err) goto exit;
- vnetinfo->vnet = vnet;
+ vnetinfo->vnet = *vnet;
vnetinfo->security = security;
+ strcpy(vnetinfo->device, device);
err = Vnet_create(vnetinfo);
exit:
if(vnetinfo) Vnet_decref(vnetinfo);
@@ -540,9 +565,15 @@
* @param vnet vnet id
* @return 0 on success, error code otherwise
*/
-static int ctrl_vnet_del(int vnet){
+static int ctrl_vnet_del(VnetId *vnet){
int err = -ENOSYS;
// Can't delete if there are any vifs on the vnet.
+
+ // Need to flush vif entries for the deleted vnet.
+ // Need to flush varp entries for the deleted vnet.
+ // Note that (un)register_netdev() hold rtnl_lock() around
+ // (un)register_netdevice().
+
//Vnet_del(vnet);
return err;
}
@@ -553,7 +584,7 @@
* @param vmac mac address
* @return 0 on success, error code otherwise
*/
-static int ctrl_vif_add(int vnet, Vmac *vmac){
+static int ctrl_vif_add(VnetId *vnet, Vmac *vmac){
int err = 0;
Vnet *vnetinfo = NULL;
Vif *vif = NULL;
@@ -561,7 +592,7 @@
dprintf(">\n");
err = Vnet_lookup(vnet, &vnetinfo);
if(err) goto exit;
- err = vif_add(vnet, vmac, &vif);
+ err = vif_create(vnet, vmac, &vif);
exit:
if(vnetinfo) Vnet_decref(vnetinfo);
if(vif) vif_decref(vif);
@@ -569,46 +600,13 @@
return err;
}
-/** Add net device 'vifname' to the bridge for 'vnet' and
- * create an entry for a vif with the given vnet and vmac.
- * This is used when device 'vifname' is a virtual device
- * connected to a vif in a vm.
- *
- * @param vifname name of device to bridge
+/** Delete a vif.
+ *
* @param vnet vnet id
* @param vmac mac address
* @return 0 on success, error code otherwise
*/
-static int ctrl_vif_conn(char *vifname, int vnet, Vmac *vmac){
- int err = 0;
- Vnet *vnetinfo = NULL;
- struct net_device *vifdev = NULL;
- Vif *vif = NULL;
-
- dprintf("> %s\n", vifname);
- err = Vnet_lookup(vnet, &vnetinfo);
- if(err) goto exit;
- err = vif_add(vnet, vmac, &vif);
- if(err) goto exit;
- err = vnet_get_device(vifname, &vifdev);
- if(err) goto exit;
- vif->dev = vifdev;
- err = vnet_add_if(vnetinfo, vifdev);
- exit:
- if(vnetinfo) Vnet_decref(vnetinfo);
- if(vif) vif_decref(vif);
- if(vifdev) dev_put(vifdev);
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** Delete a vif.
- *
- * @param vnet vnet id
- * @param vmac mac address
- * @return 0 on success, error code otherwise
- */
-static int ctrl_vif_del(int vnet, Vmac *vmac){
+static int ctrl_vif_del(VnetId *vnet, Vmac *vmac){
int err = 0;
Vnet *vnetinfo = NULL;
Vif *vif = NULL;
@@ -618,10 +616,6 @@
if(err) goto exit;
err = vif_lookup(vnet, vmac, &vif);
if(err) goto exit;
- if(vif->dev){
- vnet_del_if(vnetinfo, vif->dev);
- vif->dev = NULL;
- }
vif_remove(vnet, vmac);
exit:
if(vnetinfo) Vnet_decref(vnetinfo);
@@ -652,21 +646,37 @@
return err;
}
-/** (vnet.add (id <id>) [(security { none | auth | conf } )] )
+/** (varp.flush)
+ */
+static int eval_varp_flush(Sxpr exp){
+ int err = 0;
+ varp_flush();
+ return err;
+}
+
+/** (vnet.add (id <id>)
+ * [(vnetif <name>)]
+ * [(security { none | auth | conf } )]
+ * )
*/
static int eval_vnet_add(Sxpr exp){
int err = 0;
Sxpr oid = intern("id");
Sxpr osecurity = intern("security");
+ Sxpr ovnetif = intern("vnetif");
Sxpr csecurity;
- int id;
- char *security;
+ VnetId vnet = {};
+ char *device = NULL;
+ char dev[IFNAMSIZ] = {};
+ char *security = NULL;
int sec;
- err = child_int(exp, oid, &id);
- if(err) goto exit;
- if(id < VNET_VIF){
- err = -EINVAL;
- goto exit;
+
+ err = child_vnet(exp, oid, &vnet);
+ if(err) goto exit;
+ child_string(exp, ovnetif, &device);
+ if(!device){
+ snprintf(dev, IFNAMSIZ-1, "vnif%04x", ntohs(vnet.u.vnet16[7]));
+ device = dev;
}
csecurity = sxpr_child_value(exp, osecurity, intern("none"));
err = stringof(csecurity, &security);
@@ -681,8 +691,7 @@
err = -EINVAL;
goto exit;
}
- dprintf("> vnet id=%d\n", id);
- err = ctrl_vnet_add(id, sec);
+ err = ctrl_vnet_add(&vnet, device, sec);
exit:
dprintf("< err=%d\n", err);
return err;
@@ -698,11 +707,11 @@
static int eval_vnet_del(Sxpr exp){
int err = 0;
Sxpr oid = intern("id");
- int id;
-
- err = child_int(exp, oid, &id);
- if(err) goto exit;
- err = ctrl_vnet_del(id);
+ VnetId vnet = {};
+
+ err = child_vnet(exp, oid, &vnet);
+ if(err) goto exit;
+ err = ctrl_vnet_del(&vnet);
exit:
return err;
}
@@ -713,55 +722,32 @@
int err = 0;
Sxpr ovnet = intern("vnet");
Sxpr ovmac = intern("vmac");
- int vnet;
+ VnetId vnet = {};
Vmac vmac = {};
- err = child_int(exp, ovnet, &vnet);
+ err = child_vnet(exp, ovnet, &vnet);
if(err) goto exit;
err = child_mac(exp, ovmac, vmac.mac);
if(err) goto exit;
- err = ctrl_vif_add(vnet, &vmac);
- exit:
- return err;
-}
-
-/** (vif.conn (vif <name>) (vnet <id>) (vmac <mac>))
- */
-static int eval_vif_conn(Sxpr exp){
- int err = 0;
- Sxpr ovif = intern("vif");
+ err = ctrl_vif_add(&vnet, &vmac);
+ exit:
+ return err;
+}
+
+/** (vif.del (vnet <vnet>) (vmac <macaddr>))
+ */
+static int eval_vif_del(Sxpr exp){
+ int err = 0;
Sxpr ovnet = intern("vnet");
Sxpr ovmac = intern("vmac");
- char *vif = NULL;
- int vnet = 0;
+ VnetId vnet = {};
Vmac vmac = {};
- err = child_string(exp, ovif, &vif);
- if(err) goto exit;
- err = child_int(exp, ovnet, &vnet);
+ err = child_vnet(exp, ovnet, &vnet);
if(err) goto exit;
err = child_mac(exp, ovmac, vmac.mac);
- dprintf("> connect vif=%s vnet=%d\n", vif, vnet);
- err = ctrl_vif_conn(vif, vnet, &vmac);
- exit:
- dprintf("< err=%d\n", err);
- return err;
-}
-
-/** (vif.del (vnet <vnet>) (vmac <macaddr>))
- */
-static int eval_vif_del(Sxpr exp){
- int err = 0;
- Sxpr ovnet = intern("vnet");
- Sxpr ovmac = intern("vmac");
- int vnet;
- Vmac vmac = {};
-
- err = child_int(exp, ovnet, &vnet);
- if(err) goto exit;
- err = child_mac(exp, ovmac, vmac.mac);
- if(err) goto exit;
- err = ctrl_vif_del(vnet, &vmac);
+ if(err) goto exit;
+ err = ctrl_vif_del(&vnet, &vmac);
exit:
return err;
}
@@ -776,23 +762,23 @@
SxprEval defs[] = {
{ intern("varp.print"), eval_varp_print },
{ intern("varp.mcaddr"), eval_varp_mcaddr },
+ { intern("varp.flush"), eval_varp_flush },
{ intern("vif.add"), eval_vif_add },
- { intern("vif.conn"), eval_vif_conn },
{ intern("vif.del"), eval_vif_del },
{ intern("vnet.add"), eval_vnet_add },
{ intern("vnet.del"), eval_vnet_del },
{ ONONE, NULL } };
SxprEval *def;
- dprintf(">\n");
- err = -EINVAL;
+ iprintf("> "); objprint(iostdout, exp, 0); IOStream_print(iostdout, "\n");
+ err = -ENOSYS;
for(def = defs; !NONEP(def->elt); def++){
if(sxpr_elementp(exp, def->elt)){
err = def->fn(exp);
break;
}
}
- dprintf("< err=%d\n", err);
+ iprintf("< err=%d\n", err);
return err;
}
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnetd/Makefile
--- a/tools/vnet/vnetd/Makefile Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnetd/Makefile Fri Aug 26 20:47:16 2005
@@ -16,32 +16,29 @@
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#----------------------------------------------------------------------------
+VNET_ROOT = $(shell cd .. && pwd)
+include $(VNET_ROOT)/Make.env
+
all: vnetd
#----------------------------------------------------------------------------
-XEN_ROOT = ../../..
include $(XEN_ROOT)/tools/Rules.mk
VNETD_INSTALL_DIR = /usr/sbin
-LIB_DIR = ../libxutil
-VNET_DIR = ../vnet-module
-
-INCLUDES += -I$(LIB_DIR)
-INCLUDES += -I$(VNET_DIR)
+INCLUDES += -I$(LIBXUTIL_DIR)
+INCLUDES += -I$(VNET_MODULE_DIR)
#----------------------------------------------------------------------------
# GC.
-GC_DIR:=../gc/install
-GC_INCLUDE:= $(GC_DIR)/include
-GC_LIB_DIR:=$(GC_DIR)/lib
INCLUDES += -I$(GC_INCLUDE)
#LIBS += -L$(GC_LIB_DIR)
CPPFLAGS += -D USE_GC
#----------------------------------------------------------------------------
+CFLAGS += -g
CFLAGS += -Wall
CFLAGS += $(INCLUDES) $(LIBS)
@@ -51,7 +48,7 @@
CFLAGS += -Wp,-MD,.$(@F).d
PROG_DEP = .*.d
-vpath %.c $(LIB_DIR)
+vpath %.c $(LIBXUTIL_DIR)
IPATHS:=$(INCLUDES:-I=)
vpath %.h $(IPATHS)
@@ -83,9 +80,9 @@
VNETD_OBJ := $(VNETD_SRC:.c=.o)
-#VNETD_LIBS:= $(GC_LIB_DIR)/libgc.so.1.0.2
+#VNETD_LIBS:= $(GC_LIB_SO)
#VNETD_LIBS:= -lgc
-VNETD_LIBS:= $(GC_LIB_DIR)/libgc.a
+VNETD_LIBS:= $(GC_LIB_A)
vnetd: $(VNETD_OBJ)
$(CC) $(CFLAGS) -o $@ $^ $(VNETD_LIBS) -ldl -lpthread
@@ -95,8 +92,8 @@
install -m 0755 vnetd $(DESTDIR)$(VNETD_INSTALL_DIR)
clean:
- -rm -f *.a *.o *~
- -rm -f vnetd
- -rm -f $(PROG_DEP)
+ -@$(RM) *.a *.o *~
+ -@$(RM) vnetd
+ -@$(RM) $(PROG_DEP)
-include $(PROG_DEP)
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnetd/vcache.c
--- a/tools/vnet/vnetd/vcache.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnetd/vcache.c Fri Aug 26 20:47:16 2005
@@ -44,6 +44,8 @@
#undef DEBUG
#include "debug.h"
+#include "varp_util.c"
+
static VarpCache *vcache = NULL;
void IPMessageQueue_init(IPMessageQueue *queue, int maxlen){
@@ -97,16 +99,20 @@
* @param vmac vmac (in network order)
* @return 0 on success, error code otherwise
*/
-int varp_send(Conn *conn, uint16_t opcode, uint32_t vnet, Vmac *vmac, uint32_t
addr){
+int varp_send(Conn *conn, uint16_t opcode, VnetId *vnet, Vmac *vmac, VarpAddr
*addr){
int err = 0;
int varp_n = sizeof(VarpHdr);
VarpHdr varph = {};
-
- varph.vnetmsghdr.id = htons(VARP_ID);
- varph.vnetmsghdr.opcode = htons(opcode);
- varph.vnet = vnet;
- varph.vmac = *vmac;
- varph.addr = addr;
+#ifdef DEBUG
+ char vnetbuf[VNET_ID_BUF];
+ char addrbuf[VARP_ADDR_BUF];
+#endif
+
+ varph.hdr.id = htons(VARP_ID);
+ varph.hdr.opcode = htons(opcode);
+ varph.vnet = *vnet;
+ varph.vmac = *vmac;
+ varph.addr = *addr;
if(0){
struct sockaddr_in self;
@@ -117,8 +123,10 @@
}
dprintf("> addr=%s opcode=%d\n",
inet_ntoa(conn->addr.sin_addr), opcode);
- dprintf("> vnet=%d vmac=" MACFMT " addr=" IPFMT "\n",
- ntohl(vnet), MAC6TUPLE(vmac->mac), NIPQUAD(addr));
+ dprintf("> vnet=%s vmac=" MACFMT " addr=%s\n",
+ VnetId_ntoa(vnet, vnetbuf),
+ MAC6TUPLE(vmac->mac),
+ VarpAddr_ntoa(addr, addrbuf));
err = marshal_bytes(conn->out, &varph, varp_n);
marshal_flush(conn->out);
dprintf("< err=%d\n", err);
@@ -157,21 +165,24 @@
*/
void VCEntry_print(VCEntry *ventry){
if(ventry){
- char *c, *d;
+ char *state, *flags;
+ char vnetbuf[VNET_ID_BUF];
+ char addrbuf[VARP_ADDR_BUF];
+
switch(ventry->state){
- case VCACHE_STATE_INCOMPLETE: c = "INC"; break;
- case VCACHE_STATE_REACHABLE: c = "RCH"; break;
- case VCACHE_STATE_FAILED: c = "FLD"; break;
- default: c = "UNK"; break;
- }
- d = (VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING) ? "P" : " ");
-
- printf("VENTRY(%p %s %s vnet=%d vmac=" MACFMT " addr=" IPFMT "
time=%g)\n",
+ case VCACHE_STATE_INCOMPLETE: state = "INC"; break;
+ case VCACHE_STATE_REACHABLE: state = "RCH"; break;
+ case VCACHE_STATE_FAILED: state = "FLD"; break;
+ default: state = "UNK"; break;
+ }
+ flags = (VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING) ? "P" : " ");
+
+ printf("VENTRY(%p %s %s vnet=%s vmac=" MACFMT " addr=%s time=%g)\n",
ventry,
- c, d,
- ntohl(ventry->key.vnet),
+ state, flags,
+ VnetId_ntoa(&ventry->key.vnet, vnetbuf),
MAC6TUPLE(ventry->key.vmac.mac),
- NIPQUAD(ventry->addr),
+ VarpAddr_ntoa(&ventry->addr, addrbuf),
ventry->timestamp);
} else {
printf("VENTRY: Null!\n");
@@ -239,11 +250,11 @@
* @param vmac virtual MAC address (copied)
* @return ventry or null
*/
-VCEntry * VCEntry_new(uint32_t vnet, Vmac *vmac){
+VCEntry * VCEntry_new(VnetId *vnet, Vmac *vmac){
VCEntry *z = ALLOCATE(VCEntry);
z->state = VCACHE_STATE_INCOMPLETE;
z->timestamp = time_now();
- z->key.vnet = vnet;
+ z->key.vnet = *vnet;
z->key.vmac = *vmac;
return z;
}
@@ -256,15 +267,9 @@
*/
Hashcode vcache_key_hash_fn(void *k){
VCKey *key = k;
- Hashcode h;
- h = hash_2ul(key->vnet,
- (key->vmac.mac[0] << 24) |
- (key->vmac.mac[1] << 16) |
- (key->vmac.mac[2] << 8) |
- (key->vmac.mac[3] ));
- h = hash_hul(h,
- (key->vmac.mac[4] << 8) |
- (key->vmac.mac[5] ));
+ Hashcode h = 0;
+ h = VnetId_hash(h, &key->vnet);
+ h = Vmac_hash(h, &key->vmac);
return h;
}
@@ -278,8 +283,8 @@
int vcache_key_equal_fn(void *k1, void *k2){
VCKey *key1 = k1;
VCKey *key2 = k2;
- return (key1->vnet == key2->vnet)
- && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0);
+ return (VnetId_eq(&key1->vnet , &key2->vnet) &&
+ Vmac_eq(&key1->vmac, &key2->vmac));
}
void VarpCache_schedule(VarpCache *z);
@@ -351,7 +356,7 @@
* @param vmac virtual MAC address (copied)
* @return new entry or null
*/
-VCEntry * VarpCache_add(VarpCache *z, uint32_t vnet, Vmac *vmac){
+VCEntry * VarpCache_add(VarpCache *z, VnetId *vnet, Vmac *vmac){
VCEntry *ventry;
HTEntry *entry;
@@ -378,8 +383,8 @@
* @param vmac virtual MAC addres
* @return entry found or null
*/
-VCEntry * VarpCache_lookup(VarpCache *z, uint32_t vnet, Vmac *vmac){
- VCKey key = { .vnet = vnet, .vmac = *vmac };
+VCEntry * VarpCache_lookup(VarpCache *z, VnetId *vnet, Vmac *vmac){
+ VCKey key = { .vnet = *vnet, .vmac = *vmac };
VCEntry *ventry;
ventry = HashTable_get(z->table, &key);
return ventry;
@@ -389,13 +394,15 @@
dprintf(">\n");
if(VCEntry_get_flags(ventry, VCACHE_FLAG_LOCAL_PROBE)){
dprintf("> local probe\n");
- varp_send(vnetd->bcast_conn, VARP_OP_REQUEST, ventry->key.vnet,
&ventry->key.vmac, ventry->addr);
+ varp_send(vnetd->bcast_conn, VARP_OP_REQUEST,
+ &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
}
if(VCEntry_get_flags(ventry, VCACHE_FLAG_REMOTE_PROBE)){
ConnList *l;
dprintf("> remote probe\n");
for(l = vnetd->connections; l; l = l->next){
- varp_send(l->conn, VARP_OP_REQUEST, ventry->key.vnet,
&ventry->key.vmac, ventry->addr);
+ varp_send(l->conn, VARP_OP_REQUEST,
+ &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
}
}
@@ -440,7 +447,8 @@
IPMessage *msg;
while((msg = IPMessageQueue_pop(&ventry->queue))){
dprintf("> announce\n");
- varp_send(msg->conn, VARP_OP_ANNOUNCE, ventry->key.vnet,
&ventry->key.vmac, ventry->addr);
+ varp_send(msg->conn, VARP_OP_ANNOUNCE,
+ &ventry->key.vnet, &ventry->key.vmac, &ventry->addr);
}
}
exit:
@@ -459,7 +467,7 @@
VCEntry *ventry;
dprintf(">\n");
- ventry = VarpCache_lookup(z, varph->vnet, &varph->vmac);
+ ventry = VarpCache_lookup(z, &varph->vnet, &varph->vmac);
if(ventry){
err = VCEntry_update(ventry, msg, varph, state);
} else {
@@ -503,14 +511,14 @@
* @param local whether it's local or not
*/
void vcache_forward_varp(VarpHdr *varph, int local){
- uint16_t opcode = ntohs(varph->vnetmsghdr.opcode);
+ uint16_t opcode = ntohs(varph->hdr.opcode);
if(local){
ConnList *l;
for(l = vnetd->connections; l; l = l->next){
- varp_send(l->conn, opcode, varph->vnet, &varph->vmac,
varph->addr);
+ varp_send(l->conn, opcode, &varph->vnet, &varph->vmac,
&varph->addr);
}
} else {
- varp_send(vnetd->bcast_conn, opcode, varph->vnet, &varph->vmac,
varph->addr);
+ varp_send(vnetd->bcast_conn, opcode, &varph->vnet, &varph->vmac,
&varph->addr);
}
}
@@ -531,13 +539,13 @@
#else
int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){
int err = -ENOENT;
- uint32_t vnet;
+ VnetId *vnet;
Vmac *vmac;
VCEntry *ventry = NULL;
int reply = 0;
dprintf(">\n");
- vnet = htonl(varph->vnet);
+ vnet = &varph->vnet;
vmac = &varph->vmac;
ventry = VarpCache_lookup(vcache, vnet, vmac);
if(!ventry){
@@ -605,13 +613,18 @@
VarpHdr *varph = &vmsg->varp.varph;
dprintf(">\n");
- if(1){
+#ifdef DEBUG
+ {
+ char vnetbuf[VNET_ID_BUF];
dprintf("> src=%s:%d\n", inet_ntoa(msg->saddr.sin_addr),
ntohs(msg->saddr.sin_port));
dprintf("> dst=%s:%d\n", inet_ntoa(msg->daddr.sin_addr),
ntohs(msg->daddr.sin_port));
- dprintf("> opcode=%d vnet=%u vmac=" MACFMT "\n",
- ntohs(varph->opcode), ntohl(varph->vnet),
MAC6TUPLE(varph->vmac.mac));
- }
- switch(ntohs(varph->vnetmsghdr.opcode)){
+ dprintf("> opcode=%d vnet=%s vmac=" MACFMT "\n",
+ ntohs(varph->opcode),
+ VnetId_ntoa(&varph->vnet, vnetbuf),
+ MAC6TUPLE(varph->vmac.mac));
+ }
+#endif
+ switch(ntohs(varph->hdr.opcode)){
case VARP_OP_REQUEST:
err = vcache_handle_request(msg, varph, local);
break;
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnetd/vcache.h
--- a/tools/vnet/vnetd/vcache.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnetd/vcache.h Fri Aug 26 20:47:16 2005
@@ -93,7 +93,7 @@
/** Key for varp cache entries. */
typedef struct VCKey {
/** Vnet id (network order). */
- uint32_t vnet;
+ VnetId vnet;
/** Virtual MAC address. */
Vmac vmac;
} VCKey;
@@ -103,7 +103,7 @@
VCKey key;
/** Care-of address for the key. */
- uint32_t addr;
+ VarpAddr addr;
/** Alias coa if we are a gateway. */
//uint32_t gateway;
@@ -111,7 +111,7 @@
//uint32_t encaps;
/** Where this entry came from. */
- uint32_t source;
+ VarpAddr source;
/** Last-updated timestamp. */
double timestamp;
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnetd/vnetd.c
--- a/tools/vnet/vnetd/vnetd.c Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnetd/vnetd.c Fri Aug 26 20:47:16 2005
@@ -112,7 +112,6 @@
#include <sys/wait.h>
#include <sys/select.h>
-//#include </usr/include/linux/ip.h> // For struct iphdr;
#include <linux/ip.h> // For struct iphdr;
#include <linux/if_ether.h>
@@ -492,22 +491,16 @@
dprintf("> addr=%s protocol=%d n=%d\n",
inet_ntoa(conn->addr.sin_addr), protocol, data_n);
string_stream_init(io, &sdata, buf, sizeof(buf));
- dprintf("> 10\n");
err = marshal_uint16(io, VNET_FWD_ID);
if(err < 0) goto exit;
- dprintf("> 20\n");
err = marshal_uint16(io, 0);
if(err < 0) goto exit;
- dprintf("> 30\n");
err = marshal_uint16(io, protocol);
if(err < 0) goto exit;
- dprintf("> 40\n");
err = marshal_uint16(io, data_n);
if(err < 0) goto exit;
- dprintf("> 50\n");
err = marshal_bytes(io, data, data_n);
if(err < 0) goto exit;
- dprintf("> 60 bytes=%d\n", IOStream_get_written(io));
err = IOStream_write(conn->out, buf, IOStream_get_written(io));
IOStream_flush(conn->out);
exit:
@@ -978,7 +971,7 @@
int err = 0;
uint32_t addr = INADDR_ANY;
uint16_t port = vnetd->port;
- int flags = VSOCK_BIND | VSOCK_REUSE;
+ int flags = (VSOCK_BIND | VSOCK_REUSE);
err = create_socket(SOCK_DGRAM, addr, port, flags, val);
return err;
}
@@ -1162,7 +1155,7 @@
err = vnetd_broadcast_conn(vnetd, &vnetd->bcast_conn);
if(err < 0) goto exit;
{
- int flags = VSOCK_BROADCAST | VSOCK_MULTICAST;
+ int flags = (VSOCK_BROADCAST | VSOCK_MULTICAST);
uint32_t mcaddr = vnetd->mcast_addr.sin_addr.s_addr;
err = vnetd_raw_socket(IPPROTO_ETHERIP, flags, mcaddr,
&vnetd->etherip_sock);
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/vnetd/vnetd.h
--- a/tools/vnet/vnetd/vnetd.h Thu Aug 25 20:52:38 2005
+++ b/tools/vnet/vnetd/vnetd.h Fri Aug 26 20:47:16 2005
@@ -20,6 +20,7 @@
#include <asm/types.h>
#include <linux/if_ether.h>
#include "if_varp.h"
+#include "varp_util.h"
#include "connection.h"
#include "sxpr.h"
diff -r de3576a1c62c -r dfaf788ab18c
tools/xenstat/libxenstat/src/xen-interface.c
--- a/tools/xenstat/libxenstat/src/xen-interface.c Thu Aug 25 20:52:38 2005
+++ b/tools/xenstat/libxenstat/src/xen-interface.c Fri Aug 26 20:47:16 2005
@@ -59,14 +59,15 @@
}
/* Make simple xen version hypervisor calls */
-static int xi_make_xen_version_hypercall(xi_handle *handle, long *vnum,
xen_extraversion_t *ver)
+static int xi_make_xen_version_hypercall(xi_handle *handle, long *vnum,
+ xen_extraversion_t *ver)
{
privcmd_hypercall_t privcmd;
multicall_entry_t multicall[2];
int ret = 0;
/* set up for doing hypercall */
- privcmd.op = __HYPERVISOR_multicall;
+ privcmd.op = __HYPERVISOR_multicall;
privcmd.arg[0] = (unsigned long)multicall;
privcmd.arg[1] = 2;
@@ -75,7 +76,7 @@
multicall[0].args[0] = (unsigned long)XENVER_version;
/* second to get xen version flag */
- multicall[1].op = __HYPERVISOR_xen_version;
+ multicall[1].op = __HYPERVISOR_xen_version;
multicall[1].args[0] = (unsigned long)XENVER_extraversion;
multicall[1].args[1] = (unsigned long)ver;
@@ -104,7 +105,8 @@
}
/* Make Xen Dom0 op hypervisor call */
-static int xi_make_dom0_op(xi_handle *handle, dom0_op_t *dom_op, int
dom_opcode)
+static int xi_make_dom0_op(xi_handle *handle, dom0_op_t *dom_op,
+ int dom_opcode)
{
privcmd_hypercall_t privcmd;
int ret = 0;
@@ -191,11 +193,10 @@
}
/* gets xen version information from hypervisor */
-int xi_get_xen_version(xi_handle *handle, long *vnum, xen_extraversion_t *ver)
-{
-
- /* gets the XENVER_version and XENVER_extraversion */
- if (xi_make_xen_version_hypercall( handle, vnum, ver) < 0) {;
+int xi_get_xen_version(xi_handle *handle, long *vnum, xen_extraversion_t *ver)
+{
+ /* gets the XENVER_version and XENVER_extraversion */
+ if (xi_make_xen_version_hypercall( handle, vnum, ver) < 0) {
perror("XEN VERSION Hypercall failed");
return -1;
}
diff -r de3576a1c62c -r dfaf788ab18c tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c Thu Aug 25 20:52:38 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.c Fri Aug 26 20:47:16 2005
@@ -27,23 +27,24 @@
/*
* Types
*/
+#define SHORT_ASC_LEN 5 /* length of 65535 */
+#define VERSION_SIZE (2 * SHORT_ASC_LEN + 1 + sizeof(xen_extraversion_t) + 1)
+
struct xenstat_handle {
xi_handle *xihandle;
int page_size;
FILE *procnetdev;
+ char xen_version[VERSION_SIZE]; /* xen version running on this node */
};
-#define SHORT_ASC_LEN 5 /* length of 65535 */
-#define VERSION_SIZE (2 * SHORT_ASC_LEN + 1 + sizeof(xen_extraversion_t) + 1)
-
struct xenstat_node {
+ xenstat_handle *handle;
unsigned int flags;
unsigned long long cpu_hz;
unsigned int num_cpus;
unsigned long long tot_mem;
unsigned long long free_mem;
unsigned int num_domains;
- char xen_version[VERSION_SIZE]; /* xen version running on this node */
xenstat_domain *domains; /* Array of length num_domains */
};
@@ -83,8 +84,7 @@
*/
/* Called to collect the information for the node and all the domains on
* it. When called, the domain information has already been collected. */
-typedef int (*xenstat_collect_func)(xenstat_handle * handle,
- xenstat_node * node);
+typedef int (*xenstat_collect_func)(xenstat_node * node);
/* Called to free the information collected by the collect function. The free
* function will only be called on a xenstat_node if that node includes
* information collected by the corresponding collector. */
@@ -101,20 +101,23 @@
xenstat_uninit_func uninit;
} xenstat_collector;
-static int xenstat_collect_vcpus(xenstat_handle * handle,
- xenstat_node * node);
-static int xenstat_collect_networks(xenstat_handle * handle,
- xenstat_node * node);
+static int xenstat_collect_vcpus(xenstat_node * node);
+static int xenstat_collect_networks(xenstat_node * node);
+static int xenstat_collect_xen_version(xenstat_node * node);
static void xenstat_free_vcpus(xenstat_node * node);
static void xenstat_free_networks(xenstat_node * node);
+static void xenstat_free_xen_version(xenstat_node * node);
static void xenstat_uninit_vcpus(xenstat_handle * handle);
static void xenstat_uninit_networks(xenstat_handle * handle);
+static void xenstat_uninit_xen_version(xenstat_handle * handle);
static xenstat_collector collectors[] = {
{ XENSTAT_VCPU, xenstat_collect_vcpus,
xenstat_free_vcpus, xenstat_uninit_vcpus },
{ XENSTAT_NETWORK, xenstat_collect_networks,
- xenstat_free_networks, xenstat_uninit_networks }
+ xenstat_free_networks, xenstat_uninit_networks },
+ { XENSTAT_XEN_VERSION, xenstat_collect_xen_version,
+ xenstat_free_xen_version, xenstat_uninit_xen_version }
};
#define NUM_COLLECTORS (sizeof(collectors)/sizeof(xenstat_collector))
@@ -169,8 +172,6 @@
#define DOMAIN_CHUNK_SIZE 256
xenstat_node *node;
dom0_physinfo_t physinfo;
- xen_extraversion_t version;
- long vnum = 0;
dom0_getdomaininfo_t domaininfo[DOMAIN_CHUNK_SIZE];
unsigned int num_domains, new_domains;
unsigned int i;
@@ -180,19 +181,14 @@
if (node == NULL)
return NULL;
+ /* Store the handle in the node for later access */
+ node->handle = handle;
+
/* Get information about the physical system */
if (xi_get_physinfo(handle->xihandle, &physinfo) < 0) {
free(node);
return NULL;
}
-
- /* Get the xen version number and xen version tag */
- if (xi_get_xen_version(handle->xihandle, &vnum, &version) < 0) {
- free(node);
- return NULL;
- }
- snprintf(node->xen_version, VERSION_SIZE,
- "%ld.%ld%s\n", ((vnum >> 16) & 0xFFFF), vnum & 0xFFFF, (char
*)version);
node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
node->num_cpus =
@@ -259,7 +255,7 @@
for (i = 0; i < NUM_COLLECTORS; i++) {
if ((flags & collectors[i].flag) == collectors[i].flag) {
node->flags |= collectors[i].flag;
- if(collectors[i].collect(handle, node) == 0) {
+ if(collectors[i].collect(node) == 0) {
xenstat_free_node(node);
return NULL;
}
@@ -306,9 +302,9 @@
return NULL;
}
-const char *xenstat_node_xen_ver(xenstat_node * node)
-{
- return node->xen_version;
+const char *xenstat_node_xen_version(xenstat_node * node)
+{
+ return node->handle->xen_version;
}
unsigned long long xenstat_node_tot_mem(xenstat_node * node)
@@ -434,7 +430,7 @@
* VCPU functions
*/
/* Collect information about VCPUs */
-static int xenstat_collect_vcpus(xenstat_handle * handle, xenstat_node * node)
+static int xenstat_collect_vcpus(xenstat_node * node)
{
unsigned int i, vcpu;
/* Fill in VCPU information */
@@ -447,10 +443,9 @@
for (vcpu = 0; vcpu < node->domains[i].num_vcpus; vcpu++) {
/* FIXME: need to be using a more efficient mechanism*/
long long vcpu_time;
- vcpu_time =
- xi_get_vcpu_usage(handle->xihandle,
- node->domains[i].id,
- vcpu);
+ vcpu_time = xi_get_vcpu_usage(node->handle->xihandle,
+ node->domains[i].id,
+ vcpu);
if (vcpu_time < 0)
return 0;
node->domains[i].vcpus[vcpu].ns = vcpu_time;
@@ -490,40 +485,40 @@
"bytes packets errs drop fifo colls carrier compressed\n";
/* Collect information about networks */
-static int xenstat_collect_networks(xenstat_handle * handle,
- xenstat_node * node)
+static int xenstat_collect_networks(xenstat_node * node)
{
/* Open and validate /proc/net/dev if we haven't already */
- if (handle->procnetdev == NULL) {
+ if (node->handle->procnetdev == NULL) {
char header[sizeof(PROCNETDEV_HEADER)];
- handle->procnetdev = fopen("/proc/net/dev", "r");
- if (handle->procnetdev == NULL) {
+ node->handle->procnetdev = fopen("/proc/net/dev", "r");
+ if (node->handle->procnetdev == NULL) {
perror("Error opening /proc/net/dev");
- return 1;
+ return 0;
}
/* Validate the format of /proc/net/dev */
if (fread(header, sizeof(PROCNETDEV_HEADER) - 1, 1,
- handle->procnetdev) != 1) {
+ node->handle->procnetdev) != 1) {
perror("Error reading /proc/net/dev header");
- return 1;
+ return 0;
}
header[sizeof(PROCNETDEV_HEADER) - 1] = '\0';
if (strcmp(header, PROCNETDEV_HEADER) != 0) {
fprintf(stderr,
"Unexpected /proc/net/dev format\n");
- return 1;
+ return 0;
}
}
/* Fill in networks */
/* FIXME: optimize this */
- fseek(handle->procnetdev, sizeof(PROCNETDEV_HEADER) - 1, SEEK_SET);
+ fseek(node->handle->procnetdev, sizeof(PROCNETDEV_HEADER) - 1,
+ SEEK_SET);
while (1) {
xenstat_domain *domain;
xenstat_network net;
unsigned int domid;
- int ret = fscanf(handle->procnetdev,
+ int ret = fscanf(node->handle->procnetdev,
"vif%u.%u:%llu%llu%llu%llu%*u%*u%*u%*u"
"%llu%llu%llu%llu%*u%*u%*u%*u\n",
&domid, &net.id,
@@ -536,7 +531,7 @@
if (ret != 10) {
unsigned int c;
do {
- c = fgetc(handle->procnetdev);
+ c = fgetc(node->handle->procnetdev);
} while (c != '\n' && c != EOF);
if (c == EOF)
break;
@@ -563,7 +558,7 @@
sizeof(xenstat_network));
}
if (domain->networks == NULL)
- return 1;
+ return 0;
domain->networks[domain->num_networks - 1] = net;
}
@@ -638,3 +633,37 @@
{
return network->tdrop;
}
+
+/*
+ * Xen version functions
+ */
+
+/* Collect Xen version information */
+static int xenstat_collect_xen_version(xenstat_node * node)
+{
+ long vnum = 0;
+ xen_extraversion_t version;
+
+ /* Collect Xen version information if not already collected */
+ if (node->handle->xen_version[0] == '\0') {
+ /* Get the Xen version number and extraversion string */
+ if (xi_get_xen_version(node->handle->xihandle,
+ &vnum, &version) < 0)
+ return 0;
+ /* Format the version information as a string and store it */
+ snprintf(node->handle->xen_version, VERSION_SIZE, "%ld.%ld%s",
+ ((vnum >> 16) & 0xFFFF), vnum & 0xFFFF, version);
+ }
+
+ return 1;
+}
+
+/* Free Xen version information in node - nothing to do */
+static void xenstat_free_xen_version(xenstat_node * node)
+{
+}
+
+/* Free Xen version information in handle - nothing to do */
+static void xenstat_uninit_xen_version(xenstat_handle * handle)
+{
+}
diff -r de3576a1c62c -r dfaf788ab18c tools/xenstat/libxenstat/src/xenstat.h
--- a/tools/xenstat/libxenstat/src/xenstat.h Thu Aug 25 20:52:38 2005
+++ b/tools/xenstat/libxenstat/src/xenstat.h Fri Aug 26 20:47:16 2005
@@ -31,10 +31,13 @@
/* Release the handle to libxc, free resources, etc. */
void xenstat_uninit(xenstat_handle * handle);
-/* Get all available information about a node */
+/* Flags for types of information to collect in xenstat_get_node */
#define XENSTAT_VCPU 0x1
#define XENSTAT_NETWORK 0x2
-#define XENSTAT_ALL (XENSTAT_VCPU|XENSTAT_NETWORK)
+#define XENSTAT_XEN_VERSION 0x4
+#define XENSTAT_ALL (XENSTAT_VCPU|XENSTAT_NETWORK|XENSTAT_XEN_VERSION)
+
+/* Get all available information about a node */
xenstat_node *xenstat_get_node(xenstat_handle * handle, unsigned int flags);
/* Free the information */
@@ -51,8 +54,9 @@
/* Get the domain with the given index; used to loop over all domains. */
xenstat_domain *xenstat_node_domain_by_index(xenstat_node * node,
unsigned index);
+
/* Get xen version of the node */
-const char *xenstat_node_xen_ver(xenstat_node * node);
+const char *xenstat_node_xen_version(xenstat_node * node);
/* Get amount of total memory on a node */
unsigned long long xenstat_node_tot_mem(xenstat_node * node);
diff -r de3576a1c62c -r dfaf788ab18c tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c Thu Aug 25 20:52:38 2005
+++ b/tools/xenstat/xentop/xentop.c Fri Aug 26 20:47:16 2005
@@ -519,8 +519,8 @@
print("%4u", xenstat_domain_num_networks(domain));
}
-/* Compares number of total network tx bytes of two domains, returning -1,0,1
for
- * <,=,> */
+/* Compares number of total network tx bytes of two domains, returning -1,0,1
+ * for <,=,> */
static int compare_net_tx(xenstat_domain *domain1, xenstat_domain *domain2)
{
return -compare(tot_net_bytes(domain1, FALSE),
@@ -533,8 +533,8 @@
print("%8llu", tot_net_bytes(domain, FALSE)/1024);
}
-/* Compares number of total network rx bytes of two domains, returning -1,0,1
for
- * <,=,> */
+/* Compares number of total network rx bytes of two domains, returning -1,0,1
+ * for <,=,> */
static int compare_net_rx(xenstat_domain *domain1, xenstat_domain *domain2)
{
return -compare(tot_net_bytes(domain1, TRUE),
@@ -555,7 +555,7 @@
int i = 0;
xenstat_network *network;
unsigned num_networks = 0;
- unsigned long long total = 0;
+ unsigned long long total = 0;
/* How many networks? */
num_networks = xenstat_domain_num_networks(domain);
@@ -564,12 +564,13 @@
for (i=0; i < num_networks; i++) {
/* Next get the network information */
network = xenstat_domain_network(domain,i);
- if (rx_flag)
+ if (rx_flag)
total += xenstat_network_rbytes(network);
- else
+ else
total += xenstat_network_tbytes(network);
}
- return (total);
+
+ return total;
}
/* Compares security id (ssid) of two domains, returning -1,0,1 for <,=,> */
@@ -592,6 +593,7 @@
#define TIME_STR_LEN 9
const char *TIME_STR_FORMAT = "%H:%M:%S";
char time_str[TIME_STR_LEN];
+ const char *ver_str;
unsigned run = 0, block = 0, pause = 0,
crash = 0, dying = 0, shutdown = 0;
unsigned i, num_domains = 0;
@@ -602,7 +604,8 @@
strftime(time_str, TIME_STR_LEN, TIME_STR_FORMAT,
localtime(&curtime.tv_sec));
num_domains = xenstat_node_num_domains(cur_node);
- print("xentop - %s\n", time_str);
+ ver_str = xenstat_node_xen_version(cur_node);
+ print("xentop - %s Xen %s\n", time_str, ver_str);
/* Tabulate what states domains are in for summary */
for (i=0; i < num_domains; i++) {
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/Makefile Fri Aug 26 20:47:16 2005
@@ -14,8 +14,11 @@
irq_ia64.o irq_lsapic.o vhpt.o xenasm.o hyperprivop.o dom_fw.o \
grant_table.o sn_console.o
+# TMP holder to contain *.0 moved out of CONFIG_VTI
+OBJS += vmx_init.o
+
ifeq ($(CONFIG_VTI),y)
-OBJS += vmx_init.o vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o \
+OBJS += vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o\
vmx_phy_mode.o vmx_utility.o vmx_interrupt.o vmx_entry.o vmmu.o \
vtlb.o mmio.o vlsapic.o vmx_hypercall.o mm.o vmx_support.o pal_emul.o
endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/asm-offsets.c Fri Aug 26 20:47:16 2005
@@ -296,4 +296,11 @@
//DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
//DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
//DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec,
tv_nsec));
+ DEFINE(IA64_KR_CURRENT_OFFSET, offsetof (cpu_kr_ia64_t,
_kr[IA64_KR_CURRENT]));
+ DEFINE(IA64_KR_PT_BASE_OFFSET, offsetof (cpu_kr_ia64_t,
_kr[IA64_KR_PT_BASE]));
+ DEFINE(IA64_KR_IO_BASE_OFFSET, offsetof (cpu_kr_ia64_t,
_kr[IA64_KR_IO_BASE]));
+ DEFINE(IA64_KR_PERCPU_DATA_OFFSET, offsetof (cpu_kr_ia64_t,
_kr[IA64_KR_PER_CPU_DATA]));
+ DEFINE(IA64_KR_IO_BASE_OFFSET, offsetof (cpu_kr_ia64_t,
_kr[IA64_KR_IO_BASE]));
+ DEFINE(IA64_KR_CURRENT_STACK_OFFSET, offsetof (cpu_kr_ia64_t,
_kr[IA64_KR_CURRENT_STACK]));
+
}
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/domain.c
--- a/xen/arch/ia64/domain.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/domain.c Fri Aug 26 20:47:16 2005
@@ -38,25 +38,17 @@
#include <asm/vcpu.h> /* for function declarations */
#include <public/arch-ia64.h>
-#ifdef CONFIG_VTI
#include <asm/vmx.h>
#include <asm/vmx_vcpu.h>
#include <asm/vmx_vpd.h>
#include <asm/pal.h>
#include <public/io/ioreq.h>
-#endif // CONFIG_VTI
#define CONFIG_DOMAIN0_CONTIGUOUS
unsigned long dom0_start = -1L;
-#ifdef CONFIG_VTI
unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
unsigned long dom0_align = 256*1024*1024;
-#else // CONFIG_VTI
-unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable
-//FIXME: alignment should be 256MB, lest Linux use a 256MB page size
-unsigned long dom0_align = 64*1024*1024;
-#endif // CONFIG_VTI
#ifdef DOMU_BUILD_STAGING
unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable
unsigned long domU_staging_start;
@@ -187,60 +179,6 @@
memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
}
-#ifdef CONFIG_VTI
-void arch_do_createdomain(struct vcpu *v)
-{
- struct domain *d = v->domain;
- struct thread_info *ti = alloc_thread_info(v);
-
- /* Clear thread_info to clear some important fields, like preempt_count
*/
- memset(ti, 0, sizeof(struct thread_info));
- init_switch_stack(v);
-
- /* Shared info area is required to be allocated at domain
- * creation, since control panel will write some I/O info
- * between front end and back end to that area. However for
- * vmx domain, our design is to let domain itself to allcoate
- * shared info area, to keep machine page contiguous. So this
- * page will be released later when domainN issues request
- * after up.
- */
- d->shared_info = (void *)alloc_xenheap_page();
- /* Now assume all vcpu info and event indicators can be
- * held in one shared page. Definitely later we need to
- * consider more about it
- */
-
- memset(d->shared_info, 0, PAGE_SIZE);
- d->shared_info->vcpu_data[v->vcpu_id].arch.privregs =
- alloc_xenheap_pages(get_order(sizeof(mapped_regs_t)));
- printf("arch_vcpu_info=%p\n",
d->shared_info->vcpu_data[0].arch.privregs);
- memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0,
PAGE_SIZE);
- v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
- /* Mask all events, and specific port will be unmasked
- * when customer subscribes to it.
- */
- if(v == d->vcpu[0]) {
- memset(&d->shared_info->evtchn_mask[0], 0xff,
- sizeof(d->shared_info->evtchn_mask));
- }
-
- /* Allocate per-domain vTLB and vhpt */
- v->arch.vtlb = init_domain_tlb(v);
-
- /* Physical->machine page table will be allocated when
- * final setup, since we have no the maximum pfn number in
- * this stage
- */
-
- /* FIXME: This is identity mapped address for xenheap.
- * Do we need it at all?
- */
- d->xen_vastart = XEN_START_ADDR;
- d->xen_vaend = XEN_END_ADDR;
- d->arch.breakimm = 0x1000;
-}
-#else // CONFIG_VTI
void arch_do_createdomain(struct vcpu *v)
{
struct domain *d = v->domain;
@@ -263,11 +201,26 @@
v->vcpu_info = &(d->shared_info->vcpu_data[0]);
d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME
- if ((d->arch.metaphysical_rr0 = allocate_metaphysical_rr0()) == -1UL)
+
+#ifdef CONFIG_VTI
+ /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
+ * to this solution. Maybe it can be deferred until we know created
+ * one as vmx domain */
+ v->arch.vtlb = init_domain_tlb(v);
+#endif
+
+ /* We may also need emulation rid for region4, though it's unlikely
+ * to see guest issue uncacheable access in metaphysical mode. But
+ * keep such info here may be more sane.
+ */
+ if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL)
+ || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL))
BUG();
VCPU(v, metaphysical_mode) = 1;
v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0;
+ v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4;
v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0;
+ v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4;
#define DOMAIN_RID_BITS_DEFAULT 18
if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME
BUG();
@@ -292,7 +245,6 @@
return -ENOMEM;
}
}
-#endif // CONFIG_VTI
void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c)
{
@@ -312,16 +264,28 @@
c->shared = v->domain->shared_info->arch;
}
-#ifndef CONFIG_VTI
int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c)
{
struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v +
IA64_STK_OFFSET) - 1;
+ struct domain *d = v->domain;
+ int i, rc, ret;
+ unsigned long progress = 0;
printf("arch_set_info_guest\n");
+ if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+ return 0;
+
+ if (c->flags & VGCF_VMX_GUEST) {
+ if (!vmx_enabled) {
+ printk("No VMX hardware feature for vmx domain.\n");
+ return -EINVAL;
+ }
+
+ vmx_setup_platform(v, c);
+ }
+
*regs = c->regs;
- regs->cr_ipsr =
IA64_PSR_IT|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IC|IA64_PSR_I|IA64_PSR_DFH|IA64_PSR_BN|IA64_PSR_SP|IA64_PSR_DI;
- regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT;
- regs->ar_rsc |= (2 << 2); /* force PL2/3 */
+ new_thread(v, regs->cr_iip, 0, 0);
v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector;
if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs,
@@ -330,100 +294,13 @@
return -EFAULT;
}
- init_all_rr(v);
-
- // this should be in userspace
- regs->r28 = dom_fw_setup(v->domain,"nomca nosmp xencons=tty0
console=tty0 root=/dev/hda1",256L); //FIXME
v->arch.domain_itm_last = -1L;
- VCPU(v, banknum) = 1;
- VCPU(v, metaphysical_mode) = 1;
-
- v->domain->shared_info->arch = c->shared;
+ d->shared_info->arch = c->shared;
+
+ /* Don't redo final setup */
+ set_bit(_VCPUF_initialised, &v->vcpu_flags);
return 0;
}
-#else // CONFIG_VTI
-int arch_set_info_guest(
- struct vcpu *v, struct vcpu_guest_context *c)
-{
- struct domain *d = v->domain;
- int i, rc, ret;
- unsigned long progress = 0;
- shared_iopage_t *sp;
-
- if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
- return 0;
-
- /* Lazy FP not implemented yet */
- clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
- if ( c->flags & VGCF_FPU_VALID )
- set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
-
- /* Sync d/i cache conservatively, after domain N is loaded */
- ret = ia64_pal_cache_flush(3, 0, &progress, NULL);
- if (ret != PAL_STATUS_SUCCESS)
- panic("PAL CACHE FLUSH failed for dom[%d].\n",
- v->domain->domain_id);
- DPRINTK("Sync i/d cache for dom%d image SUCC\n",
- v->domain->domain_id);
-
- /* Physical mode emulation initialization, including
- * emulation ID allcation and related memory request
- */
- physical_mode_init(v);
-
- /* FIXME: only support PMT table continuously by far */
- d->arch.pmt = __va(c->pt_base);
- d->arch.max_pfn = c->pt_max_pfn;
- d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg);
- sp = get_sp(d);
- memset((char *)sp,0,PAGE_SIZE);
- /* FIXME: temp due to old CP */
- sp->sp_global.eport = 2;
-#ifdef V_IOSAPIC_READY
- sp->vcpu_number = 1;
-#endif
- /* TEMP */
- d->arch.vmx_platform.pib_base = 0xfee00000UL;
-
-
- if (c->flags & VGCF_VMX_GUEST) {
- if (!vmx_enabled)
- panic("No VMX hardware feature for vmx domain.\n");
-
- vmx_final_setup_domain(d);
-
- /* One more step to enable interrupt assist */
- set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
- }
-
- vlsapic_reset(v);
- vtm_init(v);
-
- /* Only open one port for I/O and interrupt emulation */
- if (v == d->vcpu[0]) {
- memset(&d->shared_info->evtchn_mask[0], 0xff,
- sizeof(d->shared_info->evtchn_mask));
- clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
- }
- /* Setup domain context. Actually IA-64 is a bit different with
- * x86, with almost all system resources better managed by HV
- * directly. CP only needs to provide start IP of guest, which
- * ideally is the load address of guest Firmware.
- */
- new_thread(v, c->guest_iip, 0, 0);
-
-
- d->xen_vastart = XEN_START_ADDR;
- d->xen_vaend = XEN_END_ADDR;
- d->arch.breakimm = 0x1000 + d->domain_id;
- v->arch._thread.on_ustack = 0;
-
- /* Don't redo final setup */
- set_bit(_VCPUF_initialised, &v->vcpu_flags);
-
- return 0;
-}
-#endif // CONFIG_VTI
void arch_do_boot_vcpu(struct vcpu *v)
{
@@ -443,7 +320,8 @@
printf("domain_relinquish_resources: not implemented\n");
}
-#ifdef CONFIG_VTI
+// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
+// and linux/arch/ia64/kernel/process.c:kernel_thread()
void new_thread(struct vcpu *v,
unsigned long start_pc,
unsigned long start_stack,
@@ -453,7 +331,6 @@
struct pt_regs *regs;
struct ia64_boot_param *bp;
extern char saved_command_line[];
- //char *dom0_cmdline = "BOOT_IMAGE=scsi0:\EFI\redhat\xenlinux nomca
root=/dev/sdb1 ro";
#ifdef CONFIG_DOMAIN0_CONTIGUOUS
@@ -471,61 +348,31 @@
regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
}
regs->cr_iip = start_pc;
- regs->cr_ifs = 0; /* why? - matthewc */
+ regs->cr_ifs = 1UL << 63; /* or clear? */
regs->ar_fpsr = FPSR_DEFAULT;
+
if (VMX_DOMAIN(v)) {
+#ifdef CONFIG_VTI
vmx_init_all_rr(v);
- } else
- init_all_rr(v);
-
- if (VMX_DOMAIN(v)) {
- if (d == dom0) {
+ if (d == dom0)
VMX_VPD(v,vgr[12]) =
dom_fw_setup(d,saved_command_line,256L);
- printk("new_thread, done with dom_fw_setup\n");
- }
/* Virtual processor context setup */
VMX_VPD(v, vpsr) = IA64_PSR_BN;
VPD_CR(v, dcr) = 0;
+#endif
} else {
- regs->r28 = dom_fw_setup(d,saved_command_line,256L);
+ init_all_rr(v);
+ if (d == dom0)
+ regs->r28 = dom_fw_setup(d,saved_command_line,256L);
+ else {
+ regs->ar_rsc |= (2 << 2); /* force PL2/3 */
+ regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0
console=tty0 root=/dev/hda1",256L); //FIXME
+ }
VCPU(v, banknum) = 1;
VCPU(v, metaphysical_mode) = 1;
d->shared_info->arch.flags = (d == dom0) ?
(SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN)
: 0;
}
}
-#else // CONFIG_VTI
-
-// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
-// and linux/arch/ia64/kernel/process.c:kernel_thread()
-void new_thread(struct vcpu *v,
- unsigned long start_pc,
- unsigned long start_stack,
- unsigned long start_info)
-{
- struct domain *d = v->domain;
- struct pt_regs *regs;
- struct ia64_boot_param *bp;
- extern char saved_command_line[];
-
-#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (d == dom0) start_pc += dom0_start;
-#endif
-
- regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
- regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
- | IA64_PSR_BITS_TO_SET | IA64_PSR_BN
- & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS);
- regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2
- regs->cr_iip = start_pc;
- regs->cr_ifs = 1UL << 63;
- regs->ar_fpsr = FPSR_DEFAULT;
- init_all_rr(v);
- regs->r28 = dom_fw_setup(d,saved_command_line,256L); //FIXME
- VCPU(v, banknum) = 1;
- VCPU(v, metaphysical_mode) = 1;
- d->shared_info->arch.flags = (d == dom0) ?
(SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN)
: 0;
-}
-#endif // CONFIG_VTI
static struct page * map_new_domain0_page(unsigned long mpaddr)
{
@@ -903,44 +750,6 @@
}
#endif
-#ifdef CONFIG_VTI
-/* Up to whether domain is vmx one, different context may be setup
- * here.
- */
-void
-post_arch_do_create_domain(struct vcpu *v, int vmx_domain)
-{
- struct domain *d = v->domain;
-
- if (!vmx_domain) {
- d->shared_info = (void*)alloc_xenheap_page();
- if (!d->shared_info)
- panic("Allocate share info for non-vmx domain failed.\n");
- d->shared_info_va = 0xfffd000000000000;
-
- printk("Build shared info for non-vmx domain\n");
- build_shared_info(d);
- /* Setup start info area */
- }
-}
-
-/* For VMX domain, this is invoked when kernel model in domain
- * request actively
- */
-void build_shared_info(struct domain *d)
-{
- int i;
-
- /* Set up shared-info area. */
- update_dom_time(d);
-
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
-
- /* ... */
-}
-
/*
* Domain 0 has direct access to all devices absolutely. However
* the major point of this stub here, is to allow alloc_dom_mem
@@ -959,182 +768,12 @@
unsigned long initrd_start, unsigned long initrd_len,
char *cmdline)
{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pt_pages;
- unsigned long count;
- unsigned long alloc_start, alloc_end;
- struct pfn_info *page = NULL;
- start_info_t *si;
- struct vcpu *v = d->vcpu[0];
- struct domain_setup_info dsi;
- unsigned long p_start;
- unsigned long pkern_start;
- unsigned long pkern_entry;
- unsigned long pkern_end;
- unsigned long ret;
- unsigned long progress = 0;
-
-//printf("construct_dom0: starting\n");
- /* Sanity! */
-#ifndef CLONE_DOMAIN0
- if ( d != dom0 )
- BUG();
- if ( test_bit(_DOMF_constructed, &d->domain_flags) )
- BUG();
-#endif
-
- printk("##Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- printk("*** LOADING DOMAIN 0 ***\n");
-
- alloc_start = dom0_start;
- alloc_end = dom0_start + dom0_size;
- d->tot_pages = d->max_pages = (alloc_end - alloc_start)/PAGE_SIZE;
- image_start = __va(ia64_boot_param->initrd_start);
- image_len = ia64_boot_param->initrd_size;
-
- dsi.image_addr = (unsigned long)image_start;
- dsi.image_len = image_len;
- rc = parseelfimage(&dsi);
- if ( rc != 0 )
- return rc;
-
- /* Temp workaround */
- if (running_on_sim)
- dsi.xen_section_string = (char *)1;
-
- if ((!vmx_enabled) && !dsi.xen_section_string) {
- printk("Lack of hardware support for unmodified vmx dom0\n");
- panic("");
- }
-
- if (vmx_enabled && !dsi.xen_section_string) {
- printk("Dom0 is vmx domain!\n");
- vmx_dom0 = 1;
- }
-
- p_start = dsi.v_start;
- pkern_start = dsi.v_kernstart;
- pkern_end = dsi.v_kernend;
- pkern_entry = dsi.v_kernentry;
-
- printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",
- p_start,pkern_start,pkern_end,pkern_entry);
-
- if ( (p_start & (PAGE_SIZE-1)) != 0 )
- {
- printk("Initial guest OS must load to a page boundary.\n");
- return -EINVAL;
- }
-
- printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %lx->%lx\n"
- " Entry address: %lx\n"
- " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
- pkern_start, pkern_end, pkern_entry);
-
- if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
-
- // Other sanity check about Dom0 image
-
- /* Construct a frame-allocation list for the initial domain, since these
- * pages are allocated by boot allocator and pfns are not set properly
- */
- for ( mfn = (alloc_start>>PAGE_SHIFT);
- mfn < (alloc_end>>PAGE_SHIFT);
- mfn++ )
- {
- page = &frame_table[mfn];
- page_set_owner(page, d);
- page->u.inuse.type_info = 0;
- page->count_info = PGC_allocated | 1;
- list_add_tail(&page->list, &d->page_list);
-
- /* Construct 1:1 mapping */
- machine_to_phys_mapping[mfn] = mfn;
- }
-
- post_arch_do_create_domain(v, vmx_dom0);
-
- /* Load Dom0 image to its own memory */
- loaddomainelfimage(d,image_start);
-
- /* Copy the initial ramdisk. */
-
- /* Sync d/i cache conservatively */
- ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
- if (ret != PAL_STATUS_SUCCESS)
- panic("PAL CACHE FLUSH failed for dom0.\n");
- printk("Sync i/d cache for dom0 image SUCC\n");
-
- /* Physical mode emulation initialization, including
- * emulation ID allcation and related memory request
- */
- physical_mode_init(v);
- /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
- * for dom0
- */
- d->arch.pmt = NULL;
-
- /* Give up the VGA console if DOM0 is configured to grab it. */
- if (cmdline != NULL)
- console_endboot(strstr(cmdline, "tty0") != NULL);
-
- /* VMX specific construction for Dom0, if hardware supports VMX
- * and Dom0 is unmodified image
- */
- printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
- if (vmx_dom0)
- vmx_final_setup_domain(dom0);
-
- /* vpd is ready now */
- vlsapic_reset(v);
- vtm_init(v);
-
- set_bit(_DOMF_constructed, &d->domain_flags);
- new_thread(v, pkern_entry, 0, 0);
-
- physdev_init_dom0(d);
- // FIXME: Hack for keyboard input
-#ifdef CLONE_DOMAIN0
-if (d == dom0)
-#endif
- serial_input_init();
- if (d == dom0) {
- VCPU(v, delivery_mask[0]) = -1L;
- VCPU(v, delivery_mask[1]) = -1L;
- VCPU(v, delivery_mask[2]) = -1L;
- VCPU(v, delivery_mask[3]) = -1L;
- }
- else __set_bit(0x30,VCPU(v, delivery_mask));
-
- return 0;
-}
-
-
-#else //CONFIG_VTI
-
-int construct_dom0(struct domain *d,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
char *dst;
int i, rc;
unsigned long pfn, mfn;
unsigned long nr_pt_pages;
unsigned long count;
- //l2_pgentry_t *l2tab, *l2start;
- //l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ unsigned long alloc_start, alloc_end;
struct pfn_info *page = NULL;
start_info_t *si;
struct vcpu *v = d->vcpu[0];
@@ -1144,6 +783,7 @@
unsigned long pkern_start;
unsigned long pkern_entry;
unsigned long pkern_end;
+ unsigned long ret, progress = 0;
//printf("construct_dom0: starting\n");
/* Sanity! */
@@ -1158,7 +798,9 @@
printk("*** LOADING DOMAIN 0 ***\n");
- d->max_pages = dom0_size/PAGE_SIZE;
+ alloc_start = dom0_start;
+ alloc_end = dom0_start + dom0_size;
+ d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE;
image_start = __va(ia64_boot_param->initrd_start);
image_len = ia64_boot_param->initrd_size;
//printk("image_start=%lx, image_len=%lx\n",image_start,image_len);
@@ -1171,6 +813,23 @@
if ( rc != 0 )
return rc;
+#ifdef CONFIG_VTI
+ /* Temp workaround */
+ if (running_on_sim)
+ dsi.xen_section_string = (char *)1;
+
+ /* Check whether dom0 is vti domain */
+ if ((!vmx_enabled) && !dsi.xen_section_string) {
+ printk("Lack of hardware support for unmodified vmx dom0\n");
+ panic("");
+ }
+
+ if (vmx_enabled && !dsi.xen_section_string) {
+ printk("Dom0 is vmx domain!\n");
+ vmx_dom0 = 1;
+ }
+#endif
+
p_start = dsi.v_start;
pkern_start = dsi.v_kernstart;
pkern_end = dsi.v_kernend;
@@ -1214,13 +873,42 @@
for ( i = 0; i < MAX_VIRT_CPUS; i++ )
d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+#ifdef CONFIG_VTI
+ /* Construct a frame-allocation list for the initial domain, since these
+ * pages are allocated by boot allocator and pfns are not set properly
+ */
+ for ( mfn = (alloc_start>>PAGE_SHIFT);
+ mfn < (alloc_end>>PAGE_SHIFT);
+ mfn++ )
+ {
+ page = &frame_table[mfn];
+ page_set_owner(page, d);
+ page->u.inuse.type_info = 0;
+ page->count_info = PGC_allocated | 1;
+ list_add_tail(&page->list, &d->page_list);
+
+ /* Construct 1:1 mapping */
+ machine_to_phys_mapping[mfn] = mfn;
+ }
+
+ /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt
+ * for dom0
+ */
+ d->arch.pmt = NULL;
+#endif
+
/* Copy the OS image. */
- //(void)loadelfimage(image_start);
loaddomainelfimage(d,image_start);
/* Copy the initial ramdisk. */
//if ( initrd_len != 0 )
// memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+
+ /* Sync d/i cache conservatively */
+ ret = ia64_pal_cache_flush(4, 0, &progress, NULL);
+ if (ret != PAL_STATUS_SUCCESS)
+ panic("PAL CACHE FLUSH failed for dom0.\n");
+ printk("Sync i/d cache for dom0 image SUCC\n");
#if 0
/* Set up start info area. */
@@ -1257,14 +945,21 @@
#endif
/* Give up the VGA console if DOM0 is configured to grab it. */
-#ifdef IA64
if (cmdline != NULL)
-#endif
- console_endboot(strstr(cmdline, "tty0") != NULL);
+ console_endboot(strstr(cmdline, "tty0") != NULL);
+
+ /* VMX specific construction for Dom0, if hardware supports VMX
+ * and Dom0 is unmodified image
+ */
+ printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d);
+ if (vmx_dom0)
+ vmx_final_setup_domain(dom0);
set_bit(_DOMF_constructed, &d->domain_flags);
new_thread(v, pkern_entry, 0, 0);
+ physdev_init_dom0(d);
+
// FIXME: Hack for keyboard input
#ifdef CLONE_DOMAIN0
if (d == dom0)
@@ -1280,7 +975,6 @@
return 0;
}
-#endif // CONFIG_VTI
// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
int construct_domU(struct domain *d,
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/hyperprivop.S
--- a/xen/arch/ia64/hyperprivop.S Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/hyperprivop.S Fri Aug 26 20:47:16 2005
@@ -73,7 +73,8 @@
ld4 r20=[r20] ;;
cmp.eq p7,p0=r0,r20
(p7) br.cond.sptk.many 1f
- mov r20=IA64_KR(CURRENT);;
+ movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r20=[r20];;
adds r21=IA64_VCPU_IRR0_OFFSET,r20;
adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;;
ld8 r23=[r21],16; ld8 r24=[r22],16;;
@@ -257,7 +258,8 @@
st8 [r21]=r20 ;;
// leave cr.ifs alone for later rfi
// set iip to go to domain IVA break instruction vector
- mov r22=IA64_KR(CURRENT);;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
adds r22=IA64_VCPU_IVA_OFFSET,r22;;
ld8 r23=[r22];;
movl r24=0x3000;;
@@ -306,7 +308,7 @@
mov r28=IA64_TIMER_VECTOR;;
cmp.ne p6,p0=r28,r30
(p6) br.cond.spnt.few rp;;
- movl r20=(PERCPU_ADDR)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
+ movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
ld8 r26=[r20];;
mov r27=ar.itc;;
adds r27=200,r27;; // safety margin
@@ -340,7 +342,8 @@
(p6) br.cond.spnt.few fast_tick_reflect_done;;
extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector
extr.u r26=r20,6,2;; // r26 has irr index of itv.vector
- mov r19=IA64_KR(CURRENT);;
+ movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r19=[r19];;
adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19
adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;;
ld8 r24=[r22];;
@@ -581,7 +584,8 @@
st8 [r18]=r0;;
// FIXME: need to save iipa and isr to be arch-compliant
// set iip to go to domain IVA break instruction vector
- mov r22=IA64_KR(CURRENT);;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
adds r22=IA64_VCPU_IVA_OFFSET,r22;;
ld8 r23=[r22];;
add r20=r20,r23;;
@@ -803,7 +807,8 @@
// r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip
// make sure none of these get trashed in case going to just_do_rfi
- mov r30=IA64_KR(CURRENT);;
+ movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r30=[r30];;
adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
mov r25=192
adds r16=IA64_VCPU_IRR3_OFFSET,r30;;
@@ -1010,7 +1015,8 @@
ld4 r21=[r20];;
cmp.eq p7,p0=r21,r0 // meta==0?
(p7) br.spnt.many 1f ;; // already in virtual mode
- mov r22=IA64_KR(CURRENT);;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;;
ld4 r23=[r22];;
mov rr[r0]=r23;;
@@ -1045,7 +1051,8 @@
ld4 r21=[r20];;
cmp.ne p7,p0=r21,r0 // meta==0?
(p7) br.spnt.many 1f ;; // already in metaphysical mode
- mov r22=IA64_KR(CURRENT);;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
adds r22=IA64_VCPU_META_RR0_OFFSET,r22;;
ld4 r23=[r22];;
mov rr[r0]=r23;;
@@ -1137,7 +1144,8 @@
(p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;;
(p7) st4 [r20]=r0;;
(p7) br.spnt.many 1f ;;
- mov r30=IA64_KR(CURRENT);;
+ movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r30=[r30];;
adds r24=IA64_VCPU_INSVC3_OFFSET,r30;;
mov r25=192
adds r22=IA64_VCPU_IRR3_OFFSET,r30;;
@@ -1242,7 +1250,8 @@
adds r21=1,r21;;
st8 [r20]=r21;;
#endif
- mov r22=IA64_KR(CURRENT);;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22=[r22];;
adds r22=IA64_VCPU_INSVC3_OFFSET,r22;;
ld8 r23=[r22];;
cmp.eq p6,p0=r23,r0;;
@@ -1305,9 +1314,10 @@
adds r21=1,r21;;
st8 [r20]=r21;;
#endif
- movl r20=(PERCPU_ADDR)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
+ movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;;
ld8 r21=[r20];;
- mov r20=IA64_KR(CURRENT);;
+ movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r20=[r20];;
adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;;
st8 [r20]=r8;;
cmp.geu p6,p0=r21,r8;;
@@ -1378,7 +1388,8 @@
st8 [r20]=r21;;
#endif
extr.u r26=r9,8,24 // r26 = r9.rid
- mov r20=IA64_KR(CURRENT);;
+ movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r20=[r20];;
adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;;
ld4 r22=[r21];;
adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;;
@@ -1544,7 +1555,8 @@
mov ar.lc=r30 ;;
mov r29=cr.ipsr
mov r30=cr.iip;;
- mov r27=IA64_KR(CURRENT);;
+ movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r27=[r27];;
adds r25=IA64_VCPU_DTLB_OFFSET,r27
adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
ld8 r24=[r25]
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/ivt.S
--- a/xen/arch/ia64/ivt.S Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/ivt.S Fri Aug 26 20:47:16 2005
@@ -136,7 +136,11 @@
;;
rsm psr.dt // use physical addressing for
data
mov r31=pr // save the predicate registers
+#ifdef XEN
+ movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
+#else
mov r19=IA64_KR(PT_BASE) // get page table base address
+#endif
shl r21=r16,3 // shift bit 60 into sign bit
shr.u r17=r16,61 // get the region number into
r17
;;
@@ -503,7 +507,11 @@
* Clobbered: b0, r18, r19, r21, psr.dt (cleared)
*/
rsm psr.dt // switch to using physical
data addressing
+#ifdef XEN
+ movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;;
+#else
mov r19=IA64_KR(PT_BASE) // get the page table base
address
+#endif
shl r21=r16,3 // shift bit 60 into sign bit
;;
shr.u r17=r16,61 // get the region number into
r17
@@ -833,7 +841,9 @@
cmp4.eq p7,p0=r0,r19
(p7) br.sptk.many fast_hyperprivop
;;
- mov r22=IA64_KR(CURRENT);;
+ movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r22 = [r22]
+ ;;
adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;;
ld4 r23=[r22];;
cmp4.eq p6,p7=r23,r17 // Xen-reserved breakimm?
@@ -842,7 +852,8 @@
br.sptk.many fast_break_reflect
;;
#endif
- mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle
read lat.
+ movl r16=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ ld8 r16=[r16]
mov r17=cr.iim
mov r18=__IA64_BREAK_SYSCALL
mov r21=ar.fpsr
@@ -934,7 +945,7 @@
// FIXME: this is a hack... use cpuinfo.ksoftirqd because its
// not used anywhere else and we need a place to stash ivr and
// there's no registers available unused by SAVE_MIN/REST
- movl r29=(PERCPU_ADDR)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
+ movl r29=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
st8 [r29]=r30;;
movl r28=slow_interrupt;;
mov r29=rp;;
@@ -954,7 +965,7 @@
;;
alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
#ifdef XEN
- movl out0=(PERCPU_ADDR)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
+ movl out0=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;;
ld8 out0=[out0];;
#else
mov out0=cr.ivr // pass cr.ivr as first arg
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/linux-xen/entry.S
--- a/xen/arch/ia64/linux-xen/entry.S Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/linux-xen/entry.S Fri Aug 26 20:47:16 2005
@@ -191,7 +191,8 @@
adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
movl r25=init_task
- mov r27=IA64_KR(CURRENT_STACK)
+ movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_STACK_OFFSET;;
+ ld8 r27=[r27]
adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
#ifdef XEN
dep r20=0,in0,60,4 // physical address of "next"
@@ -214,7 +215,8 @@
;;
(p6) srlz.d
ld8 sp=[r21] // load kernel stack pointer of new task
- mov IA64_KR(CURRENT)=in0 // update "current" application register
+ movl r8=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+ st8 [r8]=in0
mov r8=r13 // return pointer to previously running
task
mov r13=in0 // set "current" pointer
;;
@@ -233,7 +235,8 @@
;;
cmp.eq p7,p0=r25,r23
;;
-(p7) mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
+(p7) movl r8=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_STACK_OFFSET;;
+(p7) st8 [r8]=r26
(p7) br.cond.sptk .done
#endif
rsm psr.ic // interrupts (psr.i) are already
disabled here
@@ -247,8 +250,8 @@
mov cr.ifa=in0 // VA of next task...
;;
mov r25=IA64_TR_CURRENT_STACK
- mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
- ;;
+ movl r8=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_STACK_OFFSET;;
+ st8 [r8]=r26
itr.d dtr[r25]=r23 // wire in new mapping...
br.cond.sptk .done
END(ia64_switch_to)
@@ -947,7 +950,8 @@
ldf.fill f11=[r2]
bsw.0 // switch back to bank 0 (no stop bit required
beforehand...)
;;
-(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
+(pUStk) movl r18=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
+(pUStk) ld8 r18=[r18]
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/linux-xen/head.S
--- a/xen/arch/ia64/linux-xen/head.S Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/linux-xen/head.S Fri Aug 26 20:47:16 2005
@@ -226,7 +226,7 @@
bsw.1
;;
#else // CONFIG_VTI
- mov IA64_KR(CURRENT)=r2 // virtual address
+ mov IA64_KR(CURRENT)=r2
mov IA64_KR(CURRENT_STACK)=r16
#endif // CONFIG_VTI
mov r13=r2
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/linux-xen/irq_ia64.c
--- a/xen/arch/ia64/linux-xen/irq_ia64.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/linux-xen/irq_ia64.c Fri Aug 26 20:47:16 2005
@@ -265,7 +265,7 @@
*/
vmx_irq_exit();
if ( wake_dom0 && current != dom0 )
- domain_wake(dom0->vcpu[0]);
+ vcpu_wake(dom0->vcpu[0]);
}
#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/linux-xen/setup.c
--- a/xen/arch/ia64/linux-xen/setup.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/linux-xen/setup.c Fri Aug 26 20:47:16 2005
@@ -51,9 +51,7 @@
#include <asm/smp.h>
#include <asm/system.h>
#include <asm/unistd.h>
-#ifdef CONFIG_VTI
#include <asm/vmx.h>
-#endif // CONFIG_VTI
#include <asm/io.h>
#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
@@ -66,6 +64,7 @@
#endif
DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(cpu_kr_ia64_t, cpu_kr);
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
unsigned long ia64_cycles_per_usec;
@@ -401,9 +400,9 @@
cpu_physical_id(0) = hard_smp_processor_id();
#endif
-#ifdef CONFIG_VTI
+#ifdef XEN
identify_vmx_feature();
-#endif // CONFIG_VTI
+#endif
cpu_init(); /* initialize the bootstrap CPU */
@@ -599,7 +598,7 @@
c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
-#ifdef CONFIG_VTI
+#ifdef XEN
/* If vmx feature is on, do necessary initialization for vmx */
if (vmx_enabled)
vmx_init_env();
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/linux/minstate.h
--- a/xen/arch/ia64/linux/minstate.h Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/linux/minstate.h Fri Aug 26 20:47:16 2005
@@ -61,7 +61,9 @@
;;
#ifdef MINSTATE_VIRT
-# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT)
+# define MINSTATE_GET_CURRENT(reg) \
+ movl reg=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;\
+ ld8 reg=[reg]
# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT
# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT
#endif
@@ -170,7 +172,8 @@
;;
\
.mem.offset 0,0; st8.spill [r16]=r13,16;
\
.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */
\
- mov r13=IA64_KR(CURRENT); /* establish `current' */
\
+ movl r13=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;;
\
+ ld8 r13=[r13]; /* establish 'current' */
\
;;
\
.mem.offset 0,0; st8.spill [r16]=r15,16;
\
.mem.offset 8,0; st8.spill [r17]=r14,16;
\
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/regionreg.c
--- a/xen/arch/ia64/regionreg.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/regionreg.c Fri Aug 26 20:47:16 2005
@@ -29,9 +29,6 @@
#define MAX_RID_BLOCKS (1 <<
(IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS))
#define RIDS_PER_RIDBLOCK MIN_RIDS
-// This is the one global memory representation of the default Xen region reg
-ia64_rr xen_rr;
-
#if 0
// following already defined in include/asm-ia64/gcc_intrin.h
// it should probably be ifdef'd out from there to ensure all region
@@ -65,7 +62,7 @@
// returns -1 if none available
-unsigned long allocate_metaphysical_rr0(void)
+unsigned long allocate_metaphysical_rr(void)
{
ia64_rr rrv;
@@ -79,17 +76,6 @@
{
// fix this when the increment allocation mechanism is fixed.
return 1;
-}
-
-
-void init_rr(void)
-{
- xen_rr.rrval = 0;
- xen_rr.ve = 0;
- xen_rr.rid = allocate_reserved_rid();
- xen_rr.ps = PAGE_SHIFT;
-
- printf("initialized xen_rr.rid=0x%lx\n", xen_rr.rid);
}
/*************************************
@@ -186,34 +172,6 @@
return 1;
}
-
-// This function is purely for performance... apparently scrambling
-// bits in the region id makes for better hashing, which means better
-// use of the VHPT, which means better performance
-// Note that the only time a RID should be mangled is when it is stored in
-// a region register; anytime it is "viewable" outside of this module,
-// it should be unmangled
-
-// NOTE: this function is also implemented in assembly code in hyper_set_rr!!
-// Must ensure these two remain consistent!
-static inline unsigned long
-vmMangleRID(unsigned long RIDVal)
-{
- union bits64 { unsigned char bytes[4]; unsigned long uint; };
-
- union bits64 t;
- unsigned char tmp;
-
- t.uint = RIDVal;
- tmp = t.bytes[1];
- t.bytes[1] = t.bytes[3];
- t.bytes[3] = tmp;
-
- return t.uint;
-}
-
-// since vmMangleRID is symmetric, use it for unmangling also
-#define vmUnmangleRID(x) vmMangleRID(x)
static inline void
set_rr_no_srlz(unsigned long rr, unsigned long rrval)
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/vcpu.c
--- a/xen/arch/ia64/vcpu.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/vcpu.c Fri Aug 26 20:47:16 2005
@@ -14,9 +14,7 @@
#include <asm/tlb.h>
#include <asm/processor.h>
#include <asm/delay.h>
-#ifdef CONFIG_VTI
#include <asm/vmx_vcpu.h>
-#endif // CONFIG_VTI
typedef union {
struct ia64_psr ia64_psr;
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/vmmu.c
--- a/xen/arch/ia64/vmmu.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/vmmu.c Fri Aug 26 20:47:16 2005
@@ -81,10 +81,10 @@
/*
* The VRN bits of va stand for which rr to get.
*/
-rr_t vmmu_get_rr(VCPU *vcpu, u64 va)
-{
- rr_t vrr;
- vmx_vcpu_get_rr(vcpu, va, &vrr.value);
+ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va)
+{
+ ia64_rr vrr;
+ vmx_vcpu_get_rr(vcpu, va, &vrr.rrval);
return vrr;
}
@@ -240,7 +240,7 @@
u64 saved_itir, saved_ifa, saved_rr;
u64 pages;
thash_data_t mtlb;
- rr_t vrr;
+ ia64_rr vrr;
unsigned int cl = tlb->cl;
mtlb.ifa = tlb->vadr;
@@ -264,7 +264,7 @@
/* Only access memory stack which is mapped by TR,
* after rr is switched.
*/
- ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.value));
+ ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.rrval));
ia64_srlz_d();
if ( cl == ISIDE_TLB ) {
ia64_itci(mtlb.page_flags);
@@ -287,12 +287,12 @@
u64 hash_addr, tag;
unsigned long psr;
struct vcpu *v = current;
- rr_t vrr;
+ ia64_rr vrr;
saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
saved_rr0 = ia64_get_rr(0);
- vrr.value = saved_rr0;
+ vrr.rrval = saved_rr0;
vrr.rid = rid;
vrr.ps = ps;
@@ -300,7 +300,7 @@
// TODO: Set to enforce lazy mode
local_irq_save(psr);
ia64_setreg(_IA64_REG_CR_PTA, pta.val);
- ia64_set_rr(0, vmx_vrrtomrr(v, vrr.value));
+ ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
ia64_srlz_d();
hash_addr = ia64_thash(va);
@@ -318,19 +318,19 @@
u64 hash_addr, tag;
u64 psr;
struct vcpu *v = current;
- rr_t vrr;
+ ia64_rr vrr;
// TODO: Set to enforce lazy mode
saved_pta = ia64_getreg(_IA64_REG_CR_PTA);
saved_rr0 = ia64_get_rr(0);
- vrr.value = saved_rr0;
+ vrr.rrval = saved_rr0;
vrr.rid = rid;
vrr.ps = ps;
va = (va << 3) >> 3; // set VRN to 0.
local_irq_save(psr);
ia64_setreg(_IA64_REG_CR_PTA, pta.val);
- ia64_set_rr(0, vmx_vrrtomrr(v, vrr.value));
+ ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval));
ia64_srlz_d();
tag = ia64_ttag(va);
@@ -354,15 +354,15 @@
{
u64 saved_rr0;
u64 psr;
- rr_t vrr;
+ ia64_rr vrr;
va = (va << 3) >> 3; // set VRN to 0.
saved_rr0 = ia64_get_rr(0);
- vrr.value = saved_rr0;
+ vrr.rrval = saved_rr0;
vrr.rid = rid;
vrr.ps = ps;
local_irq_save(psr);
- ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.value) );
+ ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.rrval) );
ia64_srlz_d();
ia64_ptcl(va, ps << 2);
ia64_set_rr( 0, saved_rr0 );
@@ -421,14 +421,14 @@
u64 gpip; // guest physical IP
u64 mpa;
thash_data_t *tlb;
- rr_t vrr;
+ ia64_rr vrr;
u64 mfn;
if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode
gpip = gip;
}
else {
- vmx_vcpu_get_rr(vcpu, gip, &vrr.value);
+ vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval);
tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu),
vrr.rid, gip, ISIDE_TLB );
if ( tlb == NULL ) panic("No entry found in ITLB\n");
@@ -448,7 +448,7 @@
thash_data_t data, *ovl;
thash_cb_t *hcb;
search_section_t sections;
- rr_t vrr;
+ ia64_rr vrr;
hcb = vmx_vcpu_get_vtlb(vcpu);
data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
@@ -481,7 +481,7 @@
thash_data_t data, *ovl;
thash_cb_t *hcb;
search_section_t sections;
- rr_t vrr;
+ ia64_rr vrr;
hcb = vmx_vcpu_get_vtlb(vcpu);
data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
@@ -511,7 +511,7 @@
{
thash_cb_t *hcb;
- rr_t vrr;
+ ia64_rr vrr;
u64 preferred_size;
vmx_vcpu_get_rr(vcpu, va, &vrr);
@@ -527,7 +527,7 @@
thash_data_t data, *ovl;
thash_cb_t *hcb;
search_section_t sections;
- rr_t vrr;
+ ia64_rr vrr;
hcb = vmx_vcpu_get_vtlb(vcpu);
data.page_flags=pte & ~PAGE_FLAGS_RV_MASK;
@@ -559,7 +559,7 @@
thash_data_t data, *ovl;
thash_cb_t *hcb;
search_section_t sections;
- rr_t vrr;
+ ia64_rr vrr;
hcb = vmx_vcpu_get_vtlb(vcpu);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/vmx_init.c
--- a/xen/arch/ia64/vmx_init.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/vmx_init.c Fri Aug 26 20:47:16 2005
@@ -22,6 +22,9 @@
*/
/*
+ * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
+ * Disable doubling mapping
+ *
* 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>:
* Simplied design in first step:
* - One virtual environment
@@ -39,6 +42,7 @@
#include <xen/lib.h>
#include <asm/vmmu.h>
#include <public/arch-ia64.h>
+#include <public/io/ioreq.h>
#include <asm/vmx_phy_mode.h>
#include <asm/processor.h>
#include <asm/vmx.h>
@@ -126,8 +130,43 @@
else
ASSERT(tmp_base != __vsa_base);
+#ifdef XEN_DBL_MAPPING
/* Init stub for rr7 switch */
vmx_init_double_mapping_stub();
+#endif
+}
+
+void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c)
+{
+ struct domain *d = v->domain;
+ shared_iopage_t *sp;
+
+ ASSERT(d != dom0); /* only for non-privileged vti domain */
+ d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg);
+ sp = get_sp(d);
+ memset((char *)sp,0,PAGE_SIZE);
+ /* FIXME: temp due to old CP */
+ sp->sp_global.eport = 2;
+#ifdef V_IOSAPIC_READY
+ sp->vcpu_number = 1;
+#endif
+ /* TEMP */
+ d->arch.vmx_platform.pib_base = 0xfee00000UL;
+
+ /* One more step to enable interrupt assist */
+ set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags);
+ /* Only open one port for I/O and interrupt emulation */
+ if (v == d->vcpu[0]) {
+ memset(&d->shared_info->evtchn_mask[0], 0xff,
+ sizeof(d->shared_info->evtchn_mask));
+ clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]);
+ }
+
+ /* FIXME: only support PMT table continuously by far */
+ d->arch.pmt = __va(c->pt_base);
+ d->arch.max_pfn = c->pt_max_pfn;
+
+ vmx_final_setup_domain(d);
}
typedef union {
@@ -171,7 +210,7 @@
}
-
+#ifdef CONFIG_VTI
/*
* Create a VP on intialized VMX environment.
*/
@@ -190,6 +229,7 @@
panic("ia64_pal_vp_create failed. \n");
}
+#ifdef XEN_DBL_MAPPING
void vmx_init_double_mapping_stub(void)
{
u64 base, psr;
@@ -206,6 +246,7 @@
ia64_srlz_i();
printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n",
(u64)(__pa(base)));
}
+#endif
/* Other non-context related tasks can be done in context switch */
void
@@ -219,12 +260,14 @@
if (status != PAL_STATUS_SUCCESS)
panic("Save vp status failed\n");
+#ifdef XEN_DBL_MAPPING
/* FIXME: Do we really need purge double mapping for old vcpu?
* Since rid is completely different between prev and next,
* it's not overlap and thus no MCA possible... */
dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
vmx_purge_double_mapping(dom_rr7, KERNEL_START,
(u64)v->arch.vtlb->ts->vhpt->hash);
+#endif
/* Need to save KR when domain switch, though HV itself doesn;t
* use them.
@@ -252,12 +295,14 @@
if (status != PAL_STATUS_SUCCESS)
panic("Restore vp status failed\n");
+#ifdef XEN_DBL_MAPPING
dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7]));
pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL));
pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >>
PAGE_SHIFT), PAGE_KERNEL));
vmx_insert_double_mapping(dom_rr7, KERNEL_START,
(u64)v->arch.vtlb->ts->vhpt->hash,
pte_xen, pte_vhpt);
+#endif
ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
@@ -271,6 +316,7 @@
* anchored in vcpu */
}
+#ifdef XEN_DBL_MAPPING
/* Purge old double mapping and insert new one, due to rr7 change */
void
vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7)
@@ -287,6 +333,8 @@
vhpt_base,
pte_xen, pte_vhpt);
}
+#endif // XEN_DBL_MAPPING
+#endif // CONFIG_VTI
/*
* Initialize VMX envirenment for guest. Only the 1st vp/vcpu
@@ -307,12 +355,21 @@
v->arch.arch_vmx.vpd = vpd;
vpd->virt_env_vaddr = vm_buffer;
+#ifdef CONFIG_VTI
/* v->arch.schedule_tail = arch_vmx_do_launch; */
vmx_create_vp(v);
/* Set this ed to be vmx */
set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags);
+ /* Physical mode emulation initialization, including
+ * emulation ID allcation and related memory request
+ */
+ physical_mode_init(v);
+
+ vlsapic_reset(v);
+ vtm_init(v);
+#endif
+
/* Other vmx specific initialization work */
}
-
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx_phy_mode.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/vmx_phy_mode.c Fri Aug 26 20:47:16 2005
@@ -104,22 +104,8 @@
UINT64 psr;
struct domain * d = vcpu->domain;
- vcpu->domain->arch.emul_phy_rr0.rid = XEN_RR7_RID+((d->domain_id)<<3);
- /* FIXME */
-#if 0
- vcpu->domain->arch.emul_phy_rr0.ps = 28; /* set page size to 256M */
-#endif
- vcpu->domain->arch.emul_phy_rr0.ps = EMUL_PHY_PAGE_SHIFT; /* set page
size to 4k */
- vcpu->domain->arch.emul_phy_rr0.ve = 1; /* enable VHPT walker on this
region */
-
- vcpu->domain->arch.emul_phy_rr4.rid = XEN_RR7_RID + ((d->domain_id)<<3) +
4;
- vcpu->domain->arch.emul_phy_rr4.ps = EMUL_PHY_PAGE_SHIFT; /* set page
size to 4k */
- vcpu->domain->arch.emul_phy_rr4.ve = 1; /* enable VHPT walker on this
region */
-
vcpu->arch.old_rsc = 0;
vcpu->arch.mode_flags = GUEST_IN_PHY;
-
- return;
}
extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages);
@@ -246,8 +232,12 @@
vmx_load_all_rr(VCPU *vcpu)
{
unsigned long psr;
+ ia64_rr phy_rr;
psr = ia64_clear_ic();
+
+ phy_rr.ps = EMUL_PHY_PAGE_SHIFT;
+ phy_rr.ve = 1;
/* WARNING: not allow co-exist of both virtual mode and physical
* mode in same region
@@ -255,10 +245,10 @@
if (is_physical_mode(vcpu)) {
if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
panic("Unexpected domain switch in phy emul\n");
- ia64_set_rr((VRN0 << VRN_SHIFT),
- vcpu->domain->arch.emul_phy_rr0.rrval);
- ia64_set_rr((VRN4 << VRN_SHIFT),
- vcpu->domain->arch.emul_phy_rr4.rrval);
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
+ ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval);
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
+ ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval);
} else {
ia64_set_rr((VRN0 << VRN_SHIFT),
vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0])));
@@ -284,13 +274,18 @@
switch_to_physical_rid(VCPU *vcpu)
{
UINT64 psr;
+ ia64_rr phy_rr;
+
+ phy_rr.ps = EMUL_PHY_PAGE_SHIFT;
+ phy_rr.ve = 1;
/* Save original virtual mode rr[0] and rr[4] */
-
psr=ia64_clear_ic();
- ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->domain->arch.emul_phy_rr0.rrval);
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr0;
+ ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval);
ia64_srlz_d();
- ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->domain->arch.emul_phy_rr4.rrval);
+ phy_rr.rid = vcpu->domain->arch.metaphysical_rr4;
+ ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval);
ia64_srlz_d();
ia64_set_psr(psr);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/vmx_vcpu.c
--- a/xen/arch/ia64/vmx_vcpu.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/vmx_vcpu.c Fri Aug 26 20:47:16 2005
@@ -234,9 +234,11 @@
case VRN7:
VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val);
/* Change double mapping for this domain */
+#ifdef XEN_DBL_MAPPING
vmx_change_double_mapping(vcpu,
vmx_vrrtomrr(vcpu,oldrr.rrval),
vmx_vrrtomrr(vcpu,newrr.rrval));
+#endif
break;
default:
ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val));
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/vtlb.c
--- a/xen/arch/ia64/vtlb.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/vtlb.c Fri Aug 26 20:47:16 2005
@@ -283,7 +283,7 @@
thash_data_t *vhpt)
{
u64 pages,mfn;
- rr_t vrr;
+ ia64_rr vrr;
ASSERT ( hcb->ht == THASH_VHPT );
vrr = (hcb->get_rr_fn)(hcb->vcpu,va);
@@ -361,7 +361,7 @@
{
thash_data_t *hash_table, *cch;
int flag;
- rr_t vrr;
+ ia64_rr vrr;
u64 gppn;
u64 ppns, ppne;
@@ -397,7 +397,7 @@
static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
{
thash_data_t *hash_table, *cch;
- rr_t vrr;
+ ia64_rr vrr;
hash_table = (hcb->hash_func)(hcb->pta,
va, entry->rid, entry->ps);
@@ -425,7 +425,7 @@
void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va)
{
thash_data_t *hash_table;
- rr_t vrr;
+ ia64_rr vrr;
vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr);
if ( entry->ps != vrr.ps && entry->tc ) {
@@ -556,7 +556,7 @@
thash_data_t *hash_table;
thash_internal_t *priv = &hcb->priv;
u64 tag;
- rr_t vrr;
+ ia64_rr vrr;
priv->_curva = va & ~(size-1);
priv->_eva = priv->_curva + size;
@@ -580,7 +580,7 @@
thash_data_t *hash_table;
thash_internal_t *priv = &hcb->priv;
u64 tag;
- rr_t vrr;
+ ia64_rr vrr;
priv->_curva = va & ~(size-1);
priv->_eva = priv->_curva + size;
@@ -633,7 +633,7 @@
thash_data_t *ovl;
thash_internal_t *priv = &hcb->priv;
u64 addr,rr_psize;
- rr_t vrr;
+ ia64_rr vrr;
if ( priv->s_sect.tr ) {
ovl = vtr_find_next_overlap (hcb);
@@ -665,7 +665,7 @@
thash_data_t *ovl;
thash_internal_t *priv = &hcb->priv;
u64 addr,rr_psize;
- rr_t vrr;
+ ia64_rr vrr;
vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva);
rr_psize = PSIZE(vrr.ps);
@@ -800,7 +800,7 @@
{
thash_data_t *hash_table, *cch;
u64 tag;
- rr_t vrr;
+ ia64_rr vrr;
ASSERT ( hcb->ht == THASH_VTLB );
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/xenirq.c
--- a/xen/arch/ia64/xenirq.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/xenirq.c Fri Aug 26 20:47:16 2005
@@ -50,7 +50,7 @@
#endif
//FIXME: TEMPORARY HACK!!!!
vcpu_pend_interrupt(dom0->vcpu[0],vector);
- domain_wake(dom0->vcpu[0]);
+ vcpu_wake(dom0->vcpu[0]);
return(1);
}
return(0);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/xenmem.c
--- a/xen/arch/ia64/xenmem.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/xenmem.c Fri Aug 26 20:47:16 2005
@@ -30,8 +30,8 @@
*/
#ifdef CONFIG_VTI
unsigned long *mpt_table;
-unsigned long *mpt_table_size;
-#endif
+unsigned long mpt_table_size;
+#endif // CONFIG_VTI
void
paging_init (void)
@@ -53,21 +53,6 @@
printk("machine to physical table: 0x%lx\n", (u64)mpt_table);
memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size);
-
- /* Any more setup here? On VMX enabled platform,
- * there's no need to keep guest linear pg table,
- * and read only mpt table. MAP cache is not used
- * in this stage, and later it will be in region 5.
- * IO remap is in region 6 with identity mapping.
- */
- /* HV_tlb_init(); */
-
-#else // CONFIG_VTI
-
- /* Allocate and map the machine-to-phys table */
- if ((pg = alloc_domheap_pages(NULL, 10, 0)) == NULL)
- panic("Not enough memory to bootstrap Xen.\n");
- memset(page_to_virt(pg), 0x55, 16UL << 20);
#endif // CONFIG_VTI
/* Other mapping setup */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/xenmisc.c Fri Aug 26 20:47:16 2005
@@ -58,9 +58,7 @@
/* calls in xen/common code that are unused on ia64 */
-void sync_lazy_execstate_cpu(unsigned int cpu) {}
-void sync_lazy_execstate_mask(cpumask_t mask) {}
-void sync_lazy_execstate_all(void) {}
+void sync_vcpu_execstate(struct vcpu *v) {}
#ifdef CONFIG_VTI
int grant_table_create(struct domain *d) { return 0; }
@@ -340,7 +338,8 @@
loop:
printf("$$$$$ PANIC in domain %d (k6=%p): ",
- v->domain->domain_id, ia64_get_kr(IA64_KR_CURRENT));
+ v->domain->domain_id,
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT]);
va_start(args, fmt);
(void)vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/xensetup.c
--- a/xen/arch/ia64/xensetup.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/xensetup.c Fri Aug 26 20:47:16 2005
@@ -183,11 +183,6 @@
printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
xen_pstart, xenheap_phys_end);
-#ifdef CONFIG_VTI
- /* If we want to enable vhpt for all regions, related initialization
- * for HV TLB must be done earlier before first TLB miss
- */
-#endif // CONFIG_VTI
/* Find next hole */
firsthole_start = 0;
efi_memmap_walk(xen_find_first_hole, &firsthole_start);
@@ -267,6 +262,14 @@
do_initcalls();
printk("About to call sort_main_extable()\n");
sort_main_extable();
+
+ /* surrender usage of kernel registers to domain, use percpu area instead
*/
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] =
ia64_get_kr(IA64_KR_PER_CPU_DATA);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] =
ia64_get_kr(IA64_KR_CURRENT_STACK);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] =
ia64_get_kr(IA64_KR_FPU_OWNER);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT);
+ __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE);
/* Create initial domain 0. */
printk("About to call do_createdomain()\n");
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/xentime.c
--- a/xen/arch/ia64/xentime.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/ia64/xentime.c Fri Aug 26 20:47:16 2005
@@ -162,14 +162,14 @@
if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
vcpu_pend_timer(dom0->vcpu[0]);
//vcpu_set_next_timer(dom0->vcpu[0]);
- domain_wake(dom0->vcpu[0]);
+ vcpu_wake(dom0->vcpu[0]);
}
if (!is_idle_task(current->domain) && current->domain != dom0) {
if (vcpu_timer_expired(current)) {
vcpu_pend_timer(current);
// ensure another timer interrupt happens even if
domain doesn't
vcpu_set_next_timer(current);
- domain_wake(current);
+ vcpu_wake(current);
}
}
raise_actimer_softirq();
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/x86/audit.c
--- a/xen/arch/x86/audit.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/x86/audit.c Fri Aug 26 20:47:16 2005
@@ -735,7 +735,6 @@
if ( d != current->domain )
domain_pause(d);
- sync_lazy_execstate_all();
// Maybe we should just be using BIGLOCK?
//
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/x86/domain.c Fri Aug 26 20:47:16 2005
@@ -885,27 +885,22 @@
return switch_required;
}
-void sync_lazy_execstate_cpu(unsigned int cpu)
-{
+void sync_vcpu_execstate(struct vcpu *v)
+{
+ unsigned int cpu = v->processor;
+
+ if ( !cpu_isset(cpu, v->domain->cpumask) )
+ return;
+
if ( cpu == smp_processor_id() )
+ {
(void)__sync_lazy_execstate();
+ }
else
+ {
+ /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
flush_tlb_mask(cpumask_of_cpu(cpu));
-}
-
-void sync_lazy_execstate_mask(cpumask_t mask)
-{
- if ( cpu_isset(smp_processor_id(), mask) )
- (void)__sync_lazy_execstate();
- /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
- flush_tlb_mask(mask);
-}
-
-void sync_lazy_execstate_all(void)
-{
- __sync_lazy_execstate();
- /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
- flush_tlb_mask(cpu_online_map);
+ }
}
unsigned long __hypercall_create_continuation(
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/x86/x86_32/asm-offsets.c Fri Aug 26 20:47:16 2005
@@ -71,6 +71,9 @@
OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
BLANK();
+ DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+ BLANK();
+
OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code);
OFFSET(TRAPBOUNCE_cr2, struct trap_bounce, cr2);
OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Thu Aug 25 20:52:38 2005
+++ b/xen/arch/x86/x86_32/entry.S Fri Aug 26 20:47:16 2005
@@ -60,6 +60,11 @@
#include <asm/apicdef.h>
#include <asm/page.h>
#include <public/xen.h>
+
+#define GET_GUEST_REGS(reg) \
+ movl $~(STACK_SIZE-1),reg; \
+ andl %esp,reg; \
+ orl $(STACK_SIZE-CPUINFO_sizeof),reg;
#define GET_CURRENT(reg) \
movl $STACK_SIZE-4, reg; \
@@ -279,7 +284,41 @@
GET_CURRENT(%ebx)
andl $(NR_hypercalls-1),%eax
PERFC_INCR(PERFC_hypercalls, %eax)
+#ifndef NDEBUG
+ /* Deliberately corrupt parameter regs not used by this hypercall. */
+ pushl %eax
+ pushl UREGS_eip+4(%esp)
+ pushl 28(%esp) # EBP
+ pushl 28(%esp) # EDI
+ pushl 28(%esp) # ESI
+ pushl 28(%esp) # EDX
+ pushl 28(%esp) # ECX
+ pushl 28(%esp) # EBX
+ movzb hypercall_args_table(,%eax,1),%ecx
+ leal (%esp,%ecx,4),%edi
+ subl $6,%ecx
+ negl %ecx
+ movl %eax,%esi
+ movl $0xDEADBEEF,%eax
+ rep stosl
+ movl %esi,%eax
+#endif
call *hypercall_table(,%eax,4)
+#ifndef NDEBUG
+ /* Deliberately corrupt parameter regs used by this hypercall. */
+ addl $24,%esp # Shadow parameters
+ popl %ecx # Shadow EIP
+ cmpl %ecx,UREGS_eip(%esp)
+ popl %ecx # Shadow hypercall index
+ jne skip_clobber # If EIP has changed then don't clobber
+ movzb hypercall_args_table(,%ecx,1),%ecx
+ movl %esp,%edi
+ movl %eax,%esi
+ movl $0xDEADBEEF,%eax
+ rep stosl
+ movl %esi,%eax
+skip_clobber:
+#endif
movl %eax,UREGS_eax(%esp) # save the return value
test_all_events:
@@ -680,12 +719,14 @@
do_arch_sched_op:
# Ensure we return success even if we return via schedule_tail()
xorl %eax,%eax
- movl %eax,UREGS_eax+4(%esp)
+ GET_GUEST_REGS(%ecx)
+ movl %eax,UREGS_eax(%ecx)
jmp do_sched_op
do_switch_vm86:
- # Discard the return address
- addl $4,%esp
+ # Reset the stack pointer
+ GET_GUEST_REGS(%ecx)
+ movl %ecx,%esp
# GS:ESI == Ring-1 stack activation
movl UREGS_esp(%esp),%esi
@@ -774,3 +815,36 @@
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
+
+ENTRY(hypercall_args_table)
+ .byte 1 /* do_set_trap_table */ /* 0 */
+ .byte 4 /* do_mmu_update */
+ .byte 2 /* do_set_gdt */
+ .byte 2 /* do_stack_switch */
+ .byte 4 /* do_set_callbacks */
+ .byte 1 /* do_fpu_taskswitch */ /* 5 */
+ .byte 2 /* do_arch_sched_op */
+ .byte 1 /* do_dom0_op */
+ .byte 2 /* do_set_debugreg */
+ .byte 1 /* do_get_debugreg */
+ .byte 4 /* do_update_descriptor */ /* 10 */
+ .byte 0 /* do_ni_hypercall */
+ .byte 5 /* do_dom_mem_op */
+ .byte 2 /* do_multicall */
+ .byte 4 /* do_update_va_mapping */
+ .byte 2 /* do_set_timer_op */ /* 15 */
+ .byte 1 /* do_event_channel_op */
+ .byte 1 /* do_xen_version */
+ .byte 3 /* do_console_io */
+ .byte 1 /* do_physdev_op */
+ .byte 3 /* do_grant_table_op */ /* 20 */
+ .byte 2 /* do_vm_assist */
+ .byte 5 /* do_update_va_mapping_otherdomain */
+ .byte 0 /* do_switch_vm86 */
+ .byte 2 /* do_boot_vcpu */
+ .byte 0 /* do_ni_hypercall */ /* 25 */
+ .byte 4 /* do_mmuext_op */
+ .byte 1 /* do_acm_op */
+ .rept NR_hypercalls-(.-hypercall_args_table)
+ .byte 0 /* do_ni_hypercall */
+ .endr
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Thu Aug 25 20:52:38 2005
+++ b/xen/arch/x86/x86_64/asm-offsets.c Fri Aug 26 20:47:16 2005
@@ -71,6 +71,9 @@
OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
BLANK();
+ DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+ BLANK();
+
OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code);
OFFSET(TRAPBOUNCE_cr2, struct trap_bounce, cr2);
OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Thu Aug 25 20:52:38 2005
+++ b/xen/arch/x86/x86_64/entry.S Fri Aug 26 20:47:16 2005
@@ -11,6 +11,11 @@
#include <asm/apicdef.h>
#include <asm/page.h>
#include <public/xen.h>
+
+#define GET_GUEST_REGS(reg) \
+ movq $~(STACK_SIZE-1),reg; \
+ andq %rsp,reg; \
+ orq $(STACK_SIZE-CPUINFO_sizeof),reg;
#define GET_CURRENT(reg) \
movq $STACK_SIZE-8, reg; \
@@ -120,10 +125,42 @@
/*hypercall:*/
movq %r10,%rcx
andq $(NR_hypercalls-1),%rax
+#ifndef NDEBUG
+ /* Deliberately corrupt parameter regs not used by this hypercall. */
+ pushq %rdi; pushq %rsi; pushq %rdx; pushq %rcx; pushq %r8 ; pushq %r9
+ leaq hypercall_args_table(%rip),%r10
+ movq $6,%rcx
+ sub (%r10,%rax,1),%cl
+ movq %rsp,%rdi
+ movl $0xDEADBEEF,%eax
+ rep stosq
+ popq %r9 ; popq %r8 ; popq %rcx; popq %rdx; popq %rsi; popq %rdi
+ movq UREGS_rax(%rsp),%rax
+ andq $(NR_hypercalls-1),%rax
+ pushq %rax
+ pushq UREGS_rip+8(%rsp)
+#endif
leaq hypercall_table(%rip),%r10
PERFC_INCR(PERFC_hypercalls, %rax)
callq *(%r10,%rax,8)
- movq %rax,UREGS_rax(%rsp) # save the return value
+#ifndef NDEBUG
+ /* Deliberately corrupt parameter regs used by this hypercall. */
+ popq %r10 # Shadow RIP
+ cmpq %r10,UREGS_rip(%rsp)
+ popq %rcx # Shadow hypercall index
+ jne skip_clobber /* If RIP has changed then don't clobber. */
+ leaq hypercall_args_table(%rip),%r10
+ movb (%r10,%rcx,1),%cl
+ movl $0xDEADBEEF,%r10d
+ cmpb $1,%cl; jb skip_clobber; movq %r10,UREGS_rdi(%rsp)
+ cmpb $2,%cl; jb skip_clobber; movq %r10,UREGS_rsi(%rsp)
+ cmpb $3,%cl; jb skip_clobber; movq %r10,UREGS_rdx(%rsp)
+ cmpb $4,%cl; jb skip_clobber; movq %r10,UREGS_r10(%rsp)
+ cmpb $5,%cl; jb skip_clobber; movq %r10,UREGS_r8(%rsp)
+ cmpb $6,%cl; jb skip_clobber; movq %r10,UREGS_r9(%rsp)
+skip_clobber:
+#endif
+ movq %rax,UREGS_rax(%rsp) # save the return value
/* %rbx: struct vcpu */
test_all_events:
@@ -538,7 +575,8 @@
do_arch_sched_op:
# Ensure we return success even if we return via schedule_tail()
xorl %eax,%eax
- movq %rax,UREGS_rax+8(%rsp)
+ GET_GUEST_REGS(%r10)
+ movq %rax,UREGS_rax(%r10)
jmp do_sched_op
.data
@@ -597,3 +635,36 @@
.rept NR_hypercalls-((.-hypercall_table)/4)
.quad do_ni_hypercall
.endr
+
+ENTRY(hypercall_args_table)
+ .byte 1 /* do_set_trap_table */ /* 0 */
+ .byte 4 /* do_mmu_update */
+ .byte 2 /* do_set_gdt */
+ .byte 2 /* do_stack_switch */
+ .byte 3 /* do_set_callbacks */
+ .byte 1 /* do_fpu_taskswitch */ /* 5 */
+ .byte 2 /* do_arch_sched_op */
+ .byte 1 /* do_dom0_op */
+ .byte 2 /* do_set_debugreg */
+ .byte 1 /* do_get_debugreg */
+ .byte 2 /* do_update_descriptor */ /* 10 */
+ .byte 0 /* do_ni_hypercall */
+ .byte 5 /* do_dom_mem_op */
+ .byte 2 /* do_multicall */
+ .byte 3 /* do_update_va_mapping */
+ .byte 1 /* do_set_timer_op */ /* 15 */
+ .byte 1 /* do_event_channel_op */
+ .byte 1 /* do_xen_version */
+ .byte 3 /* do_console_io */
+ .byte 1 /* do_physdev_op */
+ .byte 3 /* do_grant_table_op */ /* 20 */
+ .byte 2 /* do_vm_assist */
+ .byte 4 /* do_update_va_mapping_otherdomain */
+ .byte 0 /* do_switch_to_user */
+ .byte 2 /* do_boot_vcpu */
+ .byte 2 /* do_set_segment_base */ /* 25 */
+ .byte 4 /* do_mmuext_op */
+ .byte 1 /* do_acm_op */
+ .rept NR_hypercalls-(.-hypercall_args_table)
+ .byte 0 /* do_ni_hypercall */
+ .endr
diff -r de3576a1c62c -r dfaf788ab18c xen/common/domain.c
--- a/xen/common/domain.c Thu Aug 25 20:52:38 2005
+++ b/xen/common/domain.c Fri Aug 26 20:47:16 2005
@@ -152,10 +152,7 @@
/* Make sure that every vcpu is descheduled before we finalise. */
for_each_vcpu ( d, v )
- while ( test_bit(_VCPUF_running, &v->vcpu_flags) )
- cpu_relax();
-
- sync_lazy_execstate_mask(d->cpumask);
+ vcpu_sleep_sync(v);
BUG_ON(!cpus_empty(d->cpumask));
sync_pagetable_state(d);
@@ -209,7 +206,7 @@
/* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
for_each_vcpu ( d, v )
- domain_sleep_nosync(v);
+ vcpu_sleep_nosync(v);
}
@@ -226,7 +223,7 @@
for_each_vcpu ( d, v )
{
set_bit(_VCPUF_ctrl_pause, &v->vcpu_flags);
- domain_sleep_nosync(v);
+ vcpu_sleep_nosync(v);
}
send_guest_virq(dom0->vcpu[0], VIRQ_DEBUGGER);
@@ -275,7 +272,7 @@
{
BUG_ON(v == current);
atomic_inc(&v->pausecnt);
- domain_sleep_sync(v);
+ vcpu_sleep_sync(v);
}
void domain_pause(struct domain *d)
@@ -286,7 +283,7 @@
{
BUG_ON(v == current);
atomic_inc(&v->pausecnt);
- domain_sleep_sync(v);
+ vcpu_sleep_sync(v);
}
}
@@ -294,7 +291,7 @@
{
BUG_ON(v == current);
if ( atomic_dec_and_test(&v->pausecnt) )
- domain_wake(v);
+ vcpu_wake(v);
}
void domain_unpause(struct domain *d)
@@ -313,7 +310,7 @@
{
BUG_ON(v == current);
if ( !test_and_set_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) )
- domain_sleep_sync(v);
+ vcpu_sleep_sync(v);
}
}
@@ -324,7 +321,7 @@
for_each_vcpu ( d, v )
{
if ( test_and_clear_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) )
- domain_wake(v);
+ vcpu_wake(v);
}
}
@@ -413,7 +410,7 @@
/* domain_unpause_by_systemcontroller */
if ( test_and_clear_bit(_VCPUF_ctrl_pause, &v->vcpu_flags) )
- domain_wake(v);
+ vcpu_wake(v);
xfree(c);
return 0;
diff -r de3576a1c62c -r dfaf788ab18c xen/common/multicall.c
--- a/xen/common/multicall.c Thu Aug 25 20:52:38 2005
+++ b/xen/common/multicall.c Fri Aug 26 20:47:16 2005
@@ -45,6 +45,18 @@
do_multicall_call(&mcs->call);
+#ifndef NDEBUG
+ {
+ /*
+ * Deliberately corrupt the contents of the multicall structure.
+ * The caller must depend only on the 'result' field on return.
+ */
+ multicall_entry_t corrupt;
+ memset(&corrupt, 0xAA, sizeof(corrupt));
+ (void)__copy_to_user(&call_list[i], &corrupt, sizeof(corrupt));
+ }
+#endif
+
if ( unlikely(__put_user(mcs->call.result, &call_list[i].result)) )
{
DPRINTK("Error writing result back to multicall block.\n");
diff -r de3576a1c62c -r dfaf788ab18c xen/common/schedule.c
--- a/xen/common/schedule.c Thu Aug 25 20:52:38 2005
+++ b/xen/common/schedule.c Fri Aug 26 20:47:16 2005
@@ -193,7 +193,7 @@
TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
}
-void domain_sleep_nosync(struct vcpu *v)
+void vcpu_sleep_nosync(struct vcpu *v)
{
unsigned long flags;
@@ -205,18 +205,23 @@
TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
}
-void domain_sleep_sync(struct vcpu *v)
-{
- domain_sleep_nosync(v);
-
- while ( test_bit(_VCPUF_running, &v->vcpu_flags) && !domain_runnable(v) )
+void vcpu_sleep_sync(struct vcpu *v)
+{
+ vcpu_sleep_nosync(v);
+
+ /*
+ * We can be sure that the VCPU is finally descheduled after the running
+ * flag is cleared and the scheduler lock is released.
+ */
+ while ( test_bit(_VCPUF_running, &v->vcpu_flags)
+ && !domain_runnable(v)
+ && spin_is_locked(&schedule_data[v->processor].schedule_lock) )
cpu_relax();
- if ( cpu_isset(v->processor, v->domain->cpumask) )
- sync_lazy_execstate_cpu(v->processor);
-}
-
-void domain_wake(struct vcpu *v)
+ sync_vcpu_execstate(v);
+}
+
+void vcpu_wake(struct vcpu *v)
{
unsigned long flags;
@@ -293,7 +298,7 @@
return -ESRCH;
clear_bit(_VCPUF_down, &target->vcpu_flags);
/* wake vcpu */
- domain_wake(target);
+ vcpu_wake(target);
return 0;
}
@@ -457,10 +462,10 @@
}
}
}
- } while (!succ);
- //spin_lock_irq(&schedule_data[d->vcpu[0]->processor].schedule_lock);
+ } while ( !succ );
+
SCHED_OP(adjdom, d, cmd);
- //spin_unlock_irq(&schedule_data[d->vcpu[0]->processor].schedule_lock);
+
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (__get_cpu_bit(cpu, have_lock))
spin_unlock(&schedule_data[cpu].schedule_lock);
@@ -520,7 +525,8 @@
perfc_incrc(sched_ctx);
#if defined(WAKE_HISTO)
- if ( !is_idle_task(next->domain) && next->wokenup ) {
+ if ( !is_idle_task(next->domain) && next->wokenup )
+ {
ulong diff = (ulong)(now - next->wokenup);
diff /= (ulong)MILLISECS(1);
if (diff <= BUCKETS-2) schedule_data[cpu].hist[diff]++;
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/domain.h Fri Aug 26 20:47:16 2005
@@ -3,39 +3,28 @@
#include <linux/thread_info.h>
#include <asm/tlb.h>
-#ifdef CONFIG_VTI
#include <asm/vmx_vpd.h>
#include <asm/vmmu.h>
#include <asm/regionreg.h>
#include <public/arch-ia64.h>
#include <asm/vmx_platform.h>
-#endif // CONFIG_VTI
#include <xen/list.h>
extern void arch_do_createdomain(struct vcpu *);
extern void domain_relinquish_resources(struct domain *);
-#ifdef CONFIG_VTI
-struct trap_bounce {
- // TO add, FIXME Eddie
-};
-
-#define PMT_SIZE (32L*1024*1024) // 32M for PMT
-#endif // CONFIG_VTI
-
struct arch_domain {
struct mm_struct *active_mm;
struct mm_struct *mm;
int metaphysical_rr0;
+ int metaphysical_rr4;
int starting_rid; /* first RID assigned to domain */
int ending_rid; /* one beyond highest RID assigned to domain */
int rid_bits; /* number of virtual rid bits (default: 18) */
int breakimm;
-#ifdef CONFIG_VTI
+
int imp_va_msb;
- ia64_rr emul_phy_rr0;
- ia64_rr emul_phy_rr4;
unsigned long *pmt; /* physical to machine table */
/*
* max_pfn is the maximum page frame in guest physical space, including
@@ -44,7 +33,7 @@
*/
unsigned long max_pfn;
struct virutal_platform_def vmx_platform;
-#endif //CONFIG_VTI
+
u64 xen_vastart;
u64 xen_vaend;
u64 shared_info_va;
@@ -78,15 +67,15 @@
#endif
void *regs; /* temporary until find a better way to do privops */
int metaphysical_rr0; // from arch_domain (so is pinned)
+ int metaphysical_rr4; // from arch_domain (so is pinned)
int metaphysical_saved_rr0; // from arch_domain (so is
pinned)
+ int metaphysical_saved_rr4; // from arch_domain (so is
pinned)
int breakimm; // from arch_domain (so is pinned)
int starting_rid; /* first RID assigned to domain */
int ending_rid; /* one beyond highest RID assigned to domain */
struct mm_struct *active_mm;
struct thread_struct _thread; // this must be last
-#ifdef CONFIG_VTI
- void (*schedule_tail) (struct vcpu *);
- struct trap_bounce trap_bounce;
+
thash_cb_t *vtlb;
char irq_new_pending;
char irq_new_condition; // vpsr.i/vtpr change, check for pending VHPI
@@ -94,9 +83,7 @@
//for phycial emulation
unsigned long old_rsc;
int mode_flags;
-
struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
-#endif // CONFIG_VTI
};
#define active_mm arch.active_mm
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/linux-xen/asm/pal.h
--- a/xen/include/asm-ia64/linux-xen/asm/pal.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/linux-xen/asm/pal.h Fri Aug 26 20:47:16 2005
@@ -1559,9 +1559,7 @@
return iprv.status;
}
-#ifdef CONFIG_VTI
#include <asm/vmx_pal.h>
-#endif // CONFIG_VTI
#endif /* __ASSEMBLY__ */
#endif /* _ASM_IA64_PAL_H */
diff -r de3576a1c62c -r dfaf788ab18c
xen/include/asm-ia64/linux-xen/asm/processor.h
--- a/xen/include/asm-ia64/linux-xen/asm/processor.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/linux-xen/asm/processor.h Fri Aug 26 20:47:16 2005
@@ -183,6 +183,22 @@
DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+typedef union {
+ struct {
+ __u64 kr0;
+ __u64 kr1;
+ __u64 kr2;
+ __u64 kr3;
+ __u64 kr4;
+ __u64 kr5;
+ __u64 kr6;
+ __u64 kr7;
+ };
+ __u64 _kr[8];
+} cpu_kr_ia64_t;
+
+DECLARE_PER_CPU(cpu_kr_ia64_t, cpu_kr);
+
/*
* The "local" data variable. It refers to the per-CPU data of the currently
executing
* CPU, much like "current" points to the per-task data of the currently
executing task.
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/mmu_context.h
--- a/xen/include/asm-ia64/mmu_context.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/mmu_context.h Fri Aug 26 20:47:16 2005
@@ -2,11 +2,7 @@
#define __ASM_MMU_CONTEXT_H
//dummy file to resolve non-arch-indep include
#ifdef XEN
-#ifndef CONFIG_VTI
#define IA64_REGION_ID_KERNEL 0
-#else // CONFIG_VTI
-#define IA64_REGION_ID_KERNEL 0x1e0000 /* Start from all 1 in highest 4 bits */
-#endif // CONFIG_VTI
#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61))
#ifndef __ASSEMBLY__
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/privop.h
--- a/xen/include/asm-ia64/privop.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/privop.h Fri Aug 26 20:47:16 2005
@@ -133,7 +133,6 @@
struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, x3:3, un1:1,
major:4; };
} INST64_M46;
-#ifdef CONFIG_VTI
typedef union U_INST64_M47 {
IA64_INST inst;
struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
@@ -168,8 +167,6 @@
IA64_INST inst;
struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, x6:6, m:1,
major:4; };
} INST64_M6;
-
-#endif // CONFIG_VTI
typedef union U_INST64 {
IA64_INST inst;
@@ -182,14 +179,12 @@
INST64_I26 I26; // mov register to ar (I unit)
INST64_I27 I27; // mov immediate to ar (I unit)
INST64_I28 I28; // mov from ar (I unit)
-#ifdef CONFIG_VTI
- INST64_M1 M1; // ld integer
+ INST64_M1 M1; // ld integer
INST64_M2 M2;
INST64_M3 M3;
- INST64_M4 M4; // st integer
+ INST64_M4 M4; // st integer
INST64_M5 M5;
- INST64_M6 M6; // ldfd floating pointer
-#endif // CONFIG_VTI
+ INST64_M6 M6; // ldfd floating pointer
INST64_M28 M28; // purge translation cache entry
INST64_M29 M29; // mov register to ar (M unit)
INST64_M30 M30; // mov immediate to ar (M unit)
@@ -204,9 +199,7 @@
INST64_M44 M44; // set/reset system mask
INST64_M45 M45; // translation purge
INST64_M46 M46; // translation access (tpa,tak)
-#ifdef CONFIG_VTI
INST64_M47 M47; // purge translation entry
-#endif // CONFIG_VTI
} INST64;
#define MASK_41 ((UINT64)0x1ffffffffff)
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/regionreg.h
--- a/xen/include/asm-ia64/regionreg.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/regionreg.h Fri Aug 26 20:47:16 2005
@@ -1,12 +1,6 @@
#ifndef _REGIONREG_H_
#define _REGIONREG_H_
-#ifdef CONFIG_VTI
-#define XEN_DEFAULT_RID 0xf00000
-#define DOMAIN_RID_SHIFT 20
-#define DOMAIN_RID_MASK (~(1U<<DOMAIN_RID_SHIFT -1))
-#else //CONFIG_VTI
#define XEN_DEFAULT_RID 7
-#endif // CONFIG_VTI
#define IA64_MIN_IMPL_RID_MSB 17
#define _REGION_ID(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.rid;})
#define _REGION_PAGE_SIZE(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.ps;})
@@ -42,4 +36,32 @@
int set_one_rr(unsigned long rr, unsigned long val);
+// This function is purely for performance... apparently scrambling
+// bits in the region id makes for better hashing, which means better
+// use of the VHPT, which means better performance
+// Note that the only time a RID should be mangled is when it is stored in
+// a region register; anytime it is "viewable" outside of this module,
+// it should be unmangled
+
+// NOTE: this function is also implemented in assembly code in hyper_set_rr!!
+// Must ensure these two remain consistent!
+static inline unsigned long
+vmMangleRID(unsigned long RIDVal)
+{
+ union bits64 { unsigned char bytes[4]; unsigned long uint; };
+
+ union bits64 t;
+ unsigned char tmp;
+
+ t.uint = RIDVal;
+ tmp = t.bytes[1];
+ t.bytes[1] = t.bytes[3];
+ t.bytes[3] = tmp;
+
+ return t.uint;
+}
+
+// since vmMangleRID is symmetric, use it for unmangling also
+#define vmUnmangleRID(x) vmMangleRID(x)
+
#endif /* !_REGIONREG_H_ */
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/serial.h
--- a/xen/include/asm-ia64/serial.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/serial.h Fri Aug 26 20:47:16 2005
@@ -1,20 +1,0 @@
-#ifndef __ASM_SERIAL_H__
-#define __ASM_SERIAL_H__
-
-#include <asm/regs.h>
-#include <asm/irq.h>
-#include <xen/serial.h>
-#include <asm/hpsim_ssc.h>
-
-#ifndef CONFIG_VTI
-#define arch_serial_putc(_uart, _c) \
- ( platform_is_hp_ski() ? (ia64_ssc(c,0,0,0,SSC_PUTCHAR), 1) : \
- ( longs_peak_putc(c), 1 ))
-#else
-#define arch_serial_putc(_uart, _c) \
- ( platform_is_hp_ski() ? (ia64_ssc(c,0,0,0,SSC_PUTCHAR), 1) : \
- ( (inb((_uart)->io_base + LSR) & LSR_THRE) ? \
- (outb((_c), (_uart)->io_base + THR), 1) : 0 ))
-#endif
-
-#endif /* __ASM_SERIAL_H__ */
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/tlb.h
--- a/xen/include/asm-ia64/tlb.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/tlb.h Fri Aug 26 20:47:16 2005
@@ -35,17 +35,4 @@
unsigned long rid;
} TR_ENTRY;
-#ifdef CONFIG_VTI
-typedef union {
- unsigned long value;
- struct {
- unsigned long ve : 1;
- unsigned long rv1 : 1;
- unsigned long ps : 6;
- unsigned long rid : 24;
- unsigned long rv2 : 32;
- };
-} rr_t;
-#endif // CONFIG_VTI
-
#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/vmmu.h
--- a/xen/include/asm-ia64/vmmu.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/vmmu.h Fri Aug 26 20:47:16 2005
@@ -23,10 +23,11 @@
#ifndef XEN_TLBthash_H
#define XEN_TLBthash_H
-#include "xen/config.h"
-#include "xen/types.h"
-#include "public/xen.h"
-#include "asm/tlb.h"
+#include <xen/config.h>
+#include <xen/types.h>
+#include <public/xen.h>
+#include <asm/tlb.h>
+#include <asm/regionreg.h>
//#define THASH_TLB_TR 0
//#define THASH_TLB_TC 1
@@ -152,7 +153,7 @@
typedef u64 *(GET_MFN_FN)(domid_t d, u64 gpfn, u64 pages);
typedef void *(REM_NOTIFIER_FN)(struct hash_cb *hcb, thash_data_t *entry);
typedef void (RECYCLE_FN)(struct hash_cb *hc, u64 para);
-typedef rr_t (GET_RR_FN)(struct vcpu *vcpu, u64 reg);
+typedef ia64_rr (GET_RR_FN)(struct vcpu *vcpu, u64 reg);
typedef thash_data_t *(FIND_OVERLAP_FN)(struct thash_cb *hcb,
u64 va, u64 ps, int rid, char cl, search_section_t s_sect);
typedef thash_data_t *(FIND_NEXT_OVL_FN)(struct thash_cb *hcb);
@@ -329,7 +330,7 @@
extern u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps);
extern void purge_machine_tc_by_domid(domid_t domid);
extern void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb);
-extern rr_t vmmu_get_rr(struct vcpu *vcpu, u64 va);
+extern ia64_rr vmmu_get_rr(struct vcpu *vcpu, u64 va);
extern thash_cb_t *init_domain_tlb(struct vcpu *d);
#define VTLB_DEBUG
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/vmx.h
--- a/xen/include/asm-ia64/vmx.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/vmx.h Fri Aug 26 20:47:16 2005
@@ -32,10 +32,12 @@
extern void vmx_init_double_mapping_stub(void);
extern void vmx_save_state(struct vcpu *v);
extern void vmx_load_state(struct vcpu *v);
+extern void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c);
+#ifdef XEN_DBL_MAPPING
extern vmx_insert_double_mapping(u64,u64,u64,u64,u64);
extern void vmx_purge_double_mapping(u64, u64, u64);
extern void vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7);
-
+#endif
extern void vmx_wait_io(void);
extern void vmx_io_assist(struct vcpu *v);
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/vmx_vcpu.h Fri Aug 26 20:47:16 2005
@@ -308,7 +308,9 @@
vtm=&(vcpu->arch.arch_vmx.vtm);
VPD_CR(vcpu,itm)=val;
+#ifdef CONFIG_VTI
vtm_interruption_update(vcpu, vtm);
+#endif
return IA64_NO_FAULT;
}
static inline
@@ -414,7 +416,9 @@
IA64FAULT
vmx_vcpu_set_eoi(VCPU *vcpu, u64 val)
{
+#ifdef CONFIG_VTI
guest_write_eoi(vcpu);
+#endif
return IA64_NO_FAULT;
}
@@ -424,7 +428,9 @@
{
VPD_CR(vcpu,itv)=val;
+#ifdef CONFIG_VTI
vtm_set_itv(vcpu);
+#endif
return IA64_NO_FAULT;
}
static inline
@@ -465,13 +471,17 @@
static inline
IA64FAULT vmx_vcpu_set_itc(VCPU *vcpu, UINT64 val)
{
+#ifdef CONFIG_VTI
vtm_set_itc(vcpu, val);
+#endif
return IA64_NO_FAULT;
}
static inline
IA64FAULT vmx_vcpu_get_itc(VCPU *vcpu,UINT64 *val)
{
+#ifdef CONFIG_VTI
*val = vtm_get_itc(vcpu);
+#endif
return IA64_NO_FAULT;
}
static inline
@@ -584,15 +594,22 @@
return (IA64_NO_FAULT);
}
+/* Another hash performance algorithm */
#define redistribute_rid(rid) (((rid) & ~0xffff) | (((rid) << 8) & 0xff00) |
(((rid) >> 8) & 0xff))
static inline unsigned long
-vmx_vrrtomrr(VCPU *vcpu,unsigned long val)
+vmx_vrrtomrr(VCPU *v, unsigned long val)
{
ia64_rr rr;
u64 rid;
+
rr.rrval=val;
+ rr.rid = vmMangleRID(v->arch.starting_rid + rr.rid);
+/* Disable this rid allocation algorithm for now */
+#if 0
rid=(((u64)vcpu->domain->domain_id)<<DOMAIN_RID_SHIFT) + rr.rid;
rr.rid = redistribute_rid(rid);
+#endif
+
rr.ve=1;
return rr.rrval;
}
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/vmx_vpd.h
--- a/xen/include/asm-ia64/vmx_vpd.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/vmx_vpd.h Fri Aug 26 20:47:16 2005
@@ -61,12 +61,6 @@
unsigned long lrr1;
unsigned long rsv6[46];
} cr_t;
-
-void vmx_enter_scheduler(void);
-
-//FIXME: Map for LID to vcpu, Eddie
-#define MAX_NUM_LPS (1UL<<16)
-extern struct vcpu *lid_edt[MAX_NUM_LPS];
struct arch_vmx_struct {
// struct virutal_platform_def vmx_platform;
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-ia64/xenprocessor.h
--- a/xen/include/asm-ia64/xenprocessor.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-ia64/xenprocessor.h Fri Aug 26 20:47:16 2005
@@ -50,16 +50,11 @@
__u64 ri : 2;
__u64 ed : 1;
__u64 bn : 1;
-#ifdef CONFIG_VTI
__u64 ia : 1;
__u64 vm : 1;
__u64 reserved5 : 17;
-#else // CONFIG_VTI
- __u64 reserved4 : 19;
-#endif // CONFIG_VTI
};
-#ifdef CONFIG_VTI
/* vmx like above but expressed as bitfields for more efficient access: */
typedef union{
__u64 val;
@@ -218,6 +213,4 @@
ret; \
})
-#endif // CONFIG_VTI
-
#endif // _ASM_IA64_XENPROCESSOR_H
diff -r de3576a1c62c -r dfaf788ab18c xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/asm-x86/mm.h Fri Aug 26 20:47:16 2005
@@ -370,6 +370,8 @@
void propagate_page_fault(unsigned long addr, u16 error_code);
+extern int __sync_lazy_execstate(void);
+
/*
* Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must
* hold a reference to the page.
diff -r de3576a1c62c -r dfaf788ab18c xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Thu Aug 25 20:52:38 2005
+++ b/xen/include/xen/sched.h Fri Aug 26 20:47:16 2005
@@ -245,18 +245,16 @@
long sched_ctl(struct sched_ctl_cmd *);
long sched_adjdom(struct sched_adjdom_cmd *);
int sched_id();
-void domain_wake(struct vcpu *d);
-void domain_sleep_nosync(struct vcpu *d);
-void domain_sleep_sync(struct vcpu *d);
-
-/*
- * Force loading of currently-executing domain state on the specified set
- * of CPUs. This is used to counteract lazy state switching where required.
- */
-extern void sync_lazy_execstate_cpu(unsigned int cpu);
-extern void sync_lazy_execstate_mask(cpumask_t mask);
-extern void sync_lazy_execstate_all(void);
-extern int __sync_lazy_execstate(void);
+void vcpu_wake(struct vcpu *d);
+void vcpu_sleep_nosync(struct vcpu *d);
+void vcpu_sleep_sync(struct vcpu *d);
+
+/*
+ * Force synchronisation of given VCPU's state. If it is currently descheduled,
+ * this call will ensure that all its state is committed to memory and that
+ * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
+ */
+extern void sync_vcpu_execstate(struct vcpu *v);
/*
* Called by the scheduler to switch to another VCPU. On entry, although
@@ -268,7 +266,7 @@
* The callee must ensure that the local CPU is no longer running in @prev's
* context, and that the context is saved to memory, before returning.
* Alternatively, if implementing lazy context switching, it suffices to ensure
- * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ * that invoking sync_vcpu_execstate() will switch and commit @prev's state.
*/
extern void context_switch(
struct vcpu *prev,
@@ -287,7 +285,8 @@
extern void continue_running(
struct vcpu *same);
-int idle_cpu(int cpu); /* Is CPU 'cpu' idle right now? */
+/* Is CPU 'cpu' idle right now? */
+int idle_cpu(int cpu);
void startup_cpu_idle_loop(void);
@@ -410,7 +409,7 @@
static inline void vcpu_unblock(struct vcpu *v)
{
if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
- domain_wake(v);
+ vcpu_wake(v);
}
#define IS_PRIV(_d) \
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/include/xmalloc.h
--- /dev/null Thu Aug 25 20:52:38 2005
+++ b/extras/mini-os/include/xmalloc.h Fri Aug 26 20:47:16 2005
@@ -0,0 +1,23 @@
+#ifndef __XMALLOC_H__
+#define __XMALLOC_H__
+
+/* Allocate space for typed object. */
+#define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type)))
+
+/* Allocate space for array of typed objects. */
+#define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type),
__alignof__(_type), _num))
+
+/* Free any of the above. */
+extern void xfree(const void *);
+
+/* Underlying functions */
+extern void *_xmalloc(size_t size, size_t align);
+static inline void *_xmalloc_array(size_t size, size_t align, size_t num)
+{
+ /* Check for overflow. */
+ if (size && num > UINT_MAX / size)
+ return NULL;
+ return _xmalloc(size * num, align);
+}
+
+#endif /* __XMALLOC_H__ */
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/lib/xmalloc.c
--- /dev/null Thu Aug 25 20:52:38 2005
+++ b/extras/mini-os/lib/xmalloc.c Fri Aug 26 20:47:16 2005
@@ -0,0 +1,219 @@
+/*
+ ****************************************************************************
+ * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: xmaloc.c
+ * Author: Grzegorz Milos (gm281@xxxxxxxxx)
+ * Changes:
+ *
+ * Date: Aug 2005
+ *
+ * Environment: Xen Minimal OS
+ * Description: simple memory allocator
+ *
+ ****************************************************************************
+ * Simple allocator for Mini-os. If larger than a page, simply use the
+ * page-order allocator.
+ *
+ * Copy of the allocator for Xen by Rusty Russell:
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <os.h>
+#include <mm.h>
+#include <types.h>
+#include <lib.h>
+#include <list.h>
+
+static LIST_HEAD(freelist);
+/* static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED; */
+
+struct xmalloc_hdr
+{
+ /* Total including this hdr. */
+ size_t size;
+ struct list_head freelist;
+} __cacheline_aligned;
+
+static void maybe_split(struct xmalloc_hdr *hdr, size_t size, size_t block)
+{
+ struct xmalloc_hdr *extra;
+ size_t leftover = block - size;
+
+ /* If enough is left to make a block, put it on free list. */
+ if ( leftover >= (2 * sizeof(struct xmalloc_hdr)) )
+ {
+ extra = (struct xmalloc_hdr *)((unsigned long)hdr + size);
+ extra->size = leftover;
+ list_add(&extra->freelist, &freelist);
+ }
+ else
+ {
+ size = block;
+ }
+
+ hdr->size = size;
+ /* Debugging aid. */
+ hdr->freelist.next = hdr->freelist.prev = NULL;
+}
+
+static void *xmalloc_new_page(size_t size)
+{
+ struct xmalloc_hdr *hdr;
+ /* unsigned long flags; */
+
+ hdr = (struct xmalloc_hdr *)alloc_page();
+ if ( hdr == NULL )
+ return NULL;
+
+ /* spin_lock_irqsave(&freelist_lock, flags); */
+ maybe_split(hdr, size, PAGE_SIZE);
+ /* spin_unlock_irqrestore(&freelist_lock, flags); */
+
+ return hdr+1;
+}
+
+/* Big object? Just use the page allocator. */
+static void *xmalloc_whole_pages(size_t size)
+{
+ struct xmalloc_hdr *hdr;
+ unsigned int pageorder = get_order(size);
+
+ hdr = (struct xmalloc_hdr *)alloc_pages(pageorder);
+ if ( hdr == NULL )
+ return NULL;
+
+ hdr->size = (1 << (pageorder + PAGE_SHIFT));
+ /* Debugging aid. */
+ hdr->freelist.next = hdr->freelist.prev = NULL;
+
+ return hdr+1;
+}
+
+/* Return size, increased to alignment with align. */
+static inline size_t align_up(size_t size, size_t align)
+{
+ return (size + align - 1) & ~(align - 1);
+}
+
+void *_xmalloc(size_t size, size_t align)
+{
+ struct xmalloc_hdr *i;
+ /* unsigned long flags; */
+
+ /* Add room for header, pad to align next header. */
+ size += sizeof(struct xmalloc_hdr);
+ size = align_up(size, __alignof__(struct xmalloc_hdr));
+
+ /* For big allocs, give them whole pages. */
+ if ( size >= PAGE_SIZE )
+ return xmalloc_whole_pages(size);
+
+ /* Search free list. */
+ /* spin_lock_irqsave(&freelist_lock, flags); */
+ list_for_each_entry( i, &freelist, freelist )
+ {
+ if ( i->size < size )
+ continue;
+ list_del(&i->freelist);
+ maybe_split(i, size, i->size);
+ /* spin_unlock_irqrestore(&freelist_lock, flags); */
+ return i+1;
+ }
+ /* spin_unlock_irqrestore(&freelist_lock, flags); */
+
+ /* Alloc a new page and return from that. */
+ return xmalloc_new_page(size);
+}
+
+void xfree(const void *p)
+{
+ /* unsigned long flags; */
+ struct xmalloc_hdr *i, *tmp, *hdr;
+
+ if ( p == NULL )
+ return;
+
+ hdr = (struct xmalloc_hdr *)p - 1;
+
+ /* We know hdr will be on same page. */
+ if(((long)p & PAGE_MASK) != ((long)hdr & PAGE_MASK))
+ {
+ printk("Header should be on the same page\n");
+ *(int*)0=0;
+ }
+
+ /* Not previously freed. */
+ if(hdr->freelist.next || hdr->freelist.prev)
+ {
+ printk("Should not be previously freed\n");
+ *(int*)0=0;
+ }
+
+ /* Big allocs free directly. */
+ if ( hdr->size >= PAGE_SIZE )
+ {
+ free_pages(hdr, get_order(hdr->size));
+ return;
+ }
+
+ /* Merge with other free block, or put in list. */
+ /* spin_lock_irqsave(&freelist_lock, flags); */
+ list_for_each_entry_safe( i, tmp, &freelist, freelist )
+ {
+ unsigned long _i = (unsigned long)i;
+ unsigned long _hdr = (unsigned long)hdr;
+
+ /* Do not merge across page boundaries. */
+ if ( ((_i ^ _hdr) & PAGE_MASK) != 0 )
+ continue;
+
+ /* We follow this block? Swallow it. */
+ if ( (_i + i->size) == _hdr )
+ {
+ list_del(&i->freelist);
+ i->size += hdr->size;
+ hdr = i;
+ }
+
+ /* We precede this block? Swallow it. */
+ if ( (_hdr + hdr->size) == _i )
+ {
+ list_del(&i->freelist);
+ hdr->size += i->size;
+ }
+ }
+
+ /* Did we merge an entire page? */
+ if ( hdr->size == PAGE_SIZE )
+ {
+ if((((unsigned long)hdr) & (PAGE_SIZE-1)) != 0)
+ {
+ printk("Bug\n");
+ *(int*)0=0;
+ }
+ free_pages(hdr, 0);
+ }
+ else
+ {
+ list_add(&hdr->freelist, &freelist);
+ }
+
+ /* spin_unlock_irqrestore(&freelist_lock, flags); */
+}
+
diff -r de3576a1c62c -r dfaf788ab18c
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h
--- /dev/null Thu Aug 25 20:52:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h Fri Aug 26
20:47:16 2005
@@ -0,0 +1,33 @@
+#ifndef __x86_64_MMU_H
+#define __x86_64_MMU_H
+
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+
+/*
+ * The x86_64 doesn't have a mmu context, but
+ * we put the segment information here.
+ *
+ * cpu_vm_mask is used to optimize ldt flushing.
+ */
+typedef struct {
+ void *ldt;
+ rwlock_t ldtlock;
+ int size;
+ struct semaphore sem;
+#ifdef CONFIG_XEN
+ unsigned pinned:1;
+ struct list_head unpinned;
+#endif
+} mm_context_t;
+
+#ifdef CONFIG_XEN
+extern struct list_head mm_unpinned;
+extern spinlock_t mm_unpinned_lock;
+
+/* mm/memory.c:exit_mmap hook */
+extern void _arch_exit_mmap(struct mm_struct *mm);
+#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+#endif
+
+#endif
diff -r de3576a1c62c -r dfaf788ab18c tools/xenstore/testsuite/vg-suppressions
--- /dev/null Thu Aug 25 20:52:38 2005
+++ b/tools/xenstore/testsuite/vg-suppressions Fri Aug 26 20:47:16 2005
@@ -0,0 +1,9 @@
+{
+ Glibc goes boom from _start (Debian glibc 2.3.5-3)
+ Memcheck:Cond
+ obj:/lib/ld-2.3.5.so
+ obj:/lib/ld-2.3.5.so
+ obj:/lib/ld-2.3.5.so
+ obj:/lib/ld-2.3.5.so
+ obj:/lib/ld-2.3.5.so
+}
diff -r de3576a1c62c -r dfaf788ab18c extras/mini-os/lib/malloc.c
--- a/extras/mini-os/lib/malloc.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,5697 +0,0 @@
-/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
- ****************************************************************************
- * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
- ****************************************************************************
- *
- * File: malloc.c
- * Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx)
- * Changes:
- *
- * Date: Aug 2003
- *
- * Environment: Xen Minimal OS
- * Description: Library functions, maloc at al
- *
- ****************************************************************************
- * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
- ****************************************************************************
- */
-
-#include <os.h>
-#include <mm.h>
-#include <types.h>
-#include <lib.h>
-
-/* standard compile option */
-#define HAVE_MEMCOPY 1
-#define USE_MEMCPY 1
-#undef HAVE_MMAP
-#undef MMAP_CLEARS
-#undef HAVE_MREMAP
-#define malloc_getpagesize PAGE_SIZE
-#undef HAVE_USR_INCLUDE_MALLOC_H
-#define LACKS_UNISTD_H 1
-#define LACKS_SYS_PARAM_H 1
-#define LACKS_SYS_MMAN_H 1
-#define LACKS_FCNTL_H 1
-
-
-/* page allocator interface */
-#define MORECORE more_core
-#define MORECORE_CONTIGUOUS 0
-#define MORECORE_FAILURE 0
-#define MORECORE_CANNOT_TRIM 1
-
-static void *more_core(size_t n)
-{
- static void *last;
- unsigned long order, num_pages;
- void *ret;
-
- if (n == 0)
- return last;
-
- n = PFN_UP(n);
- for ( order = 0; n > 1; order++ )
- n >>= 1;
- ret = (void *)alloc_pages(order);
-
- /* work out pointer to end of chunk */
- if ( ret )
- {
- num_pages = 1 << order;
- last = (char *)ret + (num_pages * PAGE_SIZE);
- }
-
- return ret;
-}
-
-/* other options commented out below */
-#define __STD_C 1
-#define Void_t void
-#define assert(x) ((void)0)
-
-#define CHUNK_SIZE_T unsigned long
-#define PTR_UINT unsigned long
-#define INTERNAL_SIZE_T size_t
-#define SIZE_SZ (sizeof(INTERNAL_SIZE_T))
-#define MALLOC_ALIGNMENT (2 * SIZE_SZ)
-#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
-#define TRIM_FASTBINS 0
-
-#define M_MXFAST 1
-#define DEFAULT_MXFAST 64
-#define M_TRIM_THRESHOLD -1
-#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
-#define M_TOP_PAD -2
-#define DEFAULT_TOP_PAD (0)
-#define M_MMAP_THRESHOLD -3
-#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
-#define M_MMAP_MAX -4
-#define DEFAULT_MMAP_MAX (0)
-#define MALLOC_FAILURE_ACTION printf("malloc failure\n")
-
-#define cALLOc public_cALLOc
-#define fREe public_fREe
-#define cFREe public_cFREe
-#define mALLOc public_mALLOc
-#define mEMALIGn public_mEMALIGn
-#define rEALLOc public_rEALLOc
-#define vALLOc public_vALLOc
-#define pVALLOc public_pVALLOc
-#define mALLINFo public_mALLINFo
-#define mALLOPt public_mALLOPt
-#define mTRIm public_mTRIm
-#define mSTATs public_mSTATs
-#define mUSABLe public_mUSABLe
-#define iCALLOc public_iCALLOc
-#define iCOMALLOc public_iCOMALLOc
-
-#define public_cALLOc calloc
-#define public_fREe free
-#define public_cFREe cfree
-#define public_mALLOc malloc
-#define public_mEMALIGn memalign
-#define public_rEALLOc realloc
-#define public_vALLOc valloc
-#define public_pVALLOc pvalloc
-#define public_mALLINFo mallinfo
-#define public_mALLOPt mallopt
-#define public_mTRIm malloc_trim
-#define public_mSTATs malloc_stats
-#define public_mUSABLe malloc_usable_size
-#define public_iCALLOc independent_calloc
-#define public_iCOMALLOc independent_comalloc
-
-
-/*
- This is a version (aka dlmalloc) of malloc/free/realloc written by
- Doug Lea and released to the public domain. Use, modify, and
- redistribute this code without permission or acknowledgement in any
- way you wish. Send questions, comments, complaints, performance
- data, etc to dl@xxxxxxxxxxxxx
-
-* VERSION 2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
-
- Note: There may be an updated version of this malloc obtainable at
- ftp://gee.cs.oswego.edu/pub/misc/malloc.c
- Check before installing!
-
-* Quickstart
-
- This library is all in one file to simplify the most common usage:
- ftp it, compile it (-O), and link it into another program. All
- of the compile-time options default to reasonable values for use on
- most unix platforms. Compile -DWIN32 for reasonable defaults on windows.
- You might later want to step through various compile-time and dynamic
- tuning options.
-
- For convenience, an include file for code using this malloc is at:
- ftp://gee.cs.oswego.edu/pub/misc/malloc-2.7.1.h
- You don't really need this .h file unless you call functions not
- defined in your system include files. The .h file contains only the
- excerpts from this file needed for using this malloc on ANSI C/C++
- systems, so long as you haven't changed compile-time options about
- naming and tuning parameters. If you do, then you can create your
- own malloc.h that does include all settings by cutting at the point
- indicated below.
-
-* Why use this malloc?
-
- This is not the fastest, most space-conserving, most portable, or
- most tunable malloc ever written. However it is among the fastest
- while also being among the most space-conserving, portable and tunable.
- Consistent balance across these factors results in a good general-purpose
- allocator for malloc-intensive programs.
-
- The main properties of the algorithms are:
- * For large (>= 512 bytes) requests, it is a pure best-fit allocator,
- with ties normally decided via FIFO (i.e. least recently used).
- * For small (<= 64 bytes by default) requests, it is a caching
- allocator, that maintains pools of quickly recycled chunks.
- * In between, and for combinations of large and small requests, it does
- the best it can trying to meet both goals at once.
- * For very large requests (>= 128KB by default), it relies on system
- memory mapping facilities, if supported.
-
- For a longer but slightly out of date high-level description, see
- http://gee.cs.oswego.edu/dl/html/malloc.html
-
- You may already by default be using a C library containing a malloc
- that is based on some version of this malloc (for example in
- linux). You might still want to use the one in this file in order to
- customize settings or to avoid overheads associated with library
- versions.
-
-* Contents, described in more detail in "description of public routines" below.
-
- Standard (ANSI/SVID/...) functions:
- malloc(size_t n);
- calloc(size_t n_elements, size_t element_size);
- free(Void_t* p);
- realloc(Void_t* p, size_t n);
- memalign(size_t alignment, size_t n);
- valloc(size_t n);
- mallinfo()
- mallopt(int parameter_number, int parameter_value)
-
- Additional functions:
- independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]);
- independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
- pvalloc(size_t n);
- cfree(Void_t* p);
- malloc_trim(size_t pad);
- malloc_usable_size(Void_t* p);
- malloc_stats();
-
-* Vital statistics:
-
- Supported pointer representation: 4 or 8 bytes
- Supported size_t representation: 4 or 8 bytes
- Note that size_t is allowed to be 4 bytes even if pointers are 8.
- You can adjust this by defining INTERNAL_SIZE_T
-
- Alignment: 2 * sizeof(size_t) (default)
- (i.e., 8 byte alignment with 4byte size_t). This suffices for
- nearly all current machines and C compilers. However, you can
- define MALLOC_ALIGNMENT to be wider than this if necessary.
-
- Minimum overhead per allocated chunk: 4 or 8 bytes
- Each malloced chunk has a hidden word of overhead holding size
- and status information.
-
- Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead)
- 8-byte ptrs: 24/32 bytes (including, 4/8 overhead)
-
- When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte
- ptrs but 4 byte size) or 24 (for 8/8) additional bytes are
- needed; 4 (8) for a trailing size field and 8 (16) bytes for
- free list pointers. Thus, the minimum allocatable size is
- 16/24/32 bytes.
-
- Even a request for zero bytes (i.e., malloc(0)) returns a
- pointer to something of the minimum allocatable size.
-
- The maximum overhead wastage (i.e., number of extra bytes
- allocated than were requested in malloc) is less than or equal
- to the minimum size, except for requests >= mmap_threshold that
- are serviced via mmap(), where the worst case wastage is 2 *
- sizeof(size_t) bytes plus the remainder from a system page (the
- minimal mmap unit); typically 4096 or 8192 bytes.
-
- Maximum allocated size: 4-byte size_t: 2^32 minus about two pages
- 8-byte size_t: 2^64 minus about two pages
-
- It is assumed that (possibly signed) size_t values suffice to
- represent chunk sizes. `Possibly signed' is due to the fact
- that `size_t' may be defined on a system as either a signed or
- an unsigned type. The ISO C standard says that it must be
- unsigned, but a few systems are known not to adhere to this.
- Additionally, even when size_t is unsigned, sbrk (which is by
- default used to obtain memory from system) accepts signed
- arguments, and may not be able to handle size_t-wide arguments
- with negative sign bit. Generally, values that would
- appear as negative after accounting for overhead and alignment
- are supported only via mmap(), which does not have this
- limitation.
-
- Requests for sizes outside the allowed range will perform an optional
- failure action and then return null. (Requests may also
- also fail because a system is out of memory.)
-
- Thread-safety: NOT thread-safe unless USE_MALLOC_LOCK defined
-
- When USE_MALLOC_LOCK is defined, wrappers are created to
- surround every public call with either a pthread mutex or
- a win32 spinlock (depending on WIN32). This is not
- especially fast, and can be a major bottleneck.
- It is designed only to provide minimal protection
- in concurrent environments, and to provide a basis for
- extensions. If you are using malloc in a concurrent program,
- you would be far better off obtaining ptmalloc, which is
- derived from a version of this malloc, and is well-tuned for
- concurrent programs. (See http://www.malloc.de) Note that
- even when USE_MALLOC_LOCK is defined, you can can guarantee
- full thread-safety only if no threads acquire memory through
- direct calls to MORECORE or other system-level allocators.
-
- Compliance: I believe it is compliant with the 1997 Single Unix Specification
- (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably
- others as well.
-
-* Synopsis of compile-time options:
-
- People have reported using previous versions of this malloc on all
- versions of Unix, sometimes by tweaking some of the defines
- below. It has been tested most extensively on Solaris and
- Linux. It is also reported to work on WIN32 platforms.
- People also report using it in stand-alone embedded systems.
-
- The implementation is in straight, hand-tuned ANSI C. It is not
- at all modular. (Sorry!) It uses a lot of macros. To be at all
- usable, this code should be compiled using an optimizing compiler
- (for example gcc -O3) that can simplify expressions and control
- paths. (FAQ: some macros import variables as arguments rather than
- declare locals because people reported that some debuggers
- otherwise get confused.)
-
- OPTION DEFAULT VALUE
-
- Compilation Environment options:
-
- __STD_C derived from C compiler defines
- WIN32 NOT defined
- HAVE_MEMCPY defined
- USE_MEMCPY 1 if HAVE_MEMCPY is defined
- HAVE_MMAP defined as 1
- MMAP_CLEARS 1
- HAVE_MREMAP 0 unless linux defined
- malloc_getpagesize derived from system #includes, or 4096 if not
- HAVE_USR_INCLUDE_MALLOC_H NOT defined
- LACKS_UNISTD_H NOT defined unless WIN32
- LACKS_SYS_PARAM_H NOT defined unless WIN32
- LACKS_SYS_MMAN_H NOT defined unless WIN32
- LACKS_FCNTL_H NOT defined
-
- Changing default word sizes:
-
- INTERNAL_SIZE_T size_t
- MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T)
- PTR_UINT unsigned long
- CHUNK_SIZE_T unsigned long
-
- Configuration and functionality options:
-
- USE_DL_PREFIX NOT defined
- USE_PUBLIC_MALLOC_WRAPPERS NOT defined
- USE_MALLOC_LOCK NOT defined
- DEBUG NOT defined
- REALLOC_ZERO_BYTES_FREES NOT defined
- MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op
- TRIM_FASTBINS 0
- FIRST_SORTED_BIN_SIZE 512
-
- Options for customizing MORECORE:
-
- MORECORE sbrk
- MORECORE_CONTIGUOUS 1
- MORECORE_CANNOT_TRIM NOT defined
- MMAP_AS_MORECORE_SIZE (1024 * 1024)
-
- Tuning options that are also dynamically changeable via mallopt:
-
- DEFAULT_MXFAST 64
- DEFAULT_TRIM_THRESHOLD 256 * 1024
- DEFAULT_TOP_PAD 0
- DEFAULT_MMAP_THRESHOLD 256 * 1024
- DEFAULT_MMAP_MAX 65536
-
- There are several other #defined constants and macros that you
- probably don't want to touch unless you are extending or adapting malloc.
-*/
-
-/* RN: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
-#if 0
-
-/*
- WIN32 sets up defaults for MS environment and compilers.
- Otherwise defaults are for unix.
-*/
-
-/* #define WIN32 */
-
-#ifdef WIN32
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-/* Win32 doesn't supply or need the following headers */
-#define LACKS_UNISTD_H
-#define LACKS_SYS_PARAM_H
-#define LACKS_SYS_MMAN_H
-
-/* Use the supplied emulation of sbrk */
-#define MORECORE sbrk
-#define MORECORE_CONTIGUOUS 1
-#define MORECORE_FAILURE ((void*)(-1))
-
-/* Use the supplied emulation of mmap and munmap */
-#define HAVE_MMAP 1
-#define MUNMAP_FAILURE (-1)
-#define MMAP_CLEARS 1
-
-/* These values don't really matter in windows mmap emulation */
-#define MAP_PRIVATE 1
-#define MAP_ANONYMOUS 2
-#define PROT_READ 1
-#define PROT_WRITE 2
-
-/* Emulation functions defined at the end of this file */
-
-/* If USE_MALLOC_LOCK, use supplied critical-section-based lock functions */
-#ifdef USE_MALLOC_LOCK
-static int slwait(int *sl);
-static int slrelease(int *sl);
-#endif
-
-static long getpagesize(void);
-static long getregionsize(void);
-static void *sbrk(long size);
-static void *mmap(void *ptr, long size, long prot, long type, long handle,
long arg);
-static long munmap(void *ptr, long size);
-
-static void vminfo (unsigned long*free, unsigned long*reserved, unsigned
long*committed);
-static int cpuinfo (int whole, unsigned long*kernel, unsigned long*user);
-
-#endif
-
-/*
- __STD_C should be nonzero if using ANSI-standard C compiler, a C++
- compiler, or a C compiler sufficiently close to ANSI to get away
- with it.
-*/
-
-#ifndef __STD_C
-#if defined(__STDC__) || defined(_cplusplus)
-#define __STD_C 1
-#else
-#define __STD_C 0
-#endif
-#endif /*__STD_C*/
-
-
-/*
- Void_t* is the pointer type that malloc should say it returns
-*/
-
-#ifndef Void_t
-#if (__STD_C || defined(WIN32))
-#define Void_t void
-#else
-#define Void_t char
-#endif
-#endif /*Void_t*/
-
-#if __STD_C
-#include <stddef.h> /* for size_t */
-#else
-#include <sys/types.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */
-
-/* #define LACKS_UNISTD_H */
-
-#ifndef LACKS_UNISTD_H
-#include <unistd.h>
-#endif
-
-/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */
-
-/* #define LACKS_SYS_PARAM_H */
-
-
-#include <stdio.h> /* needed for malloc_stats */
-#include <errno.h> /* needed for optional MALLOC_FAILURE_ACTION */
-
-
-/*
- Debugging:
-
- Because freed chunks may be overwritten with bookkeeping fields, this
- malloc will often die when freed memory is overwritten by user
- programs. This can be very effective (albeit in an annoying way)
- in helping track down dangling pointers.
-
- If you compile with -DDEBUG, a number of assertion checks are
- enabled that will catch more memory errors. You probably won't be
- able to make much sense of the actual assertion errors, but they
- should help you locate incorrectly overwritten memory. The
- checking is fairly extensive, and will slow down execution
- noticeably. Calling malloc_stats or mallinfo with DEBUG set will
- attempt to check every non-mmapped allocated and free chunk in the
- course of computing the summmaries. (By nature, mmapped regions
- cannot be checked very much automatically.)
-
- Setting DEBUG may also be helpful if you are trying to modify
- this code. The assertions in the check routines spell out in more
- detail the assumptions and invariants underlying the algorithms.
-
- Setting DEBUG does NOT provide an automated mechanism for checking
- that all accesses to malloced memory stay within their
- bounds. However, there are several add-ons and adaptations of this
- or other mallocs available that do this.
-*/
-
-#if DEBUG
-#include <assert.h>
-#else
-#define assert(x) ((void)0)
-#endif
-
-/*
- The unsigned integer type used for comparing any two chunk sizes.
- This should be at least as wide as size_t, but should not be signed.
-*/
-
-#ifndef CHUNK_SIZE_T
-#define CHUNK_SIZE_T unsigned long
-#endif
-
-/*
- The unsigned integer type used to hold addresses when they are are
- manipulated as integers. Except that it is not defined on all
- systems, intptr_t would suffice.
-*/
-#ifndef PTR_UINT
-#define PTR_UINT unsigned long
-#endif
-
-
-/*
- INTERNAL_SIZE_T is the word-size used for internal bookkeeping
- of chunk sizes.
-
- The default version is the same as size_t.
-
- While not strictly necessary, it is best to define this as an
- unsigned type, even if size_t is a signed type. This may avoid some
- artificial size limitations on some systems.
-
- On a 64-bit machine, you may be able to reduce malloc overhead by
- defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
- expense of not being able to handle more than 2^32 of malloced
- space. If this limitation is acceptable, you are encouraged to set
- this unless you are on a platform requiring 16byte alignments. In
- this case the alignment requirements turn out to negate any
- potential advantages of decreasing size_t word size.
-
- Implementors: Beware of the possible combinations of:
- - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
- and might be the same width as int or as long
- - size_t might have different width and signedness as INTERNAL_SIZE_T
- - int and long might be 32 or 64 bits, and might be the same width
- To deal with this, most comparisons and difference computations
- among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being
- aware of the fact that casting an unsigned int to a wider long does
- not sign-extend. (This also makes checking for negative numbers
- awkward.) Some of these casts result in harmless compiler warnings
- on some systems.
-*/
-
-#ifndef INTERNAL_SIZE_T
-#define INTERNAL_SIZE_T size_t
-#endif
-
-/* The corresponding word size */
-#define SIZE_SZ (sizeof(INTERNAL_SIZE_T))
-
-
-
-/*
- MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
- It must be a power of two at least 2 * SIZE_SZ, even on machines
- for which smaller alignments would suffice. It may be defined as
- larger than this though. Note however that code and data structures
- are optimized for the case of 8-byte alignment.
-*/
-
-
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT (2 * SIZE_SZ)
-#endif
-
-/* The corresponding bit mask value */
-#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
-
-
-
-/*
- REALLOC_ZERO_BYTES_FREES should be set if a call to
- realloc with zero bytes should be the same as a call to free.
- Some people think it should. Otherwise, since this malloc
- returns a unique pointer for malloc(0), so does realloc(p, 0).
-*/
-
-/* #define REALLOC_ZERO_BYTES_FREES */
-
-/*
- TRIM_FASTBINS controls whether free() of a very small chunk can
- immediately lead to trimming. Setting to true (1) can reduce memory
- footprint, but will almost always slow down programs that use a lot
- of small chunks.
-
- Define this only if you are willing to give up some speed to more
- aggressively reduce system-level memory footprint when releasing
- memory in programs that use many small chunks. You can get
- essentially the same effect by setting MXFAST to 0, but this can
- lead to even greater slowdowns in programs using many small chunks.
- TRIM_FASTBINS is an in-between compile-time option, that disables
- only those chunks bordering topmost memory from being placed in
- fastbins.
-*/
-
-#ifndef TRIM_FASTBINS
-#define TRIM_FASTBINS 0
-#endif
-
-
-/*
- USE_DL_PREFIX will prefix all public routines with the string 'dl'.
- This is necessary when you only want to use this malloc in one part
- of a program, using your regular system malloc elsewhere.
-*/
-
-/* #define USE_DL_PREFIX */
-
-
-/*
- USE_MALLOC_LOCK causes wrapper functions to surround each
- callable routine with pthread mutex lock/unlock.
-
- USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined
-*/
-
-
-/* #define USE_MALLOC_LOCK */
-
-
-/*
- If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is
- actually a wrapper function that first calls MALLOC_PREACTION, then
- calls the internal routine, and follows it with
- MALLOC_POSTACTION. This is needed for locking, but you can also use
- this, without USE_MALLOC_LOCK, for purposes of interception,
- instrumentation, etc. It is a sad fact that using wrappers often
- noticeably degrades performance of malloc-intensive programs.
-*/
-
-#ifdef USE_MALLOC_LOCK
-#define USE_PUBLIC_MALLOC_WRAPPERS
-#else
-/* #define USE_PUBLIC_MALLOC_WRAPPERS */
-#endif
-
-
-/*
- Two-phase name translation.
- All of the actual routines are given mangled names.
- When wrappers are used, they become the public callable versions.
- When DL_PREFIX is used, the callable names are prefixed.
-*/
-
-#ifndef USE_PUBLIC_MALLOC_WRAPPERS
-#define cALLOc public_cALLOc
-#define fREe public_fREe
-#define cFREe public_cFREe
-#define mALLOc public_mALLOc
-#define mEMALIGn public_mEMALIGn
-#define rEALLOc public_rEALLOc
-#define vALLOc public_vALLOc
-#define pVALLOc public_pVALLOc
-#define mALLINFo public_mALLINFo
-#define mALLOPt public_mALLOPt
-#define mTRIm public_mTRIm
-#define mSTATs public_mSTATs
-#define mUSABLe public_mUSABLe
-#define iCALLOc public_iCALLOc
-#define iCOMALLOc public_iCOMALLOc
-#endif
-
-#ifdef USE_DL_PREFIX
-#define public_cALLOc dlcalloc
-#define public_fREe dlfree
-#define public_cFREe dlcfree
-#define public_mALLOc dlmalloc
-#define public_mEMALIGn dlmemalign
-#define public_rEALLOc dlrealloc
-#define public_vALLOc dlvalloc
-#define public_pVALLOc dlpvalloc
-#define public_mALLINFo dlmallinfo
-#define public_mALLOPt dlmallopt
-#define public_mTRIm dlmalloc_trim
-#define public_mSTATs dlmalloc_stats
-#define public_mUSABLe dlmalloc_usable_size
-#define public_iCALLOc dlindependent_calloc
-#define public_iCOMALLOc dlindependent_comalloc
-#else /* USE_DL_PREFIX */
-#define public_cALLOc calloc
-#define public_fREe free
-#define public_cFREe cfree
-#define public_mALLOc malloc
-#define public_mEMALIGn memalign
-#define public_rEALLOc realloc
-#define public_vALLOc valloc
-#define public_pVALLOc pvalloc
-#define public_mALLINFo mallinfo
-#define public_mALLOPt mallopt
-#define public_mTRIm malloc_trim
-#define public_mSTATs malloc_stats
-#define public_mUSABLe malloc_usable_size
-#define public_iCALLOc independent_calloc
-#define public_iCOMALLOc independent_comalloc
-#endif /* USE_DL_PREFIX */
-
-
-/*
- HAVE_MEMCPY should be defined if you are not otherwise using
- ANSI STD C, but still have memcpy and memset in your C library
- and want to use them in calloc and realloc. Otherwise simple
- macro versions are defined below.
-
- USE_MEMCPY should be defined as 1 if you actually want to
- have memset and memcpy called. People report that the macro
- versions are faster than libc versions on some systems.
-
- Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
- (of <= 36 bytes) are manually unrolled in realloc and calloc.
-*/
-
-#define HAVE_MEMCPY
-
-#ifndef USE_MEMCPY
-#ifdef HAVE_MEMCPY
-#define USE_MEMCPY 1
-#else
-#define USE_MEMCPY 0
-#endif
-#endif
-
-
-#if (__STD_C || defined(HAVE_MEMCPY))
-
-#ifdef WIN32
-/* On Win32 memset and memcpy are already declared in windows.h */
-#else
-#if __STD_C
-void* memset(void*, int, size_t);
-void* memcpy(void*, const void*, size_t);
-#else
-Void_t* memset();
-Void_t* memcpy();
-#endif
-#endif
-#endif
-
-/*
- MALLOC_FAILURE_ACTION is the action to take before "return 0" when
- malloc fails to be able to return memory, either because memory is
- exhausted or because of illegal arguments.
-
- By default, sets errno if running on STD_C platform, else does nothing.
-*/
-
-#ifndef MALLOC_FAILURE_ACTION
-#if __STD_C
-#define MALLOC_FAILURE_ACTION \
- errno = ENOMEM;
-
-#else
-#define MALLOC_FAILURE_ACTION
-#endif
-#endif
-
-/*
- MORECORE-related declarations. By default, rely on sbrk
-*/
-
-
-#ifdef LACKS_UNISTD_H
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
-#if __STD_C
-extern Void_t* sbrk(ptrdiff_t);
-#else
-extern Void_t* sbrk();
-#endif
-#endif
-#endif
-
-/*
- MORECORE is the name of the routine to call to obtain more memory
- from the system. See below for general guidance on writing
- alternative MORECORE functions, as well as a version for WIN32 and a
- sample version for pre-OSX macos.
-*/
-
-#ifndef MORECORE
-#define MORECORE sbrk
-#endif
-
-/*
- MORECORE_FAILURE is the value returned upon failure of MORECORE
- as well as mmap. Since it cannot be an otherwise valid memory address,
- and must reflect values of standard sys calls, you probably ought not
- try to redefine it.
-*/
-
-#ifndef MORECORE_FAILURE
-#define MORECORE_FAILURE (-1)
-#endif
-
-/*
- If MORECORE_CONTIGUOUS is true, take advantage of fact that
- consecutive calls to MORECORE with positive arguments always return
- contiguous increasing addresses. This is true of unix sbrk. Even
- if not defined, when regions happen to be contiguous, malloc will
- permit allocations spanning regions obtained from different
- calls. But defining this when applicable enables some stronger
- consistency checks and space efficiencies.
-*/
-
-#ifndef MORECORE_CONTIGUOUS
-#define MORECORE_CONTIGUOUS 1
-#endif
-
-/*
- Define MORECORE_CANNOT_TRIM if your version of MORECORE
- cannot release space back to the system when given negative
- arguments. This is generally necessary only if you are using
- a hand-crafted MORECORE function that cannot handle negative arguments.
-*/
-
-/* #define MORECORE_CANNOT_TRIM */
-
-
-/*
- Define HAVE_MMAP as true to optionally make malloc() use mmap() to
- allocate very large blocks. These will be returned to the
- operating system immediately after a free(). Also, if mmap
- is available, it is used as a backup strategy in cases where
- MORECORE fails to provide space from system.
-
- This malloc is best tuned to work with mmap for large requests.
- If you do not have mmap, operations involving very large chunks (1MB
- or so) may be slower than you'd like.
-*/
-
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif
-
-#if HAVE_MMAP
-/*
- Standard unix mmap using /dev/zero clears memory so calloc doesn't
- need to.
-*/
-
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 1
-#endif
-
-#else /* no mmap */
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 0
-#endif
-#endif
-
-
-/*
- MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
- sbrk fails, and mmap is used as a backup (which is done only if
- HAVE_MMAP). The value must be a multiple of page size. This
- backup strategy generally applies only when systems have "holes" in
- address space, so sbrk cannot perform contiguous expansion, but
- there is still space available on system. On systems for which
- this is known to be useful (i.e. most linux kernels), this occurs
- only when programs allocate huge amounts of memory. Between this,
- and the fact that mmap regions tend to be limited, the size should
- be large, to avoid too many mmap calls and thus avoid running out
- of kernel resources.
-*/
-
-#ifndef MMAP_AS_MORECORE_SIZE
-#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
-#endif
-
-/*
- Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
- large blocks. This is currently only possible on Linux with
- kernel versions newer than 1.3.77.
-*/
-
-#ifndef HAVE_MREMAP
-#ifdef linux
-#define HAVE_MREMAP 1
-#else
-#define HAVE_MREMAP 0
-#endif
-
-#endif /* HAVE_MMAP */
-
-
-/*
- The system page size. To the extent possible, this malloc manages
- memory from the system in page-size units. Note that this value is
- cached during initialization into a field of malloc_state. So even
- if malloc_getpagesize is a function, it is only called once.
-
- The following mechanics for getpagesize were adapted from bsd/gnu
- getpagesize.h. If none of the system-probes here apply, a value of
- 4096 is used, which should be OK: If they don't apply, then using
- the actual value probably doesn't impact performance.
-*/
-
-
-#ifndef malloc_getpagesize
-
-#ifndef LACKS_UNISTD_H
-# include <unistd.h>
-#endif
-
-# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
-# ifndef _SC_PAGE_SIZE
-# define _SC_PAGE_SIZE _SC_PAGESIZE
-# endif
-# endif
-
-# ifdef _SC_PAGE_SIZE
-# define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-# else
-# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
- extern size_t getpagesize();
-# define malloc_getpagesize getpagesize()
-# else
-# ifdef WIN32 /* use supplied emulation of getpagesize */
-# define malloc_getpagesize getpagesize()
-# else
-# ifndef LACKS_SYS_PARAM_H
-# include <sys/param.h>
-# endif
-# ifdef EXEC_PAGESIZE
-# define malloc_getpagesize EXEC_PAGESIZE
-# else
-# ifdef NBPG
-# ifndef CLSIZE
-# define malloc_getpagesize NBPG
-# else
-# define malloc_getpagesize (NBPG * CLSIZE)
-# endif
-# else
-# ifdef NBPC
-# define malloc_getpagesize NBPC
-# else
-# ifdef PAGESIZE
-# define malloc_getpagesize PAGESIZE
-# else /* just guess */
-# define malloc_getpagesize (4096)
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-#endif
-
-/*
- This version of malloc supports the standard SVID/XPG mallinfo
- routine that returns a struct containing usage properties and
- statistics. It should work on any SVID/XPG compliant system that has
- a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
- install such a thing yourself, cut out the preliminary declarations
- as described above and below and save them in a malloc.h file. But
- there's no compelling reason to bother to do this.)
-
- The main declaration needed is the mallinfo struct that is returned
- (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a
- bunch of fields that are not even meaningful in this version of
- malloc. These fields are are instead filled by mallinfo() with
- other numbers that might be of interest.
-
- HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
- /usr/include/malloc.h file that includes a declaration of struct
- mallinfo. If so, it is included; else an SVID2/XPG2 compliant
- version is declared below. These must be precisely the same for
- mallinfo() to work. The original SVID version of this struct,
- defined on most systems with mallinfo, declares all fields as
- ints. But some others define as unsigned long. If your system
- defines the fields using a type of different width than listed here,
- you must #include your system version and #define
- HAVE_USR_INCLUDE_MALLOC_H.
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-#ifdef HAVE_USR_INCLUDE_MALLOC_H
-#include "/usr/include/malloc.h"
-#else
-
-/* SVID2/XPG mallinfo structure */
-
-struct mallinfo {
- int arena; /* non-mmapped space allocated from system */
- int ordblks; /* number of free chunks */
- int smblks; /* number of fastbin blocks */
- int hblks; /* number of mmapped regions */
- int hblkhd; /* space in mmapped regions */
- int usmblks; /* maximum total allocated space */
- int fsmblks; /* space available in freed fastbin blocks */
- int uordblks; /* total allocated space */
- int fordblks; /* total free space */
- int keepcost; /* top-most, releasable (via malloc_trim) space */
-};
-
-/*
- SVID/XPG defines four standard parameter numbers for mallopt,
- normally defined in malloc.h. Only one of these (M_MXFAST) is used
- in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
- so setting them has no effect. But this malloc also supports other
- options in mallopt described below.
-*/
-#endif
-
-
-/* ---------- description of public routines ------------ */
-
-/*
- malloc(size_t n)
- Returns a pointer to a newly allocated chunk of at least n bytes, or null
- if no space is available. Additionally, on failure, errno is
- set to ENOMEM on ANSI C systems.
-
- If n is zero, malloc returns a minumum-sized chunk. (The minimum
- size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
- systems.) On most systems, size_t is an unsigned type, so calls
- with negative arguments are interpreted as requests for huge amounts
- of space, which will often fail. The maximum supported value of n
- differs across systems, but is in all cases less than the maximum
- representable value of a size_t.
-*/
-#if __STD_C
-Void_t* public_mALLOc(size_t);
-#else
-Void_t* public_mALLOc();
-#endif
-
-/*
- free(Void_t* p)
- Releases the chunk of memory pointed to by p, that had been previously
- allocated using malloc or a related routine such as realloc.
- It has no effect if p is null. It can have arbitrary (i.e., bad!)
- effects if p has already been freed.
-
- Unless disabled (using mallopt), freeing very large spaces will
- when possible, automatically trigger operations that give
- back unused memory to the system, thus reducing program footprint.
-*/
-#if __STD_C
-void public_fREe(Void_t*);
-#else
-void public_fREe();
-#endif
-
-/*
- calloc(size_t n_elements, size_t element_size);
- Returns a pointer to n_elements * element_size bytes, with all locations
- set to zero.
-*/
-#if __STD_C
-Void_t* public_cALLOc(size_t, size_t);
-#else
-Void_t* public_cALLOc();
-#endif
-
-/*
- realloc(Void_t* p, size_t n)
- Returns a pointer to a chunk of size n that contains the same data
- as does chunk p up to the minimum of (n, p's size) bytes, or null
- if no space is available.
-
- The returned pointer may or may not be the same as p. The algorithm
- prefers extending p when possible, otherwise it employs the
- equivalent of a malloc-copy-free sequence.
-
- If p is null, realloc is equivalent to malloc.
-
- If space is not available, realloc returns null, errno is set (if on
- ANSI) and p is NOT freed.
-
- if n is for fewer bytes than already held by p, the newly unused
- space is lopped off and freed if possible. Unless the #define
- REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
- zero (re)allocates a minimum-sized chunk.
-
- Large chunks that were internally obtained via mmap will always
- be reallocated using malloc-copy-free sequences unless
- the system supports MREMAP (currently only linux).
-
- The old unix realloc convention of allowing the last-free'd chunk
- to be used as an argument to realloc is not supported.
-*/
-#if __STD_C
-Void_t* public_rEALLOc(Void_t*, size_t);
-#else
-Void_t* public_rEALLOc();
-#endif
-
-/*
- memalign(size_t alignment, size_t n);
- Returns a pointer to a newly allocated chunk of n bytes, aligned
- in accord with the alignment argument.
-
- The alignment argument should be a power of two. If the argument is
- not a power of two, the nearest greater power is used.
- 8-byte alignment is guaranteed by normal malloc calls, so don't
- bother calling memalign with an argument of 8 or less.
-
- Overreliance on memalign is a sure way to fragment space.
-*/
-#if __STD_C
-Void_t* public_mEMALIGn(size_t, size_t);
-#else
-Void_t* public_mEMALIGn();
-#endif
-
-/*
- valloc(size_t n);
- Equivalent to memalign(pagesize, n), where pagesize is the page
- size of the system. If the pagesize is unknown, 4096 is used.
-*/
-#if __STD_C
-Void_t* public_vALLOc(size_t);
-#else
-Void_t* public_vALLOc();
-#endif
-
-
-
-/*
- mallopt(int parameter_number, int parameter_value)
- Sets tunable parameters The format is to provide a
- (parameter-number, parameter-value) pair. mallopt then sets the
- corresponding parameter to the argument value if it can (i.e., so
- long as the value is meaningful), and returns 1 if successful else
- 0. SVID/XPG/ANSI defines four standard param numbers for mallopt,
- normally defined in malloc.h. Only one of these (M_MXFAST) is used
- in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
- so setting them has no effect. But this malloc also supports four
- other options in mallopt. See below for details. Briefly, supported
- parameters are as follows (listed defaults are for "typical"
- configurations).
-
- Symbol param # default allowed param values
- M_MXFAST 1 64 0-80 (0 disables fastbins)
- M_TRIM_THRESHOLD -1 256*1024 any (-1U disables trimming)
- M_TOP_PAD -2 0 any
- M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support)
- M_MMAP_MAX -4 65536 any (0 disables use of mmap)
-*/
-#if __STD_C
-int public_mALLOPt(int, int);
-#else
-int public_mALLOPt();
-#endif
-
-
-/*
- mallinfo()
- Returns (by copy) a struct containing various summary statistics:
-
- arena: current total non-mmapped bytes allocated from system
- ordblks: the number of free chunks
- smblks: the number of fastbin blocks (i.e., small chunks that
- have been freed but not use resused or consolidated)
- hblks: current number of mmapped regions
- hblkhd: total bytes held in mmapped regions
- usmblks: the maximum total allocated space. This will be greater
- than current total if trimming has occurred.
- fsmblks: total bytes held in fastbin blocks
- uordblks: current total allocated space (normal or mmapped)
- fordblks: total free space
- keepcost: the maximum number of bytes that could ideally be released
- back to system via malloc_trim. ("ideally" means that
- it ignores page restrictions etc.)
-
- Because these fields are ints, but internal bookkeeping may
- be kept as longs, the reported values may wrap around zero and
- thus be inaccurate.
-*/
-#if __STD_C
-struct mallinfo public_mALLINFo(void);
-#else
-struct mallinfo public_mALLINFo();
-#endif
-
-/*
- independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]);
-
- independent_calloc is similar to calloc, but instead of returning a
- single cleared space, it returns an array of pointers to n_elements
- independent elements that can hold contents of size elem_size, each
- of which starts out cleared, and can be independently freed,
- realloc'ed etc. The elements are guaranteed to be adjacently
- allocated (this is not guaranteed to occur with multiple callocs or
- mallocs), which may also improve cache locality in some
- applications.
-
- The "chunks" argument is optional (i.e., may be null, which is
- probably the most typical usage). If it is null, the returned array
- is itself dynamically allocated and should also be freed when it is
- no longer needed. Otherwise, the chunks array must be of at least
- n_elements in length. It is filled in with the pointers to the
- chunks.
-
- In either case, independent_calloc returns this pointer array, or
- null if the allocation failed. If n_elements is zero and "chunks"
- is null, it returns a chunk representing an array with zero elements
- (which should be freed if not wanted).
-
- Each element must be individually freed when it is no longer
- needed. If you'd like to instead be able to free all at once, you
- should instead use regular calloc and assign pointers into this
- space to represent elements. (In this case though, you cannot
- independently free elements.)
-
- independent_calloc simplifies and speeds up implementations of many
- kinds of pools. It may also be useful when constructing large data
- structures that initially have a fixed number of fixed-sized nodes,
- but the number is not known at compile time, and some of the nodes
- may later need to be freed. For example:
-
- struct Node { int item; struct Node* next; };
-
- struct Node* build_list() {
- struct Node** pool;
- int n = read_number_of_nodes_needed();
- if (n <= 0) return 0;
- pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
- if (pool == 0) die();
- // organize into a linked list...
- struct Node* first = pool[0];
- for (i = 0; i < n-1; ++i)
- pool[i]->next = pool[i+1];
- free(pool); // Can now free the array (or not, if it is needed later)
- return first;
- }
-*/
-#if __STD_C
-Void_t** public_iCALLOc(size_t, size_t, Void_t**);
-#else
-Void_t** public_iCALLOc();
-#endif
-
-/*
- independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]);
-
- independent_comalloc allocates, all at once, a set of n_elements
- chunks with sizes indicated in the "sizes" array. It returns
- an array of pointers to these elements, each of which can be
- independently freed, realloc'ed etc. The elements are guaranteed to
- be adjacently allocated (this is not guaranteed to occur with
- multiple callocs or mallocs), which may also improve cache locality
- in some applications.
-
- The "chunks" argument is optional (i.e., may be null). If it is null
- the returned array is itself dynamically allocated and should also
- be freed when it is no longer needed. Otherwise, the chunks array
- must be of at least n_elements in length. It is filled in with the
- pointers to the chunks.
-
- In either case, independent_comalloc returns this pointer array, or
- null if the allocation failed. If n_elements is zero and chunks is
- null, it returns a chunk representing an array with zero elements
- (which should be freed if not wanted).
-
- Each element must be individually freed when it is no longer
- needed. If you'd like to instead be able to free all at once, you
- should instead use a single regular malloc, and assign pointers at
- particular offsets in the aggregate space. (In this case though, you
- cannot independently free elements.)
-
- independent_comallac differs from independent_calloc in that each
- element may have a different size, and also that it does not
- automatically clear elements.
-
- independent_comalloc can be used to speed up allocation in cases
- where several structs or objects must always be allocated at the
- same time. For example:
-
- struct Head { ... }
- struct Foot { ... }
-
- void send_message(char* msg) {
- int msglen = strlen(msg);
- size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
- void* chunks[3];
- if (independent_comalloc(3, sizes, chunks) == 0)
- die();
- struct Head* head = (struct Head*)(chunks[0]);
- char* body = (char*)(chunks[1]);
- struct Foot* foot = (struct Foot*)(chunks[2]);
- // ...
- }
-
- In general though, independent_comalloc is worth using only for
- larger values of n_elements. For small values, you probably won't
- detect enough difference from series of malloc calls to bother.
-
- Overuse of independent_comalloc can increase overall memory usage,
- since it cannot reuse existing noncontiguous small chunks that
- might be available for some of the elements.
-*/
-#if __STD_C
-Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**);
-#else
-Void_t** public_iCOMALLOc();
-#endif
-
-
-/*
- pvalloc(size_t n);
- Equivalent to valloc(minimum-page-that-holds(n)), that is,
- round up n to nearest pagesize.
- */
-#if __STD_C
-Void_t* public_pVALLOc(size_t);
-#else
-Void_t* public_pVALLOc();
-#endif
-
-/*
- cfree(Void_t* p);
- Equivalent to free(p).
-
- cfree is needed/defined on some systems that pair it with calloc,
- for odd historical reasons (such as: cfree is used in example
- code in the first edition of K&R).
-*/
-#if __STD_C
-void public_cFREe(Void_t*);
-#else
-void public_cFREe();
-#endif
-
-/*
- malloc_trim(size_t pad);
-
- If possible, gives memory back to the system (via negative
- arguments to sbrk) if there is unused memory at the `high' end of
- the malloc pool. You can call this after freeing large blocks of
- memory to potentially reduce the system-level memory requirements
- of a program. However, it cannot guarantee to reduce memory. Under
- some allocation patterns, some large free blocks of memory will be
- locked between two used chunks, so they cannot be given back to
- the system.
-
- The `pad' argument to malloc_trim represents the amount of free
- trailing space to leave untrimmed. If this argument is zero,
- only the minimum amount of memory to maintain internal data
- structures will be left (one page or less). Non-zero arguments
- can be supplied to maintain enough trailing space to service
- future expected allocations without having to re-obtain memory
- from the system.
-
- Malloc_trim returns 1 if it actually released any memory, else 0.
- On systems that do not support "negative sbrks", it will always
- rreturn 0.
-*/
-#if __STD_C
-int public_mTRIm(size_t);
-#else
-int public_mTRIm();
-#endif
-
-/*
- malloc_usable_size(Void_t* p);
-
- Returns the number of bytes you can actually use in
- an allocated chunk, which may be more than you requested (although
- often not) due to alignment and minimum size constraints.
- You can use this many bytes without worrying about
- overwriting other allocated objects. This is not a particularly great
- programming practice. malloc_usable_size can be more useful in
- debugging and assertions, for example:
-
- p = malloc(n);
- assert(malloc_usable_size(p) >= 256);
-
-*/
-#if __STD_C
-size_t public_mUSABLe(Void_t*);
-#else
-size_t public_mUSABLe();
-#endif
-
-/*
- malloc_stats();
- Prints on stderr the amount of space obtained from the system (both
- via sbrk and mmap), the maximum amount (which may be more than
- current if malloc_trim and/or munmap got called), and the current
- number of bytes allocated via malloc (or realloc, etc) but not yet
- freed. Note that this is the number of bytes allocated, not the
- number requested. It will be larger than the number requested
- because of alignment and bookkeeping overhead. Because it includes
- alignment wastage as being in use, this figure may be greater than
- zero even when no user-level chunks are allocated.
-
- The reported current and maximum system memory can be inaccurate if
- a program makes other calls to system memory allocation functions
- (normally sbrk) outside of malloc.
-
- malloc_stats prints only the most commonly interesting statistics.
- More information can be obtained by calling mallinfo.
-
-*/
-#if __STD_C
-void public_mSTATs();
-#else
-void public_mSTATs();
-#endif
-
-/* mallopt tuning options */
-
-/*
- M_MXFAST is the maximum request size used for "fastbins", special bins
- that hold returned chunks without consolidating their spaces. This
- enables future requests for chunks of the same size to be handled
- very quickly, but can increase fragmentation, and thus increase the
- overall memory footprint of a program.
-
- This malloc manages fastbins very conservatively yet still
- efficiently, so fragmentation is rarely a problem for values less
- than or equal to the default. The maximum supported value of MXFAST
- is 80. You wouldn't want it any higher than this anyway. Fastbins
- are designed especially for use with many small structs, objects or
- strings -- the default handles structs/objects/arrays with sizes up
- to 16 4byte fields, or small strings representing words, tokens,
- etc. Using fastbins for larger objects normally worsens
- fragmentation without improving speed.
-
- M_MXFAST is set in REQUEST size units. It is internally used in
- chunksize units, which adds padding and alignment. You can reduce
- M_MXFAST to 0 to disable all use of fastbins. This causes the malloc
- algorithm to be a closer approximation of fifo-best-fit in all cases,
- not just for larger requests, but will generally cause it to be
- slower.
-*/
-
-
-/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
-#ifndef M_MXFAST
-#define M_MXFAST 1
-#endif
-
-#ifndef DEFAULT_MXFAST
-#define DEFAULT_MXFAST 64
-#endif
-
-
-/*
- M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
- to keep before releasing via malloc_trim in free().
-
- Automatic trimming is mainly useful in long-lived programs.
- Because trimming via sbrk can be slow on some systems, and can
- sometimes be wasteful (in cases where programs immediately
- afterward allocate more large chunks) the value should be high
- enough so that your overall system performance would improve by
- releasing this much memory.
-
- The trim threshold and the mmap control parameters (see below)
- can be traded off with one another. Trimming and mmapping are
- two different ways of releasing unused memory back to the
- system. Between these two, it is often possible to keep
- system-level demands of a long-lived program down to a bare
- minimum. For example, in one test suite of sessions measuring
- the XF86 X server on Linux, using a trim threshold of 128K and a
- mmap threshold of 192K led to near-minimal long term resource
- consumption.
-
- If you are using this malloc in a long-lived program, it should
- pay to experiment with these values. As a rough guide, you
- might set to a value close to the average size of a process
- (program) running on your system. Releasing this much memory
- would allow such a process to run in memory. Generally, it's
- worth it to tune for trimming rather tham memory mapping when a
- program undergoes phases where several large chunks are
- allocated and released in ways that can reuse each other's
- storage, perhaps mixed with phases where there are no such
- chunks at all. And in well-behaved long-lived programs,
- controlling release of large blocks via trimming versus mapping
- is usually faster.
-
- However, in most programs, these parameters serve mainly as
- protection against the system-level effects of carrying around
- massive amounts of unneeded memory. Since frequent calls to
- sbrk, mmap, and munmap otherwise degrade performance, the default
- parameters are set to relatively high values that serve only as
- safeguards.
-
- The trim value must be greater than page size to have any useful
- effect. To disable trimming completely, you can set to
- (unsigned long)(-1)
-
- Trim settings interact with fastbin (MXFAST) settings: Unless
- TRIM_FASTBINS is defined, automatic trimming never takes place upon
- freeing a chunk with size less than or equal to MXFAST. Trimming is
- instead delayed until subsequent freeing of larger chunks. However,
- you can still force an attempted trim by calling malloc_trim.
-
- Also, trimming is not generally possible in cases where
- the main arena is obtained via mmap.
-
- Note that the trick some people use of mallocing a huge space and
- then freeing it at program startup, in an attempt to reserve system
- memory, doesn't have the intended effect under automatic trimming,
- since that memory will immediately be returned to the system.
-*/
-
-#define M_TRIM_THRESHOLD -1
-
-#ifndef DEFAULT_TRIM_THRESHOLD
-#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
-#endif
-
-/*
- M_TOP_PAD is the amount of extra `padding' space to allocate or
- retain whenever sbrk is called. It is used in two ways internally:
-
- * When sbrk is called to extend the top of the arena to satisfy
- a new malloc request, this much padding is added to the sbrk
- request.
-
- * When malloc_trim is called automatically from free(),
- it is used as the `pad' argument.
-
- In both cases, the actual amount of padding is rounded
- so that the end of the arena is always a system page boundary.
-
- The main reason for using padding is to avoid calling sbrk so
- often. Having even a small pad greatly reduces the likelihood
- that nearly every malloc request during program start-up (or
- after trimming) will invoke sbrk, which needlessly wastes
- time.
-
- Automatic rounding-up to page-size units is normally sufficient
- to avoid measurable overhead, so the default is 0. However, in
- systems where sbrk is relatively slow, it can pay to increase
- this value, at the expense of carrying around more memory than
- the program needs.
-*/
-
-#define M_TOP_PAD -2
-
-#ifndef DEFAULT_TOP_PAD
-#define DEFAULT_TOP_PAD (0)
-#endif
-
-/*
- M_MMAP_THRESHOLD is the request size threshold for using mmap()
- to service a request. Requests of at least this size that cannot
- be allocated using already-existing space will be serviced via mmap.
- (If enough normal freed space already exists it is used instead.)
-
- Using mmap segregates relatively large chunks of memory so that
- they can be individually obtained and released from the host
- system. A request serviced through mmap is never reused by any
- other request (at least not directly; the system may just so
- happen to remap successive requests to the same locations).
-
- Segregating space in this way has the benefits that:
-
- 1. Mmapped space can ALWAYS be individually released back
- to the system, which helps keep the system level memory
- demands of a long-lived program low.
- 2. Mapped memory can never become `locked' between
- other chunks, as can happen with normally allocated chunks, which
- means that even trimming via malloc_trim would not release them.
- 3. On some systems with "holes" in address spaces, mmap can obtain
- memory that sbrk cannot.
-
- However, it has the disadvantages that:
-
- 1. The space cannot be reclaimed, consolidated, and then
- used to service later requests, as happens with normal chunks.
- 2. It can lead to more wastage because of mmap page alignment
- requirements
- 3. It causes malloc performance to be more dependent on host
- system memory management support routines which may vary in
- implementation quality and may impose arbitrary
- limitations. Generally, servicing a request via normal
- malloc steps is faster than going through a system's mmap.
-
- The advantages of mmap nearly always outweigh disadvantages for
- "large" chunks, but the value of "large" varies across systems. The
- default is an empirically derived value that works well in most
- systems.
-*/
-
-#define M_MMAP_THRESHOLD -3
-
-#ifndef DEFAULT_MMAP_THRESHOLD
-#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
-#endif
-
-/*
- M_MMAP_MAX is the maximum number of requests to simultaneously
- service using mmap. This parameter exists because
-. Some systems have a limited number of internal tables for
- use by mmap, and using more than a few of them may degrade
- performance.
-
- The default is set to a value that serves only as a safeguard.
- Setting to 0 disables use of mmap for servicing large requests. If
- HAVE_MMAP is not set, the default value is 0, and attempts to set it
- to non-zero values in mallopt will fail.
-*/
-
-#define M_MMAP_MAX -4
-
-#ifndef DEFAULT_MMAP_MAX
-#if HAVE_MMAP
-#define DEFAULT_MMAP_MAX (65536)
-#else
-#define DEFAULT_MMAP_MAX (0)
-#endif
-#endif
-
-#ifdef __cplusplus
-}; /* end of extern "C" */
-#endif
-
-
-/* RN XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
-#endif
-
-/*
- ========================================================================
- To make a fully customizable malloc.h header file, cut everything
- above this line, put into file malloc.h, edit to suit, and #include it
- on the next line, as well as in programs that use this malloc.
- ========================================================================
-*/
-
-/* #include "malloc.h" */
-
-/* --------------------- public wrappers ---------------------- */
-
-#ifdef USE_PUBLIC_MALLOC_WRAPPERS
-
-/* Declare all routines as internal */
-#if __STD_C
-static Void_t* mALLOc(size_t);
-static void fREe(Void_t*);
-static Void_t* rEALLOc(Void_t*, size_t);
-static Void_t* mEMALIGn(size_t, size_t);
-static Void_t* vALLOc(size_t);
-static Void_t* pVALLOc(size_t);
-static Void_t* cALLOc(size_t, size_t);
-static Void_t** iCALLOc(size_t, size_t, Void_t**);
-static Void_t** iCOMALLOc(size_t, size_t*, Void_t**);
-static void cFREe(Void_t*);
-static int mTRIm(size_t);
-static size_t mUSABLe(Void_t*);
-static void mSTATs();
-static int mALLOPt(int, int);
-static struct mallinfo mALLINFo(void);
-#else
-static Void_t* mALLOc();
-static void fREe();
-static Void_t* rEALLOc();
-static Void_t* mEMALIGn();
-static Void_t* vALLOc();
-static Void_t* pVALLOc();
-static Void_t* cALLOc();
-static Void_t** iCALLOc();
-static Void_t** iCOMALLOc();
-static void cFREe();
-static int mTRIm();
-static size_t mUSABLe();
-static void mSTATs();
-static int mALLOPt();
-static struct mallinfo mALLINFo();
-#endif
-
-/*
- MALLOC_PREACTION and MALLOC_POSTACTION should be
- defined to return 0 on success, and nonzero on failure.
- The return value of MALLOC_POSTACTION is currently ignored
- in wrapper functions since there is no reasonable default
- action to take on failure.
-*/
-
-
-#ifdef USE_MALLOC_LOCK
-
-#ifdef WIN32
-
-static int mALLOC_MUTEx;
-#define MALLOC_PREACTION slwait(&mALLOC_MUTEx)
-#define MALLOC_POSTACTION slrelease(&mALLOC_MUTEx)
-
-#else
-
-#include <pthread.h>
-
-static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER;
-
-#define MALLOC_PREACTION pthread_mutex_lock(&mALLOC_MUTEx)
-#define MALLOC_POSTACTION pthread_mutex_unlock(&mALLOC_MUTEx)
-
-#endif /* USE_MALLOC_LOCK */
-
-#else
-
-/* Substitute anything you like for these */
-
-#define MALLOC_PREACTION (0)
-#define MALLOC_POSTACTION (0)
-
-#endif
-
-Void_t* public_mALLOc(size_t bytes) {
- Void_t* m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = mALLOc(bytes);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-void public_fREe(Void_t* m) {
- if (MALLOC_PREACTION != 0) {
- return;
- }
- fREe(m);
- if (MALLOC_POSTACTION != 0) {
- }
-}
-
-Void_t* public_rEALLOc(Void_t* m, size_t bytes) {
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = rEALLOc(m, bytes);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-Void_t* public_mEMALIGn(size_t alignment, size_t bytes) {
- Void_t* m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = mEMALIGn(alignment, bytes);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-Void_t* public_vALLOc(size_t bytes) {
- Void_t* m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = vALLOc(bytes);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-Void_t* public_pVALLOc(size_t bytes) {
- Void_t* m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = pVALLOc(bytes);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-Void_t* public_cALLOc(size_t n, size_t elem_size) {
- Void_t* m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = cALLOc(n, elem_size);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-
-Void_t** public_iCALLOc(size_t n, size_t elem_size, Void_t** chunks) {
- Void_t** m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = iCALLOc(n, elem_size, chunks);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-Void_t** public_iCOMALLOc(size_t n, size_t sizes[], Void_t** chunks) {
- Void_t** m;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- m = iCOMALLOc(n, sizes, chunks);
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-void public_cFREe(Void_t* m) {
- if (MALLOC_PREACTION != 0) {
- return;
- }
- cFREe(m);
- if (MALLOC_POSTACTION != 0) {
- }
-}
-
-int public_mTRIm(size_t s) {
- int result;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- result = mTRIm(s);
- if (MALLOC_POSTACTION != 0) {
- }
- return result;
-}
-
-size_t public_mUSABLe(Void_t* m) {
- size_t result;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- result = mUSABLe(m);
- if (MALLOC_POSTACTION != 0) {
- }
- return result;
-}
-
-void public_mSTATs() {
- if (MALLOC_PREACTION != 0) {
- return;
- }
- mSTATs();
- if (MALLOC_POSTACTION != 0) {
- }
-}
-
-struct mallinfo public_mALLINFo() {
- struct mallinfo m;
- if (MALLOC_PREACTION != 0) {
- struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- return nm;
- }
- m = mALLINFo();
- if (MALLOC_POSTACTION != 0) {
- }
- return m;
-}
-
-int public_mALLOPt(int p, int v) {
- int result;
- if (MALLOC_PREACTION != 0) {
- return 0;
- }
- result = mALLOPt(p, v);
- if (MALLOC_POSTACTION != 0) {
- }
- return result;
-}
-
-#endif
-
-
-
-/* ------------- Optional versions of memcopy ---------------- */
-
-
-#if USE_MEMCPY
-
-/*
- Note: memcpy is ONLY invoked with non-overlapping regions,
- so the (usually slower) memmove is not needed.
-*/
-
-#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes)
-#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes)
-
-#else /* !USE_MEMCPY */
-
-/* Use Duff's device for good zeroing/copying performance. */
-
-#define MALLOC_ZERO(charp, nbytes) \
-do { \
- INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp); \
- CHUNK_SIZE_T mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \
- long mcn; \
- if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \
- switch (mctmp) { \
- case 0: for(;;) { *mzp++ = 0; \
- case 7: *mzp++ = 0; \
- case 6: *mzp++ = 0; \
- case 5: *mzp++ = 0; \
- case 4: *mzp++ = 0; \
- case 3: *mzp++ = 0; \
- case 2: *mzp++ = 0; \
- case 1: *mzp++ = 0; if(mcn <= 0) break; mcn--; } \
- } \
-} while(0)
-
-#define MALLOC_COPY(dest,src,nbytes) \
-do { \
- INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src; \
- INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest; \
- CHUNK_SIZE_T mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \
- long mcn; \
- if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \
- switch (mctmp) { \
- case 0: for(;;) { *mcdst++ = *mcsrc++; \
- case 7: *mcdst++ = *mcsrc++; \
- case 6: *mcdst++ = *mcsrc++; \
- case 5: *mcdst++ = *mcsrc++; \
- case 4: *mcdst++ = *mcsrc++; \
- case 3: *mcdst++ = *mcsrc++; \
- case 2: *mcdst++ = *mcsrc++; \
- case 1: *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; } \
- } \
-} while(0)
-
-#endif
-
-/* ------------------ MMAP support ------------------ */
-
-
-#if HAVE_MMAP
-
-#ifndef LACKS_FCNTL_H
-#include <fcntl.h>
-#endif
-
-#ifndef LACKS_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-/*
- Nearly all versions of mmap support MAP_ANONYMOUS,
- so the following is unlikely to be needed, but is
- supplied just in case.
-*/
-
-#ifndef MAP_ANONYMOUS
-
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
-
-#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \
- (dev_zero_fd = open("/dev/zero", O_RDWR), \
- mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \
- mmap((addr), (size), (prot), (flags), dev_zero_fd, 0))
-
-#else
-
-#define MMAP(addr, size, prot, flags) \
- (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
-
-#endif
-
-
-#endif /* HAVE_MMAP */
-
-
-/*
- ----------------------- Chunk representations -----------------------
-*/
-
-
-/*
- This struct declaration is misleading (but accurate and necessary).
- It declares a "view" into memory allowing access to necessary
- fields at known offsets from a given base. See explanation below.
-*/
-
-struct malloc_chunk {
-
- INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */
- INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */
-
- struct malloc_chunk* fd; /* double links -- used only if free. */
- struct malloc_chunk* bk;
-};
-
-
-typedef struct malloc_chunk* mchunkptr;
-
-/*
- malloc_chunk details:
-
- (The following includes lightly edited explanations by Colin Plumb.)
-
- Chunks of memory are maintained using a `boundary tag' method as
- described in e.g., Knuth or Standish. (See the paper by Paul
- Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a
- survey of such techniques.) Sizes of free chunks are stored both
- in the front of each chunk and at the end. This makes
- consolidating fragmented chunks into bigger chunks very fast. The
- size fields also hold bits representing whether chunks are free or
- in use.
-
- An allocated chunk looks like this:
-
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk, if allocated | |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of chunk, in bytes |P|
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | User data starts here... .
- . .
- . (malloc_usable_space() bytes) .
- . |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
- Where "chunk" is the front of the chunk for the purpose of most of
- the malloc code, but "mem" is the pointer that is returned to the
- user. "Nextchunk" is the beginning of the next contiguous chunk.
-
- Chunks always begin on even word boundries, so the mem portion
- (which is returned to the user) is also on an even word boundary, and
- thus at least double-word aligned.
-
- Free chunks are stored in circular doubly-linked lists, and look like this:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `head:' | Size of chunk, in bytes |P|
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Forward pointer to next chunk in list |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Back pointer to previous chunk in list |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Unused space (may be 0 bytes long) .
- . .
- . |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `foot:' | Size of chunk, in bytes |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- The P (PREV_INUSE) bit, stored in the unused low-order bit of the
- chunk size (which is always a multiple of two words), is an in-use
- bit for the *previous* chunk. If that bit is *clear*, then the
- word before the current chunk size contains the previous chunk
- size, and can be used to find the front of the previous chunk.
- The very first chunk allocated always has this bit set,
- preventing access to non-existent (or non-owned) memory. If
- prev_inuse is set for any given chunk, then you CANNOT determine
- the size of the previous chunk, and might even get a memory
- addressing fault when trying to do so.
-
- Note that the `foot' of the current chunk is actually represented
- as the prev_size of the NEXT chunk. This makes it easier to
- deal with alignments etc but can be very confusing when trying
- to extend or adapt this code.
-
- The two exceptions to all this are
-
- 1. The special chunk `top' doesn't bother using the
- trailing size field since there is no next contiguous chunk
- that would have to index off it. After initialization, `top'
- is forced to always exist. If it would become less than
- MINSIZE bytes long, it is replenished.
-
- 2. Chunks allocated via mmap, which have the second-lowest-order
- bit (IS_MMAPPED) set in their size fields. Because they are
- allocated one-by-one, each must contain its own trailing size field.
-
-*/
-
-/*
- ---------- Size and alignment checks and conversions ----------
-*/
-
-/* conversion from malloc headers to user pointers, and back */
-
-#define chunk2mem(p) ((Void_t*)((char*)(p) + 2*SIZE_SZ))
-#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
-
-/* The smallest possible chunk */
-#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk))
-
-/* The smallest size we can malloc is an aligned minimal chunk */
-
-#define MINSIZE \
- (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
-
-/* Check if m has acceptable alignment */
-
-#define aligned_OK(m) (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0)
-
-
-/*
- Check if a request is so large that it would wrap around zero when
- padded and aligned. To simplify some other code, the bound is made
- low enough so that adding MINSIZE will also not wrap around sero.
-*/
-
-#define REQUEST_OUT_OF_RANGE(req) \
- ((CHUNK_SIZE_T)(req) >= \
- (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-2 * MINSIZE))
-
-/* pad request bytes into a usable size -- internal version */
-
-#define request2size(req) \
- (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
- MINSIZE : \
- ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
-
-/* Same, except also perform argument check */
-
-#define checked_request2size(req, sz) \
- if (REQUEST_OUT_OF_RANGE(req)) { \
- MALLOC_FAILURE_ACTION; \
- return 0; \
- } \
- (sz) = request2size(req);
-
-/*
- --------------- Physical chunk operations ---------------
-*/
-
-
-/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
-#define PREV_INUSE 0x1
-
-/* extract inuse bit of previous chunk */
-#define prev_inuse(p) ((p)->size & PREV_INUSE)
-
-
-/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
-#define IS_MMAPPED 0x2
-
-/* check for mmap()'ed chunk */
-#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
-
-/*
- Bits to mask off when extracting size
-
- Note: IS_MMAPPED is intentionally not masked off from size field in
- macros for which mmapped chunks should never be seen. This should
- cause helpful core dumps to occur if it is tried by accident by
- people extending or adapting this malloc.
-*/
-#define SIZE_BITS (PREV_INUSE|IS_MMAPPED)
-
-/* Get size, ignoring use bits */
-#define chunksize(p) ((p)->size & ~(SIZE_BITS))
-
-
-/* Ptr to next physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~PREV_INUSE) ))
-
-/* Ptr to previous physical malloc_chunk */
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
-
-/* Treat space at ptr + offset as a chunk */
-#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
-
-/* extract p's inuse bit */
-#define inuse(p)\
-((((mchunkptr)(((char*)(p))+((p)->size & ~PREV_INUSE)))->size) & PREV_INUSE)
-
-/* set/clear chunk as being inuse without otherwise disturbing */
-#define set_inuse(p)\
-((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size |= PREV_INUSE
-
-#define clear_inuse(p)\
-((mchunkptr)(((char*)(p)) + ((p)->size & ~PREV_INUSE)))->size &= ~(PREV_INUSE)
-
-
-/* check/set/clear inuse bits in known places */
-#define inuse_bit_at_offset(p, s)\
- (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
-
-#define set_inuse_bit_at_offset(p, s)\
- (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
-
-#define clear_inuse_bit_at_offset(p, s)\
- (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
-
-
-/* Set size at head, without disturbing its use bit */
-#define set_head_size(p, s) ((p)->size = (((p)->size & PREV_INUSE) | (s)))
-
-/* Set size/use field */
-#define set_head(p, s) ((p)->size = (s))
-
-/* Set size at footer (only when chunk is not in use) */
-#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
-
-
-/*
- -------------------- Internal data structures --------------------
-
- All internal state is held in an instance of malloc_state defined
- below. There are no other static variables, except in two optional
- cases:
- * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above.
- * If HAVE_MMAP is true, but mmap doesn't support
- MAP_ANONYMOUS, a dummy file descriptor for mmap.
-
- Beware of lots of tricks that minimize the total bookkeeping space
- requirements. The result is a little over 1K bytes (for 4byte
- pointers and size_t.)
-*/
-
-/*
- Bins
-
- An array of bin headers for free chunks. Each bin is doubly
- linked. The bins are approximately proportionally (log) spaced.
- There are a lot of these bins (128). This may look excessive, but
- works very well in practice. Most bins hold sizes that are
- unusual as malloc request sizes, but are more usual for fragments
- and consolidated sets of chunks, which is what these bins hold, so
- they can be found quickly. All procedures maintain the invariant
- that no consolidated chunk physically borders another one, so each
- chunk in a list is known to be preceeded and followed by either
- inuse chunks or the ends of memory.
-
- Chunks in bins are kept in size order, with ties going to the
- approximately least recently used chunk. Ordering isn't needed
- for the small bins, which all contain the same-sized chunks, but
- facilitates best-fit allocation for larger chunks. These lists
- are just sequential. Keeping them in order almost never requires
- enough traversal to warrant using fancier ordered data
- structures.
-
- Chunks of the same size are linked with the most
- recently freed at the front, and allocations are taken from the
- back. This results in LRU (FIFO) allocation order, which tends
- to give each chunk an equal opportunity to be consolidated with
- adjacent freed chunks, resulting in larger free chunks and less
- fragmentation.
-
- To simplify use in double-linked lists, each bin header acts
- as a malloc_chunk. This avoids special-casing for headers.
- But to conserve space and improve locality, we allocate
- only the fd/bk pointers of bins, and then use repositioning tricks
- to treat these as the fields of a malloc_chunk*.
-*/
-
-typedef struct malloc_chunk* mbinptr;
-
-/* addressing -- note that bin_at(0) does not exist */
-#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1)))
-
-/* analog of ++bin */
-#define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1)))
-
-/* Reminders about list directionality within bins */
-#define first(b) ((b)->fd)
-#define last(b) ((b)->bk)
-
-/* Take a chunk off a bin list */
-#define unlink(P, BK, FD) { \
- FD = P->fd; \
- BK = P->bk; \
- FD->bk = BK; \
- BK->fd = FD; \
-}
-
-/*
- Indexing
-
- Bins for sizes < 512 bytes contain chunks of all the same size, spaced
- 8 bytes apart. Larger bins are approximately logarithmically spaced:
-
- 64 bins of size 8
- 32 bins of size 64
- 16 bins of size 512
- 8 bins of size 4096
- 4 bins of size 32768
- 2 bins of size 262144
- 1 bin of size what's left
-
- The bins top out around 1MB because we expect to service large
- requests via mmap.
-*/
-
-#define NBINS 96
-#define NSMALLBINS 32
-#define SMALLBIN_WIDTH 8
-#define MIN_LARGE_SIZE 256
-
-#define in_smallbin_range(sz) \
- ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE)
-
-#define smallbin_index(sz) (((unsigned)(sz)) >> 3)
-
-/*
- Compute index for size. We expect this to be inlined when
- compiled with optimization, else not, which works out well.
-*/
-static int largebin_index(unsigned int sz) {
- unsigned int x = sz >> SMALLBIN_WIDTH;
- unsigned int m; /* bit position of highest set bit of m */
-
- if (x >= 0x10000) return NBINS-1;
-
- /* On intel, use BSRL instruction to find highest bit */
-#if defined(__GNUC__) && defined(i386)
-
- __asm__("bsrl %1,%0\n\t"
- : "=r" (m)
- : "g" (x));
-
-#else
- {
- /*
- Based on branch-free nlz algorithm in chapter 5 of Henry
- S. Warren Jr's book "Hacker's Delight".
- */
-
- unsigned int n = ((x - 0x100) >> 16) & 8;
- x <<= n;
- m = ((x - 0x1000) >> 16) & 4;
- n += m;
- x <<= m;
- m = ((x - 0x4000) >> 16) & 2;
- n += m;
- x = (x << m) >> 14;
- m = 13 - n + (x & ~(x>>1));
- }
-#endif
-
- /* Use next 2 bits to create finer-granularity bins */
- return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3);
-}
-
-#define bin_index(sz) \
- ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
-
-/*
- FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
- first bin that is maintained in sorted order. This must
- be the smallest size corresponding to a given bin.
-
- Normally, this should be MIN_LARGE_SIZE. But you can weaken
- best fit guarantees to sometimes speed up malloc by increasing value.
- Doing this means that malloc may choose a chunk that is
- non-best-fitting by up to the width of the bin.
-
- Some useful cutoff values:
- 512 - all bins sorted
- 2560 - leaves bins <= 64 bytes wide unsorted
- 12288 - leaves bins <= 512 bytes wide unsorted
- 65536 - leaves bins <= 4096 bytes wide unsorted
- 262144 - leaves bins <= 32768 bytes wide unsorted
- -1 - no bins sorted (not recommended!)
-*/
-
-#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
-/* #define FIRST_SORTED_BIN_SIZE 65536 */
-
-/*
- Unsorted chunks
-
- All remainders from chunk splits, as well as all returned chunks,
- are first placed in the "unsorted" bin. They are then placed
- in regular bins after malloc gives them ONE chance to be used before
- binning. So, basically, the unsorted_chunks list acts as a queue,
- with chunks being placed on it in free (and malloc_consolidate),
- and taken off (to be either used or placed in bins) in malloc.
-*/
-
-/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
-#define unsorted_chunks(M) (bin_at(M, 1))
-
-/*
- Top
-
- The top-most available chunk (i.e., the one bordering the end of
- available memory) is treated specially. It is never included in
- any bin, is used only if no other chunk is available, and is
- released back to the system if it is very large (see
- M_TRIM_THRESHOLD). Because top initially
- points to its own bin with initial zero size, thus forcing
- extension on the first malloc request, we avoid having any special
- code in malloc to check whether it even exists yet. But we still
- need to do so when getting memory from system, so we make
- initial_top treat the bin as a legal but unusable chunk during the
- interval between initialization and the first call to
- sYSMALLOc. (This is somewhat delicate, since it relies on
- the 2 preceding words to be zero during this interval as well.)
-*/
-
-/* Conveniently, the unsorted bin can be used as dummy top on first call */
-#define initial_top(M) (unsorted_chunks(M))
-
-/*
- Binmap
-
- To help compensate for the large number of bins, a one-level index
- structure is used for bin-by-bin searching. `binmap' is a
- bitvector recording whether bins are definitely empty so they can
- be skipped over during during traversals. The bits are NOT always
- cleared as soon as bins are empty, but instead only
- when they are noticed to be empty during traversal in malloc.
-*/
-
-/* Conservatively use 32 bits per map word, even if on 64bit system */
-#define BINMAPSHIFT 5
-#define BITSPERMAP (1U << BINMAPSHIFT)
-#define BINMAPSIZE (NBINS / BITSPERMAP)
-
-#define idx2block(i) ((i) >> BINMAPSHIFT)
-#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
-
-#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i))
-#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
-#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i))
-
-/*
- Fastbins
-
- An array of lists holding recently freed small chunks. Fastbins
- are not doubly linked. It is faster to single-link them, and
- since chunks are never removed from the middles of these lists,
- double linking is not necessary. Also, unlike regular bins, they
- are not even processed in FIFO order (they use faster LIFO) since
- ordering doesn't much matter in the transient contexts in which
- fastbins are normally used.
-
- Chunks in fastbins keep their inuse bit set, so they cannot
- be consolidated with other free chunks. malloc_consolidate
- releases all chunks in fastbins and consolidates them with
- other free chunks.
-*/
-
-typedef struct malloc_chunk* mfastbinptr;
-
-/* offset 2 to use otherwise unindexable first 2 bins */
-#define fastbin_index(sz) ((((unsigned int)(sz)) >> 3) - 2)
-
-/* The maximum fastbin request size we support */
-#define MAX_FAST_SIZE 80
-
-#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1)
-
-/*
- FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
- that triggers automatic consolidation of possibly-surrounding
- fastbin chunks. This is a heuristic, so the exact value should not
- matter too much. It is defined at half the default trim threshold as a
- compromise heuristic to only attempt consolidation if it is likely
- to lead to trimming. However, it is not dynamically tunable, since
- consolidation reduces fragmentation surrounding loarge chunks even
- if trimming is not used.
-*/
-
-#define FASTBIN_CONSOLIDATION_THRESHOLD \
- ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
-
-/*
- Since the lowest 2 bits in max_fast don't matter in size comparisons,
- they are used as flags.
-*/
-
-/*
- ANYCHUNKS_BIT held in max_fast indicates that there may be any
- freed chunks at all. It is set true when entering a chunk into any
- bin.
-*/
-
-#define ANYCHUNKS_BIT (1U)
-
-#define have_anychunks(M) (((M)->max_fast & ANYCHUNKS_BIT))
-#define set_anychunks(M) ((M)->max_fast |= ANYCHUNKS_BIT)
-#define clear_anychunks(M) ((M)->max_fast &= ~ANYCHUNKS_BIT)
-
-/*
- FASTCHUNKS_BIT held in max_fast indicates that there are probably
- some fastbin chunks. It is set true on entering a chunk into any
- fastbin, and cleared only in malloc_consolidate.
-*/
-
-#define FASTCHUNKS_BIT (2U)
-
-#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT))
-#define set_fastchunks(M) ((M)->max_fast |= (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
-#define clear_fastchunks(M) ((M)->max_fast &= ~(FASTCHUNKS_BIT))
-
-/*
- Set value of max_fast.
- Use impossibly small value if 0.
-*/
-
-#define set_max_fast(M, s) \
- (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
- ((M)->max_fast & (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
-
-#define get_max_fast(M) \
- ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
-
-
-/*
- morecore_properties is a status word holding dynamically discovered
- or controlled properties of the morecore function
-*/
-
-#define MORECORE_CONTIGUOUS_BIT (1U)
-
-#define contiguous(M) \
- (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT))
-#define noncontiguous(M) \
- (((M)->morecore_properties & MORECORE_CONTIGUOUS_BIT) == 0)
-#define set_contiguous(M) \
- ((M)->morecore_properties |= MORECORE_CONTIGUOUS_BIT)
-#define set_noncontiguous(M) \
- ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
-
-
-/*
- ----------- Internal state representation and initialization -----------
-*/
-
-struct malloc_state {
-
- /* The maximum chunk size to be eligible for fastbin */
- INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */
-
- /* Fastbins */
- mfastbinptr fastbins[NFASTBINS];
-
- /* Base of the topmost chunk -- not otherwise kept in a bin */
- mchunkptr top;
-
- /* The remainder from the most recent split of a small request */
- mchunkptr last_remainder;
-
- /* Normal bins packed as described above */
- mchunkptr bins[NBINS * 2];
-
- /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
- unsigned int binmap[BINMAPSIZE+1];
-
- /* Tunable parameters */
- CHUNK_SIZE_T trim_threshold;
- INTERNAL_SIZE_T top_pad;
- INTERNAL_SIZE_T mmap_threshold;
-
- /* Memory map support */
- int n_mmaps;
- int n_mmaps_max;
- int max_n_mmaps;
-
- /* Cache malloc_getpagesize */
- unsigned int pagesize;
-
- /* Track properties of MORECORE */
- unsigned int morecore_properties;
-
- /* Statistics */
- INTERNAL_SIZE_T mmapped_mem;
- INTERNAL_SIZE_T sbrked_mem;
- INTERNAL_SIZE_T max_sbrked_mem;
- INTERNAL_SIZE_T max_mmapped_mem;
- INTERNAL_SIZE_T max_total_mem;
-};
-
-typedef struct malloc_state *mstate;
-
-/*
- There is exactly one instance of this struct in this malloc.
- If you are adapting this malloc in a way that does NOT use a static
- malloc_state, you MUST explicitly zero-fill it before using. This
- malloc relies on the property that malloc_state is initialized to
- all zeroes (as is true of C statics).
-*/
-
-static struct malloc_state av_; /* never directly referenced */
-
-/*
- All uses of av_ are via get_malloc_state().
- At most one "call" to get_malloc_state is made per invocation of
- the public versions of malloc and free, but other routines
- that in turn invoke malloc and/or free may call more then once.
- Also, it is called in check* routines if DEBUG is set.
-*/
-
-#define get_malloc_state() (&(av_))
-
-/*
- Initialize a malloc_state struct.
-
- This is called only from within malloc_consolidate, which needs
- be called in the same contexts anyway. It is never called directly
- outside of malloc_consolidate because some optimizing compilers try
- to inline it at all call points, which turns out not to be an
- optimization at all. (Inlining it in malloc_consolidate is fine though.)
-*/
-
-#if __STD_C
-static void malloc_init_state(mstate av)
-#else
-static void malloc_init_state(av) mstate av;
-#endif
-{
- int i;
- mbinptr bin;
-
- /* Establish circular links for normal bins */
- for (i = 1; i < NBINS; ++i) {
- bin = bin_at(av,i);
- bin->fd = bin->bk = bin;
- }
-
- av->top_pad = DEFAULT_TOP_PAD;
- av->n_mmaps_max = DEFAULT_MMAP_MAX;
- av->mmap_threshold = DEFAULT_MMAP_THRESHOLD;
- av->trim_threshold = DEFAULT_TRIM_THRESHOLD;
-
-#if MORECORE_CONTIGUOUS
- set_contiguous(av);
-#else
- set_noncontiguous(av);
-#endif
-
-
- set_max_fast(av, DEFAULT_MXFAST);
-
- av->top = initial_top(av);
- av->pagesize = malloc_getpagesize;
-}
-
-/*
- Other internal utilities operating on mstates
-*/
-
-static Void_t* sYSMALLOc(INTERNAL_SIZE_T, mstate);
-#ifndef MORECORE_CANNOT_TRIM
-static int sYSTRIm(size_t, mstate);
-#endif
-static void malloc_consolidate(mstate);
-static Void_t** iALLOc(size_t, size_t*, int, Void_t**);
-
-/*
- Debugging support
-
- These routines make a number of assertions about the states
- of data structures that should be true at all times. If any
- are not true, it's very likely that a user program has somehow
- trashed memory. (It's also possible that there is a coding error
- in malloc. In which case, please report it!)
-*/
-
-#if ! DEBUG
-
-#define check_chunk(P)
-#define check_free_chunk(P)
-#define check_inuse_chunk(P)
-#define check_remalloced_chunk(P,N)
-#define check_malloced_chunk(P,N)
-#define check_malloc_state()
-
-#else
-#define check_chunk(P) do_check_chunk(P)
-#define check_free_chunk(P) do_check_free_chunk(P)
-#define check_inuse_chunk(P) do_check_inuse_chunk(P)
-#define check_remalloced_chunk(P,N) do_check_remalloced_chunk(P,N)
-#define check_malloced_chunk(P,N) do_check_malloced_chunk(P,N)
-#define check_malloc_state() do_check_malloc_state()
-
-/*
- Properties of all chunks
-*/
-
-#if __STD_C
-static void do_check_chunk(mchunkptr p)
-#else
-static void do_check_chunk(p) mchunkptr p;
-#endif
-{
- mstate av = get_malloc_state();
- CHUNK_SIZE_T sz = chunksize(p);
- /* min and max possible addresses assuming contiguous allocation */
- char* max_address = (char*)(av->top) + chunksize(av->top);
- char* min_address = max_address - av->sbrked_mem;
-
- if (!chunk_is_mmapped(p)) {
-
- /* Has legal address ... */
- if (p != av->top) {
- if (contiguous(av)) {
- assert(((char*)p) >= min_address);
- assert(((char*)p + sz) <= ((char*)(av->top)));
- }
- }
- else {
- /* top size is always at least MINSIZE */
- assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
- /* top predecessor always marked inuse */
- assert(prev_inuse(p));
- }
-
- }
- else {
-#if HAVE_MMAP
- /* address is outside main heap */
- if (contiguous(av) && av->top != initial_top(av)) {
- assert(((char*)p) < min_address || ((char*)p) > max_address);
- }
- /* chunk is page-aligned */
- assert(((p->prev_size + sz) & (av->pagesize-1)) == 0);
- /* mem is aligned */
- assert(aligned_OK(chunk2mem(p)));
-#else
- /* force an appropriate assert violation if debug set */
- assert(!chunk_is_mmapped(p));
-#endif
- }
-}
-
-/*
- Properties of free chunks
-*/
-
-#if __STD_C
-static void do_check_free_chunk(mchunkptr p)
-#else
-static void do_check_free_chunk(p) mchunkptr p;
-#endif
-{
- mstate av = get_malloc_state();
-
- INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
- mchunkptr next = chunk_at_offset(p, sz);
-
- do_check_chunk(p);
-
- /* Chunk must claim to be free ... */
- assert(!inuse(p));
- assert (!chunk_is_mmapped(p));
-
- /* Unless a special marker, must have OK fields */
- if ((CHUNK_SIZE_T)(sz) >= MINSIZE)
- {
- assert((sz & MALLOC_ALIGN_MASK) == 0);
- assert(aligned_OK(chunk2mem(p)));
- /* ... matching footer field */
- assert(next->prev_size == sz);
- /* ... and is fully consolidated */
- assert(prev_inuse(p));
- assert (next == av->top || inuse(next));
-
- /* ... and has minimally sane links */
- assert(p->fd->bk == p);
- assert(p->bk->fd == p);
- }
- else /* markers are always of size SIZE_SZ */
- assert(sz == SIZE_SZ);
-}
-
-/*
- Properties of inuse chunks
-*/
-
-#if __STD_C
-static void do_check_inuse_chunk(mchunkptr p)
-#else
-static void do_check_inuse_chunk(p) mchunkptr p;
-#endif
-{
- mstate av = get_malloc_state();
- mchunkptr next;
- do_check_chunk(p);
-
- if (chunk_is_mmapped(p))
- return; /* mmapped chunks have no next/prev */
-
- /* Check whether it claims to be in use ... */
- assert(inuse(p));
-
- next = next_chunk(p);
-
- /* ... and is surrounded by OK chunks.
- Since more things can be checked with free chunks than inuse ones,
- if an inuse chunk borders them and debug is on, it's worth doing them.
- */
- if (!prev_inuse(p)) {
- /* Note that we cannot even look at prev unless it is not inuse */
- mchunkptr prv = prev_chunk(p);
- assert(next_chunk(prv) == p);
- do_check_free_chunk(prv);
- }
-
- if (next == av->top) {
- assert(prev_inuse(next));
- assert(chunksize(next) >= MINSIZE);
- }
- else if (!inuse(next))
- do_check_free_chunk(next);
-}
-
-/*
- Properties of chunks recycled from fastbins
-*/
-
-#if __STD_C
-static void do_check_remalloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
-#else
-static void do_check_remalloced_chunk(p, s) mchunkptr p; INTERNAL_SIZE_T s;
-#endif
-{
- INTERNAL_SIZE_T sz = p->size & ~PREV_INUSE;
-
- do_check_inuse_chunk(p);
-
- /* Legal size ... */
- assert((sz & MALLOC_ALIGN_MASK) == 0);
- assert((CHUNK_SIZE_T)(sz) >= MINSIZE);
- /* ... and alignment */
- assert(aligned_OK(chunk2mem(p)));
- /* chunk is less than MINSIZE more than request */
- assert((long)(sz) - (long)(s) >= 0);
- assert((long)(sz) - (long)(s + MINSIZE) < 0);
-}
-
-/*
- Properties of nonrecycled chunks at the point they are malloced
-*/
-
-#if __STD_C
-static void do_check_malloced_chunk(mchunkptr p, INTERNAL_SIZE_T s)
-#else
-static void do_check_malloced_chunk(p, s) mchunkptr p; INTERNAL_SIZE_T s;
-#endif
-{
- /* same as recycled case ... */
- do_check_remalloced_chunk(p, s);
-
- /*
- ... plus, must obey implementation invariant that prev_inuse is
- always true of any allocated chunk; i.e., that each allocated
- chunk borders either a previously allocated and still in-use
- chunk, or the base of its memory arena. This is ensured
- by making all allocations from the the `lowest' part of any found
- chunk. This does not necessarily hold however for chunks
- recycled via fastbins.
- */
-
- assert(prev_inuse(p));
-}
-
-
-/*
- Properties of malloc_state.
-
- This may be useful for debugging malloc, as well as detecting user
- programmer errors that somehow write into malloc_state.
-
- If you are extending or experimenting with this malloc, you can
- probably figure out how to hack this routine to print out or
- display chunk addresses, sizes, bins, and other instrumentation.
-*/
-
-static void do_check_malloc_state()
-{
- mstate av = get_malloc_state();
- int i;
- mchunkptr p;
- mchunkptr q;
- mbinptr b;
- unsigned int binbit;
- int empty;
- unsigned int idx;
- INTERNAL_SIZE_T size;
- CHUNK_SIZE_T total = 0;
- int max_fast_bin;
-
- /* internal size_t must be no wider than pointer type */
- assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*));
-
- /* alignment is a power of 2 */
- assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0);
-
- /* cannot run remaining checks until fully initialized */
- if (av->top == 0 || av->top == initial_top(av))
- return;
-
- /* pagesize is a power of 2 */
- assert((av->pagesize & (av->pagesize-1)) == 0);
-
- /* properties of fastbins */
-
- /* max_fast is in allowed range */
- assert(get_max_fast(av) <= request2size(MAX_FAST_SIZE));
-
- max_fast_bin = fastbin_index(av->max_fast);
-
- for (i = 0; i < NFASTBINS; ++i) {
- p = av->fastbins[i];
-
- /* all bins past max_fast are empty */
- if (i > max_fast_bin)
- assert(p == 0);
-
- while (p != 0) {
- /* each chunk claims to be inuse */
- do_check_inuse_chunk(p);
- total += chunksize(p);
- /* chunk belongs in this bin */
- assert(fastbin_index(chunksize(p)) == i);
- p = p->fd;
- }
- }
-
- if (total != 0)
- assert(have_fastchunks(av));
- else if (!have_fastchunks(av))
- assert(total == 0);
-
- /* check normal bins */
- for (i = 1; i < NBINS; ++i) {
- b = bin_at(av,i);
-
- /* binmap is accurate (except for bin 1 == unsorted_chunks) */
- if (i >= 2) {
- binbit = get_binmap(av,i);
- empty = last(b) == b;
- if (!binbit)
- assert(empty);
- else if (!empty)
- assert(binbit);
- }
-
- for (p = last(b); p != b; p = p->bk) {
- /* each chunk claims to be free */
- do_check_free_chunk(p);
- size = chunksize(p);
- total += size;
- if (i >= 2) {
- /* chunk belongs in bin */
- idx = bin_index(size);
- assert(idx == i);
- /* lists are sorted */
- if ((CHUNK_SIZE_T) size >= (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
- assert(p->bk == b ||
- (CHUNK_SIZE_T)chunksize(p->bk) >=
- (CHUNK_SIZE_T)chunksize(p));
- }
- }
- /* chunk is followed by a legal chain of inuse chunks */
- for (q = next_chunk(p);
- (q != av->top && inuse(q) &&
- (CHUNK_SIZE_T)(chunksize(q)) >= MINSIZE);
- q = next_chunk(q))
- do_check_inuse_chunk(q);
- }
- }
-
- /* top chunk is OK */
- check_chunk(av->top);
-
- /* sanity checks for statistics */
-
- assert(total <= (CHUNK_SIZE_T)(av->max_total_mem));
- assert(av->n_mmaps >= 0);
- assert(av->n_mmaps <= av->max_n_mmaps);
-
- assert((CHUNK_SIZE_T)(av->sbrked_mem) <=
- (CHUNK_SIZE_T)(av->max_sbrked_mem));
-
- assert((CHUNK_SIZE_T)(av->mmapped_mem) <=
- (CHUNK_SIZE_T)(av->max_mmapped_mem));
-
- assert((CHUNK_SIZE_T)(av->max_total_mem) >=
- (CHUNK_SIZE_T)(av->mmapped_mem) + (CHUNK_SIZE_T)(av->sbrked_mem));
-}
-#endif
-
-
-/* ----------- Routines dealing with system allocation -------------- */
-
-/*
- sysmalloc handles malloc cases requiring more memory from the system.
- On entry, it is assumed that av->top does not have enough
- space to service request for nb bytes, thus requiring that av->top
- be extended or replaced.
-*/
-
-#if __STD_C
-static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av)
-#else
-static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av;
-#endif
-{
- mchunkptr old_top; /* incoming value of av->top */
- INTERNAL_SIZE_T old_size; /* its size */
- char* old_end; /* its end address */
-
- long size; /* arg to first MORECORE or mmap call */
- char* brk; /* return value from MORECORE */
-
- long correction; /* arg to 2nd MORECORE call */
- char* snd_brk; /* 2nd return val */
-
- INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
- INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */
- char* aligned_brk; /* aligned offset into brk */
-
- mchunkptr p; /* the allocated/returned chunk */
- mchunkptr remainder; /* remainder from allocation */
- CHUNK_SIZE_T remainder_size; /* its size */
-
- CHUNK_SIZE_T sum; /* for updating stats */
-
- size_t pagemask = av->pagesize - 1;
-
- /*
- If there is space available in fastbins, consolidate and retry
- malloc from scratch rather than getting memory from system. This
- can occur only if nb is in smallbin range so we didn't consolidate
- upon entry to malloc. It is much easier to handle this case here
- than in malloc proper.
- */
-
- if (have_fastchunks(av)) {
- assert(in_smallbin_range(nb));
- malloc_consolidate(av);
- return mALLOc(nb - MALLOC_ALIGN_MASK);
- }
-
-
-#if HAVE_MMAP
-
- /*
- If have mmap, and the request size meets the mmap threshold, and
- the system supports mmap, and there are few enough currently
- allocated mmapped regions, try to directly map this request
- rather than expanding top.
- */
-
- if ((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) &&
- (av->n_mmaps < av->n_mmaps_max)) {
-
- char* mm; /* return value from mmap call*/
-
- /*
- Round up size to nearest page. For mmapped chunks, the overhead
- is one SIZE_SZ unit larger than for normal chunks, because there
- is no following chunk whose prev_size field could be used.
- */
- size = (nb + SIZE_SZ + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
-
- /* Don't try if size wraps around 0 */
- if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
-
- mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
-
- if (mm != (char*)(MORECORE_FAILURE)) {
-
- /*
- The offset to the start of the mmapped region is stored
- in the prev_size field of the chunk. This allows us to adjust
- returned start address to meet alignment requirements here
- and in memalign(), and still be able to compute proper
- address argument for later munmap in free() and realloc().
- */
-
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
- if (front_misalign > 0) {
- correction = MALLOC_ALIGNMENT - front_misalign;
- p = (mchunkptr)(mm + correction);
- p->prev_size = correction;
- set_head(p, (size - correction) |IS_MMAPPED);
- }
- else {
- p = (mchunkptr)mm;
- p->prev_size = 0;
- set_head(p, size|IS_MMAPPED);
- }
-
- /* update statistics */
-
- if (++av->n_mmaps > av->max_n_mmaps)
- av->max_n_mmaps = av->n_mmaps;
-
- sum = av->mmapped_mem += size;
- if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem))
- av->max_mmapped_mem = sum;
- sum += av->sbrked_mem;
- if (sum > (CHUNK_SIZE_T)(av->max_total_mem))
- av->max_total_mem = sum;
-
- check_chunk(p);
-
- return chunk2mem(p);
- }
- }
- }
-#endif
-
- /* Record incoming configuration of top */
-
- old_top = av->top;
- old_size = chunksize(old_top);
- old_end = (char*)(chunk_at_offset(old_top, old_size));
-
- brk = snd_brk = (char*)(MORECORE_FAILURE);
-
- /*
- If not the first time through, we require old_size to be
- at least MINSIZE and to have prev_inuse set.
- */
-
- assert((old_top == initial_top(av) && old_size == 0) ||
- ((CHUNK_SIZE_T) (old_size) >= MINSIZE &&
- prev_inuse(old_top)));
-
- /* Precondition: not enough current space to satisfy nb request */
- assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE));
-
- /* Precondition: all fastbins are consolidated */
- assert(!have_fastchunks(av));
-
-
- /* Request enough space for nb + pad + overhead */
-
- size = nb + av->top_pad + MINSIZE;
-
- /*
- If contiguous, we can subtract out existing space that we hope to
- combine with new space. We add it back later only if
- we don't actually get contiguous space.
- */
-
- if (contiguous(av))
- size -= old_size;
-
- /*
- Round to a multiple of page size.
- If MORECORE is not contiguous, this ensures that we only call it
- with whole-page arguments. And if MORECORE is contiguous and
- this is not first time through, this preserves page-alignment of
- previous calls. Otherwise, we correct to page-align below.
- */
-
- size = (size + pagemask) & ~pagemask;
-
- /*
- Don't try to call MORECORE if argument is so big as to appear
- negative. Note that since mmap takes size_t arg, it may succeed
- below even if we cannot call MORECORE.
- */
-
- if (size > 0)
- brk = (char*)(MORECORE(size));
-
- /*
- If have mmap, try using it as a backup when MORECORE fails or
- cannot be used. This is worth doing on systems that have "holes" in
- address space, so sbrk cannot extend to give contiguous space, but
- space is available elsewhere. Note that we ignore mmap max count
- and threshold limits, since the space will not be used as a
- segregated mmap region.
- */
-
-#if HAVE_MMAP
- if (brk == (char*)(MORECORE_FAILURE)) {
-
- /* Cannot merge with old top, so add its size back in */
- if (contiguous(av))
- size = (size + old_size + pagemask) & ~pagemask;
-
- /* If we are relying on mmap as backup, then use larger units */
- if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE))
- size = MMAP_AS_MORECORE_SIZE;
-
- /* Don't try if size wraps around 0 */
- if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
-
- brk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
-
- if (brk != (char*)(MORECORE_FAILURE)) {
-
- /* We do not need, and cannot use, another sbrk call to find end */
- snd_brk = brk + size;
-
- /*
- Record that we no longer have a contiguous sbrk region.
- After the first time mmap is used as backup, we do not
- ever rely on contiguous space since this could incorrectly
- bridge regions.
- */
- set_noncontiguous(av);
- }
- }
- }
-#endif
-
- if (brk != (char*)(MORECORE_FAILURE)) {
- av->sbrked_mem += size;
-
- /*
- If MORECORE extends previous space, we can likewise extend top size.
- */
-
- if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE)) {
- set_head(old_top, (size + old_size) | PREV_INUSE);
- }
-
- /*
- Otherwise, make adjustments:
-
- * If the first time through or noncontiguous, we need to call sbrk
- just to find out where the end of memory lies.
-
- * We need to ensure that all returned chunks from malloc will meet
- MALLOC_ALIGNMENT
-
- * If there was an intervening foreign sbrk, we need to adjust sbrk
- request size to account for fact that we will not be able to
- combine new space with existing space in old_top.
-
- * Almost all systems internally allocate whole pages at a time, in
- which case we might as well use the whole last page of request.
- So we allocate enough more memory to hit a page boundary now,
- which in turn causes future contiguous calls to page-align.
- */
-
- else {
- front_misalign = 0;
- end_misalign = 0;
- correction = 0;
- aligned_brk = brk;
-
- /*
- If MORECORE returns an address lower than we have seen before,
- we know it isn't really contiguous. This and some subsequent
- checks help cope with non-conforming MORECORE functions and
- the presence of "foreign" calls to MORECORE from outside of
- malloc or by other threads. We cannot guarantee to detect
- these in all cases, but cope with the ones we do detect.
- */
- if (contiguous(av) && old_size != 0 && brk < old_end) {
- set_noncontiguous(av);
- }
-
- /* handle contiguous cases */
- if (contiguous(av)) {
-
- /*
- We can tolerate forward non-contiguities here (usually due
- to foreign calls) but treat them as part of our space for
- stats reporting.
- */
- if (old_size != 0)
- av->sbrked_mem += brk - old_end;
-
- /* Guarantee alignment of first new chunk made from this space */
-
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
- if (front_misalign > 0) {
-
- /*
- Skip over some bytes to arrive at an aligned position.
- We don't need to specially mark these wasted front bytes.
- They will never be accessed anyway because
- prev_inuse of av->top (and any chunk created from its start)
- is always true after initialization.
- */
-
- correction = MALLOC_ALIGNMENT - front_misalign;
- aligned_brk += correction;
- }
-
- /*
- If this isn't adjacent to existing space, then we will not
- be able to merge with old_top space, so must add to 2nd request.
- */
-
- correction += old_size;
-
- /* Extend the end address to hit a page boundary */
- end_misalign = (INTERNAL_SIZE_T)(brk + size + correction);
- correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign;
-
- assert(correction >= 0);
- snd_brk = (char*)(MORECORE(correction));
-
- if (snd_brk == (char*)(MORECORE_FAILURE)) {
- /*
- If can't allocate correction, try to at least find out current
- brk. It might be enough to proceed without failing.
- */
- correction = 0;
- snd_brk = (char*)(MORECORE(0));
- }
- else if (snd_brk < brk) {
- /*
- If the second call gives noncontiguous space even though
- it says it won't, the only course of action is to ignore
- results of second call, and conservatively estimate where
- the first call left us. Also set noncontiguous, so this
- won't happen again, leaving at most one hole.
-
- Note that this check is intrinsically incomplete. Because
- MORECORE is allowed to give more space than we ask for,
- there is no reliable way to detect a noncontiguity
- producing a forward gap for the second call.
- */
- snd_brk = brk + size;
- correction = 0;
- set_noncontiguous(av);
- }
-
- }
-
- /* handle non-contiguous cases */
- else {
- /* MORECORE/mmap must correctly align */
- assert(aligned_OK(chunk2mem(brk)));
-
- /* Find out current end of memory */
- if (snd_brk == (char*)(MORECORE_FAILURE)) {
- snd_brk = (char*)(MORECORE(0));
- av->sbrked_mem += snd_brk - brk - size;
- }
- }
-
- /* Adjust top based on results of second sbrk */
- if (snd_brk != (char*)(MORECORE_FAILURE)) {
- av->top = (mchunkptr)aligned_brk;
- set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
- av->sbrked_mem += correction;
-
- /*
- If not the first time through, we either have a
- gap due to foreign sbrk or a non-contiguous region. Insert a
- double fencepost at old_top to prevent consolidation with space
- we don't own. These fenceposts are artificial chunks that are
- marked as inuse and are in any case too small to use. We need
- two to make sizes and alignments work out.
- */
-
- if (old_size != 0) {
- /*
- Shrink old_top to insert fenceposts, keeping size a
- multiple of MALLOC_ALIGNMENT. We know there is at least
- enough space in old_top to do this.
- */
- old_size = (old_size - 3*SIZE_SZ) & ~MALLOC_ALIGN_MASK;
- set_head(old_top, old_size | PREV_INUSE);
-
- /*
- Note that the following assignments completely overwrite
- old_top when old_size was previously MINSIZE. This is
- intentional. We need the fencepost, even if old_top otherwise gets
- lost.
- */
- chunk_at_offset(old_top, old_size )->size =
- SIZE_SZ|PREV_INUSE;
-
- chunk_at_offset(old_top, old_size + SIZE_SZ)->size =
- SIZE_SZ|PREV_INUSE;
-
- /*
- If possible, release the rest, suppressing trimming.
- */
- if (old_size >= MINSIZE) {
- INTERNAL_SIZE_T tt = av->trim_threshold;
- av->trim_threshold = (INTERNAL_SIZE_T)(-1);
- fREe(chunk2mem(old_top));
- av->trim_threshold = tt;
- }
- }
- }
- }
-
- /* Update statistics */
- sum = av->sbrked_mem;
- if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem))
- av->max_sbrked_mem = sum;
-
- sum += av->mmapped_mem;
- if (sum > (CHUNK_SIZE_T)(av->max_total_mem))
- av->max_total_mem = sum;
-
- check_malloc_state();
-
- /* finally, do the allocation */
-
- p = av->top;
- size = chunksize(p);
-
- /* check that one of the above allocation paths succeeded */
- if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
- remainder_size = size - nb;
- remainder = chunk_at_offset(p, nb);
- av->top = remainder;
- set_head(p, nb | PREV_INUSE);
- set_head(remainder, remainder_size | PREV_INUSE);
- check_malloced_chunk(p, nb);
- return chunk2mem(p);
- }
-
- }
-
- /* catch all failure paths */
- MALLOC_FAILURE_ACTION;
- return 0;
-}
-
-
-
-
-#ifndef MORECORE_CANNOT_TRIM
-/*
- sYSTRIm is an inverse of sorts to sYSMALLOc. It gives memory back
- to the system (via negative arguments to sbrk) if there is unused
- memory at the `high' end of the malloc pool. It is called
- automatically by free() when top space exceeds the trim
- threshold. It is also called by the public malloc_trim routine. It
- returns 1 if it actually released any memory, else 0.
-*/
-
-#if __STD_C
-static int sYSTRIm(size_t pad, mstate av)
-#else
-static int sYSTRIm(pad, av) size_t pad; mstate av;
-#endif
-{
- long top_size; /* Amount of top-most memory */
- long extra; /* Amount to release */
- long released; /* Amount actually released */
- char* current_brk; /* address returned by pre-check sbrk call */
- char* new_brk; /* address returned by post-check sbrk call */
- size_t pagesz;
-
- pagesz = av->pagesize;
- top_size = chunksize(av->top);
-
- /* Release in pagesize units, keeping at least one page */
- extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz;
-
- if (extra > 0) {
-
- /*
- Only proceed if end of memory is where we last set it.
- This avoids problems if there were foreign sbrk calls.
- */
- current_brk = (char*)(MORECORE(0));
- if (current_brk == (char*)(av->top) + top_size) {
-
- /*
- Attempt to release memory. We ignore MORECORE return value,
- and instead call again to find out where new end of memory is.
- This avoids problems if first call releases less than we asked,
- of if failure somehow altered brk value. (We could still
- encounter problems if it altered brk in some very bad way,
- but the only thing we can do is adjust anyway, which will cause
- some downstream failure.)
- */
-
- MORECORE(-extra);
- new_brk = (char*)(MORECORE(0));
-
- if (new_brk != (char*)MORECORE_FAILURE) {
- released = (long)(current_brk - new_brk);
-
- if (released != 0) {
- /* Success. Adjust top. */
- av->sbrked_mem -= released;
- set_head(av->top, (top_size - released) | PREV_INUSE);
- check_malloc_state();
- return 1;
- }
- }
- }
- }
- return 0;
-}
-#endif
-
-/*
- ------------------------------ malloc ------------------------------
-*/
-
-
-#if __STD_C
-Void_t* mALLOc(size_t bytes)
-#else
- Void_t* mALLOc(bytes) size_t bytes;
-#endif
-{
- mstate av = get_malloc_state();
-
- INTERNAL_SIZE_T nb; /* normalized request size */
- unsigned int idx; /* associated bin index */
- mbinptr bin; /* associated bin */
- mfastbinptr* fb; /* associated fastbin */
-
- mchunkptr victim; /* inspected/selected chunk */
- INTERNAL_SIZE_T size; /* its size */
- int victim_index; /* its bin index */
-
- mchunkptr remainder; /* remainder from a split */
- CHUNK_SIZE_T remainder_size; /* its size */
-
- unsigned int block; /* bit map traverser */
- unsigned int bit; /* bit map traverser */
- unsigned int map; /* current word of binmap */
-
- mchunkptr fwd; /* misc temp for linking */
- mchunkptr bck; /* misc temp for linking */
-
- /*
- Convert request size to internal form by adding SIZE_SZ bytes
- overhead plus possibly more to obtain necessary alignment and/or
- to obtain a size of at least MINSIZE, the smallest allocatable
- size. Also, checked_request2size traps (returning 0) request sizes
- that are so large that they wrap around zero when padded and
- aligned.
- */
-
- checked_request2size(bytes, nb);
-
- /*
- Bypass search if no frees yet
- */
- if (!have_anychunks(av)) {
- if (av->max_fast == 0) /* initialization check */
- malloc_consolidate(av);
- goto use_top;
- }
-
- /*
- If the size qualifies as a fastbin, first check corresponding bin.
- */
-
- if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) {
- fb = &(av->fastbins[(fastbin_index(nb))]);
- if ( (victim = *fb) != 0) {
- *fb = victim->fd;
- check_remalloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
- }
-
- /*
- If a small request, check regular bin. Since these "smallbins"
- hold one size each, no searching within bins is necessary.
- (For a large request, we need to wait until unsorted chunks are
- processed to find best fit. But for small ones, fits are exact
- anyway, so we can check now, which is faster.)
- */
-
- if (in_smallbin_range(nb)) {
- idx = smallbin_index(nb);
- bin = bin_at(av,idx);
-
- if ( (victim = last(bin)) != bin) {
- bck = victim->bk;
- set_inuse_bit_at_offset(victim, nb);
- bin->bk = bck;
- bck->fd = bin;
-
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
- }
-
- /*
- If this is a large request, consolidate fastbins before continuing.
- While it might look excessive to kill all fastbins before
- even seeing if there is space available, this avoids
- fragmentation problems normally associated with fastbins.
- Also, in practice, programs tend to have runs of either small or
- large requests, but less often mixtures, so consolidation is not
- invoked all that often in most programs. And the programs that
- it is called frequently in otherwise tend to fragment.
- */
-
- else {
- idx = largebin_index(nb);
- if (have_fastchunks(av))
- malloc_consolidate(av);
- }
-
- /*
- Process recently freed or remaindered chunks, taking one only if
- it is exact fit, or, if this a small request, the chunk is remainder from
- the most recent non-exact fit. Place other traversed chunks in
- bins. Note that this step is the only place in any routine where
- chunks are placed in bins.
- */
-
- while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
- bck = victim->bk;
- size = chunksize(victim);
-
- /*
- If a small request, try to use last remainder if it is the
- only chunk in unsorted bin. This helps promote locality for
- runs of consecutive small requests. This is the only
- exception to best-fit, and applies only when there is
- no exact fit for a small chunk.
- */
-
- if (in_smallbin_range(nb) &&
- bck == unsorted_chunks(av) &&
- victim == av->last_remainder &&
- (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
-
- /* split and reattach remainder */
- remainder_size = size - nb;
- remainder = chunk_at_offset(victim, nb);
- unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
- av->last_remainder = remainder;
- remainder->bk = remainder->fd = unsorted_chunks(av);
-
- set_head(victim, nb | PREV_INUSE);
- set_head(remainder, remainder_size | PREV_INUSE);
- set_foot(remainder, remainder_size);
-
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
-
- /* remove from unsorted list */
- unsorted_chunks(av)->bk = bck;
- bck->fd = unsorted_chunks(av);
-
- /* Take now instead of binning if exact fit */
-
- if (size == nb) {
- set_inuse_bit_at_offset(victim, size);
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
-
- /* place chunk in bin */
-
- if (in_smallbin_range(size)) {
- victim_index = smallbin_index(size);
- bck = bin_at(av, victim_index);
- fwd = bck->fd;
- }
- else {
- victim_index = largebin_index(size);
- bck = bin_at(av, victim_index);
- fwd = bck->fd;
-
- if (fwd != bck) {
- /* if smaller than smallest, place first */
- if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) {
- fwd = bck;
- bck = bck->bk;
- }
- else if ((CHUNK_SIZE_T)(size) >=
- (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
-
- /* maintain large bins in sorted order */
- size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */
- while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size))
- fwd = fwd->fd;
- bck = fwd->bk;
- }
- }
- }
-
- mark_bin(av, victim_index);
- victim->bk = bck;
- victim->fd = fwd;
- fwd->bk = victim;
- bck->fd = victim;
- }
-
- /*
- If a large request, scan through the chunks of current bin to
- find one that fits. (This will be the smallest that fits unless
- FIRST_SORTED_BIN_SIZE has been changed from default.) This is
- the only step where an unbounded number of chunks might be
- scanned without doing anything useful with them. However the
- lists tend to be short.
- */
-
- if (!in_smallbin_range(nb)) {
- bin = bin_at(av, idx);
-
- for (victim = last(bin); victim != bin; victim = victim->bk) {
- size = chunksize(victim);
-
- if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) {
- remainder_size = size - nb;
- unlink(victim, bck, fwd);
-
- /* Exhaust */
- if (remainder_size < MINSIZE) {
- set_inuse_bit_at_offset(victim, size);
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
- /* Split */
- else {
- remainder = chunk_at_offset(victim, nb);
- unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
- remainder->bk = remainder->fd = unsorted_chunks(av);
- set_head(victim, nb | PREV_INUSE);
- set_head(remainder, remainder_size | PREV_INUSE);
- set_foot(remainder, remainder_size);
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
- }
- }
- }
-
- /*
- Search for a chunk by scanning bins, starting with next largest
- bin. This search is strictly by best-fit; i.e., the smallest
- (with ties going to approximately the least recently used) chunk
- that fits is selected.
-
- The bitmap avoids needing to check that most blocks are nonempty.
- */
-
- ++idx;
- bin = bin_at(av,idx);
- block = idx2block(idx);
- map = av->binmap[block];
- bit = idx2bit(idx);
-
- for (;;) {
-
- /* Skip rest of block if there are no more set bits in this block. */
- if (bit > map || bit == 0) {
- do {
- if (++block >= BINMAPSIZE) /* out of bins */
- goto use_top;
- } while ( (map = av->binmap[block]) == 0);
-
- bin = bin_at(av, (block << BINMAPSHIFT));
- bit = 1;
- }
-
- /* Advance to bin with set bit. There must be one. */
- while ((bit & map) == 0) {
- bin = next_bin(bin);
- bit <<= 1;
- assert(bit != 0);
- }
-
- /* Inspect the bin. It is likely to be non-empty */
- victim = last(bin);
-
- /* If a false alarm (empty bin), clear the bit. */
- if (victim == bin) {
- av->binmap[block] = map &= ~bit; /* Write through */
- bin = next_bin(bin);
- bit <<= 1;
- }
-
- else {
- size = chunksize(victim);
-
- /* We know the first chunk in this bin is big enough to use. */
- assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb));
-
- remainder_size = size - nb;
-
- /* unlink */
- bck = victim->bk;
- bin->bk = bck;
- bck->fd = bin;
-
- /* Exhaust */
- if (remainder_size < MINSIZE) {
- set_inuse_bit_at_offset(victim, size);
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
-
- /* Split */
- else {
- remainder = chunk_at_offset(victim, nb);
-
- unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
- remainder->bk = remainder->fd = unsorted_chunks(av);
- /* advertise as last remainder */
- if (in_smallbin_range(nb))
- av->last_remainder = remainder;
-
- set_head(victim, nb | PREV_INUSE);
- set_head(remainder, remainder_size | PREV_INUSE);
- set_foot(remainder, remainder_size);
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
- }
- }
-
- use_top:
- /*
- If large enough, split off the chunk bordering the end of memory
- (held in av->top). Note that this is in accord with the best-fit
- search rule. In effect, av->top is treated as larger (and thus
- less well fitting) than any other available chunk since it can
- be extended to be as large as necessary (up to system
- limitations).
-
- We require that av->top always exists (i.e., has size >=
- MINSIZE) after initialization, so if it would otherwise be
- exhuasted by current request, it is replenished. (The main
- reason for ensuring it exists is that we may need MINSIZE space
- to put in fenceposts in sysmalloc.)
- */
-
- victim = av->top;
- size = chunksize(victim);
-
- if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
- remainder_size = size - nb;
- remainder = chunk_at_offset(victim, nb);
- av->top = remainder;
- set_head(victim, nb | PREV_INUSE);
- set_head(remainder, remainder_size | PREV_INUSE);
-
- check_malloced_chunk(victim, nb);
- return chunk2mem(victim);
- }
-
- /*
- If no space in top, relay to handle system-dependent cases
- */
- return sYSMALLOc(nb, av);
-}
-
-/*
- ------------------------------ free ------------------------------
-*/
-
-#if __STD_C
-void fREe(Void_t* mem)
-#else
-void fREe(mem) Void_t* mem;
-#endif
-{
- mstate av = get_malloc_state();
-
- mchunkptr p; /* chunk corresponding to mem */
- INTERNAL_SIZE_T size; /* its size */
- mfastbinptr* fb; /* associated fastbin */
- mchunkptr nextchunk; /* next contiguous chunk */
- INTERNAL_SIZE_T nextsize; /* its size */
- int nextinuse; /* true if nextchunk is used */
- INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
- mchunkptr bck; /* misc temp for linking */
- mchunkptr fwd; /* misc temp for linking */
-
- /* free(0) has no effect */
- if (mem != 0) {
- p = mem2chunk(mem);
- size = chunksize(p);
-
- check_inuse_chunk(p);
-
- /*
- If eligible, place chunk on a fastbin so it can be found
- and used quickly in malloc.
- */
-
- if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast)
-
-#if TRIM_FASTBINS
- /*
- If TRIM_FASTBINS set, don't place chunks
- bordering top into fastbins
- */
- && (chunk_at_offset(p, size) != av->top)
-#endif
- ) {
-
- set_fastchunks(av);
- fb = &(av->fastbins[fastbin_index(size)]);
- p->fd = *fb;
- *fb = p;
- }
-
- /*
- Consolidate other non-mmapped chunks as they arrive.
- */
-
- else if (!chunk_is_mmapped(p)) {
- set_anychunks(av);
-
- nextchunk = chunk_at_offset(p, size);
- nextsize = chunksize(nextchunk);
-
- /* consolidate backward */
- if (!prev_inuse(p)) {
- prevsize = p->prev_size;
- size += prevsize;
- p = chunk_at_offset(p, -((long) prevsize));
- unlink(p, bck, fwd);
- }
-
- if (nextchunk != av->top) {
- /* get and clear inuse bit */
- nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
- set_head(nextchunk, nextsize);
-
- /* consolidate forward */
- if (!nextinuse) {
- unlink(nextchunk, bck, fwd);
- size += nextsize;
- }
-
- /*
- Place the chunk in unsorted chunk list. Chunks are
- not placed into regular bins until after they have
- been given one chance to be used in malloc.
- */
-
- bck = unsorted_chunks(av);
- fwd = bck->fd;
- p->bk = bck;
- p->fd = fwd;
- bck->fd = p;
- fwd->bk = p;
-
- set_head(p, size | PREV_INUSE);
- set_foot(p, size);
-
- check_free_chunk(p);
- }
-
- /*
- If the chunk borders the current high end of memory,
- consolidate into top
- */
-
- else {
- size += nextsize;
- set_head(p, size | PREV_INUSE);
- av->top = p;
- check_chunk(p);
- }
-
- /*
- If freeing a large space, consolidate possibly-surrounding
- chunks. Then, if the total unused topmost memory exceeds trim
- threshold, ask malloc_trim to reduce top.
-
- Unless max_fast is 0, we don't know if there are fastbins
- bordering top, so we cannot tell for sure whether threshold
- has been reached unless fastbins are consolidated. But we
- don't want to consolidate on each free. As a compromise,
- consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
- is reached.
- */
-
- if ((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
- if (have_fastchunks(av))
- malloc_consolidate(av);
-
-#ifndef MORECORE_CANNOT_TRIM
- if ((CHUNK_SIZE_T)(chunksize(av->top)) >=
- (CHUNK_SIZE_T)(av->trim_threshold))
- sYSTRIm(av->top_pad, av);
-#endif
- }
-
- }
- /*
- If the chunk was allocated via mmap, release via munmap()
- Note that if HAVE_MMAP is false but chunk_is_mmapped is
- true, then user must have overwritten memory. There's nothing
- we can do to catch this error unless DEBUG is set, in which case
- check_inuse_chunk (above) will have triggered error.
- */
-
- else {
-#if HAVE_MMAP
- int ret;
- INTERNAL_SIZE_T offset = p->prev_size;
- av->n_mmaps--;
- av->mmapped_mem -= (size + offset);
- ret = munmap((char*)p - offset, size + offset);
- /* munmap returns non-zero on failure */
- assert(ret == 0);
-#endif
- }
- }
-}
-
-/*
- ------------------------- malloc_consolidate -------------------------
-
- malloc_consolidate is a specialized version of free() that tears
- down chunks held in fastbins. Free itself cannot be used for this
- purpose since, among other things, it might place chunks back onto
- fastbins. So, instead, we need to use a minor variant of the same
- code.
-
- Also, because this routine needs to be called the first time through
- malloc anyway, it turns out to be the perfect place to trigger
- initialization code.
-*/
-
-#if __STD_C
-static void malloc_consolidate(mstate av)
-#else
-static void malloc_consolidate(av) mstate av;
-#endif
-{
- mfastbinptr* fb; /* current fastbin being consolidated */
- mfastbinptr* maxfb; /* last fastbin (for loop control) */
- mchunkptr p; /* current chunk being consolidated */
- mchunkptr nextp; /* next chunk to consolidate */
- mchunkptr unsorted_bin; /* bin header */
- mchunkptr first_unsorted; /* chunk to link to */
-
- /* These have same use as in free() */
- mchunkptr nextchunk;
- INTERNAL_SIZE_T size;
- INTERNAL_SIZE_T nextsize;
- INTERNAL_SIZE_T prevsize;
- int nextinuse;
- mchunkptr bck;
- mchunkptr fwd;
-
- /*
- If max_fast is 0, we know that av hasn't
- yet been initialized, in which case do so below
- */
-
- if (av->max_fast != 0) {
- clear_fastchunks(av);
-
- unsorted_bin = unsorted_chunks(av);
-
- /*
- Remove each chunk from fast bin and consolidate it, placing it
- then in unsorted bin. Among other reasons for doing this,
- placing in unsorted bin avoids needing to calculate actual bins
- until malloc is sure that chunks aren't immediately going to be
- reused anyway.
- */
-
- maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
- fb = &(av->fastbins[0]);
- do {
- if ( (p = *fb) != 0) {
- *fb = 0;
-
- do {
- check_inuse_chunk(p);
- nextp = p->fd;
-
- /* Slightly streamlined version of consolidation code in free() */
- size = p->size & ~PREV_INUSE;
- nextchunk = chunk_at_offset(p, size);
- nextsize = chunksize(nextchunk);
-
- if (!prev_inuse(p)) {
- prevsize = p->prev_size;
- size += prevsize;
- p = chunk_at_offset(p, -((long) prevsize));
- unlink(p, bck, fwd);
- }
-
- if (nextchunk != av->top) {
- nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
- set_head(nextchunk, nextsize);
-
- if (!nextinuse) {
- size += nextsize;
- unlink(nextchunk, bck, fwd);
- }
-
- first_unsorted = unsorted_bin->fd;
- unsorted_bin->fd = p;
- first_unsorted->bk = p;
-
- set_head(p, size | PREV_INUSE);
- p->bk = unsorted_bin;
- p->fd = first_unsorted;
- set_foot(p, size);
- }
-
- else {
- size += nextsize;
- set_head(p, size | PREV_INUSE);
- av->top = p;
- }
-
- } while ( (p = nextp) != 0);
-
- }
- } while (fb++ != maxfb);
- }
- else {
- malloc_init_state(av);
- check_malloc_state();
- }
-}
-
-/*
- ------------------------------ realloc ------------------------------
-*/
-
-
-#if __STD_C
-Void_t* rEALLOc(Void_t* oldmem, size_t bytes)
-#else
-Void_t* rEALLOc(oldmem, bytes) Void_t* oldmem; size_t bytes;
-#endif
-{
- mstate av = get_malloc_state();
-
- INTERNAL_SIZE_T nb; /* padded request size */
-
- mchunkptr oldp; /* chunk corresponding to oldmem */
- INTERNAL_SIZE_T oldsize; /* its size */
-
- mchunkptr newp; /* chunk to return */
- INTERNAL_SIZE_T newsize; /* its size */
- Void_t* newmem; /* corresponding user mem */
-
- mchunkptr next; /* next contiguous chunk after oldp */
-
- mchunkptr remainder; /* extra space at end of newp */
- CHUNK_SIZE_T remainder_size; /* its size */
-
- mchunkptr bck; /* misc temp for linking */
- mchunkptr fwd; /* misc temp for linking */
-
- CHUNK_SIZE_T copysize; /* bytes to copy */
- unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */
- INTERNAL_SIZE_T* s; /* copy source */
- INTERNAL_SIZE_T* d; /* copy destination */
-
-
-#ifdef REALLOC_ZERO_BYTES_FREES
- if (bytes == 0) {
- fREe(oldmem);
- return 0;
- }
-#endif
-
- /* realloc of null is supposed to be same as malloc */
- if (oldmem == 0) return mALLOc(bytes);
-
- checked_request2size(bytes, nb);
-
- oldp = mem2chunk(oldmem);
- oldsize = chunksize(oldp);
-
- check_inuse_chunk(oldp);
-
- if (!chunk_is_mmapped(oldp)) {
-
- if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb)) {
- /* already big enough; split below */
- newp = oldp;
- newsize = oldsize;
- }
-
- else {
- next = chunk_at_offset(oldp, oldsize);
-
- /* Try to expand forward into top */
- if (next == av->top &&
- (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
- (CHUNK_SIZE_T)(nb + MINSIZE)) {
- set_head_size(oldp, nb);
- av->top = chunk_at_offset(oldp, nb);
- set_head(av->top, (newsize - nb) | PREV_INUSE);
- return chunk2mem(oldp);
- }
-
- /* Try to expand forward into next chunk; split off remainder below */
- else if (next != av->top &&
- !inuse(next) &&
- (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
- (CHUNK_SIZE_T)(nb)) {
- newp = oldp;
- unlink(next, bck, fwd);
- }
-
- /* allocate, copy, free */
- else {
- newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
- if (newmem == 0)
- return 0; /* propagate failure */
-
- newp = mem2chunk(newmem);
- newsize = chunksize(newp);
-
- /*
- Avoid copy if newp is next chunk after oldp.
- */
- if (newp == next) {
- newsize += oldsize;
- newp = oldp;
- }
- else {
- /*
- Unroll copy of <= 36 bytes (72 if 8byte sizes)
- We know that contents have an odd number of
- INTERNAL_SIZE_T-sized words; minimally 3.
- */
-
- copysize = oldsize - SIZE_SZ;
- s = (INTERNAL_SIZE_T*)(oldmem);
- d = (INTERNAL_SIZE_T*)(newmem);
- ncopies = copysize / sizeof(INTERNAL_SIZE_T);
- assert(ncopies >= 3);
-
- if (ncopies > 9)
- MALLOC_COPY(d, s, copysize);
-
- else {
- *(d+0) = *(s+0);
- *(d+1) = *(s+1);
- *(d+2) = *(s+2);
- if (ncopies > 4) {
- *(d+3) = *(s+3);
- *(d+4) = *(s+4);
- if (ncopies > 6) {
- *(d+5) = *(s+5);
- *(d+6) = *(s+6);
- if (ncopies > 8) {
- *(d+7) = *(s+7);
- *(d+8) = *(s+8);
- }
- }
- }
- }
-
- fREe(oldmem);
- check_inuse_chunk(newp);
- return chunk2mem(newp);
- }
- }
- }
-
- /* If possible, free extra space in old or extended chunk */
-
- assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb));
-
- remainder_size = newsize - nb;
-
- if (remainder_size < MINSIZE) { /* not enough extra to split off */
- set_head_size(newp, newsize);
- set_inuse_bit_at_offset(newp, newsize);
- }
- else { /* split remainder */
- remainder = chunk_at_offset(newp, nb);
- set_head_size(newp, nb);
- set_head(remainder, remainder_size | PREV_INUSE);
- /* Mark remainder as inuse so free() won't complain */
- set_inuse_bit_at_offset(remainder, remainder_size);
- fREe(chunk2mem(remainder));
- }
-
- check_inuse_chunk(newp);
- return chunk2mem(newp);
- }
-
- /*
- Handle mmap cases
- */
-
- else {
-#if HAVE_MMAP
-
-#if HAVE_MREMAP
- INTERNAL_SIZE_T offset = oldp->prev_size;
- size_t pagemask = av->pagesize - 1;
- char *cp;
- CHUNK_SIZE_T sum;
-
- /* Note the extra SIZE_SZ overhead */
- newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask;
-
- /* don't need to remap if still within same page */
- if (oldsize == newsize - offset)
- return oldmem;
-
- cp = (char*)mremap((char*)oldp - offset, oldsize + offset, newsize, 1);
-
- if (cp != (char*)MORECORE_FAILURE) {
-
- newp = (mchunkptr)(cp + offset);
- set_head(newp, (newsize - offset)|IS_MMAPPED);
-
- assert(aligned_OK(chunk2mem(newp)));
- assert((newp->prev_size == offset));
-
- /* update statistics */
- sum = av->mmapped_mem += newsize - oldsize;
- if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem))
- av->max_mmapped_mem = sum;
- sum += av->sbrked_mem;
- if (sum > (CHUNK_SIZE_T)(av->max_total_mem))
- av->max_total_mem = sum;
-
- return chunk2mem(newp);
- }
-#endif
-
- /* Note the extra SIZE_SZ overhead. */
- if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ))
- newmem = oldmem; /* do nothing */
- else {
- /* Must alloc, copy, free. */
- newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
- if (newmem != 0) {
- MALLOC_COPY(newmem, oldmem, oldsize - 2*SIZE_SZ);
- fREe(oldmem);
- }
- }
- return newmem;
-
-#else
- /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */
- check_malloc_state();
- MALLOC_FAILURE_ACTION;
- return 0;
-#endif
- }
-}
-
-/*
- ------------------------------ memalign ------------------------------
-*/
-
-#if __STD_C
-Void_t* mEMALIGn(size_t alignment, size_t bytes)
-#else
-Void_t* mEMALIGn(alignment, bytes) size_t alignment; size_t bytes;
-#endif
-{
- INTERNAL_SIZE_T nb; /* padded request size */
- char* m; /* memory returned by malloc call */
- mchunkptr p; /* corresponding chunk */
- char* brk; /* alignment point within p */
- mchunkptr newp; /* chunk to return */
- INTERNAL_SIZE_T newsize; /* its size */
- INTERNAL_SIZE_T leadsize; /* leading space before alignment point */
- mchunkptr remainder; /* spare room at end to split off */
- CHUNK_SIZE_T remainder_size; /* its size */
- INTERNAL_SIZE_T size;
-
- /* If need less alignment than we give anyway, just relay to malloc */
-
- if (alignment <= MALLOC_ALIGNMENT) return mALLOc(bytes);
-
- /* Otherwise, ensure that it is at least a minimum chunk size */
-
- if (alignment < MINSIZE) alignment = MINSIZE;
-
- /* Make sure alignment is power of 2 (in case MINSIZE is not). */
- if ((alignment & (alignment - 1)) != 0) {
- size_t a = MALLOC_ALIGNMENT * 2;
- while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) a <<= 1;
- alignment = a;
- }
-
- checked_request2size(bytes, nb);
-
- /*
- Strategy: find a spot within that chunk that meets the alignment
- request, and then possibly free the leading and trailing space.
- */
-
-
- /* Call malloc with worst case padding to hit alignment. */
-
- m = (char*)(mALLOc(nb + alignment + MINSIZE));
-
- if (m == 0) return 0; /* propagate failure */
-
- p = mem2chunk(m);
-
- if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */
-
- /*
- Find an aligned spot inside chunk. Since we need to give back
- leading space in a chunk of at least MINSIZE, if the first
- calculation places us at a spot with less than MINSIZE leader,
- we can move to the next aligned spot -- we've allocated enough
- total room so that this is always possible.
- */
-
- brk = (char*)mem2chunk((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) &
- -((signed long) alignment)));
- if ((CHUNK_SIZE_T)(brk - (char*)(p)) < MINSIZE)
- brk += alignment;
-
- newp = (mchunkptr)brk;
- leadsize = brk - (char*)(p);
- newsize = chunksize(p) - leadsize;
-
- /* For mmapped chunks, just adjust offset */
- if (chunk_is_mmapped(p)) {
- newp->prev_size = p->prev_size + leadsize;
- set_head(newp, newsize|IS_MMAPPED);
- return chunk2mem(newp);
- }
-
- /* Otherwise, give back leader, use the rest */
- set_head(newp, newsize | PREV_INUSE);
- set_inuse_bit_at_offset(newp, newsize);
- set_head_size(p, leadsize);
- fREe(chunk2mem(p));
- p = newp;
-
- assert (newsize >= nb &&
- (((PTR_UINT)(chunk2mem(p))) % alignment) == 0);
- }
-
- /* Also give back spare room at the end */
- if (!chunk_is_mmapped(p)) {
- size = chunksize(p);
- if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
- remainder_size = size - nb;
- remainder = chunk_at_offset(p, nb);
- set_head(remainder, remainder_size | PREV_INUSE);
- set_head_size(p, nb);
- fREe(chunk2mem(remainder));
- }
- }
-
- check_inuse_chunk(p);
- return chunk2mem(p);
-}
-
-/*
- ------------------------------ calloc ------------------------------
-*/
-
-#if __STD_C
-Void_t* cALLOc(size_t n_elements, size_t elem_size)
-#else
-Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size;
-#endif
-{
- mchunkptr p;
- CHUNK_SIZE_T clearsize;
- CHUNK_SIZE_T nclears;
- INTERNAL_SIZE_T* d;
-
- Void_t* mem = mALLOc(n_elements * elem_size);
-
- if (mem != 0) {
- p = mem2chunk(mem);
-
- if (!chunk_is_mmapped(p))
- {
- /*
- Unroll clear of <= 36 bytes (72 if 8byte sizes)
- We know that contents have an odd number of
- INTERNAL_SIZE_T-sized words; minimally 3.
- */
-
- d = (INTERNAL_SIZE_T*)mem;
- clearsize = chunksize(p) - SIZE_SZ;
- nclears = clearsize / sizeof(INTERNAL_SIZE_T);
- assert(nclears >= 3);
-
- if (nclears > 9)
- MALLOC_ZERO(d, clearsize);
-
- else {
- *(d+0) = 0;
- *(d+1) = 0;
- *(d+2) = 0;
- if (nclears > 4) {
- *(d+3) = 0;
- *(d+4) = 0;
- if (nclears > 6) {
- *(d+5) = 0;
- *(d+6) = 0;
- if (nclears > 8) {
- *(d+7) = 0;
- *(d+8) = 0;
- }
- }
- }
- }
- }
-#if ! MMAP_CLEARS
- else
- {
- d = (INTERNAL_SIZE_T*)mem;
- /*
- Note the additional SIZE_SZ
- */
- clearsize = chunksize(p) - 2*SIZE_SZ;
- MALLOC_ZERO(d, clearsize);
- }
-#endif
- }
- return mem;
-}
-
-/*
- ------------------------------ cfree ------------------------------
-*/
-
-#if __STD_C
-void cFREe(Void_t *mem)
-#else
-void cFREe(mem) Void_t *mem;
-#endif
-{
- fREe(mem);
-}
-
-/*
- ------------------------- independent_calloc -------------------------
-*/
-
-#if __STD_C
-Void_t** iCALLOc(size_t n_elements, size_t elem_size, Void_t* chunks[])
-#else
-Void_t** iCALLOc(n_elements, elem_size, chunks) size_t n_elements; size_t
elem_size; Void_t* chunks[];
-#endif
-{
- size_t sz = elem_size; /* serves as 1-element array */
- /* opts arg of 3 means all elements are same size, and should be cleared */
- return iALLOc(n_elements, &sz, 3, chunks);
-}
-
-/*
- ------------------------- independent_comalloc -------------------------
-*/
-
-#if __STD_C
-Void_t** iCOMALLOc(size_t n_elements, size_t sizes[], Void_t* chunks[])
-#else
-Void_t** iCOMALLOc(n_elements, sizes, chunks) size_t n_elements; size_t
sizes[]; Void_t* chunks[];
-#endif
-{
- return iALLOc(n_elements, sizes, 0, chunks);
-}
-
-
-/*
- ------------------------------ ialloc ------------------------------
- ialloc provides common support for independent_X routines, handling all of
- the combinations that can result.
-
- The opts arg has:
- bit 0 set if all elements are same size (using sizes[0])
- bit 1 set if elements should be zeroed
-*/
-
-
-#if __STD_C
-static Void_t** iALLOc(size_t n_elements,
- size_t* sizes,
- int opts,
- Void_t* chunks[])
-#else
-static Void_t** iALLOc(n_elements, sizes, opts, chunks) size_t n_elements;
size_t* sizes; int opts; Void_t* chunks[];
-#endif
-{
- mstate av = get_malloc_state();
- INTERNAL_SIZE_T element_size; /* chunksize of each element, if all same */
- INTERNAL_SIZE_T contents_size; /* total size of elements */
- INTERNAL_SIZE_T array_size; /* request size of pointer array */
- Void_t* mem; /* malloced aggregate space */
- mchunkptr p; /* corresponding chunk */
- INTERNAL_SIZE_T remainder_size; /* remaining bytes while splitting */
- Void_t** marray; /* either "chunks" or malloced ptr array */
- mchunkptr array_chunk; /* chunk for malloced ptr array */
- int mmx; /* to disable mmap */
- INTERNAL_SIZE_T size;
- size_t i;
-
- /* Ensure initialization */
- if (av->max_fast == 0) malloc_consolidate(av);
-
- /* compute array length, if needed */
- if (chunks != 0) {
- if (n_elements == 0)
- return chunks; /* nothing to do */
- marray = chunks;
- array_size = 0;
- }
- else {
- /* if empty req, must still return chunk representing empty array */
- if (n_elements == 0)
- return (Void_t**) mALLOc(0);
- marray = 0;
- array_size = request2size(n_elements * (sizeof(Void_t*)));
- }
-
- /* compute total element size */
- if (opts & 0x1) { /* all-same-size */
- element_size = request2size(*sizes);
- contents_size = n_elements * element_size;
- }
- else { /* add up all the sizes */
- element_size = 0;
- contents_size = 0;
- for (i = 0; i != n_elements; ++i)
- contents_size += request2size(sizes[i]);
- }
-
- /* subtract out alignment bytes from total to minimize overallocation */
- size = contents_size + array_size - MALLOC_ALIGN_MASK;
-
- /*
- Allocate the aggregate chunk.
- But first disable mmap so malloc won't use it, since
- we would not be able to later free/realloc space internal
- to a segregated mmap region.
- */
- mmx = av->n_mmaps_max; /* disable mmap */
- av->n_mmaps_max = 0;
- mem = mALLOc(size);
- av->n_mmaps_max = mmx; /* reset mmap */
- if (mem == 0)
- return 0;
-
- p = mem2chunk(mem);
- assert(!chunk_is_mmapped(p));
- remainder_size = chunksize(p);
-
- if (opts & 0x2) { /* optionally clear the elements */
- MALLOC_ZERO(mem, remainder_size - SIZE_SZ - array_size);
- }
-
- /* If not provided, allocate the pointer array as final part of chunk */
- if (marray == 0) {
- array_chunk = chunk_at_offset(p, contents_size);
- marray = (Void_t**) (chunk2mem(array_chunk));
- set_head(array_chunk, (remainder_size - contents_size) | PREV_INUSE);
- remainder_size = contents_size;
- }
-
- /* split out elements */
- for (i = 0; ; ++i) {
- marray[i] = chunk2mem(p);
- if (i != n_elements-1) {
- if (element_size != 0)
- size = element_size;
- else
- size = request2size(sizes[i]);
- remainder_size -= size;
- set_head(p, size | PREV_INUSE);
- p = chunk_at_offset(p, size);
- }
- else { /* the final element absorbs any overallocation slop */
- set_head(p, remainder_size | PREV_INUSE);
- break;
- }
- }
-
-#if DEBUG
- if (marray != chunks) {
- /* final element must have exactly exhausted chunk */
- if (element_size != 0)
- assert(remainder_size == element_size);
- else
- assert(remainder_size == request2size(sizes[i]));
- check_inuse_chunk(mem2chunk(marray));
- }
-
- for (i = 0; i != n_elements; ++i)
- check_inuse_chunk(mem2chunk(marray[i]));
-#endif
-
- return marray;
-}
-
-
-/*
- ------------------------------ valloc ------------------------------
-*/
-
-#if __STD_C
-Void_t* vALLOc(size_t bytes)
-#else
-Void_t* vALLOc(bytes) size_t bytes;
-#endif
-{
- /* Ensure initialization */
- mstate av = get_malloc_state();
- if (av->max_fast == 0) malloc_consolidate(av);
- return mEMALIGn(av->pagesize, bytes);
-}
-
-/*
- ------------------------------ pvalloc ------------------------------
-*/
-
-
-#if __STD_C
-Void_t* pVALLOc(size_t bytes)
-#else
-Void_t* pVALLOc(bytes) size_t bytes;
-#endif
-{
- mstate av = get_malloc_state();
- size_t pagesz;
-
- /* Ensure initialization */
- if (av->max_fast == 0) malloc_consolidate(av);
- pagesz = av->pagesize;
- return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
-}
-
-
-/*
- ------------------------------ malloc_trim ------------------------------
-*/
-
-#if __STD_C
-int mTRIm(size_t pad)
-#else
-int mTRIm(pad) size_t pad;
-#endif
-{
- mstate av = get_malloc_state();
- /* Ensure initialization/consolidation */
- malloc_consolidate(av);
-
-#ifndef MORECORE_CANNOT_TRIM
- return sYSTRIm(pad, av);
-#else
- return 0;
-#endif
-}
-
-
-/*
- ------------------------- malloc_usable_size -------------------------
-*/
-
-#if __STD_C
-size_t mUSABLe(Void_t* mem)
-#else
-size_t mUSABLe(mem) Void_t* mem;
-#endif
-{
- mchunkptr p;
- if (mem != 0) {
- p = mem2chunk(mem);
- if (chunk_is_mmapped(p))
- return chunksize(p) - 2*SIZE_SZ;
- else if (inuse(p))
- return chunksize(p) - SIZE_SZ;
- }
- return 0;
-}
-
-/*
- ------------------------------ mallinfo ------------------------------
-*/
-
-struct mallinfo mALLINFo()
-{
- mstate av = get_malloc_state();
- struct mallinfo mi;
- int i;
- mbinptr b;
- mchunkptr p;
- INTERNAL_SIZE_T avail;
- INTERNAL_SIZE_T fastavail;
- int nblocks;
- int nfastblocks;
-
- /* Ensure initialization */
- if (av->top == 0) malloc_consolidate(av);
-
- check_malloc_state();
-
- /* Account for top */
- avail = chunksize(av->top);
- nblocks = 1; /* top always exists */
-
- /* traverse fastbins */
- nfastblocks = 0;
- fastavail = 0;
-
- for (i = 0; i < NFASTBINS; ++i) {
- for (p = av->fastbins[i]; p != 0; p = p->fd) {
- ++nfastblocks;
- fastavail += chunksize(p);
- }
- }
-
- avail += fastavail;
-
- /* traverse regular bins */
- for (i = 1; i < NBINS; ++i) {
- b = bin_at(av, i);
- for (p = last(b); p != b; p = p->bk) {
- ++nblocks;
- avail += chunksize(p);
- }
- }
-
- mi.smblks = nfastblocks;
- mi.ordblks = nblocks;
- mi.fordblks = avail;
- mi.uordblks = av->sbrked_mem - avail;
- mi.arena = av->sbrked_mem;
- mi.hblks = av->n_mmaps;
- mi.hblkhd = av->mmapped_mem;
- mi.fsmblks = fastavail;
- mi.keepcost = chunksize(av->top);
- mi.usmblks = av->max_total_mem;
- return mi;
-}
-
-/*
- ------------------------------ malloc_stats ------------------------------
-*/
-
-void mSTATs()
-{
- struct mallinfo mi = mALLINFo();
-
-#ifdef WIN32
- {
- CHUNK_SIZE_T free, reserved, committed;
- vminfo (&free, &reserved, &committed);
- fprintf(stderr, "free bytes = %10lu\n",
- free);
- fprintf(stderr, "reserved bytes = %10lu\n",
- reserved);
- fprintf(stderr, "committed bytes = %10lu\n",
- committed);
- }
-#endif
-
-/* RN XXX */
- printf("max system bytes = %10lu\n",
- (CHUNK_SIZE_T)(mi.usmblks));
- printf("system bytes = %10lu\n",
- (CHUNK_SIZE_T)(mi.arena + mi.hblkhd));
- printf("in use bytes = %10lu\n",
- (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd));
-
-#ifdef WIN32
- {
- CHUNK_SIZE_T kernel, user;
- if (cpuinfo (TRUE, &kernel, &user)) {
- fprintf(stderr, "kernel ms = %10lu\n",
- kernel);
- fprintf(stderr, "user ms = %10lu\n",
- user);
- }
- }
-#endif
-}
-
-
-/*
- ------------------------------ mallopt ------------------------------
-*/
-
-#if __STD_C
-int mALLOPt(int param_number, int value)
-#else
-int mALLOPt(param_number, value) int param_number; int value;
-#endif
-{
- mstate av = get_malloc_state();
- /* Ensure initialization/consolidation */
- malloc_consolidate(av);
-
- switch(param_number) {
- case M_MXFAST:
- if (value >= 0 && value <= MAX_FAST_SIZE) {
- set_max_fast(av, value);
- return 1;
- }
- else
- return 0;
-
- case M_TRIM_THRESHOLD:
- av->trim_threshold = value;
- return 1;
-
- case M_TOP_PAD:
- av->top_pad = value;
- return 1;
-
- case M_MMAP_THRESHOLD:
- av->mmap_threshold = value;
- return 1;
-
- case M_MMAP_MAX:
-#if !HAVE_MMAP
- if (value != 0)
- return 0;
-#endif
- av->n_mmaps_max = value;
- return 1;
-
- default:
- return 0;
- }
-}
-
-
-/*
- -------------------- Alternative MORECORE functions --------------------
-*/
-
-
-/*
- General Requirements for MORECORE.
-
- The MORECORE function must have the following properties:
-
- If MORECORE_CONTIGUOUS is false:
-
- * MORECORE must allocate in multiples of pagesize. It will
- only be called with arguments that are multiples of pagesize.
-
- * MORECORE(0) must return an address that is at least
- MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.)
-
- else (i.e. If MORECORE_CONTIGUOUS is true):
-
- * Consecutive calls to MORECORE with positive arguments
- return increasing addresses, indicating that space has been
- contiguously extended.
-
- * MORECORE need not allocate in multiples of pagesize.
- Calls to MORECORE need not have args of multiples of pagesize.
-
- * MORECORE need not page-align.
-
- In either case:
-
- * MORECORE may allocate more memory than requested. (Or even less,
- but this will generally result in a malloc failure.)
-
- * MORECORE must not allocate memory when given argument zero, but
- instead return one past the end address of memory from previous
- nonzero call. This malloc does NOT call MORECORE(0)
- until at least one call with positive arguments is made, so
- the initial value returned is not important.
-
- * Even though consecutive calls to MORECORE need not return contiguous
- addresses, it must be OK for malloc'ed chunks to span multiple
- regions in those cases where they do happen to be contiguous.
-
- * MORECORE need not handle negative arguments -- it may instead
- just return MORECORE_FAILURE when given negative arguments.
- Negative arguments are always multiples of pagesize. MORECORE
- must not misinterpret negative args as large positive unsigned
- args. You can suppress all such calls from even occurring by defining
- MORECORE_CANNOT_TRIM,
-
- There is some variation across systems about the type of the
- argument to sbrk/MORECORE. If size_t is unsigned, then it cannot
- actually be size_t, because sbrk supports negative args, so it is
- normally the signed type of the same width as size_t (sometimes
- declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much
- matter though. Internally, we use "long" as arguments, which should
- work across all reasonable possibilities.
-
- Additionally, if MORECORE ever returns failure for a positive
- request, and HAVE_MMAP is true, then mmap is used as a noncontiguous
- system allocator. This is a useful backup strategy for systems with
- holes in address spaces -- in this case sbrk cannot contiguously
- expand the heap, but mmap may be able to map noncontiguous space.
-
- If you'd like mmap to ALWAYS be used, you can define MORECORE to be
- a function that always returns MORECORE_FAILURE.
-
- Malloc only has limited ability to detect failures of MORECORE
- to supply contiguous space when it says it can. In particular,
- multithreaded programs that do not use locks may result in
- rece conditions across calls to MORECORE that result in gaps
- that cannot be detected as such, and subsequent corruption.
-
- If you are using this malloc with something other than sbrk (or its
- emulation) to supply memory regions, you probably want to set
- MORECORE_CONTIGUOUS as false. As an example, here is a custom
- allocator kindly contributed for pre-OSX macOS. It uses virtually
- but not necessarily physically contiguous non-paged memory (locked
- in, present and won't get swapped out). You can use it by
- uncommenting this section, adding some #includes, and setting up the
- appropriate defines above:
-
- #define MORECORE osMoreCore
- #define MORECORE_CONTIGUOUS 0
-
- There is also a shutdown routine that should somehow be called for
- cleanup upon program exit.
-
- #define MAX_POOL_ENTRIES 100
- #define MINIMUM_MORECORE_SIZE (64 * 1024)
- static int next_os_pool;
- void *our_os_pools[MAX_POOL_ENTRIES];
-
- void *osMoreCore(int size)
- {
- void *ptr = 0;
- static void *sbrk_top = 0;
-
- if (size > 0)
- {
- if (size < MINIMUM_MORECORE_SIZE)
- size = MINIMUM_MORECORE_SIZE;
- if (CurrentExecutionLevel() == kTaskLevel)
- ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
- if (ptr == 0)
- {
- return (void *) MORECORE_FAILURE;
- }
- // save ptrs so they can be freed during cleanup
- our_os_pools[next_os_pool] = ptr;
- next_os_pool++;
- ptr = (void *) ((((CHUNK_SIZE_T) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
- sbrk_top = (char *) ptr + size;
- return ptr;
- }
- else if (size < 0)
- {
- // we don't currently support shrink behavior
- return (void *) MORECORE_FAILURE;
- }
- else
- {
- return sbrk_top;
- }
- }
-
- // cleanup any allocated memory pools
- // called as last thing before shutting down driver
-
- void osCleanupMem(void)
- {
- void **ptr;
-
- for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
- if (*ptr)
- {
- PoolDeallocate(*ptr);
- *ptr = 0;
- }
- }
-
-*/
-
-
-/*
- --------------------------------------------------------------
-
- Emulation of sbrk for win32.
- Donated by J. Walter <Walter@xxxxxxxxxxxx>.
- For additional information about this code, and malloc on Win32, see
- http://www.genesys-e.de/jwalter/
-*/
-
-
-#ifdef WIN32
-
-#ifdef _DEBUG
-/* #define TRACE */
-#endif
-
-/* Support for USE_MALLOC_LOCK */
-#ifdef USE_MALLOC_LOCK
-
-/* Wait for spin lock */
-static int slwait (int *sl) {
- while (InterlockedCompareExchange ((void **) sl, (void *) 1, (void *) 0)
!= 0)
- Sleep (0);
- return 0;
-}
-
-/* Release spin lock */
-static int slrelease (int *sl) {
- InterlockedExchange (sl, 0);
- return 0;
-}
-
-#ifdef NEEDED
-/* Spin lock for emulation code */
-static int g_sl;
-#endif
-
-#endif /* USE_MALLOC_LOCK */
-
-/* getpagesize for windows */
-static long getpagesize (void) {
- static long g_pagesize = 0;
- if (! g_pagesize) {
- SYSTEM_INFO system_info;
- GetSystemInfo (&system_info);
- g_pagesize = system_info.dwPageSize;
- }
- return g_pagesize;
-}
-static long getregionsize (void) {
- static long g_regionsize = 0;
- if (! g_regionsize) {
- SYSTEM_INFO system_info;
- GetSystemInfo (&system_info);
- g_regionsize = system_info.dwAllocationGranularity;
- }
- return g_regionsize;
-}
-
-/* A region list entry */
-typedef struct _region_list_entry {
- void *top_allocated;
- void *top_committed;
- void *top_reserved;
- long reserve_size;
- struct _region_list_entry *previous;
-} region_list_entry;
-
-/* Allocate and link a region entry in the region list */
-static int region_list_append (region_list_entry **last, void *base_reserved,
long reserve_size) {
- region_list_entry *next = HeapAlloc (GetProcessHeap (), 0, sizeof
(region_list_entry));
- if (! next)
- return FALSE;
- next->top_allocated = (char *) base_reserved;
- next->top_committed = (char *) base_reserved;
- next->top_reserved = (char *) base_reserved + reserve_size;
- next->reserve_size = reserve_size;
- next->previous = *last;
- *last = next;
- return TRUE;
-}
-/* Free and unlink the last region entry from the region list */
-static int region_list_remove (region_list_entry **last) {
- region_list_entry *previous = (*last)->previous;
- if (! HeapFree (GetProcessHeap (), sizeof (region_list_entry), *last))
- return FALSE;
- *last = previous;
- return TRUE;
-}
-
-#define CEIL(size,to) (((size)+(to)-1)&~((to)-1))
-#define FLOOR(size,to) ((size)&~((to)-1))
-
-#define SBRK_SCALE 0
-/* #define SBRK_SCALE 1 */
-/* #define SBRK_SCALE 2 */
-/* #define SBRK_SCALE 4 */
-
-/* sbrk for windows */
-static void *sbrk (long size) {
- static long g_pagesize, g_my_pagesize;
- static long g_regionsize, g_my_regionsize;
- static region_list_entry *g_last;
- void *result = (void *) MORECORE_FAILURE;
-#ifdef TRACE
- printf ("sbrk %d\n", size);
-#endif
-#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
- /* Wait for spin lock */
- slwait (&g_sl);
-#endif
- /* First time initialization */
- if (! g_pagesize) {
- g_pagesize = getpagesize ();
- g_my_pagesize = g_pagesize << SBRK_SCALE;
- }
- if (! g_regionsize) {
- g_regionsize = getregionsize ();
- g_my_regionsize = g_regionsize << SBRK_SCALE;
- }
- if (! g_last) {
- if (! region_list_append (&g_last, 0, 0))
- goto sbrk_exit;
- }
- /* Assert invariants */
- assert (g_last);
- assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *)
g_last->top_allocated &&
- g_last->top_allocated <= g_last->top_committed);
- assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *)
g_last->top_committed &&
- g_last->top_committed <= g_last->top_reserved &&
- (unsigned) g_last->top_committed % g_pagesize == 0);
- assert ((unsigned) g_last->top_reserved % g_regionsize == 0);
- assert ((unsigned) g_last->reserve_size % g_regionsize == 0);
- /* Allocation requested? */
- if (size >= 0) {
- /* Allocation size is the requested size */
- long allocate_size = size;
- /* Compute the size to commit */
- long to_commit = (char *) g_last->top_allocated + allocate_size -
(char *) g_last->top_committed;
- /* Do we reach the commit limit? */
- if (to_commit > 0) {
- /* Round size to commit */
- long commit_size = CEIL (to_commit, g_my_pagesize);
- /* Compute the size to reserve */
- long to_reserve = (char *) g_last->top_committed + commit_size -
(char *) g_last->top_reserved;
- /* Do we reach the reserve limit? */
- if (to_reserve > 0) {
- /* Compute the remaining size to commit in the current region
*/
- long remaining_commit_size = (char *) g_last->top_reserved -
(char *) g_last->top_committed;
- if (remaining_commit_size > 0) {
- /* Assert preconditions */
- assert ((unsigned) g_last->top_committed % g_pagesize ==
0);
- assert (0 < remaining_commit_size && remaining_commit_size
% g_pagesize == 0); {
- /* Commit this */
- void *base_committed = VirtualAlloc
(g_last->top_committed, remaining_commit_size,
-
MEM_COMMIT, PAGE_READWRITE);
- /* Check returned pointer for consistency */
- if (base_committed != g_last->top_committed)
- goto sbrk_exit;
- /* Assert postconditions */
- assert ((unsigned) base_committed % g_pagesize == 0);
-#ifdef TRACE
- printf ("Commit %p %d\n", base_committed,
remaining_commit_size);
-#endif
- /* Adjust the regions commit top */
- g_last->top_committed = (char *) base_committed +
remaining_commit_size;
- }
- } {
- /* Now we are going to search and reserve. */
- int contiguous = -1;
- int found = FALSE;
- MEMORY_BASIC_INFORMATION memory_info;
- void *base_reserved;
- long reserve_size;
- do {
- /* Assume contiguous memory */
- contiguous = TRUE;
- /* Round size to reserve */
- reserve_size = CEIL (to_reserve, g_my_regionsize);
- /* Start with the current region's top */
- memory_info.BaseAddress = g_last->top_reserved;
- /* Assert preconditions */
- assert ((unsigned) memory_info.BaseAddress %
g_pagesize == 0);
- assert (0 < reserve_size && reserve_size %
g_regionsize == 0);
- while (VirtualQuery (memory_info.BaseAddress,
&memory_info, sizeof (memory_info))) {
- /* Assert postconditions */
- assert ((unsigned) memory_info.BaseAddress %
g_pagesize == 0);
-#ifdef TRACE
- printf ("Query %p %d %s\n",
memory_info.BaseAddress, memory_info.RegionSize,
- memory_info.State == MEM_FREE ? "FREE":
- (memory_info.State == MEM_RESERVE ?
"RESERVED":
- (memory_info.State == MEM_COMMIT ?
"COMMITTED": "?")));
-#endif
- /* Region is free, well aligned and big enough: we
are done */
- if (memory_info.State == MEM_FREE &&
- (unsigned) memory_info.BaseAddress %
g_regionsize == 0 &&
- memory_info.RegionSize >= (unsigned)
reserve_size) {
- found = TRUE;
- break;
- }
- /* From now on we can't get contiguous memory! */
- contiguous = FALSE;
- /* Recompute size to reserve */
- reserve_size = CEIL (allocate_size,
g_my_regionsize);
- memory_info.BaseAddress = (char *)
memory_info.BaseAddress + memory_info.RegionSize;
- /* Assert preconditions */
- assert ((unsigned) memory_info.BaseAddress %
g_pagesize == 0);
- assert (0 < reserve_size && reserve_size %
g_regionsize == 0);
- }
- /* Search failed? */
- if (! found)
- goto sbrk_exit;
- /* Assert preconditions */
- assert ((unsigned) memory_info.BaseAddress %
g_regionsize == 0);
- assert (0 < reserve_size && reserve_size %
g_regionsize == 0);
- /* Try to reserve this */
- base_reserved = VirtualAlloc (memory_info.BaseAddress,
reserve_size,
-
MEM_RESERVE, PAGE_NOACCESS);
- if (! base_reserved) {
- int rc = GetLastError ();
- if (rc != ERROR_INVALID_ADDRESS)
- goto sbrk_exit;
- }
- /* A null pointer signals (hopefully) a race condition
with another thread. */
- /* In this case, we try again. */
- } while (! base_reserved);
- /* Check returned pointer for consistency */
- if (memory_info.BaseAddress && base_reserved !=
memory_info.BaseAddress)
- goto sbrk_exit;
- /* Assert postconditions */
- assert ((unsigned) base_reserved % g_regionsize == 0);
-#ifdef TRACE
- printf ("Reserve %p %d\n", base_reserved, reserve_size);
-#endif
- /* Did we get contiguous memory? */
- if (contiguous) {
- long start_size = (char *) g_last->top_committed -
(char *) g_last->top_allocated;
- /* Adjust allocation size */
- allocate_size -= start_size;
- /* Adjust the regions allocation top */
- g_last->top_allocated = g_last->top_committed;
- /* Recompute the size to commit */
- to_commit = (char *) g_last->top_allocated +
allocate_size - (char *) g_last->top_committed;
- /* Round size to commit */
- commit_size = CEIL (to_commit, g_my_pagesize);
- }
- /* Append the new region to the list */
- if (! region_list_append (&g_last, base_reserved,
reserve_size))
- goto sbrk_exit;
- /* Didn't we get contiguous memory? */
- if (! contiguous) {
- /* Recompute the size to commit */
- to_commit = (char *) g_last->top_allocated +
allocate_size - (char *) g_last->top_committed;
- /* Round size to commit */
- commit_size = CEIL (to_commit, g_my_pagesize);
- }
- }
- }
- /* Assert preconditions */
- assert ((unsigned) g_last->top_committed % g_pagesize == 0);
- assert (0 < commit_size && commit_size % g_pagesize == 0); {
- /* Commit this */
- void *base_committed = VirtualAlloc (g_last->top_committed,
commit_size,
-
MEM_COMMIT, PAGE_READWRITE);
- /* Check returned pointer for consistency */
- if (base_committed != g_last->top_committed)
- goto sbrk_exit;
- /* Assert postconditions */
- assert ((unsigned) base_committed % g_pagesize == 0);
-#ifdef TRACE
- printf ("Commit %p %d\n", base_committed, commit_size);
-#endif
- /* Adjust the regions commit top */
- g_last->top_committed = (char *) base_committed + commit_size;
- }
- }
- /* Adjust the regions allocation top */
- g_last->top_allocated = (char *) g_last->top_allocated + allocate_size;
- result = (char *) g_last->top_allocated - size;
- /* Deallocation requested? */
- } else if (size < 0) {
- long deallocate_size = - size;
- /* As long as we have a region to release */
- while ((char *) g_last->top_allocated - deallocate_size < (char *)
g_last->top_reserved - g_last->reserve_size) {
- /* Get the size to release */
- long release_size = g_last->reserve_size;
- /* Get the base address */
- void *base_reserved = (char *) g_last->top_reserved - release_size;
- /* Assert preconditions */
- assert ((unsigned) base_reserved % g_regionsize == 0);
- assert (0 < release_size && release_size % g_regionsize == 0); {
- /* Release this */
- int rc = VirtualFree (base_reserved, 0,
- MEM_RELEASE);
- /* Check returned code for consistency */
- if (! rc)
- goto sbrk_exit;
-#ifdef TRACE
- printf ("Release %p %d\n", base_reserved, release_size);
-#endif
- }
- /* Adjust deallocation size */
- deallocate_size -= (char *) g_last->top_allocated - (char *)
base_reserved;
- /* Remove the old region from the list */
- if (! region_list_remove (&g_last))
- goto sbrk_exit;
- } {
- /* Compute the size to decommit */
- long to_decommit = (char *) g_last->top_committed - ((char *)
g_last->top_allocated - deallocate_size);
- if (to_decommit >= g_my_pagesize) {
- /* Compute the size to decommit */
- long decommit_size = FLOOR (to_decommit, g_my_pagesize);
- /* Compute the base address */
- void *base_committed = (char *) g_last->top_committed -
decommit_size;
- /* Assert preconditions */
- assert ((unsigned) base_committed % g_pagesize == 0);
- assert (0 < decommit_size && decommit_size % g_pagesize == 0);
{
- /* Decommit this */
- int rc = VirtualFree ((char *) base_committed,
decommit_size,
- MEM_DECOMMIT);
- /* Check returned code for consistency */
- if (! rc)
- goto sbrk_exit;
-#ifdef TRACE
- printf ("Decommit %p %d\n", base_committed, decommit_size);
-#endif
- }
- /* Adjust deallocation size and regions commit and allocate
top */
- deallocate_size -= (char *) g_last->top_allocated - (char *)
base_committed;
- g_last->top_committed = base_committed;
- g_last->top_allocated = base_committed;
- }
- }
- /* Adjust regions allocate top */
- g_last->top_allocated = (char *) g_last->top_allocated -
deallocate_size;
- /* Check for underflow */
- if ((char *) g_last->top_reserved - g_last->reserve_size > (char *)
g_last->top_allocated ||
- g_last->top_allocated > g_last->top_committed) {
- /* Adjust regions allocate top */
- g_last->top_allocated = (char *) g_last->top_reserved -
g_last->reserve_size;
- goto sbrk_exit;
- }
- result = g_last->top_allocated;
- }
- /* Assert invariants */
- assert (g_last);
- assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *)
g_last->top_allocated &&
- g_last->top_allocated <= g_last->top_committed);
- assert ((char *) g_last->top_reserved - g_last->reserve_size <= (char *)
g_last->top_committed &&
- g_last->top_committed <= g_last->top_reserved &&
- (unsigned) g_last->top_committed % g_pagesize == 0);
- assert ((unsigned) g_last->top_reserved % g_regionsize == 0);
- assert ((unsigned) g_last->reserve_size % g_regionsize == 0);
-
-sbrk_exit:
-#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
- /* Release spin lock */
- slrelease (&g_sl);
-#endif
- return result;
-}
-
-/* mmap for windows */
-static void *mmap (void *ptr, long size, long prot, long type, long handle,
long arg) {
- static long g_pagesize;
- static long g_regionsize;
-#ifdef TRACE
- printf ("mmap %d\n", size);
-#endif
-#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
- /* Wait for spin lock */
- slwait (&g_sl);
-#endif
- /* First time initialization */
- if (! g_pagesize)
- g_pagesize = getpagesize ();
- if (! g_regionsize)
- g_regionsize = getregionsize ();
- /* Assert preconditions */
- assert ((unsigned) ptr % g_regionsize == 0);
- assert (size % g_pagesize == 0);
- /* Allocate this */
- ptr = VirtualAlloc (ptr, size,
- MEM_RESERVE | MEM_COMMIT |
MEM_TOP_DOWN, PAGE_READWRITE);
- if (! ptr) {
- ptr = (void *) MORECORE_FAILURE;
- goto mmap_exit;
- }
- /* Assert postconditions */
- assert ((unsigned) ptr % g_regionsize == 0);
-#ifdef TRACE
- printf ("Commit %p %d\n", ptr, size);
-#endif
-mmap_exit:
-#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
- /* Release spin lock */
- slrelease (&g_sl);
-#endif
- return ptr;
-}
-
-/* munmap for windows */
-static long munmap (void *ptr, long size) {
- static long g_pagesize;
- static long g_regionsize;
- int rc = MUNMAP_FAILURE;
-#ifdef TRACE
- printf ("munmap %p %d\n", ptr, size);
-#endif
-#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
- /* Wait for spin lock */
- slwait (&g_sl);
-#endif
- /* First time initialization */
- if (! g_pagesize)
- g_pagesize = getpagesize ();
- if (! g_regionsize)
- g_regionsize = getregionsize ();
- /* Assert preconditions */
- assert ((unsigned) ptr % g_regionsize == 0);
- assert (size % g_pagesize == 0);
- /* Free this */
- if (! VirtualFree (ptr, 0,
- MEM_RELEASE))
- goto munmap_exit;
- rc = 0;
-#ifdef TRACE
- printf ("Release %p %d\n", ptr, size);
-#endif
-munmap_exit:
-#if defined (USE_MALLOC_LOCK) && defined (NEEDED)
- /* Release spin lock */
- slrelease (&g_sl);
-#endif
- return rc;
-}
-
-static void vminfo (CHUNK_SIZE_T *free, CHUNK_SIZE_T *reserved, CHUNK_SIZE_T
*committed) {
- MEMORY_BASIC_INFORMATION memory_info;
- memory_info.BaseAddress = 0;
- *free = *reserved = *committed = 0;
- while (VirtualQuery (memory_info.BaseAddress, &memory_info, sizeof
(memory_info))) {
- switch (memory_info.State) {
- case MEM_FREE:
- *free += memory_info.RegionSize;
- break;
- case MEM_RESERVE:
- *reserved += memory_info.RegionSize;
- break;
- case MEM_COMMIT:
- *committed += memory_info.RegionSize;
- break;
- }
- memory_info.BaseAddress = (char *) memory_info.BaseAddress +
memory_info.RegionSize;
- }
-}
-
-static int cpuinfo (int whole, CHUNK_SIZE_T *kernel, CHUNK_SIZE_T *user) {
- if (whole) {
- __int64 creation64, exit64, kernel64, user64;
- int rc = GetProcessTimes (GetCurrentProcess (),
- (FILETIME *) &creation64,
- (FILETIME *) &exit64,
- (FILETIME *) &kernel64,
- (FILETIME *) &user64);
- if (! rc) {
- *kernel = 0;
- *user = 0;
- return FALSE;
- }
- *kernel = (CHUNK_SIZE_T) (kernel64 / 10000);
- *user = (CHUNK_SIZE_T) (user64 / 10000);
- return TRUE;
- } else {
- __int64 creation64, exit64, kernel64, user64;
- int rc = GetThreadTimes (GetCurrentThread (),
- (FILETIME *) &creation64,
- (FILETIME *) &exit64,
- (FILETIME *) &kernel64,
- (FILETIME *) &user64);
- if (! rc) {
- *kernel = 0;
- *user = 0;
- return FALSE;
- }
- *kernel = (CHUNK_SIZE_T) (kernel64 / 10000);
- *user = (CHUNK_SIZE_T) (user64 / 10000);
- return TRUE;
- }
-}
-
-#endif /* WIN32 */
-
-/* ------------------------------------------------------------
-History:
- V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
- * Fix malloc_state bitmap array misdeclaration
-
- V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee)
- * Allow tuning of FIRST_SORTED_BIN_SIZE
- * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
- * Better detection and support for non-contiguousness of MORECORE.
- Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
- * Bypass most of malloc if no frees. Thanks To Emery Berger.
- * Fix freeing of old top non-contiguous chunk im sysmalloc.
- * Raised default trim and map thresholds to 256K.
- * Fix mmap-related #defines. Thanks to Lubos Lunak.
- * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
- * Branch-free bin calculation
- * Default trim and mmap thresholds now 256K.
-
- V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee)
- * Introduce independent_comalloc and independent_calloc.
- Thanks to Michael Pachos for motivation and help.
- * Make optional .h file available
- * Allow > 2GB requests on 32bit systems.
- * new WIN32 sbrk, mmap, munmap, lock code from <Walter@xxxxxxxxxxxx>.
- Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
- and Anonymous.
- * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
- helping test this.)
- * memalign: check alignment arg
- * realloc: don't try to shift chunks backwards, since this
- leads to more fragmentation in some programs and doesn't
- seem to help in any others.
- * Collect all cases in malloc requiring system memory into sYSMALLOc
- * Use mmap as backup to sbrk
- * Place all internal state in malloc_state
- * Introduce fastbins (although similar to 2.5.1)
- * Many minor tunings and cosmetic improvements
- * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
- * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
- Thanks to Tony E. Bennett <tbennett@xxxxxxxxxx> and others.
- * Include errno.h to support default failure action.
-
- V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee)
- * return null for negative arguments
- * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
- * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
- (e.g. WIN32 platforms)
- * Cleanup header file inclusion for WIN32 platforms
- * Cleanup code to avoid Microsoft Visual C++ compiler complaints
- * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
- memory allocation routines
- * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
- * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
- usage of 'assert' in non-WIN32 code
- * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
- avoid infinite loop
- * Always call 'fREe()' rather than 'free()'
-
- V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee)
- * Fixed ordering problem with boundary-stamping
-
- V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee)
- * Added pvalloc, as recommended by H.J. Liu
- * Added 64bit pointer support mainly from Wolfram Gloger
- * Added anonymously donated WIN32 sbrk emulation
- * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
- * malloc_extend_top: fix mask error that caused wastage after
- foreign sbrks
- * Add linux mremap support code from HJ Liu
-
- V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee)
- * Integrated most documentation with the code.
- * Add support for mmap, with help from
- Wolfram Gloger (Gloger@xxxxxxxxxxxxxxxxxxx).
- * Use last_remainder in more cases.
- * Pack bins using idea from colin@xxxxxxxxxxxxxxx
- * Use ordered bins instead of best-fit threshhold
- * Eliminate block-local decls to simplify tracing and debugging.
- * Support another case of realloc via move into top
- * Fix error occuring when initial sbrk_base not word-aligned.
- * Rely on page size for units instead of SBRK_UNIT to
- avoid surprises about sbrk alignment conventions.
- * Add mallinfo, mallopt. Thanks to Raymond Nijssen
- (raymond@xxxxxxxxxxxxx) for the suggestion.
- * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
- * More precautions for cases where other routines call sbrk,
- courtesy of Wolfram Gloger (Gloger@xxxxxxxxxxxxxxxxxxx).
- * Added macros etc., allowing use in linux libc from
- H.J. Lu (hjl@xxxxxxxxxxxxxx)
- * Inverted this history list
-
- V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee)
- * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
- * Removed all preallocation code since under current scheme
- the work required to undo bad preallocations exceeds
- the work saved in good cases for most test programs.
- * No longer use return list or unconsolidated bins since
- no scheme using them consistently outperforms those that don't
- given above changes.
- * Use best fit for very large chunks to prevent some worst-cases.
- * Added some support for debugging
-
- V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee)
- * Removed footers when chunks are in use. Thanks to
- Paul Wilson (wilson@xxxxxxxxxxxx) for the suggestion.
-
- V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee)
- * Added malloc_trim, with help from Wolfram Gloger
- (wmglo@xxxxxxxxxxxxxxxxxxxxxxxx).
-
- V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g)
-
- V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g)
- * realloc: try to expand in both directions
- * malloc: swap order of clean-bin strategy;
- * realloc: only conditionally expand backwards
- * Try not to scavenge used bins
- * Use bin counts as a guide to preallocation
- * Occasionally bin return list chunks in first scan
- * Add a few optimizations from colin@xxxxxxxxxxxxxxx
-
- V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g)
- * faster bin computation & slightly different binning
- * merged all consolidations to one part of malloc proper
- (eliminating old malloc_find_space & malloc_clean_bin)
- * Scan 2 returns chunks (not just 1)
- * Propagate failure in realloc if malloc returns 0
- * Add stuff to allow compilation on non-ANSI compilers
- from kpv@xxxxxxxxxxxxxxxx
-
- V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu)
- * removed potential for odd address access in prev_chunk
- * removed dependency on getpagesize.h
- * misc cosmetics and a bit more internal documentation
- * anticosmetics: mangled names in macros to evade debugger strangeness
- * tested on sparc, hp-700, dec-mips, rs6000
- with gcc & native cc (hp, dec only) allowing
- Detlefs & Zorn comparison study (in SIGPLAN Notices.)
-
- Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu)
- * Based loosely on libg++-1.2X malloc. (It retains some of the overall
- structure of old version, but most details differ.)
-
-*/
diff -r de3576a1c62c -r dfaf788ab18c tools/vnet/INSTALL
--- a/tools/vnet/INSTALL Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,31 +0,0 @@
-To compile and install run "make install"; if it fails or you need to
reinstall
-run "make clean" first or the build will fail, at least that is what I have
-found under 2.6.10.
-
-Other important items:
-1) You will need to have your xen0 kernel compiled with HMAC_SUPPORT
- 2.6.x = (MAIN MENU: Cryptographic Options -> HMAC Support)
- BEFORE running "make install".
-
-2) You will want at least some of the other alogorithms listed under
- "Cryptographic Options" for the kernel compiled as modules.
-
-3) You will want the networking IPsec/VLAN options compiled in as modules
- 2.6.x = (MAIN MENU: Device Drivers -> Networking Support ->
- Networking Options ->
- IP: AH transformation
- IP: ESP transformation
- IP: IPComp transformation
- IP: tunnel transformation
-
- IPsec user configuration interface
-
- 802.1Q VLAN Support
-
-4) The module (vnet_module) will not properly load from the command line
- with a "modprobe vnet_module". Use network-vnet to properly configure
- your system and load the module for you.
-
-Please refer to the additional documentation found in tools/vnet/doc for
-proper syntax and config file parameters.
-
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/cpumask.h
--- a/xen/arch/ia64/patch/linux-2.6.11/cpumask.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,12 +0,0 @@
---- ../../linux-2.6.11/include/linux/cpumask.h 2005-03-02 00:38:00.000000000
-0700
-+++ include/asm-ia64/linux/cpumask.h 2005-04-28 13:21:20.000000000 -0600
-@@ -342,7 +342,9 @@
- */
-
- extern cpumask_t cpu_possible_map;
-+#ifndef XEN
- extern cpumask_t cpu_online_map;
-+#endif
- extern cpumask_t cpu_present_map;
-
- #if NR_CPUS > 1
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/efi.c
--- a/xen/arch/ia64/patch/linux-2.6.11/efi.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,50 +0,0 @@
---- ../../linux-2.6.11/arch/ia64/kernel/efi.c 2005-03-02 00:37:47.000000000
-0700
-+++ arch/ia64/efi.c 2005-06-09 06:15:36.000000000 -0600
-@@ -320,6 +320,16 @@
- if (!(md->attribute & EFI_MEMORY_WB))
- continue;
-
-+#ifdef XEN
-+// this works around a problem in the ski bootloader
-+{
-+ extern long running_on_sim;
-+ if (running_on_sim && md->type != EFI_CONVENTIONAL_MEMORY)
-+ continue;
-+}
-+// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP
-+ if (md->phys_addr >= 0x100000000) continue;
-+#endif
- /*
- * granule_addr is the base of md's first granule.
- * [granule_addr - first_non_wb_addr) is guaranteed to
-@@ -719,6 +729,30 @@
- return 0;
- }
-
-+#ifdef XEN
-+// variation of efi_get_iobase which returns entire memory descriptor
-+efi_memory_desc_t *
-+efi_get_io_md (void)
-+{
-+ void *efi_map_start, *efi_map_end, *p;
-+ efi_memory_desc_t *md;
-+ u64 efi_desc_size;
-+
-+ efi_map_start = __va(ia64_boot_param->efi_memmap);
-+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
-+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
-+
-+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-+ md = p;
-+ if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
-+ if (md->attribute & EFI_MEMORY_UC)
-+ return md;
-+ }
-+ }
-+ return 0;
-+}
-+#endif
-+
- u32
- efi_mem_type (unsigned long phys_addr)
- {
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/entry.S
--- a/xen/arch/ia64/patch/linux-2.6.11/entry.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,237 +0,0 @@
---- ../../linux-2.6.11/arch/ia64/kernel/entry.S 2005-03-02
00:37:50.000000000 -0700
-+++ arch/ia64/entry.S 2005-05-23 16:49:23.000000000 -0600
-@@ -46,6 +46,7 @@
-
- #include "minstate.h"
-
-+#ifndef XEN
- /*
- * execve() is special because in case of success, we need to
- * setup a null register window frame.
-@@ -174,6 +175,7 @@
- mov rp=loc0
- br.ret.sptk.many rp
- END(sys_clone)
-+#endif /* !XEN */
-
- /*
- * prev_task <- ia64_switch_to(struct task_struct *next)
-@@ -191,7 +193,11 @@
- movl r25=init_task
- mov r27=IA64_KR(CURRENT_STACK)
- adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
-+#ifdef XEN
-+ dep r20=0,in0,60,4 // physical address of "next"
-+#else
- dep r20=0,in0,61,3 // physical address of "next"
-+#endif
- ;;
- st8 [r22]=sp // save kernel stack pointer of old task
- shr.u r26=r20,IA64_GRANULE_SHIFT
-@@ -220,6 +226,16 @@
- br.ret.sptk.many rp // boogie on out in new context
-
- .map:
-+#ifdef XEN
-+ // avoid overlapping with kernel TR
-+ movl r25=KERNEL_START
-+ dep r23=0,in0,0,KERNEL_TR_PAGE_SHIFT
-+ ;;
-+ cmp.eq p7,p0=r25,r23
-+ ;;
-+(p7) mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
-+(p7) br.cond.sptk .done
-+#endif
- rsm psr.ic // interrupts (psr.i) are already
disabled here
- movl r25=PAGE_KERNEL
- ;;
-@@ -376,7 +392,11 @@
- * - b7 holds address to return to
- * - must not touch r8-r11
- */
-+#ifdef XEN
-+GLOBAL_ENTRY(load_switch_stack)
-+#else
- ENTRY(load_switch_stack)
-+#endif
- .prologue
- .altrp b7
-
-@@ -470,6 +490,7 @@
- br.cond.sptk.many b7
- END(load_switch_stack)
-
-+#ifndef XEN
- GLOBAL_ENTRY(__ia64_syscall)
- .regstk 6,0,0,0
- mov r15=in5 // put syscall number in place
-@@ -588,6 +609,7 @@
- }
- .ret4: br.cond.sptk ia64_leave_kernel
- END(ia64_strace_leave_kernel)
-+#endif
-
- GLOBAL_ENTRY(ia64_ret_from_clone)
- PT_REGS_UNWIND_INFO(0)
-@@ -604,6 +626,15 @@
- */
- br.call.sptk.many rp=ia64_invoke_schedule_tail
- }
-+#ifdef XEN
-+ // new domains are cloned but not exec'ed so switch to user mode here
-+ cmp.ne pKStk,pUStk=r0,r0
-+#ifdef CONFIG_VTI
-+ br.cond.spnt ia64_leave_hypervisor
-+#else // CONFIG_VTI
-+ br.cond.spnt ia64_leave_kernel
-+#endif // CONFIG_VTI
-+#else
- .ret8:
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-@@ -614,6 +645,7 @@
- ;;
- cmp.ne p6,p0=r2,r0
- (p6) br.cond.spnt .strace_check_retval
-+#endif
- ;; // added stop bits to prevent
r8 dependency
- END(ia64_ret_from_clone)
- // fall through
-@@ -700,19 +732,27 @@
- .work_processed_syscall:
- adds r2=PT(LOADRS)+16,r12
- adds r3=PT(AR_BSPSTORE)+16,r12
-+#ifdef XEN
-+ ;;
-+#else
- adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- (p6) ld4 r31=[r18] // load
current_thread_info()->flags
-+#endif
- ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for
"loadrs"
- mov b7=r0 // clear b7
- ;;
- ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be
garbage)
- ld8 r18=[r2],PT(R9)-PT(B6) // load b6
-+#ifndef XEN
- (p6) and r15=TIF_WORK_MASK,r31 // any work other than
TIF_SYSCALL_TRACE?
-+#endif
- ;;
- mov r16=ar.bsp // M2 get existing backing
store pointer
-+#ifndef XEN
- (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
- (p6) br.cond.spnt .work_pending_syscall
-+#endif
- ;;
- // start restoring the state saved on the kernel stack (struct pt_regs):
- ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
-@@ -757,7 +797,11 @@
- ;;
- ld8.fill r12=[r2] // restore r12 (sp)
- ld8.fill r15=[r3] // restore r15
-+#ifdef XEN
-+ movl r3=THIS_CPU(ia64_phys_stacked_size_p8)
-+#else
- addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
-+#endif
- ;;
- (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
- (pUStk) st1 [r14]=r17
-@@ -814,9 +858,18 @@
- (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
- #endif
- .work_processed_kernel:
-+#ifdef XEN
-+ alloc loc0=ar.pfs,0,1,1,0
-+ adds out0=16,r12
-+ ;;
-+(p6) br.call.sptk.many b0=deliver_pending_interrupt
-+ mov ar.pfs=loc0
-+ mov r31=r0
-+#else
- adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- (p6) ld4 r31=[r17] // load
current_thread_info()->flags
-+#endif
- adds r21=PT(PR)+16,r12
- ;;
-
-@@ -828,17 +881,20 @@
- ld8 r28=[r2],8 // load b6
- adds r29=PT(R24)+16,r12
-
-- ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
-+ ld8.fill r16=[r3]
- adds r30=PT(AR_CCV)+16,r12
- (p6) and r19=TIF_WORK_MASK,r31 // any work other than
TIF_SYSCALL_TRACE?
- ;;
-+ adds r3=PT(AR_CSD)-PT(R16),r3
- ld8.fill r24=[r29]
- ld8 r15=[r30] // load ar.ccv
- (p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
- ;;
- ld8 r29=[r2],16 // load b7
- ld8 r30=[r3],16 // load ar.csd
-+#ifndef XEN
- (p6) br.cond.spnt .work_pending
-+#endif
- ;;
- ld8 r31=[r2],16 // load ar.ssd
- ld8.fill r8=[r3],16
-@@ -934,7 +990,11 @@
- shr.u r18=r19,16 // get byte size of existing "dirty" partition
- ;;
- mov r16=ar.bsp // get existing backing store pointer
-+#ifdef XEN
-+ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
-+#else
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-+#endif
- ;;
- ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
- (pKStk) br.cond.dpnt skip_rbs_switch
-@@ -1069,6 +1129,7 @@
- mov pr=r31,-1 // I0
- rfi // B
-
-+#ifndef XEN
- /*
- * On entry:
- * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT)
-@@ -1130,6 +1191,7 @@
- ld8 r8=[r2]
- ld8 r10=[r3]
- br.cond.sptk.many .work_processed_syscall // re-check
-+#endif
-
- END(ia64_leave_kernel)
-
-@@ -1166,6 +1228,7 @@
- br.ret.sptk.many rp
- END(ia64_invoke_schedule_tail)
-
-+#ifndef XEN
- /*
- * Setup stack and call do_notify_resume_user(). Note that pSys and
pNonSys need to
- * be set up by the caller. We declare 8 input registers so the system
call
-@@ -1264,6 +1327,7 @@
- mov ar.unat=r9
- br.many b7
- END(sys_rt_sigreturn)
-+#endif
-
- GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
- .prologue
-@@ -1278,6 +1342,7 @@
- br.cond.sptk.many rp // goes to
ia64_leave_kernel
- END(ia64_prepare_handle_unaligned)
-
-+#ifndef XEN
- //
- // unw_init_running(void (*callback)(info, arg), void *arg)
- //
-@@ -1585,3 +1650,4 @@
- data8 sys_ni_syscall
-
- .org sys_call_table + 8*NR_syscalls // guard against failures to
increase NR_syscalls
-+#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/entry.h
--- a/xen/arch/ia64/patch/linux-2.6.11/entry.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,37 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/arch/ia64/kernel/entry.h
2005-03-01 23:38:07.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/arch/ia64/entry.h
2005-05-18 14:00:53.000000000 -0700
-@@ -7,6 +7,12 @@
- #define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */
- #define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */
- #define PRED_USER_STACK 3 /* returning to user-stacks? */
-+#ifdef CONFIG_VTI
-+#define PRED_EMUL 2 /* Need to save r4-r7 for inst emulation */
-+#define PRED_NON_EMUL 3 /* No need to save r4-r7 for normal path */
-+#define PRED_BN0 6 /* Guest is in bank 0 */
-+#define PRED_BN1 7 /* Guest is in bank 1 */
-+#endif // CONFIG_VTI
- #define PRED_SYSCALL 4 /* inside a system call? */
- #define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */
-
-@@ -17,12 +23,21 @@
- # define pLvSys PASTE(p,PRED_LEAVE_SYSCALL)
- # define pKStk PASTE(p,PRED_KERNEL_STACK)
- # define pUStk PASTE(p,PRED_USER_STACK)
-+#ifdef CONFIG_VTI
-+# define pEml PASTE(p,PRED_EMUL)
-+# define pNonEml PASTE(p,PRED_NON_EMUL)
-+# define pBN0 PASTE(p,PRED_BN0)
-+# define pBN1 PASTE(p,PRED_BN1)
-+#endif // CONFIG_VTI
- # define pSys PASTE(p,PRED_SYSCALL)
- # define pNonSys PASTE(p,PRED_NON_SYSCALL)
- #endif
-
- #define PT(f) (IA64_PT_REGS_##f##_OFFSET)
- #define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
-+#ifdef CONFIG_VTI
-+#define VPD(f) (VPD_##f##_START_OFFSET)
-+#endif // CONFIG_VTI
-
- #define PT_REGS_SAVES(off) \
- .unwabi 3, 'i'; \
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h
--- a/xen/arch/ia64/patch/linux-2.6.11/gcc_intrin.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,69 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/gcc_intrin.h
2005-03-01 23:38:08.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/gcc_intrin.h
2005-05-18 14:00:53.000000000 -0700
-@@ -368,6 +368,66 @@
- #define ia64_mf() asm volatile ("mf" ::: "memory")
- #define ia64_mfa() asm volatile ("mf.a" ::: "memory")
-
-+#ifdef CONFIG_VTI
-+/*
-+ * Flushrs instruction stream.
-+ */
-+#define ia64_flushrs() asm volatile ("flushrs;;":::"memory")
-+
-+#define ia64_loadrs() asm volatile ("loadrs;;":::"memory")
-+
-+#define ia64_get_rsc() \
-+({ \
-+ unsigned long val; \
-+ asm volatile ("mov %0=ar.rsc;;" : "=r"(val) :: "memory"); \
-+ val; \
-+})
-+
-+#define ia64_set_rsc(val) \
-+ asm volatile ("mov ar.rsc=%0;;" :: "r"(val) : "memory")
-+
-+#define ia64_get_bspstore() \
-+({ \
-+ unsigned long val; \
-+ asm volatile ("mov %0=ar.bspstore;;" : "=r"(val) :: "memory"); \
-+ val; \
-+})
-+
-+#define ia64_set_bspstore(val) \
-+ asm volatile ("mov ar.bspstore=%0;;" :: "r"(val) : "memory")
-+
-+#define ia64_get_rnat() \
-+({ \
-+ unsigned long val; \
-+ asm volatile ("mov %0=ar.rnat;" : "=r"(val) :: "memory"); \
-+ val; \
-+})
-+
-+#define ia64_set_rnat(val) \
-+ asm volatile ("mov ar.rnat=%0;;" :: "r"(val) : "memory")
-+
-+#define ia64_ttag(addr)
\
-+({
\
-+ __u64 ia64_intri_res;
\
-+ asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr));
\
-+ ia64_intri_res;
\
-+})
-+
-+#define ia64_get_dcr() \
-+({ \
-+ __u64 result; \
-+ asm volatile ("mov %0=cr.dcr" : "=r"(result) : ); \
-+ result; \
-+})
-+
-+#define ia64_set_dcr(val) \
-+({ \
-+ asm volatile ("mov cr.dcr=%0" :: "r"(val) ); \
-+})
-+
-+#endif // CONFIG_VTI
-+
-+
- #define ia64_invala() asm volatile ("invala" ::: "memory")
-
- #define ia64_thash(addr)
\
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/hardirq.h
--- a/xen/arch/ia64/patch/linux-2.6.11/hardirq.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,14 +0,0 @@
---- ../../linux-2.6.11/include/linux/hardirq.h 2005-03-02 00:38:00.000000000
-0700
-+++ include/asm-ia64/linux/hardirq.h 2005-04-28 16:34:39.000000000 -0600
-@@ -60,7 +60,11 @@
- */
- #define in_irq() (hardirq_count())
- #define in_softirq() (softirq_count())
-+#ifndef XEN
- #define in_interrupt() (irq_count())
-+#else
-+#define in_interrupt() 0 // FIXME LATER
-+#endif
-
- #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
- # define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/head.S
--- a/xen/arch/ia64/patch/linux-2.6.11/head.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,120 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/head.S
2005-03-01 23:38:13.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/head.S
2005-05-18 12:40:50.000000000 -0700
-@@ -76,21 +76,21 @@
- * We initialize all of them to prevent inadvertently assuming
- * something about the state of address translation early in boot.
- */
-- mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
-+ movl r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
- movl r7=(0<<61)
-- mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
-+ movl r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
- movl r9=(1<<61)
-- mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
-+ movl r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
- movl r11=(2<<61)
-- mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
-+ movl r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
- movl r13=(3<<61)
-- mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
-+ movl r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
- movl r15=(4<<61)
-- mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
-+ movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT
<< 2) | 1)
- movl r17=(5<<61)
-- mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2))
-+ movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2))
- movl r19=(6<<61)
-- mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2))
-+ movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) |
(IA64_GRANULE_SHIFT << 2))
- movl r21=(7<<61)
- ;;
- mov rr[r7]=r6
-@@ -129,8 +129,13 @@
- /*
- * Switch into virtual mode:
- */
-+#ifdef CONFIG_VTI
-+ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH \
-+ |IA64_PSR_DI)
-+#else // CONFIG_VTI
- movl
r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
- |IA64_PSR_DI)
-+#endif // CONFIG_VTI
- ;;
- mov cr.ipsr=r16
- movl r17=1f
-@@ -143,7 +148,11 @@
- 1: // now we are in virtual mode
-
- // set IVT entry point---can't access I/O ports without it
-+#ifdef CONFIG_VTI
-+ movl r3=vmx_ia64_ivt
-+#else // CONFIG_VTI
- movl r3=ia64_ivt
-+#endif // CONFIG_VTI
- ;;
- mov cr.iva=r3
- movl r2=FPSR_DEFAULT
-@@ -187,7 +196,11 @@
- dep r18=0,r3,0,12
- ;;
- or r18=r17,r18
-+#ifdef XEN
-+ dep r2=-1,r3,60,4 // IMVA of task
-+#else
- dep r2=-1,r3,61,3 // IMVA of task
-+#endif
- ;;
- mov r17=rr[r2]
- shr.u r16=r3,IA64_GRANULE_SHIFT
-@@ -207,8 +220,15 @@
-
- .load_current:
- // load the "current" pointer (r13) and ar.k6 with the current task
-+#ifdef CONFIG_VTI
-+ mov r21=r2 // virtual address
-+ ;;
-+ bsw.1
-+ ;;
-+#else // CONFIG_VTI
- mov IA64_KR(CURRENT)=r2 // virtual address
- mov IA64_KR(CURRENT_STACK)=r16
-+#endif // CONFIG_VTI
- mov r13=r2
- /*
- * Reserve space at the top of the stack for "struct pt_regs". Kernel
threads
-@@ -227,7 +247,11 @@
- ;;
- mov ar.rsc=0x3 // place RSE in eager mode
-
-+#ifdef XEN
-+(isBP) dep r28=-1,r28,60,4 // make address virtual
-+#else
- (isBP) dep r28=-1,r28,61,3 // make address virtual
-+#endif
- (isBP) movl r2=ia64_boot_param
- ;;
- (isBP) st8 [r2]=r28 // save the address of the boot param
area passed by the bootloader
-@@ -254,7 +278,9 @@
- br.call.sptk.many b0=console_print
-
- self: hint @pause
-+ ;;
- br.sptk.many self // endless loop
-+ ;;
- END(_start)
-
- GLOBAL_ENTRY(ia64_save_debug_regs)
-@@ -850,7 +876,11 @@
- * intermediate precision so that we can produce a full 64-bit result.
- */
- GLOBAL_ENTRY(sched_clock)
-+#ifdef XEN
-+ movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
-+#else
- addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
-+#endif
- mov.m r9=ar.itc // fetch cycle-counter
(35 cyc)
- ;;
- ldf8 f8=[r8]
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h
--- a/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,26 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/hp/sim/hpsim_ssc.h
2005-03-01 23:38:17.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/hpsim_ssc.h
2005-05-18 12:40:19.000000000 -0700
-@@ -33,4 +33,23 @@
- */
- extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
-
-+#ifdef XEN
-+/* Note: These are declared in linux/arch/ia64/hp/sim/simscsi.c but belong
-+ * in linux/include/asm-ia64/hpsim_ssc.h, hence their addition here */
-+#define SSC_OPEN 50
-+#define SSC_CLOSE 51
-+#define SSC_READ 52
-+#define SSC_WRITE 53
-+#define SSC_GET_COMPLETION 54
-+#define SSC_WAIT_COMPLETION 55
-+
-+#define SSC_WRITE_ACCESS 2
-+#define SSC_READ_ACCESS 1
-+
-+struct ssc_disk_req {
-+ unsigned long addr;
-+ unsigned long len;
-+};
-+#endif
-+
- #endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/ia64regs.h
--- a/xen/arch/ia64/patch/linux-2.6.11/ia64regs.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,38 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/ia64regs.h
2005-03-01 23:38:07.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/ia64regs.h
2005-05-18 14:00:53.000000000 -0700
-@@ -87,6 +87,35 @@
- #define _IA64_REG_CR_LRR0 4176
- #define _IA64_REG_CR_LRR1 4177
-
-+#ifdef CONFIG_VTI
-+#define IA64_REG_CR_DCR 0
-+#define IA64_REG_CR_ITM 1
-+#define IA64_REG_CR_IVA 2
-+#define IA64_REG_CR_PTA 8
-+#define IA64_REG_CR_IPSR 16
-+#define IA64_REG_CR_ISR 17
-+#define IA64_REG_CR_IIP 19
-+#define IA64_REG_CR_IFA 20
-+#define IA64_REG_CR_ITIR 21
-+#define IA64_REG_CR_IIPA 22
-+#define IA64_REG_CR_IFS 23
-+#define IA64_REG_CR_IIM 24
-+#define IA64_REG_CR_IHA 25
-+#define IA64_REG_CR_LID 64
-+#define IA64_REG_CR_IVR 65
-+#define IA64_REG_CR_TPR 66
-+#define IA64_REG_CR_EOI 67
-+#define IA64_REG_CR_IRR0 68
-+#define IA64_REG_CR_IRR1 69
-+#define IA64_REG_CR_IRR2 70
-+#define IA64_REG_CR_IRR3 71
-+#define IA64_REG_CR_ITV 72
-+#define IA64_REG_CR_PMV 73
-+#define IA64_REG_CR_CMCV 74
-+#define IA64_REG_CR_LRR0 80
-+#define IA64_REG_CR_LRR1 81
-+#endif // CONFIG_VTI
-+
- /* Indirect Registers for getindreg() and setindreg() */
-
- #define _IA64_REG_INDR_CPUID 9000 /* getindreg only */
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.11/interrupt.h
--- a/xen/arch/ia64/patch/linux-2.6.11/interrupt.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,27 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/linux/interrupt.h
2005-03-01 23:38:09.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/linux/interrupt.h
2005-05-18 12:40:50.000000000 -0700
-@@ -33,6 +33,7 @@
- #define IRQ_HANDLED (1)
- #define IRQ_RETVAL(x) ((x) != 0)
-
-+#ifndef XEN
- struct irqaction {
- irqreturn_t (*handler)(int, void *, struct pt_regs *);
- unsigned long flags;
-@@ -49,6 +50,7 @@
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long, const char *, void *);
- extern void free_irq(unsigned int, void *);
-+#endif
-
-
- #ifdef CONFIG_GENERIC_HARDIRQS
-@@ -121,7 +123,7 @@
- };
-
- asmlinkage void do_softirq(void);
--extern void open_softirq(int nr, void (*action)(struct softirq_action*), void
*data);
-+//extern void open_softirq(int nr, void (*action)(struct softirq_action*),
void *data);
- extern void softirq_init(void);
- #define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL <<
(nr); } while (0)
- extern void FASTCALL(raise_softirq_irqoff(unsigned int nr));
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/io.h
--- a/xen/arch/ia64/patch/linux-2.6.11/io.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,14 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/io.h
2005-03-01 23:38:34.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/io.h
2005-05-18 12:40:50.000000000 -0700
-@@ -23,7 +23,11 @@
- #define __SLOW_DOWN_IO do { } while (0)
- #define SLOW_DOWN_IO do { } while (0)
-
-+#ifdef XEN
-+#define __IA64_UNCACHED_OFFSET 0xe800000000000000UL
-+#else
- #define __IA64_UNCACHED_OFFSET 0xc000000000000000UL /* region 6 */
-+#endif
-
- /*
- * The legacy I/O space defined by the ia64 architecture supports only 65536
ports, but
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c
--- a/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,126 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/irq_ia64.c
2005-03-01 23:38:07.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/irq_ia64.c
2005-05-18 12:40:51.000000000 -0700
-@@ -106,6 +106,9 @@
- unsigned long saved_tpr;
-
- #if IRQ_DEBUG
-+#ifdef XEN
-+ xen_debug_irq(vector, regs);
-+#endif
- {
- unsigned long bsp, sp;
-
-@@ -148,6 +151,9 @@
- ia64_setreg(_IA64_REG_CR_TPR, vector);
- ia64_srlz_d();
-
-+#ifdef XEN
-+ if (!xen_do_IRQ(vector))
-+#endif
- __do_IRQ(local_vector_to_irq(vector), regs);
-
- /*
-@@ -167,6 +173,103 @@
- irq_exit();
- }
-
-+#ifdef CONFIG_VTI
-+#define vmx_irq_enter() \
-+ add_preempt_count(HARDIRQ_OFFSET);
-+
-+/* Now softirq will be checked when leaving hypervisor, or else
-+ * scheduler irq will be executed too early.
-+ */
-+#define vmx_irq_exit(void) \
-+ sub_preempt_count(HARDIRQ_OFFSET);
-+/*
-+ * That's where the IVT branches when we get an external
-+ * interrupt. This branches to the correct hardware IRQ handler via
-+ * function ptr.
-+ */
-+void
-+vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
-+{
-+ unsigned long saved_tpr;
-+ int wake_dom0 = 0;
-+
-+
-+#if IRQ_DEBUG
-+ {
-+ unsigned long bsp, sp;
-+
-+ /*
-+ * Note: if the interrupt happened while executing in
-+ * the context switch routine (ia64_switch_to), we may
-+ * get a spurious stack overflow here. This is
-+ * because the register and the memory stack are not
-+ * switched atomically.
-+ */
-+ bsp = ia64_getreg(_IA64_REG_AR_BSP);
-+ sp = ia64_getreg(_IA64_REG_AR_SP);
-+
-+ if ((sp - bsp) < 1024) {
-+ static unsigned char count;
-+ static long last_time;
-+
-+ if (jiffies - last_time > 5*HZ)
-+ count = 0;
-+ if (++count < 5) {
-+ last_time = jiffies;
-+ printk("ia64_handle_irq: DANGER: less than "
-+ "1KB of free stack space!!\n"
-+ "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
-+ }
-+ }
-+ }
-+#endif /* IRQ_DEBUG */
-+
-+ /*
-+ * Always set TPR to limit maximum interrupt nesting depth to
-+ * 16 (without this, it would be ~240, which could easily lead
-+ * to kernel stack overflows).
-+ */
-+ vmx_irq_enter();
-+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
-+ ia64_srlz_d();
-+ while (vector != IA64_SPURIOUS_INT_VECTOR) {
-+ if (!IS_RESCHEDULE(vector)) {
-+ ia64_setreg(_IA64_REG_CR_TPR, vector);
-+ ia64_srlz_d();
-+
-+ if (vector != IA64_TIMER_VECTOR) {
-+ /* FIXME: Leave IRQ re-route later */
-+ vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector);
-+ wake_dom0 = 1;
-+ }
-+ else { // FIXME: Handle Timer only now
-+ __do_IRQ(local_vector_to_irq(vector), regs);
-+ }
-+
-+ /*
-+ * Disable interrupts and send EOI:
-+ */
-+ local_irq_disable();
-+ ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
-+ }
-+ else {
-+ printf("Oops: RESCHEDULE IPI absorbed by HV\n");
-+ }
-+ ia64_eoi();
-+ vector = ia64_get_ivr();
-+ }
-+ /*
-+ * This must be done *after* the ia64_eoi(). For example, the keyboard
softirq
-+ * handler needs to be able to wait for further keyboard interrupts,
which can't
-+ * come through until ia64_eoi() has been done.
-+ */
-+ vmx_irq_exit();
-+ if ( wake_dom0 && current != dom0 )
-+ domain_wake(dom0->vcpu[0]);
-+}
-+#endif
-+
-+
- #ifdef CONFIG_HOTPLUG_CPU
- /*
- * This function emulates a interrupt processing when a cpu is about to be
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/kregs.h
--- a/xen/arch/ia64/patch/linux-2.6.11/kregs.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,66 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/kregs.h
2005-03-01 23:37:49.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/kregs.h
2005-05-18 12:40:50.000000000 -0700
-@@ -29,8 +29,21 @@
- */
- #define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel
image (code & data) */
- #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as
required by EFI */
-+#ifdef CONFIG_VTI
-+#define IA64_TR_XEN_IN_DOM 6 /* itr6, dtr6: Double mapping for xen
image in domain space */
-+#endif // CONFIG_VTI
- #define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
- #define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- &
register-stacks */
-+#ifdef XEN
-+#define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */
-+#define IA64_TR_VHPT 4 /* dtr4: vhpt */
-+#define IA64_TR_ARCH_INFO 5
-+#ifdef CONFIG_VTI
-+#define IA64_TR_VHPT_IN_DOM 5 /* dtr5: Double mapping for vhpt table
in domain space */
-+#define IA64_TR_RR7_SWITCH_STUB 7 /* dtr7: mapping for rr7 switch
stub */
-+#define IA64_TEMP_PHYSICAL 8 /* itr8, dtr8: temp mapping for guest
physical memory 256M */
-+#endif // CONFIG_VTI
-+#endif
-
- /* Processor status register bits: */
- #define IA64_PSR_BE_BIT 1
-@@ -66,6 +78,9 @@
- #define IA64_PSR_ED_BIT 43
- #define IA64_PSR_BN_BIT 44
- #define IA64_PSR_IA_BIT 45
-+#ifdef CONFIG_VTI
-+#define IA64_PSR_VM_BIT 46
-+#endif // CONFIG_VTI
-
- /* A mask of PSR bits that we generally don't want to inherit across a
clone2() or an
- execve(). Only list flags here that need to be cleared/set for BOTH
clone2() and
-@@ -107,6 +122,9 @@
- #define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT)
- #define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT)
- #define IA64_PSR_IA (__IA64_UL(1) << IA64_PSR_IA_BIT)
-+#ifdef CONFIG_VTI
-+#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-+#endif // CONFIG_VTI
-
- /* User mask bits: */
- #define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL
| IA64_PSR_MFH)
-@@ -160,4 +178,21 @@
- #define IA64_ISR_CODE_LFETCH 4
- #define IA64_ISR_CODE_PROBEF 5
-
-+#ifdef XEN
-+/* Interruption Function State */
-+#define IA64_IFS_V_BIT 63
-+#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT)
-+
-+/* Page Table Address */
-+#define IA64_PTA_VE_BIT 0
-+#define IA64_PTA_SIZE_BIT 2
-+#define IA64_PTA_VF_BIT 8
-+#define IA64_PTA_BASE_BIT 15
-+
-+#define IA64_PTA_VE (__IA64_UL(1) << IA64_PTA_VE_BIT)
-+#define IA64_PTA_SIZE (__IA64_UL(0x3f) << IA64_PTA_SIZE_BIT)
-+#define IA64_PTA_VF (__IA64_UL(1) << IA64_PTA_VF_BIT)
-+#define IA64_PTA_BASE (__IA64_UL(0) - ((__IA64_UL(1) << IA64_PTA_BASE_BIT)))
-+#endif
-+
- #endif /* _ASM_IA64_kREGS_H */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/mca_asm.h
--- a/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,32 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/mca_asm.h
2005-03-01 23:38:38.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/mca_asm.h
2005-05-18 12:40:19.000000000 -0700
-@@ -26,8 +26,13 @@
- * direct mapped to physical addresses.
- * 1. Lop off bits 61 thru 63 in the virtual address
- */
-+#ifdef XEN
-+#define INST_VA_TO_PA(addr)
\
-+ dep addr = 0, addr, 60, 4
-+#else // XEN
- #define INST_VA_TO_PA(addr)
\
- dep addr = 0, addr, 61, 3
-+#endif // XEN
- /*
- * This macro converts a data virtual address to a physical address
- * Right now for simulation purposes the virtual addresses are
-@@ -42,9 +47,15 @@
- * direct mapped to physical addresses.
- * 1. Put 0x7 in bits 61 thru 63.
- */
-+#ifdef XEN
-+#define DATA_PA_TO_VA(addr,temp)
\
-+ mov temp = 0xf ;;
\
-+ dep addr = temp, addr, 60, 4
-+#else // XEN
- #define DATA_PA_TO_VA(addr,temp)
\
- mov temp = 0x7 ;;
\
- dep addr = temp, addr, 61, 3
-+#endif // XEN
-
- #define GET_THIS_PADDR(reg, var) \
- mov reg = IA64_KR(PER_CPU_DATA);; \
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/minstate.h
--- a/xen/arch/ia64/patch/linux-2.6.11/minstate.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,25 +0,0 @@
- minstate.h | 4 ++--
- 1 files changed, 2 insertions(+), 2 deletions(-)
-
-Index: linux-2.6.11-xendiffs/arch/ia64/kernel/minstate.h
-===================================================================
---- linux-2.6.11-xendiffs.orig/arch/ia64/kernel/minstate.h 2005-04-06
22:51:31.170261541 -0500
-+++ linux-2.6.11-xendiffs/arch/ia64/kernel/minstate.h 2005-04-06
22:54:03.210575034 -0500
-@@ -48,7 +48,7 @@
- (pUStk) mov r24=ar.rnat;
\
- (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base
of memory stack */ \
- (pUStk) mov r23=ar.bspstore; /* save
ar.bspstore */ \
--(pUStk) dep r22=-1,r22,61,3; /* compute kernel
virtual addr of RBS */ \
-+(pUStk) dep r22=-1,r22,60,4; /* compute kernel
virtual addr of RBS */ \
- ;;
\
- (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode,
use sp (r12) */ \
- (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS
*/ \
-@@ -57,7 +57,7 @@
- (pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian,
loadrs=0 */ \
-
- #define MINSTATE_END_SAVE_MIN_PHYS
\
-- dep r12=-1,r12,61,3; /* make sp a kernel virtual address */
\
-+ dep r12=-1,r12,60,4; /* make sp a kernel virtual address */
\
- ;;
-
- #ifdef MINSTATE_VIRT
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.11/mm_contig.c
--- a/xen/arch/ia64/patch/linux-2.6.11/mm_contig.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,47 +0,0 @@
---- ../../linux-2.6.11/arch/ia64/mm/contig.c 2005-03-02 00:37:55.000000000
-0700
-+++ arch/ia64/mm_contig.c 2005-04-28 16:13:52.000000000 -0600
-@@ -35,6 +35,7 @@
- *
- * Just walks the pages in the system and describes where they're allocated.
- */
-+#ifndef XEN
- void
- show_mem (void)
- {
-@@ -63,6 +64,7 @@
- printk("%d pages swap cached\n", cached);
- printk("%ld pages in page table cache\n", pgtable_cache_size);
- }
-+#endif
-
- /* physical address where the bootmem map is located */
- unsigned long bootmap_start;
-@@ -140,6 +142,7 @@
- * Walk the EFI memory map and find usable memory for the system, taking
- * into account reserved areas.
- */
-+#ifndef XEN
- void
- find_memory (void)
- {
-@@ -168,6 +171,7 @@
-
- find_initrd();
- }
-+#endif
-
- #ifdef CONFIG_SMP
- /**
-@@ -225,6 +229,7 @@
- * Set up the page tables.
- */
-
-+#ifndef XEN
- void
- paging_init (void)
- {
-@@ -297,3 +302,4 @@
- #endif /* !CONFIG_VIRTUAL_MEM_MAP */
- zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
- }
-+#endif /* !CONFIG_XEN */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/page.h
--- a/xen/arch/ia64/patch/linux-2.6.11/page.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,74 +0,0 @@
----
/home/adsharma/xeno-unstable-ia64-staging.bk/xen/../../linux-2.6.11/include/asm-ia64/page.h
2005-03-01 23:37:48.000000000 -0800
-+++ /home/adsharma/xeno-unstable-ia64-staging.bk/xen/include/asm-ia64/page.h
2005-05-20 09:36:02.000000000 -0700
-@@ -32,6 +32,7 @@
- #define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
-
- #define PERCPU_PAGE_SHIFT 16 /* log2() of max. size of per-CPU area
*/
-+
- #define PERCPU_PAGE_SIZE (__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT)
-
- #define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per
region addr limit */
-@@ -95,9 +96,15 @@
- #endif
-
- #ifndef CONFIG_DISCONTIGMEM
-+#ifdef XEN
-+# define pfn_valid(pfn) (0)
-+# define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
-+# define pfn_to_page(_pfn) (frame_table + (_pfn))
-+#else
- # define pfn_valid(pfn) (((pfn) < max_mapnr) &&
ia64_pfn_valid(pfn))
- # define page_to_pfn(page) ((unsigned long) (page - mem_map))
- # define pfn_to_page(pfn) (mem_map + (pfn))
-+#endif
- #else
- extern struct page *vmem_map;
- extern unsigned long max_low_pfn;
-@@ -109,6 +116,11 @@
- #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
- #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-
-+#ifdef XEN
-+#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
-+#define phys_to_page(kaddr) pfn_to_page(((kaddr) >> PAGE_SHIFT))
-+#endif
-+
- typedef union ia64_va {
- struct {
- unsigned long off : 61; /* intra-region offset */
-@@ -124,8 +136,23 @@
- * expressed in this way to ensure they result in a single "dep"
- * instruction.
- */
-+#ifdef XEN
-+typedef union xen_va {
-+ struct {
-+ unsigned long off : 60;
-+ unsigned long reg : 4;
-+ } f;
-+ unsigned long l;
-+ void *p;
-+} xen_va;
-+
-+// xen/drivers/console.c uses __va in a declaration (should be fixed!)
-+#define __pa(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = 0;
_v.l;})
-+#define __va(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = -1;
_v.p;})
-+#else
- #define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0;
_v.l;})
- #define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1;
_v.p;})
-+#endif
-
- #define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
- #define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
-@@ -197,7 +224,11 @@
- # define __pgprot(x) (x)
- #endif /* !STRICT_MM_TYPECHECKS */
-
-+#ifdef XEN
-+#define PAGE_OFFSET __IA64_UL_CONST(0xf000000000000000)
-+#else
- #define PAGE_OFFSET __IA64_UL_CONST(0xe000000000000000)
-+#endif
-
- #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE |
\
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC
| \
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/pal.S
--- a/xen/arch/ia64/patch/linux-2.6.11/pal.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,26 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/pal.S
2005-03-01 23:38:33.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/pal.S
2005-05-18 12:40:19.000000000 -0700
-@@ -166,7 +166,11 @@
- adds r8 = 1f-1b,r8 // calculate return address for call
- ;;
- mov loc4=ar.rsc // save RSE configuration
-+#ifdef XEN
-+ dep.z loc2=loc2,0,60 // convert pal entry point to physical
-+#else // XEN
- dep.z loc2=loc2,0,61 // convert pal entry point to physical
-+#endif // XEN
- tpa r8=r8 // convert rp to physical
- ;;
- mov b7 = loc2 // install target to branch reg
-@@ -225,7 +229,11 @@
- mov loc3 = psr // save psr
- ;;
- mov loc4=ar.rsc // save RSE configuration
-+#ifdef XEN
-+ dep.z loc2=loc2,0,60 // convert pal entry point to physical
-+#else // XEN
- dep.z loc2=loc2,0,61 // convert pal entry point to physical
-+#endif // XEN
- ;;
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- movl r16=PAL_PSR_BITS_TO_CLEAR
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/pal.h
--- a/xen/arch/ia64/patch/linux-2.6.11/pal.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,12 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/pal.h
2005-03-01 23:38:13.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/pal.h
2005-05-18 14:00:53.000000000 -0700
-@@ -1559,6 +1559,9 @@
- return iprv.status;
- }
-
-+#ifdef CONFIG_VTI
-+#include <asm/vmx_pal.h>
-+#endif // CONFIG_VTI
- #endif /* __ASSEMBLY__ */
-
- #endif /* _ASM_IA64_PAL_H */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/pgalloc.h
--- a/xen/arch/ia64/patch/linux-2.6.11/pgalloc.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,76 +0,0 @@
---- ../../linux-2.6.11/include/asm-ia64/pgalloc.h 2005-03-02
00:37:31.000000000 -0700
-+++ include/asm-ia64/pgalloc.h 2005-06-09 13:40:48.000000000 -0600
-@@ -61,7 +61,12 @@
- pgd_t *pgd = pgd_alloc_one_fast(mm);
-
- if (unlikely(pgd == NULL)) {
-+#ifdef XEN
-+ pgd = (pgd_t *)alloc_xenheap_page();
-+ memset(pgd,0,PAGE_SIZE);
-+#else
- pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-+#endif
- }
- return pgd;
- }
-@@ -104,7 +109,12 @@
- static inline pmd_t*
- pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
- {
-+#ifdef XEN
-+ pmd_t *pmd = (pmd_t *)alloc_xenheap_page();
-+ memset(pmd,0,PAGE_SIZE);
-+#else
- pmd_t *pmd = (pmd_t
*)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-+#endif
-
- return pmd;
- }
-@@ -136,7 +146,12 @@
- static inline struct page *
- pte_alloc_one (struct mm_struct *mm, unsigned long addr)
- {
-+#ifdef XEN
-+ struct page *pte = alloc_xenheap_page();
-+ memset(pte,0,PAGE_SIZE);
-+#else
- struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-+#endif
-
- return pte;
- }
-@@ -144,7 +159,12 @@
- static inline pte_t *
- pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
- {
-+#ifdef XEN
-+ pte_t *pte = (pte_t *)alloc_xenheap_page();
-+ memset(pte,0,PAGE_SIZE);
-+#else
- pte_t *pte = (pte_t
*)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-+#endif
-
- return pte;
- }
-@@ -152,13 +172,21 @@
- static inline void
- pte_free (struct page *pte)
- {
-+#ifdef XEN
-+ free_xenheap_page(pte);
-+#else
- __free_page(pte);
-+#endif
- }
-
- static inline void
- pte_free_kernel (pte_t *pte)
- {
-+#ifdef XEN
-+ free_xenheap_page((unsigned long) pte);
-+#else
- free_page((unsigned long) pte);
-+#endif
- }
-
- #define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.11/processor.h
--- a/xen/arch/ia64/patch/linux-2.6.11/processor.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,37 +0,0 @@
----
/home/adsharma/xeno-unstable-ia64-staging.bk/xen/../../linux-2.6.11/include/asm-ia64/processor.h
2005-03-01 23:37:58.000000000 -0800
-+++
/home/adsharma/xeno-unstable-ia64-staging.bk/xen/include/asm-ia64/processor.h
2005-05-20 09:36:02.000000000 -0700
-@@ -94,7 +94,11 @@
- #ifdef CONFIG_NUMA
- #include <asm/nodedata.h>
- #endif
-+#ifdef XEN
-+#include <asm/xenprocessor.h>
-+#endif
-
-+#ifndef XEN
- /* like above but expressed as bitfields for more efficient access: */
- struct ia64_psr {
- __u64 reserved0 : 1;
-@@ -133,6 +137,7 @@
- __u64 bn : 1;
- __u64 reserved4 : 19;
- };
-+#endif
-
- /*
- * CPU type, hardware bug flags, and per-CPU state. Frequently used
-@@ -408,12 +413,14 @@
- */
-
- /* Return TRUE if task T owns the fph partition of the CPU we're running on.
*/
-+#ifndef XEN
- #define ia64_is_local_fpu_owner(t)
\
- ({
\
- struct task_struct *__ia64_islfo_task = (t);
\
- (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id()
\
- && __ia64_islfo_task == (struct task_struct *)
ia64_get_kr(IA64_KR_FPU_OWNER)); \
- })
-+#endif
-
- /* Mark task T as owning the fph partition of the CPU we're running on. */
- #define ia64_set_local_fpu_owner(t) do {
\
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/ptrace.h
--- a/xen/arch/ia64/patch/linux-2.6.11/ptrace.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,20 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/test3.bk/xen/../../linux-2.6.11/include/asm-ia64/ptrace.h
2005-03-01 23:38:38.000000000 -0800
-+++ /home/adsharma/disk2/xen-ia64/test3.bk/xen/include/asm-ia64/ptrace.h
2005-05-18 14:00:53.000000000 -0700
-@@ -95,6 +95,9 @@
- * (because the memory stack pointer MUST ALWAYS be aligned this way)
- *
- */
-+#ifdef XEN
-+#include <public/arch-ia64.h>
-+#else
- struct pt_regs {
- /* The following registers are saved by SAVE_MIN: */
- unsigned long b6; /* scratch */
-@@ -170,6 +173,7 @@
- struct ia64_fpreg f10; /* scratch */
- struct ia64_fpreg f11; /* scratch */
- };
-+#endif
-
- /*
- * This structure contains the addition registers that need to
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/series
--- a/xen/arch/ia64/patch/linux-2.6.11/series Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,40 +0,0 @@
-bootmem.h
-current.h
-efi.c
-efi.h
-entry.S
-gcc_intrin.h
-hardirq.h
-head.S
-hpsim_irq.c
-hpsim_ssc.h
-hw_irq.h
-ide.h
-init_task.c
-init_task.h
-interrupt.h
-io.h
-irq.h
-irq_ia64.c
-ivt.S
-kregs.h
-lds.S
-linuxtime.h
-minstate.h
-mm_bootmem.c
-mm_contig.c
-mmzone.h
-page_alloc.c
-page.h
-processor.h
-sal.h
-setup.c
-slab.c
-slab.h
-system.h
-time.c
-kernel-time.c
-tlb.c
-types.h
-unaligned.c
-wait.h
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/setup.c
--- a/xen/arch/ia64/patch/linux-2.6.11/setup.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,151 +0,0 @@
---- ../../linux-2.6.11/arch/ia64/kernel/setup.c 2005-03-02
00:37:49.000000000 -0700
-+++ arch/ia64/setup.c 2005-06-03 10:14:24.000000000 -0600
-@@ -51,6 +51,10 @@
- #include <asm/smp.h>
- #include <asm/system.h>
- #include <asm/unistd.h>
-+#ifdef CONFIG_VTI
-+#include <asm/vmx.h>
-+#endif // CONFIG_VTI
-+#include <asm/io.h>
-
- #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
- # error "struct cpuinfo_ia64 too big!"
-@@ -127,7 +131,16 @@
- range_end = min(end, rsvd_region[i].start);
-
- if (range_start < range_end)
-+#ifdef XEN
-+ {
-+ /* init_boot_pages requires "ps, pe" */
-+ printk("Init boot pages: 0x%lx -> 0x%lx.\n",
-+ __pa(range_start), __pa(range_end));
-+ (*func)(__pa(range_start), __pa(range_end), 0);
-+ }
-+#else
- call_pernode_memory(__pa(range_start), range_end -
range_start, func);
-+#endif
-
- /* nothing more available in this segment */
- if (range_end == end) return 0;
-@@ -185,7 +198,12 @@
- n++;
-
- rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
-+#ifdef XEN
-+ /* Reserve xen image/bitmap/xen-heap */
-+ rsvd_region[n].end = rsvd_region[n].start + xenheap_size;
-+#else
- rsvd_region[n].end = (unsigned long) ia64_imva(_end);
-+#endif
- n++;
-
- #ifdef CONFIG_BLK_DEV_INITRD
-@@ -299,17 +317,25 @@
- }
-
- void __init
-+#ifdef XEN
-+early_setup_arch (char **cmdline_p)
-+#else
- setup_arch (char **cmdline_p)
-+#endif
- {
- unw_init();
-
- ia64_patch_vtop((u64) __start___vtop_patchlist, (u64)
__end___vtop_patchlist);
-
- *cmdline_p = __va(ia64_boot_param->command_line);
-+#ifdef XEN
-+ efi_init();
-+#else
- strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
-
- efi_init();
- io_port_init();
-+#endif
-
- #ifdef CONFIG_IA64_GENERIC
- {
-@@ -336,6 +362,11 @@
- }
- #endif
-
-+#ifdef XEN
-+ early_cmdline_parse(cmdline_p);
-+ cmdline_parse(*cmdline_p);
-+#undef CONFIG_ACPI_BOOT
-+#endif
- if (early_console_setup(*cmdline_p) == 0)
- mark_bsp_online();
-
-@@ -351,8 +382,18 @@
- # endif
- #endif /* CONFIG_APCI_BOOT */
-
-+#ifndef XEN
- find_memory();
-+#else
-+ io_port_init();
-+}
-
-+void __init
-+late_setup_arch (char **cmdline_p)
-+{
-+#undef CONFIG_ACPI_BOOT
-+ acpi_table_init();
-+#endif
- /* process SAL system table: */
- ia64_sal_init(efi.sal_systab);
-
-@@ -360,6 +401,10 @@
- cpu_physical_id(0) = hard_smp_processor_id();
- #endif
-
-+#ifdef CONFIG_VTI
-+ identify_vmx_feature();
-+#endif // CONFIG_VTI
-+
- cpu_init(); /* initialize the bootstrap CPU */
-
- #ifdef CONFIG_ACPI_BOOT
-@@ -492,12 +537,14 @@
- {
- }
-
-+#ifndef XEN
- struct seq_operations cpuinfo_op = {
- .start = c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo
- };
-+#endif
-
- void
- identify_cpu (struct cpuinfo_ia64 *c)
-@@ -551,6 +598,12 @@
- }
- c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
- c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
-+
-+#ifdef CONFIG_VTI
-+ /* If vmx feature is on, do necessary initialization for vmx */
-+ if (vmx_enabled)
-+ vmx_init_env();
-+#endif
- }
-
- void
-@@ -659,7 +712,11 @@
- | IA64_DCR_DA | IA64_DCR_DD |
IA64_DCR_LC));
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
-+#ifdef XEN
-+ if (current->domain->arch.mm)
-+#else
- if (current->mm)
-+#endif
- BUG();
-
- ia64_mmu_init(ia64_imva(cpu_data));
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/sn_sal.h
--- a/xen/arch/ia64/patch/linux-2.6.11/sn_sal.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,33 +0,0 @@
---- /data/lwork/attica1/edwardsg/linux-2.6.11/include/asm-ia64/sn/sn_sal.h
2005-03-02 01:38:33 -06:00
-+++ include/asm-ia64/sn/sn_sal.h 2005-06-01 14:31:47 -05:00
-@@ -123,6 +123,7 @@
- #define SALRET_ERROR (-3)
-
-
-+#ifndef XEN
- /**
- * sn_sal_rev_major - get the major SGI SAL revision number
- *
-@@ -226,6 +227,7 @@ ia64_sn_get_klconfig_addr(nasid_t nasid)
- }
- return ret_stuff.v0 ? __va(ret_stuff.v0) : NULL;
- }
-+#endif /* !XEN */
-
- /*
- * Returns the next console character.
-@@ -304,6 +306,7 @@ ia64_sn_console_putb(const char *buf, in
- return (u64)0;
- }
-
-+#ifndef XEN
- /*
- * Print a platform error record
- */
-@@ -987,5 +990,5 @@ ia64_sn_hwperf_op(nasid_t nasid, u64 opc
- *v0 = (int) rv.v0;
- return (int) rv.status;
- }
--
-+#endif /* !XEN */
- #endif /* _ASM_IA64_SN_SN_SAL_H */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/system.h
--- a/xen/arch/ia64/patch/linux-2.6.11/system.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,38 +0,0 @@
----
/home/adsharma/xeno-unstable-ia64-staging.bk/xen/../../linux-2.6.11/include/asm-ia64/system.h
2005-03-01 23:38:07.000000000 -0800
-+++ /home/adsharma/xeno-unstable-ia64-staging.bk/xen/include/asm-ia64/system.h
2005-05-20 09:36:02.000000000 -0700
-@@ -18,14 +18,19 @@
- #include <asm/page.h>
- #include <asm/pal.h>
- #include <asm/percpu.h>
-+#ifdef XEN
-+#include <asm/xensystem.h>
-+#endif
-
- #define GATE_ADDR __IA64_UL_CONST(0xa000000000000000)
- /*
- * 0xa000000000000000+2*PERCPU_PAGE_SIZE
- * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
- */
-+#ifndef XEN
- #define KERNEL_START __IA64_UL_CONST(0xa000000100000000)
- #define PERCPU_ADDR (-PERCPU_PAGE_SIZE)
-+#endif
-
- #ifndef __ASSEMBLY__
-
-@@ -218,6 +223,7 @@
- # define PERFMON_IS_SYSWIDE() (0)
- #endif
-
-+#ifndef XEN
- #define IA64_HAS_EXTRA_STATE(t)
\
- ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)
\
- || IS_IA32_PROCESS(ia64_task_regs(t)) || PERFMON_IS_SYSWIDE())
-@@ -230,6 +236,7 @@
- ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);
\
- (last) = ia64_switch_to((next));
\
- } while (0)
-+#endif
-
- #ifdef CONFIG_SMP
- /*
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/time.c
--- a/xen/arch/ia64/patch/linux-2.6.11/time.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,56 +0,0 @@
---- ../../linux-2.6.11/arch/ia64/kernel/time.c 2005-03-02 00:37:50.000000000
-0700
-+++ arch/ia64/time.c 2005-05-02 11:19:29.000000000 -0600
-@@ -29,6 +29,9 @@
- #include <asm/sal.h>
- #include <asm/sections.h>
- #include <asm/system.h>
-+#ifdef XEN
-+#include <linux/jiffies.h> // not included by xen/sched.h
-+#endif
-
- extern unsigned long wall_jiffies;
-
-@@ -45,6 +48,7 @@
-
- #endif
-
-+#ifndef XEN
- static struct time_interpolator itc_interpolator = {
- .shift = 16,
- .mask = 0xffffffffffffffffLL,
-@@ -110,6 +114,7 @@
- } while (time_after_eq(ia64_get_itc(), new_itm));
- return IRQ_HANDLED;
- }
-+#endif
-
- /*
- * Encapsulate access to the itm structure for SMP.
-@@ -212,6 +217,7 @@
- + itc_freq/2)/itc_freq;
-
- if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
-+#ifndef XEN
- itc_interpolator.frequency = local_cpu_data->itc_freq;
- itc_interpolator.drift = itc_drift;
- #ifdef CONFIG_SMP
-@@ -228,12 +234,14 @@
- if (!nojitter) itc_interpolator.jitter = 1;
- #endif
- register_time_interpolator(&itc_interpolator);
-+#endif
- }
-
- /* Setup the CPU local timer tick */
- ia64_cpu_local_tick();
- }
-
-+#ifndef XEN
- static struct irqaction timer_irqaction = {
- .handler = timer_interrupt,
- .flags = SA_INTERRUPT,
-@@ -253,3 +261,4 @@
- */
- set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec,
-xtime.tv_nsec);
- }
-+#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/tlb.c
--- a/xen/arch/ia64/patch/linux-2.6.11/tlb.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,38 +0,0 @@
---- ../../linux-2.6.11/arch/ia64/mm/tlb.c 2005-03-02 00:38:38.000000000
-0700
-+++ arch/ia64/tlb.c 2005-05-02 10:23:09.000000000 -0600
-@@ -43,6 +43,9 @@
- void
- wrap_mmu_context (struct mm_struct *mm)
- {
-+#ifdef XEN
-+printf("wrap_mmu_context: called, not implemented\n");
-+#else
- unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
- struct task_struct *tsk;
- int i;
-@@ -83,6 +86,7 @@
- put_cpu();
- }
- local_flush_tlb_all();
-+#endif
- }
-
- void
-@@ -132,6 +136,9 @@
- void
- flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned
long end)
- {
-+#ifdef XEN
-+printf("flush_tlb_range: called, not implemented\n");
-+#else
- struct mm_struct *mm = vma->vm_mm;
- unsigned long size = end - start;
- unsigned long nbits;
-@@ -163,6 +170,7 @@
- # endif
-
- ia64_srlz_i(); /* srlz.i implies srlz.d */
-+#endif
- }
- EXPORT_SYMBOL(flush_tlb_range);
-
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/types.h
--- a/xen/arch/ia64/patch/linux-2.6.11/types.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,44 +0,0 @@
---- ../../linux-2.6.11/include/asm-ia64/types.h 2005-03-04
10:26:30.000000000 -0700
-+++ include/asm-ia64/types.h 2005-04-11 15:23:49.000000000 -0600
-@@ -1,5 +1,12 @@
- #ifndef _ASM_IA64_TYPES_H
- #define _ASM_IA64_TYPES_H
-+#ifdef XEN
-+#ifndef __ASSEMBLY__
-+typedef unsigned long ssize_t;
-+typedef unsigned long size_t;
-+typedef long long loff_t;
-+#endif
-+#endif
-
- /*
- * This file is never included by application software unless explicitly
requested (e.g.,
-@@ -61,6 +68,28 @@
- typedef __s64 s64;
- typedef __u64 u64;
-
-+#ifdef XEN
-+/*
-+ * Below are truly Linux-specific types that should never collide with
-+ * any application/library that wants linux/types.h.
-+ */
-+
-+#ifdef __CHECKER__
-+#define __bitwise __attribute__((bitwise))
-+#else
-+#define __bitwise
-+#endif
-+
-+typedef __u16 __bitwise __le16;
-+typedef __u16 __bitwise __be16;
-+typedef __u32 __bitwise __le32;
-+typedef __u32 __bitwise __be32;
-+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
-+typedef __u64 __bitwise __le64;
-+typedef __u64 __bitwise __be64;
-+#endif
-+#endif
-+
- #define BITS_PER_LONG 64
-
- /* DMA addresses are 64-bits wide, in general. */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.11/uaccess.h
--- a/xen/arch/ia64/patch/linux-2.6.11/uaccess.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,41 +0,0 @@
---- ../../linux-2.6.11/include/asm-ia64/uaccess.h 2005-03-02
00:37:53.000000000 -0700
-+++ include/asm-ia64/uaccess.h 2005-06-21 21:53:20.000000000 -0600
-@@ -32,6 +32,10 @@
- * David Mosberger-Tang <davidm@xxxxxxxxxx>
- */
-
-+#ifdef CONFIG_VTI
-+#include <asm/vmx_uaccess.h>
-+#else // CONFIG_VTI
-+
- #include <linux/compiler.h>
- #include <linux/errno.h>
- #include <linux/sched.h>
-@@ -60,6 +64,11 @@
- * address TASK_SIZE is never valid. We also need to make sure that the
address doesn't
- * point inside the virtually mapped linear page table.
- */
-+#ifdef XEN
-+/* VT-i reserves bit 60 for the VMM; guest addresses have bit 60 = bit 59 */
-+#define IS_VMM_ADDRESS(addr) ((((addr) >> 60) ^ ((addr) >> 59)) & 1)
-+#define __access_ok(addr, size, segment) (!IS_VMM_ADDRESS((unsigned
long)(addr)))
-+#else
- #define __access_ok(addr, size, segment)
\
- ({
\
- __chk_user_ptr(addr);
\
-@@ -67,6 +76,7 @@
- && ((segment).seg == KERNEL_DS.seg
\
- || likely(REGION_OFFSET((unsigned long) (addr)) <
RGN_MAP_LIMIT))); \
- })
-+#endif
- #define access_ok(type, addr, size) __access_ok((addr), (size), get_fs())
-
- static inline int
-@@ -343,6 +353,7 @@
- __su_ret; \
- })
-
-+#endif // CONFIG_VTI
- /* Generic code can't deal with the location-relative format that we use for
compactness. */
- #define ARCH_HAS_SORT_EXTABLE
- #define ARCH_HAS_SEARCH_EXTABLE
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.11/unaligned.c
--- a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,227 +0,0 @@
----
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/unaligned.c
2005-03-01 23:38:25.000000000 -0800
-+++
/home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/unaligned.c
2005-05-18 12:40:50.000000000 -0700
-@@ -201,7 +201,11 @@
-
- RPT(r1), RPT(r2), RPT(r3),
-
-+#ifdef CONFIG_VTI
-+ RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-+#else //CONFIG_VTI
- RSW(r4), RSW(r5), RSW(r6), RSW(r7),
-+#endif //CONFIG_VTI
-
- RPT(r8), RPT(r9), RPT(r10), RPT(r11),
- RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-@@ -291,6 +295,121 @@
- return reg;
- }
-
-+#ifdef CONFIG_VTI
-+static void
-+set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val,
unsigned long nat)
-+{
-+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
-+ unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
-+ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
-+ unsigned long rnats, nat_mask;
-+ unsigned long old_rsc,new_rsc;
-+ unsigned long on_kbs,rnat;
-+ long sof = (regs->cr_ifs) & 0x7f;
-+ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
-+ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-+ long ridx = r1 - 32;
-+
-+ if (ridx >= sof) {
-+ /* this should never happen, as the "rsvd register fault" has
higher priority */
-+ DPRINT("ignoring write to r%lu; only %lu registers are
allocated!\n", r1, sof);
-+ return;
-+ }
-+
-+ if (ridx < sor)
-+ ridx = rotate_reg(sor, rrb_gr, ridx);
-+
-+ old_rsc=ia64_get_rsc();
-+ new_rsc=old_rsc&(~0x3);
-+ ia64_set_rsc(new_rsc);
-+
-+ bspstore = ia64_get_bspstore();
-+ bsp =kbs + (regs->loadrs >> 19);//16+3
-+
-+ addr = ia64_rse_skip_regs(bsp, -sof + ridx);
-+ nat_mask = 1UL << ia64_rse_slot_num(addr);
-+ rnat_addr = ia64_rse_rnat_addr(addr);
-+
-+ if(addr >= bspstore){
-+
-+ ia64_flushrs ();
-+ ia64_mf ();
-+ *addr = val;
-+ bspstore = ia64_get_bspstore();
-+ rnat = ia64_get_rnat ();
-+ if(bspstore < rnat_addr){
-+ rnat=rnat&(~nat_mask);
-+ }else{
-+ *rnat_addr = (*rnat_addr)&(~nat_mask);
-+ }
-+ ia64_mf();
-+ ia64_loadrs();
-+ ia64_set_rnat(rnat);
-+ }else{
-+
-+ rnat = ia64_get_rnat ();
-+ *addr = val;
-+ if(bspstore < rnat_addr){
-+ rnat=rnat&(~nat_mask);
-+ }else{
-+ *rnat_addr = (*rnat_addr)&(~nat_mask);
-+ }
-+ ia64_set_bspstore (bspstore);
-+ ia64_set_rnat(rnat);
-+ }
-+ ia64_set_rsc(old_rsc);
-+}
-+
-+
-+static void
-+get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val,
unsigned long *nat)
-+{
-+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
-+ unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
-+ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
-+ unsigned long rnats, nat_mask;
-+ unsigned long on_kbs;
-+ unsigned long old_rsc, new_rsc;
-+ long sof = (regs->cr_ifs) & 0x7f;
-+ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
-+ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-+ long ridx = r1 - 32;
-+
-+ if (ridx >= sof) {
-+ /* read of out-of-frame register returns an undefined value; 0
in our case. */
-+ DPRINT("ignoring read from r%lu; only %lu registers are
allocated!\n", r1, sof);
-+ panic("wrong stack register number");
-+ }
-+
-+ if (ridx < sor)
-+ ridx = rotate_reg(sor, rrb_gr, ridx);
-+
-+ old_rsc=ia64_get_rsc();
-+ new_rsc=old_rsc&(~(0x3));
-+ ia64_set_rsc(new_rsc);
-+
-+ bspstore = ia64_get_bspstore();
-+ bsp =kbs + (regs->loadrs >> 19); //16+3;
-+
-+ addr = ia64_rse_skip_regs(bsp, -sof + ridx);
-+ nat_mask = 1UL << ia64_rse_slot_num(addr);
-+ rnat_addr = ia64_rse_rnat_addr(addr);
-+
-+ if(addr >= bspstore){
-+
-+ ia64_flushrs ();
-+ ia64_mf ();
-+ bspstore = ia64_get_bspstore();
-+ }
-+ *val=*addr;
-+ if(bspstore < rnat_addr){
-+ *nat=!!(ia64_get_rnat()&nat_mask);
-+ }else{
-+ *nat = !!((*rnat_addr)&nat_mask);
-+ }
-+ ia64_set_rsc(old_rsc);
-+}
-+#else // CONFIG_VTI
- static void
- set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int
nat)
- {
-@@ -435,9 +554,14 @@
- *nat = 0;
- return;
- }
-+#endif // CONFIG_VTI
-
-
-+#ifdef XEN
-+void
-+#else
- static void
-+#endif
- setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs
*regs)
- {
- struct switch_stack *sw = (struct switch_stack *) regs - 1;
-@@ -466,7 +590,11 @@
- unat = &sw->ar_unat;
- } else {
- addr = (unsigned long)regs;
-+#ifdef CONFIG_VTI
-+ unat = ®s->eml_unat;
-+#else //CONFIG_VTI
- unat = &sw->caller_unat;
-+#endif //CONFIG_VTI
- }
- DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
- addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
-@@ -522,7 +650,11 @@
- */
- if (regnum >= IA64_FIRST_ROTATING_FR) {
- ia64_sync_fph(current);
-+#ifdef XEN
-+ current->arch._thread.fph[fph_index(regs, regnum)] = *fpval;
-+#else
- current->thread.fph[fph_index(regs, regnum)] = *fpval;
-+#endif
- } else {
- /*
- * pt_regs or switch_stack ?
-@@ -581,7 +713,11 @@
- */
- if (regnum >= IA64_FIRST_ROTATING_FR) {
- ia64_flush_fph(current);
-+#ifdef XEN
-+ *fpval = current->arch._thread.fph[fph_index(regs, regnum)];
-+#else
- *fpval = current->thread.fph[fph_index(regs, regnum)];
-+#endif
- } else {
- /*
- * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
-@@ -611,7 +747,11 @@
- }
-
-
-+#ifdef XEN
-+void
-+#else
- static void
-+#endif
- getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs
*regs)
- {
- struct switch_stack *sw = (struct switch_stack *) regs - 1;
-@@ -640,7 +780,11 @@
- unat = &sw->ar_unat;
- } else {
- addr = (unsigned long)regs;
-+#ifdef CONFIG_VTI
-+ unat = ®s->eml_unat;;
-+#else //CONFIG_VTI
- unat = &sw->caller_unat;
-+#endif //CONFIG_VTI
- }
-
- DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
-@@ -1294,6 +1438,9 @@
- void
- ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
- {
-+#ifdef XEN
-+printk("ia64_handle_unaligned: called, not working yet\n");
-+#else
- struct ia64_psr *ipsr = ia64_psr(regs);
- mm_segment_t old_fs = get_fs();
- unsigned long bundle[2];
-@@ -1502,4 +1649,5 @@
- si.si_imm = 0;
- force_sig_info(SIGBUS, &si, current);
- goto done;
-+#endif
- }
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/bootmem.h
--- a/xen/arch/ia64/patch/linux-2.6.7/bootmem.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,12 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/bootmem.h
2004-06-15 23:19:52.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/bootmem.h
2004-08-25 19:28:13.000000000 -0600
-@@ -41,7 +41,9 @@
- extern void __init free_bootmem (unsigned long addr, unsigned long size);
- extern void * __init __alloc_bootmem (unsigned long size, unsigned long
align, unsigned long goal);
- #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-+#ifndef XEN
- extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
-+#endif
- #define alloc_bootmem(x) \
- __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
- #define alloc_bootmem_low(x) \
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/current.h
--- a/xen/arch/ia64/patch/linux-2.6.7/current.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,17 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/current.h
2004-06-15 23:19:52.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/current.h
2004-08-25 19:28:12.000000000 -0600
-@@ -12,6 +12,14 @@
- * In kernel mode, thread pointer (r13) is used to point to the current task
- * structure.
- */
-+#ifdef XEN
-+struct domain;
-+#define get_current() ((struct vcpu *) ia64_getreg(_IA64_REG_TP))
-+#define current get_current()
-+//#define set_current(d) ia64_setreg(_IA64_REG_TP,(void *)d);
-+#define set_current(d) (ia64_r13 = (void *)d)
-+#else
- #define current ((struct task_struct *) ia64_getreg(_IA64_REG_TP))
-+#endif
-
- #endif /* _ASM_IA64_CURRENT_H */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/efi.c
--- a/xen/arch/ia64/patch/linux-2.6.7/efi.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,85 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/efi.c
2004-06-15 23:18:55.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/efi.c 2004-12-17
13:47:03.000000000 -0700
-@@ -25,6 +25,9 @@
- #include <linux/types.h>
- #include <linux/time.h>
- #include <linux/efi.h>
-+#ifdef XEN
-+#include <xen/sched.h>
-+#endif
-
- #include <asm/io.h>
- #include <asm/kregs.h>
-@@ -49,7 +52,10 @@
- {
\
- struct ia64_fpreg fr[6];
\
- efi_status_t ret;
\
-+ efi_time_cap_t *atc = NULL;
\
-
\
-+ if (tc)
\
-+ atc = adjust_arg(tc);
\
- ia64_save_scratch_fpregs(fr);
\
- ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time),
adjust_arg(tm), \
- adjust_arg(tc));
\
-@@ -201,6 +207,7 @@
- if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS)
- return;
-
-+ dummy();
- ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute,
tm.second);
- ts->tv_nsec = tm.nanosecond;
- }
-@@ -303,6 +310,10 @@
- if (!(md->attribute & EFI_MEMORY_WB))
- continue;
-
-+#ifdef XEN
-+// this is a temporary hack to avoid CONFIG_VIRTUAL_MEM_MAP
-+ if (md->phys_addr >= 0x100000000) continue;
-+#endif
- /*
- * granule_addr is the base of md's first granule.
- * [granule_addr - first_non_wb_addr) is guaranteed to
-@@ -456,9 +467,11 @@
-
- cpu = smp_processor_id();
-
-+#ifndef XEN
- /* insert this TR into our list for MCA recovery purposes */
- ia64_mca_tlb_list[cpu].pal_base = vaddr & mask;
- ia64_mca_tlb_list[cpu].pal_paddr =
pte_val(mk_pte_phys(md->phys_addr, PAGE_KERNEL));
-+#endif
- }
- }
-
-@@ -680,6 +693,30 @@
- return 0;
- }
-
-+#ifdef XEN
-+// variation of efi_get_iobase which returns entire memory descriptor
-+efi_memory_desc_t *
-+efi_get_io_md (void)
-+{
-+ void *efi_map_start, *efi_map_end, *p;
-+ efi_memory_desc_t *md;
-+ u64 efi_desc_size;
-+
-+ efi_map_start = __va(ia64_boot_param->efi_memmap);
-+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
-+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
-+
-+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-+ md = p;
-+ if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
-+ if (md->attribute & EFI_MEMORY_UC)
-+ return md;
-+ }
-+ }
-+ return 0;
-+}
-+#endif
-+
- u32
- efi_mem_type (unsigned long phys_addr)
- {
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/efi.h
--- a/xen/arch/ia64/patch/linux-2.6.7/efi.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,13 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/efi.h
2004-06-15 23:20:03.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/efi.h
2004-08-25 19:28:13.000000000 -0600
-@@ -15,8 +15,10 @@
- #include <linux/string.h>
- #include <linux/time.h>
- #include <linux/types.h>
-+#ifndef XEN
- #include <linux/proc_fs.h>
- #include <linux/rtc.h>
-+#endif
- #include <linux/ioport.h>
-
- #include <asm/page.h>
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/entry.S
--- a/xen/arch/ia64/patch/linux-2.6.7/entry.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,195 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/entry.S 2005-03-24 19:39:56.000000000
-0700
-+++ arch/ia64/entry.S 2005-04-01 12:56:01.000000000 -0700
-@@ -35,7 +35,9 @@
-
- #include <asm/asmmacro.h>
- #include <asm/cache.h>
-+#ifndef XEN
- #include <asm/errno.h>
-+#endif
- #include <asm/kregs.h>
- #include <asm/offsets.h>
- #include <asm/pgtable.h>
-@@ -46,6 +48,23 @@
-
- #include "minstate.h"
-
-+#ifdef XEN
-+#define sys_execve 0
-+#define do_fork 0
-+#define syscall_trace 0
-+#define schedule 0
-+#define do_notify_resume_user 0
-+#define ia64_rt_sigsuspend 0
-+#define ia64_rt_sigreturn 0
-+#define ia64_handle_unaligned 0
-+#define errno 0
-+#define sys_ni_syscall 0
-+#define unw_init_frame_info 0
-+#define sys_call_table 0
-+#endif
-+
-+ /*
-+
- /*
- * execve() is special because in case of success, we need to
- * setup a null register window frame.
-@@ -178,11 +197,14 @@
- DO_SAVE_SWITCH_STACK
- .body
-
-+#ifdef XEN
-+//#undef IA64_TASK_THREAD_KSP_OFFSET
-+//#define IA64_TASK_THREAD_KSP_OFFSET 0x38
- adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
- movl r25=init_task
- mov r27=IA64_KR(CURRENT_STACK)
- adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
-- dep r20=0,in0,61,3 // physical address of "current"
-+ dep r20=0,in0,60,4 // physical address of "current"
- ;;
- st8 [r22]=sp // save kernel stack pointer of old task
- shr.u r26=r20,IA64_GRANULE_SHIFT
-@@ -194,6 +216,22 @@
- (p6) cmp.eq p7,p6=r26,r27
- (p6) br.cond.dpnt .map
- ;;
-+#else
-+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
-+ mov r27=IA64_KR(CURRENT_STACK)
-+ dep r20=0,in0,61,3 // physical address of "current"
-+ ;;
-+ st8 [r22]=sp // save kernel stack pointer of old task
-+ shr.u r26=r20,IA64_GRANULE_SHIFT
-+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
-+ ;;
-+ /*
-+ * If we've already mapped this task's page, we can skip doing it again.
-+ */
-+ cmp.eq p7,p6=r26,r27
-+(p6) br.cond.dpnt .map
-+ ;;
-+#endif
- .done:
- (p6) ssm psr.ic // if we we had to map, renable the
psr.ic bit FIRST!!!
- ;;
-@@ -211,6 +249,16 @@
- br.ret.sptk.many rp // boogie on out in new context
-
- .map:
-+#ifdef XEN
-+ // avoid overlapping with kernel TR
-+ movl r25=KERNEL_START
-+ dep r23=0,in0,0,KERNEL_TR_PAGE_SHIFT
-+ ;;
-+ cmp.eq p7,p0=r25,r23
-+ ;;
-+(p7) mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
-+(p7) br.cond.sptk .done
-+#endif
- rsm psr.ic // interrupts (psr.i) are already
disabled here
- movl r25=PAGE_KERNEL
- ;;
-@@ -367,7 +415,11 @@
- * - b7 holds address to return to
- * - must not touch r8-r11
- */
-+#ifdef XEN
-+GLOBAL_ENTRY(load_switch_stack)
-+#else
- ENTRY(load_switch_stack)
-+#endif
- .prologue
- .altrp b7
-
-@@ -595,6 +647,11 @@
- */
- br.call.sptk.many rp=ia64_invoke_schedule_tail
- }
-+#ifdef XEN
-+ // new domains are cloned but not exec'ed so switch to user mode here
-+ cmp.ne pKStk,pUStk=r0,r0
-+ br.cond.spnt ia64_leave_kernel
-+#else
- .ret8:
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
-@@ -603,6 +660,7 @@
- mov r8=0
- tbit.nz p6,p0=r2,TIF_SYSCALL_TRACE
- (p6) br.cond.spnt .strace_check_retval
-+#endif
- ;; // added stop bits to prevent
r8 dependency
- END(ia64_ret_from_clone)
- // fall through
-@@ -684,9 +742,14 @@
- #endif /* CONFIG_PREEMPT */
- adds r16=PT(LOADRS)+16,r12
- adds r17=PT(AR_BSPSTORE)+16,r12
-+#ifdef XEN
-+ mov r31=r0
-+ ;;
-+#else
- adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- (p6) ld4 r31=[r18] // load
current_thread_info()->flags
-+#endif
- ld8 r19=[r16],PT(B6)-PT(LOADRS) // load ar.rsc value for
"loadrs"
- nop.i 0
- ;;
-@@ -745,7 +808,11 @@
- mov b7=r0 // clear b7
- ;;
- (pUStk) st1 [r14]=r3
-+#ifdef XEN
-+ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
-+#else
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-+#endif
- ;;
- mov r16=ar.bsp // get existing backing store pointer
- srlz.i // ensure interruption collection is off
-@@ -796,9 +863,18 @@
- ;;
- (p6) cmp.eq.unc p6,p0=r21,r0 // p6 <- p6 && (r21 == 0)
- #endif /* CONFIG_PREEMPT */
-+#ifdef XEN
-+ alloc loc0=ar.pfs,0,1,1,0
-+ adds out0=16,r12
-+ ;;
-+(p6) br.call.sptk.many b0=deliver_pending_interrupt
-+ mov ar.pfs=loc0
-+ mov r31=r0
-+#else
- adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
- ;;
- (p6) ld4 r31=[r17] // load
current_thread_info()->flags
-+#endif
- adds r21=PT(PR)+16,r12
- ;;
-
-@@ -912,7 +988,11 @@
- shr.u r18=r19,16 // get byte size of existing "dirty" partition
- ;;
- mov r16=ar.bsp // get existing backing store pointer
-+#ifdef XEN
-+ movl r17=THIS_CPU(ia64_phys_stacked_size_p8)
-+#else
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-+#endif
- ;;
- ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
- (pKStk) br.cond.dpnt skip_rbs_switch
-@@ -1264,6 +1344,7 @@
- br.ret.sptk.many rp
- END(unw_init_running)
-
-+#ifndef XEN
- .rodata
- .align 8
- .globl sys_call_table
-@@ -1526,3 +1607,4 @@
- data8 sys_ni_syscall
-
- .org sys_call_table + 8*NR_syscalls // guard against failures to
increase NR_syscalls
-+#endif
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h
--- a/xen/arch/ia64/patch/linux-2.6.7/gcc_intrin.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,20 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/gcc_intrin.h
2005-01-23 13:23:36.000000000 -0700
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/gcc_intrin.h
2004-08-25 19:28:13.000000000 -0600
-@@ -92,6 +92,9 @@
-
- #define ia64_hint_pause 0
-
-+#ifdef XEN
-+#define ia64_hint(mode) 0
-+#else
- #define ia64_hint(mode) \
- ({ \
- switch (mode) { \
-@@ -100,6 +103,7 @@
- break; \
- } \
- })
-+#endif
-
-
- /* Integer values for mux1 instruction */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/hardirq.h
--- a/xen/arch/ia64/patch/linux-2.6.7/hardirq.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,22 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/hardirq.h
2004-06-15 23:19:02.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/hardirq.h
2004-12-17 13:47:03.000000000 -0700
-@@ -81,10 +81,19 @@
- */
- #define in_irq() (hardirq_count())
- #define in_softirq() (softirq_count())
-+#ifdef XEN
- #define in_interrupt() (irq_count())
-+#else
-+#define in_interrupt() 0 // FIXME LATER
-+#endif
-
-+#ifdef XEN
-+#define hardirq_trylock(cpu) (!in_interrupt())
-+#define hardirq_endlock(cpu) do { } while (0)
-+#else
- #define hardirq_trylock() (!in_interrupt())
- #define hardirq_endlock() do { } while (0)
-+#endif
-
- #ifdef CONFIG_PREEMPT
- # include <linux/smp_lock.h>
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/head.S
--- a/xen/arch/ia64/patch/linux-2.6.7/head.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,93 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/head.S 2005-03-24 19:39:56.000000000
-0700
-+++ arch/ia64/head.S 2005-04-01 12:56:01.000000000 -0700
-@@ -1,3 +1,8 @@
-+#ifdef XEN
-+#define console_print printf
-+#define kernel_thread_helper 0
-+#define sys_exit 0
-+#endif
- /*
- * Here is where the ball gets rolling as far as the kernel is concerned.
- * When control is transferred to _start, the bootload has already
-@@ -166,7 +171,11 @@
- dep r18=0,r3,0,12
- ;;
- or r18=r17,r18
-+#ifdef XEN
-+ dep r2=-1,r3,60,4 // IMVA of task
-+#else
- dep r2=-1,r3,61,3 // IMVA of task
-+#endif
- ;;
- mov r17=rr[r2]
- ;;
-@@ -205,7 +214,11 @@
- ;;
- mov ar.rsc=0x3 // place RSE in eager mode
-
-+#ifdef XEN
-+(isBP) dep r28=-1,r28,60,4 // make address virtual
-+#else
- (isBP) dep r28=-1,r28,61,3 // make address virtual
-+#endif
- (isBP) movl r2=ia64_boot_param
- ;;
- (isBP) st8 [r2]=r28 // save the address of the boot param
area passed by the bootloader
-@@ -238,14 +251,30 @@
- br.call.sptk.many rp=sys_fw_init
- .ret1:
- #endif
-+#ifdef XEN
-+ alloc r2=ar.pfs,8,0,2,0
-+ ;;
-+#define fake_mbi_magic 0
-+#define MULTIBOOT_INFO_SIZE 1024
-+ .rodata
-+fake_mbi:
-+ .skip MULTIBOOT_INFO_SIZE
-+ .previous
-+ movl out0=fake_mbi
-+ ;;
-+ br.call.sptk.many rp=cmain
-+#else
- br.call.sptk.many rp=start_kernel
-+#endif
- .ret2: addl r3=@ltoff(halt_msg),gp
- ;;
- alloc r2=ar.pfs,8,0,2,0
- ;;
- ld8 out0=[r3]
- br.call.sptk.many b0=console_print
-+ ;;
- self: br.sptk.many self // endless loop
-+ ;;
- END(_start)
-
- GLOBAL_ENTRY(ia64_save_debug_regs)
-@@ -781,8 +810,13 @@
- movl r18=KERNEL_START
- dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
- dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
-+#ifdef XEN
-+ dep r17=-1,r17,60,4
-+ dep sp=-1,sp,60,4
-+#else
- dep r17=-1,r17,61,3
- dep sp=-1,sp,61,3
-+#endif
- ;;
- or r3=r3,r18
- or r14=r14,r18
-@@ -838,7 +872,12 @@
- * intermediate precision so that we can produce a full 64-bit result.
- */
- GLOBAL_ENTRY(sched_clock)
-+#ifdef XEN
-+ break 0;; // FIX IA64_CPUINFO_NSEC_PER_CYC_OFFSET
-+ //movl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET
-+#else
- addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
-+#endif
- mov.m r9=ar.itc // fetch cycle-counter
(35 cyc)
- ;;
- ldf8 f8=[r8]
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c
--- a/xen/arch/ia64/patch/linux-2.6.7/hpsim_irq.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,36 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/hp/sim/hpsim_irq.c
2004-06-15 23:20:26.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/hpsim_irq.c 2004-11-01
17:54:15.000000000 -0700
-@@ -9,7 +9,17 @@
- #include <linux/kernel.h>
- #include <linux/sched.h>
- #include <linux/irq.h>
-+#ifdef XEN
-+#include <asm/hw_irq.h>
-+#endif
-
-+#if 1
-+void __init
-+hpsim_irq_init (void)
-+{
-+ printf("*** hpsim_irq_init called: NOT NEEDED?!?!?\n");
-+}
-+#else
- static unsigned int
- hpsim_irq_startup (unsigned int irq)
- {
-@@ -19,6 +29,10 @@
- static void
- hpsim_irq_noop (unsigned int irq)
- {
-+#if 1
-+printf("hpsim_irq_noop: irq=%d\n",irq);
-+while(irq);
-+#endif
- }
-
- static struct hw_interrupt_type irq_type_hp_sim = {
-@@ -44,3 +58,4 @@
- idesc->handler = &irq_type_hp_sim;
- }
- }
-+#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h
--- a/xen/arch/ia64/patch/linux-2.6.7/hpsim_ssc.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,26 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/hp/sim/hpsim_ssc.h
2004-06-15 23:19:43.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/hpsim_ssc.h
2004-08-29 01:04:23.000000000 -0600
-@@ -33,4 +33,23 @@
- */
- extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
-
-+#ifdef XEN
-+/* Note: These are declared in linux/arch/ia64/hp/sim/simscsi.c but belong
-+ * in linux/include/asm-ia64/hpsim_ssc.h, hence their addition here */
-+#define SSC_OPEN 50
-+#define SSC_CLOSE 51
-+#define SSC_READ 52
-+#define SSC_WRITE 53
-+#define SSC_GET_COMPLETION 54
-+#define SSC_WAIT_COMPLETION 55
-+
-+#define SSC_WRITE_ACCESS 2
-+#define SSC_READ_ACCESS 1
-+
-+struct ssc_disk_req {
-+ unsigned long addr;
-+ unsigned long len;
-+};
-+#endif
-+
- #endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/hw_irq.h
--- a/xen/arch/ia64/patch/linux-2.6.7/hw_irq.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,24 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/hw_irq.h
2004-06-15 23:19:22.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/hw_irq.h
2004-08-27 09:07:38.000000000 -0600
-@@ -9,7 +9,9 @@
- #include <linux/interrupt.h>
- #include <linux/sched.h>
- #include <linux/types.h>
-+#ifndef XEN
- #include <linux/profile.h>
-+#endif
-
- #include <asm/machvec.h>
- #include <asm/ptrace.h>
-@@ -96,7 +98,11 @@
- * Default implementations for the irq-descriptor API:
- */
-
-+#ifdef XEN
-+#define _irq_desc irq_desc
-+#else
- extern irq_desc_t _irq_desc[NR_IRQS];
-+#endif
-
- #ifndef CONFIG_IA64_GENERIC
- static inline irq_desc_t *
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/ide.h
--- a/xen/arch/ia64/patch/linux-2.6.7/ide.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,35 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/ide.h
2004-06-15 23:19:36.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/ide.h 2004-08-25
19:28:13.000000000 -0600
-@@ -64,6 +64,32 @@
- #define ide_init_default_irq(base) ide_default_irq(base)
- #endif
-
-+#ifdef XEN
-+// this is moved to linux/ide.h in newer versions of linux
-+typedef union {
-+ unsigned all : 8; /* all of the bits together */
-+ struct {
-+ unsigned head : 4; /* always zeros here */
-+ unsigned unit : 1; /* drive select number, 0 or 1
*/
-+ unsigned bit5 : 1; /* always 1 */
-+ unsigned lba : 1; /* using LBA instead of CHS */
-+ unsigned bit7 : 1; /* always 1 */
-+ } b;
-+} select_t;
-+
-+typedef union {
-+ unsigned all : 8; /* all of the bits together */
-+ struct {
-+ unsigned bit0 : 1;
-+ unsigned nIEN : 1; /* device INTRQ to host */
-+ unsigned SRST : 1; /* host soft reset bit */
-+ unsigned bit3 : 1; /* ATA-2 thingy */
-+ unsigned reserved456 : 3;
-+ unsigned HOB : 1; /* 48-bit address ordering */
-+ } b;
-+} control_t;
-+#endif
-+
- #include <asm-generic/ide_iops.h>
-
- #endif /* __KERNEL__ */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/init_task.c
--- a/xen/arch/ia64/patch/linux-2.6.7/init_task.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,35 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/init_task.c
2004-06-15 23:20:26.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/init_task.c 2004-08-27
00:06:35.000000000 -0600
-@@ -15,10 +15,12 @@
- #include <asm/uaccess.h>
- #include <asm/pgtable.h>
-
-+#ifndef XEN
- static struct fs_struct init_fs = INIT_FS;
- static struct files_struct init_files = INIT_FILES;
- static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
- static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-+#endif
- struct mm_struct init_mm = INIT_MM(init_mm);
-
- EXPORT_SYMBOL(init_mm);
-@@ -33,13 +35,19 @@
-
- union {
- struct {
-+#ifdef XEN
-+ struct domain task;
-+#else
- struct task_struct task;
- struct thread_info thread_info;
-+#endif
- } s;
- unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
- } init_task_mem asm ("init_task") __attribute__((section(".data.init_task")))
= {{
- .task = INIT_TASK(init_task_mem.s.task),
-+#ifndef XEN
- .thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
-+#endif
- }};
-
- EXPORT_SYMBOL(init_task);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/init_task.h
--- a/xen/arch/ia64/patch/linux-2.6.7/init_task.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,53 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/init_task.h
2004-06-15 23:18:57.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/init_task.h
2004-11-15 17:06:20.000000000 -0700
-@@ -31,6 +31,18 @@
- .max_reqs = ~0U, \
- }
-
-+#ifdef XEN
-+#define INIT_MM(name) \
-+{ \
-+ .mm_rb = RB_ROOT, \
-+ .pgd = swapper_pg_dir, \
-+ .mm_users = ATOMIC_INIT(2), \
-+ .mm_count = ATOMIC_INIT(1), \
-+ .page_table_lock = SPIN_LOCK_UNLOCKED, \
-+ .mmlist = LIST_HEAD_INIT(name.mmlist), \
-+ .cpu_vm_mask = CPU_MASK_ALL, \
-+}
-+#else
- #define INIT_MM(name) \
- { \
- .mm_rb = RB_ROOT, \
-@@ -43,6 +55,7 @@
- .cpu_vm_mask = CPU_MASK_ALL, \
- .default_kioctx = INIT_KIOCTX(name.default_kioctx, name), \
- }
-+#endif
-
- #define INIT_SIGNALS(sig) { \
- .count = ATOMIC_INIT(1), \
-@@ -64,6 +77,15 @@
- * INIT_TASK is used to set up the first task table, touch at
- * your own risk!. Base=0, limit=0x1fffff (=2MB)
- */
-+#ifdef XEN
-+#define INIT_TASK(tsk) \
-+{ \
-+ /*processor: 0,*/ \
-+ /*domain_id: IDLE_DOMAIN_ID,*/ \
-+ /*domain_flags: DOMF_idle_domain,*/ \
-+ refcnt: ATOMIC_INIT(1) \
-+}
-+#else
- #define INIT_TASK(tsk) \
- { \
- .state = 0, \
-@@ -113,6 +135,7 @@
- .switch_lock = SPIN_LOCK_UNLOCKED, \
- .journal_info = NULL, \
- }
-+#endif
-
-
-
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/interrupt.h
--- a/xen/arch/ia64/patch/linux-2.6.7/interrupt.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,18 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/interrupt.h
2004-06-15 23:19:29.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/interrupt.h
2004-08-25 19:28:13.000000000 -0600
-@@ -32,6 +32,7 @@
- #define IRQ_HANDLED (1)
- #define IRQ_RETVAL(x) ((x) != 0)
-
-+#ifndef XEN
- struct irqaction {
- irqreturn_t (*handler)(int, void *, struct pt_regs *);
- unsigned long flags;
-@@ -46,6 +47,7 @@
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long, const char *, void *);
- extern void free_irq(unsigned int, void *);
-+#endif
-
- /*
- * Temporary defines for UP kernels, until all code gets fixed.
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/io.h
--- a/xen/arch/ia64/patch/linux-2.6.7/io.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,14 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/io.h
2004-06-15 23:18:57.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/io.h 2004-11-05
16:53:36.000000000 -0700
-@@ -23,7 +23,11 @@
- #define __SLOW_DOWN_IO do { } while (0)
- #define SLOW_DOWN_IO do { } while (0)
-
-+#ifdef XEN
-+#define __IA64_UNCACHED_OFFSET 0xdffc000000000000 /* region 6 */
-+#else
- #define __IA64_UNCACHED_OFFSET 0xc000000000000000 /* region 6 */
-+#endif
-
- /*
- * The legacy I/O space defined by the ia64 architecture supports only 65536
ports, but
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/irq.h
--- a/xen/arch/ia64/patch/linux-2.6.7/irq.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,18 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/irq.h
2005-01-23 13:23:36.000000000 -0700
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/irq.h 2004-08-25
19:28:13.000000000 -0600
-@@ -30,6 +30,15 @@
- extern void enable_irq (unsigned int);
- extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
-
-+#ifdef XEN
-+// dup'ed from signal.h to avoid changes to includes
-+#define SA_NOPROFILE 0x02000000
-+#define SA_SHIRQ 0x04000000
-+#define SA_RESTART 0x10000000
-+#define SA_INTERRUPT 0x20000000
-+#define SA_SAMPLE_RANDOM SA_RESTART
-+#endif
-+
- #ifdef CONFIG_SMP
- extern void move_irq(int irq);
- #else
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c
--- a/xen/arch/ia64/patch/linux-2.6.7/irq_ia64.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,82 +0,0 @@
---- /home/djm/linux-2.6.7/arch/ia64/kernel/irq_ia64.c 2004-06-15
23:19:13.000000000 -0600
-+++ arch/ia64/irq_ia64.c 2005-02-17 13:17:16.000000000 -0700
-@@ -17,18 +17,26 @@
- #include <linux/config.h>
- #include <linux/module.h>
-
-+#ifndef XEN
- #include <linux/jiffies.h>
-+#endif
- #include <linux/errno.h>
- #include <linux/init.h>
- #include <linux/interrupt.h>
- #include <linux/ioport.h>
-+#ifndef XEN
- #include <linux/kernel_stat.h>
-+#endif
- #include <linux/slab.h>
-+#ifndef XEN
- #include <linux/ptrace.h>
- #include <linux/random.h> /* for rand_initialize_irq() */
- #include <linux/signal.h>
-+#endif
- #include <linux/smp.h>
-+#ifndef XEN
- #include <linux/smp_lock.h>
-+#endif
- #include <linux/threads.h>
-
- #include <asm/bitops.h>
-@@ -101,6 +109,24 @@
- ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
- {
- unsigned long saved_tpr;
-+#if 0
-+//FIXME: For debug only, can be removed
-+ static char firstirq = 1;
-+ static char firsttime[256];
-+ static char firstpend[256];
-+ if (firstirq) {
-+ int i;
-+ for (i=0;i<256;i++) firsttime[i] = 1;
-+ for (i=0;i<256;i++) firstpend[i] = 1;
-+ firstirq = 0;
-+ }
-+ if (firsttime[vector]) {
-+ printf("**** (entry) First received int on vector=%d,itc=%lx\n",
-+ (unsigned long) vector, ia64_get_itc());
-+ firsttime[vector] = 0;
-+ }
-+#endif
-+
-
- #if IRQ_DEBUG
- {
-@@ -145,6 +171,27 @@
- ia64_setreg(_IA64_REG_CR_TPR, vector);
- ia64_srlz_d();
-
-+#ifdef XEN
-+ if (vector != 0xef) {
-+ extern void vcpu_pend_interrupt(void *, int);
-+#if 0
-+ if (firsttime[vector]) {
-+ printf("**** (iterate) First received int on
vector=%d,itc=%lx\n",
-+ (unsigned long) vector, ia64_get_itc());
-+ firsttime[vector] = 0;
-+ }
-+ if (firstpend[vector]) {
-+ printf("**** First pended int on vector=%d,itc=%lx\n",
-+ (unsigned long) vector,ia64_get_itc());
-+ firstpend[vector] = 0;
-+ }
-+#endif
-+ //FIXME: TEMPORARY HACK!!!!
-+ vcpu_pend_interrupt(dom0->vcpu[0],vector);
-+ domain_wake(dom0->vcpu[0]);
-+ }
-+ else
-+#endif
- do_IRQ(local_vector_to_irq(vector), regs);
-
- /*
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/ivt.S
--- a/xen/arch/ia64/patch/linux-2.6.7/ivt.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,528 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/ivt.S 2004-06-15 23:18:59.000000000
-0600
-+++ arch/ia64/ivt.S 2005-04-01 12:56:01.000000000 -0700
-@@ -1,3 +1,21 @@
-+
-+#ifdef XEN
-+//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled??
-+// these are all hacked out for now as the entire IVT
-+// will eventually be replaced... just want to use it
-+// for startup code to handle TLB misses
-+//#define ia64_leave_kernel 0
-+//#define ia64_ret_from_syscall 0
-+//#define ia64_handle_irq 0
-+//#define ia64_fault 0
-+#define ia64_illegal_op_fault 0
-+#define ia64_prepare_handle_unaligned 0
-+#define ia64_bad_break 0
-+#define ia64_trace_syscall 0
-+#define sys_call_table 0
-+#define sys_ni_syscall 0
-+#include <asm/vhpt.h>
-+#endif
- /*
- * arch/ia64/kernel/ivt.S
- *
-@@ -76,6 +94,13 @@
- mov r19=n;; /* prepare to save predicates */
\
- br.sptk.many dispatch_to_fault_handler
-
-+#ifdef XEN
-+#define REFLECT(n)
\
-+ mov r31=pr;
\
-+ mov r19=n;; /* prepare to save predicates */
\
-+ br.sptk.many dispatch_reflection
-+#endif
-+
- .section .text.ivt,"ax"
-
- .align 32768 // align on 32KB boundary
-@@ -213,6 +238,13 @@
- // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
- ENTRY(itlb_miss)
- DBG_FAULT(1)
-+#ifdef XEN
-+ VHPT_CCHAIN_LOOKUP(itlb_miss,i)
-+#ifdef VHPT_GLOBAL
-+ br.cond.sptk page_fault
-+ ;;
-+#endif
-+#endif
- /*
- * The ITLB handler accesses the L3 PTE via the virtually mapped linear
- * page table. If a nested TLB miss occurs, we switch into physical
-@@ -257,6 +289,13 @@
- // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
- ENTRY(dtlb_miss)
- DBG_FAULT(2)
-+#ifdef XEN
-+ VHPT_CCHAIN_LOOKUP(dtlb_miss,d)
-+#ifdef VHPT_GLOBAL
-+ br.cond.sptk page_fault
-+ ;;
-+#endif
-+#endif
- /*
- * The DTLB handler accesses the L3 PTE via the virtually mapped linear
- * page table. If a nested TLB miss occurs, we switch into physical
-@@ -301,6 +340,13 @@
- // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
- ENTRY(alt_itlb_miss)
- DBG_FAULT(3)
-+#ifdef XEN
-+//#ifdef VHPT_GLOBAL
-+// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i)
-+// br.cond.sptk page_fault
-+// ;;
-+//#endif
-+#endif
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r21=cr.ipsr
-@@ -339,6 +385,13 @@
- // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
- ENTRY(alt_dtlb_miss)
- DBG_FAULT(4)
-+#ifdef XEN
-+//#ifdef VHPT_GLOBAL
-+// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d)
-+// br.cond.sptk page_fault
-+// ;;
-+//#endif
-+#endif
- mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=PAGE_KERNEL
- mov r20=cr.isr
-@@ -368,6 +421,17 @@
- cmp.ne p8,p0=r0,r23
- (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
- (p8) br.cond.spnt page_fault
-+#ifdef XEN
-+ ;;
-+ // FIXME: inadequate test, this is where we test for Xen address
-+ // note that 0xf000 (cached) and 0xd000 (uncached) addresses
-+ // should be OK. (Though no I/O is done in Xen, EFI needs uncached
-+ // addresses and some domain EFI calls are passed through)
-+ tbit.nz p0,p8=r16,60
-+(p8) br.cond.spnt page_fault
-+//(p8) br.cond.spnt 0
-+ ;;
-+#endif
-
- dep r21=-1,r21,IA64_PSR_ED_BIT,1
- or r19=r19,r17 // insert PTE control bits into r19
-@@ -448,6 +512,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
- ENTRY(ikey_miss)
-+#ifdef XEN
-+ REFLECT(6)
-+#endif
- DBG_FAULT(6)
- FAULT(6)
- END(ikey_miss)
-@@ -460,9 +527,16 @@
- srlz.i
- ;;
- SAVE_MIN_WITH_COVER
-+#ifdef XEN
-+ alloc r15=ar.pfs,0,0,4,0
-+ mov out0=cr.ifa
-+ mov out1=cr.isr
-+ mov out3=cr.itir
-+#else
- alloc r15=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=cr.isr
-+#endif
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic | PSR_DEFAULT_BITS
-@@ -483,6 +557,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
- ENTRY(dkey_miss)
-+#ifdef XEN
-+ REFLECT(7)
-+#endif
- DBG_FAULT(7)
- FAULT(7)
- END(dkey_miss)
-@@ -491,6 +568,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
- ENTRY(dirty_bit)
-+#ifdef XEN
-+ REFLECT(8)
-+#endif
- DBG_FAULT(8)
- /*
- * What we do here is to simply turn on the dirty bit in the PTE. We
need to
-@@ -553,6 +633,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
- ENTRY(iaccess_bit)
-+#ifdef XEN
-+ REFLECT(9)
-+#endif
- DBG_FAULT(9)
- // Like Entry 8, except for instruction access
- mov r16=cr.ifa // get the address that caused
the fault
-@@ -618,6 +701,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
- ENTRY(daccess_bit)
-+#ifdef XEN
-+ REFLECT(10)
-+#endif
- DBG_FAULT(10)
- // Like Entry 8, except for data access
- mov r16=cr.ifa // get the address that caused
the fault
-@@ -686,6 +772,16 @@
- * to prevent leaking bits from kernel to user level.
- */
- DBG_FAULT(11)
-+#ifdef XEN
-+ mov r16=cr.isr
-+ mov r17=cr.iim
-+ mov r31=pr
-+ ;;
-+ cmp.eq p7,p0=r0,r17 // is this a psuedo-cover?
-+ // FIXME: may also need to check slot==2?
-+(p7) br.sptk.many dispatch_privop_fault
-+ br.sptk.many dispatch_break_fault
-+#endif
- mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle
read lat.
- mov r17=cr.iim
- mov r18=__IA64_BREAK_SYSCALL
-@@ -696,7 +792,9 @@
- mov r27=ar.rsc
- mov r26=ar.pfs
- mov r28=cr.iip
-+#ifndef XEN
- mov r31=pr // prepare to save predicates
-+#endif
- mov r20=r1
- ;;
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-@@ -792,6 +890,36 @@
- DBG_FAULT(13)
- FAULT(13)
-
-+#ifdef XEN
-+ // There is no particular reason for this code to be here, other than
that
-+ // there happens to be space here that would go unused otherwise. If
this
-+ // fault ever gets "unreserved", simply moved the following code to a
more
-+ // suitable spot...
-+
-+ENTRY(dispatch_break_fault)
-+ SAVE_MIN_WITH_COVER
-+ ;;
-+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
-+ mov out0=cr.ifa
-+ adds out1=16,sp
-+ mov out2=cr.isr // FIXME: pity to make this slow access twice
-+ mov out3=cr.iim // FIXME: pity to make this slow access twice
-+
-+ ssm psr.ic | PSR_DEFAULT_BITS
-+ ;;
-+ srlz.i // guarantee that interruption
collection is on
-+ ;;
-+(p15) ssm psr.i // restore psr.i
-+ adds r3=8,r2 // set up second base pointer
-+ ;;
-+ SAVE_REST
-+ movl r14=ia64_leave_kernel
-+ ;;
-+ mov rp=r14
-+ br.sptk.many ia64_prepare_handle_break
-+END(dispatch_break_fault)
-+#endif
-+
- .org ia64_ivt+0x3800
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x3800 Entry 14 (size 64 bundles) Reserved
-@@ -842,9 +970,11 @@
- * - ar.fpsr: set to kernel settings
- */
- GLOBAL_ENTRY(ia64_syscall_setup)
-+#ifndef XEN
- #if PT(B6) != 0
- # error This code assumes that b6 is the first field in pt_regs.
- #endif
-+#endif
- st8 [r1]=r19 // save b6
- add r16=PT(CR_IPSR),r1 // initialize first base pointer
- add r17=PT(R11),r1 // initialize second base
pointer
-@@ -974,6 +1104,37 @@
- DBG_FAULT(16)
- FAULT(16)
-
-+#ifdef XEN
-+ // There is no particular reason for this code to be here, other than
that
-+ // there happens to be space here that would go unused otherwise. If
this
-+ // fault ever gets "unreserved", simply moved the following code to a
more
-+ // suitable spot...
-+
-+ENTRY(dispatch_privop_fault)
-+ SAVE_MIN_WITH_COVER
-+ ;;
-+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first
in insn group!)
-+ mov out0=cr.ifa
-+ adds out1=16,sp
-+ mov out2=cr.isr // FIXME: pity to make this slow access twice
-+ mov out3=cr.itir
-+
-+ ssm psr.ic | PSR_DEFAULT_BITS
-+ ;;
-+ srlz.i // guarantee that interruption
collection is on
-+ ;;
-+(p15) ssm psr.i // restore psr.i
-+ adds r3=8,r2 // set up second base pointer
-+ ;;
-+ SAVE_REST
-+ movl r14=ia64_leave_kernel
-+ ;;
-+ mov rp=r14
-+ br.sptk.many ia64_prepare_handle_privop
-+END(dispatch_privop_fault)
-+#endif
-+
-+
- .org ia64_ivt+0x4400
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x4400 Entry 17 (size 64 bundles) Reserved
-@@ -1090,6 +1251,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
- ENTRY(page_not_present)
-+#ifdef XEN
-+ REFLECT(20)
-+#endif
- DBG_FAULT(20)
- mov r16=cr.ifa
- rsm psr.dt
-@@ -1110,6 +1274,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
- ENTRY(key_permission)
-+#ifdef XEN
-+ REFLECT(21)
-+#endif
- DBG_FAULT(21)
- mov r16=cr.ifa
- rsm psr.dt
-@@ -1123,6 +1290,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
- ENTRY(iaccess_rights)
-+#ifdef XEN
-+ REFLECT(22)
-+#endif
- DBG_FAULT(22)
- mov r16=cr.ifa
- rsm psr.dt
-@@ -1136,6 +1306,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
- ENTRY(daccess_rights)
-+#ifdef XEN
-+ REFLECT(23)
-+#endif
- DBG_FAULT(23)
- mov r16=cr.ifa
- rsm psr.dt
-@@ -1153,8 +1326,13 @@
- mov r16=cr.isr
- mov r31=pr
- ;;
-+#ifdef XEN
-+ cmp4.ge p6,p0=0x20,r16
-+(p6) br.sptk.many dispatch_privop_fault
-+#else
- cmp4.eq p6,p0=0,r16
- (p6) br.sptk.many dispatch_illegal_op_fault
-+#endif
- ;;
- mov r19=24 // fault number
- br.sptk.many dispatch_to_fault_handler
-@@ -1164,6 +1342,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
- ENTRY(disabled_fp_reg)
-+#ifdef XEN
-+ REFLECT(25)
-+#endif
- DBG_FAULT(25)
- rsm psr.dfh // ensure we can access fph
- ;;
-@@ -1177,6 +1358,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
- ENTRY(nat_consumption)
-+#ifdef XEN
-+ REFLECT(26)
-+#endif
- DBG_FAULT(26)
- FAULT(26)
- END(nat_consumption)
-@@ -1185,6 +1369,10 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5700 Entry 27 (size 16 bundles) Speculation (40)
- ENTRY(speculation_vector)
-+#ifdef XEN
-+ // this probably need not reflect...
-+ REFLECT(27)
-+#endif
- DBG_FAULT(27)
- /*
- * A [f]chk.[as] instruction needs to take the branch to the recovery
code but
-@@ -1228,6 +1416,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
- ENTRY(debug_vector)
-+#ifdef XEN
-+ REFLECT(29)
-+#endif
- DBG_FAULT(29)
- FAULT(29)
- END(debug_vector)
-@@ -1236,6 +1427,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
- ENTRY(unaligned_access)
-+#ifdef XEN
-+ REFLECT(30)
-+#endif
- DBG_FAULT(30)
- mov r16=cr.ipsr
- mov r31=pr // prepare to save predicates
-@@ -1247,6 +1441,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
- ENTRY(unsupported_data_reference)
-+#ifdef XEN
-+ REFLECT(31)
-+#endif
- DBG_FAULT(31)
- FAULT(31)
- END(unsupported_data_reference)
-@@ -1255,6 +1452,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
- ENTRY(floating_point_fault)
-+#ifdef XEN
-+ REFLECT(32)
-+#endif
- DBG_FAULT(32)
- FAULT(32)
- END(floating_point_fault)
-@@ -1263,6 +1463,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
- ENTRY(floating_point_trap)
-+#ifdef XEN
-+ REFLECT(33)
-+#endif
- DBG_FAULT(33)
- FAULT(33)
- END(floating_point_trap)
-@@ -1271,6 +1474,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
- ENTRY(lower_privilege_trap)
-+#ifdef XEN
-+ REFLECT(34)
-+#endif
- DBG_FAULT(34)
- FAULT(34)
- END(lower_privilege_trap)
-@@ -1279,6 +1485,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
- ENTRY(taken_branch_trap)
-+#ifdef XEN
-+ REFLECT(35)
-+#endif
- DBG_FAULT(35)
- FAULT(35)
- END(taken_branch_trap)
-@@ -1287,6 +1496,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
- ENTRY(single_step_trap)
-+#ifdef XEN
-+ REFLECT(36)
-+#endif
- DBG_FAULT(36)
- FAULT(36)
- END(single_step_trap)
-@@ -1343,6 +1555,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
- ENTRY(ia32_exception)
-+#ifdef XEN
-+ REFLECT(45)
-+#endif
- DBG_FAULT(45)
- FAULT(45)
- END(ia32_exception)
-@@ -1351,6 +1566,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
- ENTRY(ia32_intercept)
-+#ifdef XEN
-+ REFLECT(46)
-+#endif
- DBG_FAULT(46)
- #ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
-@@ -1381,6 +1599,9 @@
-
/////////////////////////////////////////////////////////////////////////////////////////
- // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
- ENTRY(ia32_interrupt)
-+#ifdef XEN
-+ REFLECT(47)
-+#endif
- DBG_FAULT(47)
- #ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
-@@ -1510,6 +1731,39 @@
- DBG_FAULT(67)
- FAULT(67)
-
-+#ifdef XEN
-+ .org ia64_ivt+0x8000
-+ENTRY(dispatch_reflection)
-+ /*
-+ * Input:
-+ * psr.ic: off
-+ * r19: intr type (offset into ivt, see ia64_int.h)
-+ * r31: contains saved predicates (pr)
-+ */
-+ SAVE_MIN_WITH_COVER_R19
-+ alloc r14=ar.pfs,0,0,5,0
-+ mov out4=r15
-+ mov out0=cr.ifa
-+ adds out1=16,sp
-+ mov out2=cr.isr
-+ mov out3=cr.iim
-+// mov out3=cr.itir
-+
-+ ssm psr.ic | PSR_DEFAULT_BITS
-+ ;;
-+ srlz.i // guarantee that interruption
collection is on
-+ ;;
-+(p15) ssm psr.i // restore psr.i
-+ adds r3=8,r2 // set up second base pointer
-+ ;;
-+ SAVE_REST
-+ movl r14=ia64_leave_kernel
-+ ;;
-+ mov rp=r14
-+ br.sptk.many ia64_prepare_handle_reflection
-+END(dispatch_reflection)
-+#endif
-+
- #ifdef CONFIG_IA32_SUPPORT
-
- /*
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/kregs.h
--- a/xen/arch/ia64/patch/linux-2.6.7/kregs.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,13 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/kregs.h
2004-06-15 23:19:01.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/kregs.h
2004-09-17 18:27:22.000000000 -0600
-@@ -30,6 +30,10 @@
- #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as
required by EFI */
- #define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */
- #define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- &
register-stacks */
-+#ifdef XEN
-+#define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */
-+#define IA64_TR_VHPT 4 /* dtr4: vhpt */
-+#endif
-
- /* Processor status register bits: */
- #define IA64_PSR_BE_BIT 1
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/lds.S
--- a/xen/arch/ia64/patch/linux-2.6.7/lds.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,17 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/vmlinux.lds.S
2004-06-15 23:19:52.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/xen.lds.S 2004-08-25
19:28:12.000000000 -0600
-@@ -11,12 +11,14 @@
- OUTPUT_FORMAT("elf64-ia64-little")
- OUTPUT_ARCH(ia64)
- ENTRY(phys_start)
-+#ifndef XEN
- jiffies = jiffies_64;
- PHDRS {
- code PT_LOAD;
- percpu PT_LOAD;
- data PT_LOAD;
- }
-+#endif
- SECTIONS
- {
- /* Sections to be discarded */
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/linuxtime.h
--- a/xen/arch/ia64/patch/linux-2.6.7/linuxtime.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,34 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/time.h
2004-06-15 23:19:37.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/xen/linuxtime.h 2004-11-15
17:42:04.000000000 -0700
-@@ -1,6 +1,11 @@
- #ifndef _LINUX_TIME_H
- #define _LINUX_TIME_H
-
-+#ifdef XEN
-+typedef s64 time_t;
-+typedef s64 suseconds_t;
-+#endif
-+
- #include <asm/param.h>
- #include <linux/types.h>
-
-@@ -25,7 +30,9 @@
- #ifdef __KERNEL__
-
- #include <linux/spinlock.h>
-+#ifndef XEN
- #include <linux/seqlock.h>
-+#endif
- #include <linux/timex.h>
- #include <asm/div64.h>
- #ifndef div_long_long_rem
-@@ -322,7 +329,9 @@
-
- extern struct timespec xtime;
- extern struct timespec wall_to_monotonic;
-+#ifndef XEN
- extern seqlock_t xtime_lock;
-+#endif
-
- static inline unsigned long get_seconds(void)
- {
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/mca_asm.h
--- a/xen/arch/ia64/patch/linux-2.6.7/mca_asm.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,32 +0,0 @@
---- ../../linux-2.6.7/include/asm-ia64/mca_asm.h 2004-06-15
23:20:03.000000000 -0600
-+++ include/asm-ia64/mca_asm.h 2005-04-01 12:56:37.000000000 -0700
-@@ -26,8 +26,13 @@
- * direct mapped to physical addresses.
- * 1. Lop off bits 61 thru 63 in the virtual address
- */
-+#ifdef XEN
-+#define INST_VA_TO_PA(addr)
\
-+ dep addr = 0, addr, 60, 4
-+#else // XEN
- #define INST_VA_TO_PA(addr)
\
- dep addr = 0, addr, 61, 3
-+#endif // XEN
- /*
- * This macro converts a data virtual address to a physical address
- * Right now for simulation purposes the virtual addresses are
-@@ -42,9 +47,15 @@
- * direct mapped to physical addresses.
- * 1. Put 0x7 in bits 61 thru 63.
- */
-+#ifdef XEN
-+#define DATA_PA_TO_VA(addr,temp)
\
-+ mov temp = 0xf ;;
\
-+ dep addr = temp, addr, 60, 4
-+#else // XEN
- #define DATA_PA_TO_VA(addr,temp)
\
- mov temp = 0x7 ;;
\
- dep addr = temp, addr, 61, 3
-+#endif // XEN
-
- /*
- * This macro jumps to the instruction at the given virtual address
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/minstate.h
--- a/xen/arch/ia64/patch/linux-2.6.7/minstate.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,29 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/minstate.h 2004-06-15
23:19:52.000000000 -0600
-+++ arch/ia64/minstate.h 2005-04-01 12:56:01.000000000 -0700
-@@ -45,7 +45,7 @@
- (pKStk) tpa r1=sp; /* compute physical addr of sp
*/ \
- (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base
of memory stack */ \
- (pUStk) mov r23=ar.bspstore; /* save
ar.bspstore */ \
--(pUStk) dep r22=-1,r22,61,3; /* compute kernel
virtual addr of RBS */ \
-+(pUStk) dep r22=-1,r22,60,4; /* compute kernel
virtual addr of RBS */ \
- ;;
\
- (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode,
use sp (r12) */ \
- (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS
*/ \
-@@ -65,7 +65,7 @@
- #endif
-
- #ifdef MINSTATE_PHYS
--# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; dep
reg=0,reg,61,3
-+# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; dep
reg=0,reg,60,4
- # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
- # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
- #endif
-@@ -172,7 +172,7 @@
- ;;
\
- .mem.offset 0,0; st8.spill [r16]=r15,16;
\
- .mem.offset 8,0; st8.spill [r17]=r14,16;
\
-- dep r14=-1,r0,61,3;
\
-+ dep r14=-1,r0,60,4;
\
- ;;
\
- .mem.offset 0,0; st8.spill [r16]=r2,16;
\
- .mem.offset 8,0; st8.spill [r17]=r3,16;
\
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c
--- a/xen/arch/ia64/patch/linux-2.6.7/mm_bootmem.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,92 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/mm/bootmem.c
2004-06-15 23:19:09.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/mm_bootmem.c 2004-12-17
13:47:03.000000000 -0700
-@@ -10,7 +10,9 @@
- */
-
- #include <linux/mm.h>
-+#ifndef XEN
- #include <linux/kernel_stat.h>
-+#endif
- #include <linux/swap.h>
- #include <linux/interrupt.h>
- #include <linux/init.h>
-@@ -55,6 +57,9 @@
- bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
- bdata->node_boot_start = (start << PAGE_SHIFT);
- bdata->node_low_pfn = end;
-+#ifdef XEN
-+//printk("init_bootmem_core:
mapstart=%lx,start=%lx,end=%lx,bdata->node_bootmem_map=%lx,bdata->node_boot_start=%lx,bdata->node_low_pfn=%lx\n",mapstart,start,end,bdata->node_bootmem_map,bdata->node_boot_start,bdata->node_low_pfn);
-+#endif
-
- /*
- * Initially all pages are reserved - setup_arch() has to
-@@ -146,6 +151,9 @@
- unsigned long i, start = 0, incr, eidx;
- void *ret;
-
-+#ifdef XEN
-+//printf("__alloc_bootmem_core(%lx,%lx,%lx,%lx)
called\n",bdata,size,align,goal);
-+#endif
- if(!size) {
- printk("__alloc_bootmem_core(): zero-sized request\n");
- BUG();
-@@ -153,6 +161,9 @@
- BUG_ON(align & (align-1));
-
- eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
-+#ifdef XEN
-+//printf("__alloc_bootmem_core: eidx=%lx\n",eidx);
-+#endif
- offset = 0;
- if (align &&
- (bdata->node_boot_start & (align - 1UL)) != 0)
-@@ -182,6 +193,9 @@
- unsigned long j;
- i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
- i = ALIGN(i, incr);
-+#ifdef XEN
-+//if (i >= eidx) goto fail_block;
-+#endif
- if (test_bit(i, bdata->node_bootmem_map))
- continue;
- for (j = i + 1; j < i + areasize; ++j) {
-@@ -203,6 +217,9 @@
- return NULL;
-
- found:
-+#ifdef XEN
-+//printf("__alloc_bootmem_core: start=%lx\n",start);
-+#endif
- bdata->last_success = start << PAGE_SHIFT;
- BUG_ON(start >= eidx);
-
-@@ -262,6 +279,9 @@
- page = virt_to_page(phys_to_virt(bdata->node_boot_start));
- idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
- map = bdata->node_bootmem_map;
-+#ifdef XEN
-+//printk("free_all_bootmem_core: bdata=%lx, bdata->node_boot_start=%lx,
bdata->node_low_pfn=%lx,
bdata->node_bootmem_map=%lx\n",bdata,bdata->node_boot_start,bdata->node_low_pfn,bdata->node_bootmem_map);
-+#endif
- for (i = 0; i < idx; ) {
- unsigned long v = ~map[i / BITS_PER_LONG];
- if (v) {
-@@ -285,6 +305,9 @@
- * Now free the allocator bitmap itself, it's not
- * needed anymore:
- */
-+#ifdef XEN
-+//printk("About to free the allocator bitmap itself\n");
-+#endif
- page = virt_to_page(bdata->node_bootmem_map);
- count = 0;
- for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >>
PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
-@@ -327,6 +350,9 @@
- return(init_bootmem_core(&contig_page_data, start, 0, pages));
- }
-
-+#ifdef XEN
-+#undef reserve_bootmem
-+#endif
- #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
- void __init reserve_bootmem (unsigned long addr, unsigned long size)
- {
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/mm_contig.c
--- a/xen/arch/ia64/patch/linux-2.6.7/mm_contig.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,216 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/mm/contig.c 2004-06-15 23:19:12.000000000
-0600
-+++ arch/ia64/mm_contig.c 2005-03-23 14:54:06.000000000 -0700
-@@ -15,11 +15,21 @@
- * memory.
- */
- #include <linux/config.h>
-+#ifdef XEN
-+#include <xen/sched.h>
-+#endif
- #include <linux/bootmem.h>
- #include <linux/efi.h>
- #include <linux/mm.h>
- #include <linux/swap.h>
-
-+#ifdef XEN
-+#undef reserve_bootmem
-+extern struct page *zero_page_memmap_ptr;
-+struct page *mem_map;
-+#define MAX_DMA_ADDRESS ~0UL // FIXME???
-+#endif
-+
- #include <asm/meminit.h>
- #include <asm/pgalloc.h>
- #include <asm/pgtable.h>
-@@ -37,30 +47,7 @@
- void
- show_mem (void)
- {
-- int i, total = 0, reserved = 0;
-- int shared = 0, cached = 0;
--
-- printk("Mem-info:\n");
-- show_free_areas();
--
-- printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-- i = max_mapnr;
-- while (i-- > 0) {
-- if (!pfn_valid(i))
-- continue;
-- total++;
-- if (PageReserved(mem_map+i))
-- reserved++;
-- else if (PageSwapCache(mem_map+i))
-- cached++;
-- else if (page_count(mem_map + i))
-- shared += page_count(mem_map + i) - 1;
-- }
-- printk("%d pages of RAM\n", total);
-- printk("%d reserved pages\n", reserved);
-- printk("%d pages shared\n", shared);
-- printk("%d pages swap cached\n", cached);
-- printk("%ld pages in page table cache\n", pgtable_cache_size);
-+ printk("Dummy show_mem\n");
- }
-
- /* physical address where the bootmem map is located */
-@@ -80,6 +67,9 @@
- {
- unsigned long *max_pfnp = arg, pfn;
-
-+#ifdef XEN
-+//printf("find_max_pfn: start=%lx, end=%lx, *arg=%lx\n",start,end,*(unsigned
long *)arg);
-+#endif
- pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
- if (pfn > *max_pfnp)
- *max_pfnp = pfn;
-@@ -133,41 +123,6 @@
- return 0;
- }
-
--/**
-- * find_memory - setup memory map
-- *
-- * Walk the EFI memory map and find usable memory for the system, taking
-- * into account reserved areas.
-- */
--void
--find_memory (void)
--{
-- unsigned long bootmap_size;
--
-- reserve_memory();
--
-- /* first find highest page frame number */
-- max_pfn = 0;
-- efi_memmap_walk(find_max_pfn, &max_pfn);
--
-- /* how many bytes to cover all the pages */
-- bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
--
-- /* look for a location to hold the bootmap */
-- bootmap_start = ~0UL;
-- efi_memmap_walk(find_bootmap_location, &bootmap_size);
-- if (bootmap_start == ~0UL)
-- panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
--
-- bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
--
-- /* Free all available memory, then mark bootmem-map as being in use. */
-- efi_memmap_walk(filter_rsvd_memory, free_bootmem);
-- reserve_bootmem(bootmap_start, bootmap_size);
--
-- find_initrd();
--}
--
- #ifdef CONFIG_SMP
- /**
- * per_cpu_init - setup per-cpu variables
-@@ -227,73 +182,42 @@
- void
- paging_init (void)
- {
-- unsigned long max_dma;
-- unsigned long zones_size[MAX_NR_ZONES];
--#ifdef CONFIG_VIRTUAL_MEM_MAP
-- unsigned long zholes_size[MAX_NR_ZONES];
-- unsigned long max_gap;
--#endif
--
-- /* initialize mem_map[] */
-+ struct pfn_info *pg;
-+ /* Allocate and map the machine-to-phys table */
-+ if ((pg = alloc_domheap_pages(NULL, 10)) == NULL)
-+ panic("Not enough memory to bootstrap Xen.\n");
-+ memset(page_to_virt(pg), 0x55, 16UL << 20);
-
-- memset(zones_size, 0, sizeof(zones_size));
-+ /* Other mapping setup */
-
-- num_physpages = 0;
-- efi_memmap_walk(count_pages, &num_physpages);
-
-- max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
--
--#ifdef CONFIG_VIRTUAL_MEM_MAP
-- memset(zholes_size, 0, sizeof(zholes_size));
--
-- num_dma_physpages = 0;
-- efi_memmap_walk(count_dma_pages, &num_dma_physpages);
--
-- if (max_low_pfn < max_dma) {
-- zones_size[ZONE_DMA] = max_low_pfn;
-- zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
-- } else {
-- zones_size[ZONE_DMA] = max_dma;
-- zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
-- if (num_physpages > num_dma_physpages) {
-- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
-- zholes_size[ZONE_NORMAL] =
-- ((max_low_pfn - max_dma) -
-- (num_physpages - num_dma_physpages));
-- }
-- }
--
-- max_gap = 0;
-- efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
-- if (max_gap < LARGE_GAP) {
-- vmem_map = (struct page *) 0;
-- free_area_init_node(0, &contig_page_data, NULL, zones_size, 0,
-- zholes_size);
-- mem_map = contig_page_data.node_mem_map;
-- } else {
-- unsigned long map_size;
--
-- /* allocate virtual_mem_map */
--
-- map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
-- vmalloc_end -= map_size;
-- vmem_map = (struct page *) vmalloc_end;
-- efi_memmap_walk(create_mem_map_page_table, 0);
--
-- free_area_init_node(0, &contig_page_data, vmem_map, zones_size,
-- 0, zholes_size);
--
-- mem_map = contig_page_data.node_mem_map;
-- printk("Virtual mem_map starts at 0x%p\n", mem_map);
-- }
--#else /* !CONFIG_VIRTUAL_MEM_MAP */
-- if (max_low_pfn < max_dma)
-- zones_size[ZONE_DMA] = max_low_pfn;
-- else {
-- zones_size[ZONE_DMA] = max_dma;
-- zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
-- }
-- free_area_init(zones_size);
--#endif /* !CONFIG_VIRTUAL_MEM_MAP */
- zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
- }
-+
-+struct pfn_info *frame_table;
-+unsigned long frame_table_size;
-+unsigned long max_page;
-+
-+/* FIXME: postpone support to machines with big holes between physical
memorys.
-+ * Current hack allows only efi memdesc upto 4G place. (See efi.c)
-+ */
-+#ifndef CONFIG_VIRTUAL_MEM_MAP
-+#define FT_ALIGN_SIZE (16UL << 20)
-+void __init init_frametable(void)
-+{
-+ unsigned long i, p;
-+ frame_table_size = max_page * sizeof(struct pfn_info);
-+ frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
-+
-+ /* Request continuous trunk from boot allocator, since HV
-+ * address is identity mapped */
-+ p = alloc_boot_pages(frame_table_size>>PAGE_SHIFT,
FT_ALIGN_SIZE>>PAGE_SHIFT) << PAGE_SHIFT;
-+ if (p == 0)
-+ panic("Not enough memory for frame table.\n");
-+
-+ frame_table = __va(p);
-+ memset(frame_table, 0, frame_table_size);
-+ printk("size of frame_table: %lukB\n",
-+ frame_table_size >> 10);
-+}
-+#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/mmzone.h
--- a/xen/arch/ia64/patch/linux-2.6.7/mmzone.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,14 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/mmzone.h
2004-06-15 23:19:36.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/mmzone.h
2004-08-25 19:28:13.000000000 -0600
-@@ -185,7 +185,11 @@
- char *name;
- unsigned long spanned_pages; /* total size, including holes
*/
- unsigned long present_pages; /* amount of memory (excluding
holes) */
-+#ifdef XEN
-+};
-+#else
- } ____cacheline_maxaligned_in_smp;
-+#endif
-
-
- /*
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/page.h
--- a/xen/arch/ia64/patch/linux-2.6.7/page.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,84 +0,0 @@
---- ../../linux-2.6.7/include/asm-ia64/page.h 2004-06-15 23:18:58.000000000
-0600
-+++ include/asm-ia64/page.h 2005-04-01 12:56:37.000000000 -0700
-@@ -12,6 +12,9 @@
- #include <asm/intrinsics.h>
- #include <asm/types.h>
-
-+#ifndef __ASSEMBLY__
-+#include <asm/flushtlb.h>
-+#endif
- /*
- * PAGE_SHIFT determines the actual kernel page size.
- */
-@@ -84,14 +87,22 @@
- #endif
-
- #ifndef CONFIG_DISCONTIGMEM
-+#ifdef XEN
-+#define pfn_valid(pfn) (0)
-+#else
- #define pfn_valid(pfn) (((pfn) < max_mapnr) &&
ia64_pfn_valid(pfn))
--#define page_to_pfn(page) ((unsigned long) (page - mem_map))
--#define pfn_to_page(pfn) (mem_map + (pfn))
-+#endif
- #endif /* CONFIG_DISCONTIGMEM */
-
--#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
-+#define page_to_pfn(_page) ((unsigned long)((_page) - frame_table))
-+#define page_to_virt(_page) phys_to_virt(page_to_phys(_page))
-+
-+#define page_to_phys(_page) (page_to_pfn(_page) << PAGE_SHIFT)
- #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-
-+#define pfn_to_page(_pfn) (frame_table + (_pfn))
-+#define phys_to_page(kaddr) pfn_to_page(((kaddr) >> PAGE_SHIFT))
-+
- typedef union ia64_va {
- struct {
- unsigned long off : 61; /* intra-region offset */
-@@ -107,8 +118,25 @@
- * expressed in this way to ensure they result in a single "dep"
- * instruction.
- */
-+#ifdef XEN
-+typedef union xen_va {
-+ struct {
-+ unsigned long off : 60;
-+ unsigned long reg : 4;
-+ } f;
-+ unsigned long l;
-+ void *p;
-+} xen_va;
-+
-+// xen/drivers/console.c uses __va in a declaration (should be fixed!)
-+#define __pa(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = 0;
_v.l;})
-+#define __va(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = -1;
_v.p;})
-+//# define __pa(x) ((unsigned long)(((unsigned long)x) - PAGE_OFFSET))
-+//# define __va(x) ((void *)((char *)(x) + PAGE_OFFSET))
-+#else
- #define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0;
_v.l;})
- #define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1;
_v.p;})
-+#endif
-
- #define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
- #define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
-@@ -180,11 +208,19 @@
- # define __pgprot(x) (x)
- #endif /* !STRICT_MM_TYPECHECKS */
-
-+#ifdef XEN
-+#define PAGE_OFFSET 0xf000000000000000
-+#else
- #define PAGE_OFFSET 0xe000000000000000
-+#endif
-
- #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE |
\
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC
| \
- (((current->thread.flags &
IA64_THREAD_XSTACK) != 0) \
- ? VM_EXEC : 0))
-
-+#ifdef XEN
-+#define __flush_tlb() do {} while(0);
-+#endif
-+
- #endif /* _ASM_IA64_PAGE_H */
diff -r de3576a1c62c -r dfaf788ab18c
xen/arch/ia64/patch/linux-2.6.7/page_alloc.c
--- a/xen/arch/ia64/patch/linux-2.6.7/page_alloc.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,305 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/mm/page_alloc.c
2004-06-15 23:18:57.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/page_alloc.c 2004-12-17
13:47:03.000000000 -0700
-@@ -19,20 +19,28 @@
- #include <linux/mm.h>
- #include <linux/swap.h>
- #include <linux/interrupt.h>
-+#ifndef XEN
- #include <linux/pagemap.h>
-+#endif
- #include <linux/bootmem.h>
- #include <linux/compiler.h>
- #include <linux/module.h>
-+#ifndef XEN
- #include <linux/suspend.h>
- #include <linux/pagevec.h>
- #include <linux/blkdev.h>
-+#endif
- #include <linux/slab.h>
-+#ifndef XEN
- #include <linux/notifier.h>
-+#endif
- #include <linux/topology.h>
-+#ifndef XEN
- #include <linux/sysctl.h>
- #include <linux/cpu.h>
-
- #include <asm/tlbflush.h>
-+#endif
-
- DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
- struct pglist_data *pgdat_list;
-@@ -71,6 +79,9 @@
-
- static void bad_page(const char *function, struct page *page)
- {
-+#ifdef XEN
-+printk("bad_page: called but disabled\n");
-+#else
- printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
- function, current->comm, page);
- printk(KERN_EMERG "flags:0x%08lx mapping:%p mapcount:%d count:%d\n",
-@@ -91,6 +102,7 @@
- set_page_count(page, 0);
- page->mapping = NULL;
- page->mapcount = 0;
-+#endif
- }
-
- #ifndef CONFIG_HUGETLB_PAGE
-@@ -218,6 +230,7 @@
-
- static inline void free_pages_check(const char *function, struct page *page)
- {
-+#ifndef XEN
- if ( page_mapped(page) ||
- page->mapping != NULL ||
- page_count(page) != 0 ||
-@@ -233,6 +246,7 @@
- 1 << PG_swapcache |
- 1 << PG_writeback )))
- bad_page(function, page);
-+#endif
- if (PageDirty(page))
- ClearPageDirty(page);
- }
-@@ -276,6 +290,9 @@
-
- void __free_pages_ok(struct page *page, unsigned int order)
- {
-+#ifdef XEN
-+printk("__free_pages_ok: called but disabled\n");
-+#else
- LIST_HEAD(list);
- int i;
-
-@@ -285,6 +302,7 @@
- list_add(&page->lru, &list);
- kernel_map_pages(page, 1<<order, 0);
- free_pages_bulk(page_zone(page), 1, &list, order);
-+#endif
- }
-
- #define MARK_USED(index, order, area) \
-@@ -330,6 +348,7 @@
- */
- static void prep_new_page(struct page *page, int order)
- {
-+#ifndef XEN
- if (page->mapping || page_mapped(page) ||
- (page->flags & (
- 1 << PG_private |
-@@ -343,11 +362,14 @@
- 1 << PG_swapcache |
- 1 << PG_writeback )))
- bad_page(__FUNCTION__, page);
-+#endif
-
- page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
- 1 << PG_referenced | 1 << PG_arch_1 |
- 1 << PG_checked | 1 << PG_mappedtodisk);
-+#ifndef XEN
- page->private = 0;
-+#endif
- set_page_refs(page, order);
- }
-
-@@ -590,13 +612,17 @@
- unsigned long min;
- struct zone **zones;
- struct page *page;
-+#ifndef XEN
- struct reclaim_state reclaim_state;
-+#endif
- struct task_struct *p = current;
- int i;
- int alloc_type;
- int do_retry;
-
-+#ifndef XEN
- might_sleep_if(wait);
-+#endif
-
- zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
- if (zones[0] == NULL) /* no zones in the zonelist */
-@@ -610,12 +636,14 @@
-
- min = (1<<order) + z->protection[alloc_type];
-
-+#ifndef XEN
- /*
- * We let real-time tasks dip their real-time paws a little
- * deeper into reserves.
- */
- if (rt_task(p))
- min -= z->pages_low >> 1;
-+#endif
-
- if (z->free_pages >= min ||
- (!wait && z->free_pages >= z->pages_high)) {
-@@ -627,9 +655,11 @@
- }
- }
-
-+#ifndef XEN
- /* we're somewhat low on memory, failed to find what we needed */
- for (i = 0; zones[i] != NULL; i++)
- wakeup_kswapd(zones[i]);
-+#endif
-
- /* Go through the zonelist again, taking __GFP_HIGH into account */
- for (i = 0; zones[i] != NULL; i++) {
-@@ -639,8 +669,10 @@
-
- if (gfp_mask & __GFP_HIGH)
- min -= z->pages_low >> 2;
-+#ifndef XEN
- if (rt_task(p))
- min -= z->pages_low >> 1;
-+#endif
-
- if (z->free_pages >= min ||
- (!wait && z->free_pages >= z->pages_high)) {
-@@ -654,6 +686,7 @@
-
- /* here we're in the low on memory slow path */
-
-+#ifndef XEN
- rebalance:
- if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
- /* go through the zonelist yet again, ignoring mins */
-@@ -681,6 +714,7 @@
-
- p->reclaim_state = NULL;
- p->flags &= ~PF_MEMALLOC;
-+#endif
-
- /* go through the zonelist yet one more time */
- for (i = 0; zones[i] != NULL; i++) {
-@@ -698,6 +732,11 @@
- }
- }
-
-+#ifdef XEN
-+printk(KERN_WARNING "%s: page allocation failure."
-+ " order:%d, mode:0x%x\n",
-+ "(xen tasks have no comm)", order, gfp_mask);
-+#else
- /*
- * Don't let big-order allocations loop unless the caller explicitly
- * requests that. Wait for some write requests to complete then retry.
-@@ -724,6 +763,7 @@
- p->comm, order, gfp_mask);
- dump_stack();
- }
-+#endif
- return NULL;
- got_pg:
- kernel_map_pages(page, 1 << order, 1);
-@@ -808,6 +848,7 @@
-
- EXPORT_SYMBOL(get_zeroed_page);
-
-+#ifndef XEN
- void __pagevec_free(struct pagevec *pvec)
- {
- int i = pagevec_count(pvec);
-@@ -815,10 +856,15 @@
- while (--i >= 0)
- free_hot_cold_page(pvec->pages[i], pvec->cold);
- }
-+#endif
-
- fastcall void __free_pages(struct page *page, unsigned int order)
- {
-+#ifdef XEN
-+ if (!PageReserved(page)) {
-+#else
- if (!PageReserved(page) && put_page_testzero(page)) {
-+#endif
- if (order == 0)
- free_hot_page(page);
- else
-@@ -914,6 +960,13 @@
- return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
- }
-
-+#ifdef XEN
-+unsigned int nr_free_highpages (void)
-+{
-+printf("nr_free_highpages: called but not implemented\n");
-+}
-+#endif
-+
- #ifdef CONFIG_HIGHMEM
- unsigned int nr_free_highpages (void)
- {
-@@ -1022,6 +1075,7 @@
-
- void si_meminfo(struct sysinfo *val)
- {
-+#ifndef XEN
- val->totalram = totalram_pages;
- val->sharedram = 0;
- val->freeram = nr_free_pages();
-@@ -1034,6 +1088,7 @@
- val->freehigh = 0;
- #endif
- val->mem_unit = PAGE_SIZE;
-+#endif
- }
-
- EXPORT_SYMBOL(si_meminfo);
-@@ -1165,7 +1220,9 @@
- printk("= %lukB\n", K(total));
- }
-
-+#ifndef XEN
- show_swap_cache_info();
-+#endif
- }
-
- /*
-@@ -1530,6 +1587,9 @@
- zone->wait_table_size = wait_table_size(size);
- zone->wait_table_bits =
- wait_table_bits(zone->wait_table_size);
-+#ifdef XEN
-+//printf("free_area_init_core-1: calling
alloc_bootmem_node(%lx,%lx)\n",pgdat,zone->wait_table_size *
sizeof(wait_queue_head_t));
-+#endif
- zone->wait_table = (wait_queue_head_t *)
- alloc_bootmem_node(pgdat, zone->wait_table_size
- * sizeof(wait_queue_head_t));
-@@ -1584,6 +1644,9 @@
- */
- bitmap_size = (size-1) >> (i+4);
- bitmap_size = LONG_ALIGN(bitmap_size+1);
-+#ifdef XEN
-+//printf("free_area_init_core-2: calling
alloc_bootmem_node(%lx,%lx)\n",pgdat, bitmap_size);
-+#endif
- zone->free_area[i].map =
- (unsigned long *) alloc_bootmem_node(pgdat,
bitmap_size);
- }
-@@ -1601,6 +1664,9 @@
- calculate_zone_totalpages(pgdat, zones_size, zholes_size);
- if (!node_mem_map) {
- size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
-+#ifdef XEN
-+//printf("free_area_init_node: calling
alloc_bootmem_node(%lx,%lx)\n",pgdat,size);
-+#endif
- node_mem_map = alloc_bootmem_node(pgdat, size);
- }
- pgdat->node_mem_map = node_mem_map;
-@@ -1784,6 +1850,7 @@
-
- #endif /* CONFIG_PROC_FS */
-
-+#ifndef XEN
- #ifdef CONFIG_HOTPLUG_CPU
- static int page_alloc_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-@@ -2011,3 +2078,4 @@
- setup_per_zone_protection();
- return 0;
- }
-+#endif
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/pal.S
--- a/xen/arch/ia64/patch/linux-2.6.7/pal.S Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,26 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/pal.S 2004-06-15 23:20:25.000000000
-0600
-+++ arch/ia64/pal.S 2005-04-01 12:56:01.000000000 -0700
-@@ -163,7 +163,11 @@
- adds r8 = 1f-1b,r8 // calculate return address for call
- ;;
- mov loc4=ar.rsc // save RSE configuration
-+#ifdef XEN
-+ dep.z loc2=loc2,0,60 // convert pal entry point to physical
-+#else // XEN
- dep.z loc2=loc2,0,61 // convert pal entry point to physical
-+#endif // XEN
- tpa r8=r8 // convert rp to physical
- ;;
- mov b7 = loc2 // install target to branch reg
-@@ -218,7 +222,11 @@
- mov loc3 = psr // save psr
- ;;
- mov loc4=ar.rsc // save RSE configuration
-+#ifdef XEN
-+ dep.z loc2=loc2,0,60 // convert pal entry point to physical
-+#else // XEN
- dep.z loc2=loc2,0,61 // convert pal entry point to physical
-+#endif // XEN
- ;;
- mov ar.rsc=0 // put RSE in enforced lazy, LE mode
- movl r16=PAL_PSR_BITS_TO_CLEAR
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/pgalloc.h
--- a/xen/arch/ia64/patch/linux-2.6.7/pgalloc.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,65 +0,0 @@
---- ../../linux-2.6.7/include/asm-ia64/pgalloc.h 2004-06-15
23:18:54.000000000 -0600
-+++ include/asm-ia64/pgalloc.h 2005-03-23 14:54:11.000000000 -0700
-@@ -34,6 +34,10 @@
- #define pmd_quicklist (local_cpu_data->pmd_quick)
- #define pgtable_cache_size (local_cpu_data->pgtable_cache_sz)
-
-+/* FIXME: Later 3 level page table should be over, to create
-+ * new interface upon xen memory allocator. To simplify first
-+ * effort moving to xen allocator, use xenheap pages temporarily.
-+ */
- static inline pgd_t*
- pgd_alloc_one_fast (struct mm_struct *mm)
- {
-@@ -55,7 +59,7 @@
- pgd_t *pgd = pgd_alloc_one_fast(mm);
-
- if (unlikely(pgd == NULL)) {
-- pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
-+ pgd = (pgd_t *)alloc_xenheap_page();
- if (likely(pgd != NULL))
- clear_page(pgd);
- }
-@@ -93,7 +97,7 @@
- static inline pmd_t*
- pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
- {
-- pmd_t *pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-+ pmd_t *pmd = (pmd_t *)alloc_xenheap_page();
-
- if (likely(pmd != NULL))
- clear_page(pmd);
-@@ -125,7 +129,7 @@
- static inline struct page *
- pte_alloc_one (struct mm_struct *mm, unsigned long addr)
- {
-- struct page *pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
-+ struct page *pte = alloc_xenheap_page();
-
- if (likely(pte != NULL))
- clear_page(page_address(pte));
-@@ -135,7 +139,7 @@
- static inline pte_t *
- pte_alloc_one_kernel (struct mm_struct *mm, unsigned long addr)
- {
-- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-+ pte_t *pte = (pte_t *)alloc_xenheap_page();
-
- if (likely(pte != NULL))
- clear_page(pte);
-@@ -145,13 +149,13 @@
- static inline void
- pte_free (struct page *pte)
- {
-- __free_page(pte);
-+ free_xenheap_page(pte);
- }
-
- static inline void
- pte_free_kernel (pte_t *pte)
- {
-- free_page((unsigned long) pte);
-+ free_xenheap_page((unsigned long) pte);
- }
-
- #define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/processor.h
--- a/xen/arch/ia64/patch/linux-2.6.7/processor.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,19 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/processor.h
2005-01-23 13:23:36.000000000 -0700
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/processor.h
2004-08-25 19:28:13.000000000 -0600
-@@ -406,12 +406,16 @@
- */
-
- /* Return TRUE if task T owns the fph partition of the CPU we're running on.
*/
-+#ifdef XEN
-+#define ia64_is_local_fpu_owner(t) 0
-+#else
- #define ia64_is_local_fpu_owner(t)
\
- ({
\
- struct task_struct *__ia64_islfo_task = (t);
\
- (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id()
\
- && __ia64_islfo_task == (struct task_struct *)
ia64_get_kr(IA64_KR_FPU_OWNER)); \
- })
-+#endif
-
- /* Mark task T as owning the fph partition of the CPU we're running on. */
- #define ia64_set_local_fpu_owner(t) do {
\
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/sal.h
--- a/xen/arch/ia64/patch/linux-2.6.7/sal.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,26 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/sal.h
2004-06-15 23:20:04.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/sal.h 2004-10-27
13:55:23.000000000 -0600
-@@ -646,7 +646,23 @@
- {
- struct ia64_sal_retval isrv;
-
-+//#ifdef XEN
-+#if 0
-+ unsigned long *x = (unsigned long *)ia64_sal;
-+ unsigned long *inst = (unsigned long *)*x;
-+ unsigned long __ia64_sc_flags;
-+ struct ia64_fpreg __ia64_sc_fr[6];
-+printf("ia64_sal_freq_base: about to save_scratch_fpregs\n");
-+ ia64_save_scratch_fpregs(__ia64_sc_fr);
-+ spin_lock_irqsave(&sal_lock, __ia64_sc_flags);
-+printf("ia64_sal_freq_base: about to call, ia64_sal=%p, ia64_sal[0]=%p,
ia64_sal[1]=%p\n",x,x[0],x[1]);
-+printf("first inst=%p,%p\n",inst[0],inst[1]);
-+ isrv = (*ia64_sal)(SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-+ spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags);
-+ ia64_load_scratch_fpregs(__ia64_sc_fr);
-+#else
- SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-+#endif
- *ticks_per_second = isrv.v0;
- *drift_info = isrv.v1;
- return isrv.status;
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/setup.c
--- a/xen/arch/ia64/patch/linux-2.6.7/setup.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,203 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/setup.c 2004-06-15 23:18:58.000000000
-0600
-+++ arch/ia64/setup.c 2005-04-04 22:31:09.000000000 -0600
-@@ -21,6 +21,9 @@
- #include <linux/init.h>
-
- #include <linux/acpi.h>
-+#ifdef XEN
-+#include <xen/sched.h>
-+#endif
- #include <linux/bootmem.h>
- #include <linux/console.h>
- #include <linux/delay.h>
-@@ -30,13 +33,17 @@
- #include <linux/seq_file.h>
- #include <linux/string.h>
- #include <linux/threads.h>
-+#ifndef XEN
- #include <linux/tty.h>
- #include <linux/serial.h>
- #include <linux/serial_core.h>
-+#endif
- #include <linux/efi.h>
- #include <linux/initrd.h>
-
-+#ifndef XEN
- #include <asm/ia32.h>
-+#endif
- #include <asm/machvec.h>
- #include <asm/mca.h>
- #include <asm/meminit.h>
-@@ -50,6 +57,11 @@
- #include <asm/smp.h>
- #include <asm/system.h>
- #include <asm/unistd.h>
-+#ifdef XEN
-+#include <linux/mm.h>
-+#include <asm/mmu_context.h>
-+extern unsigned long loops_per_jiffy; // from linux/init/main.c
-+#endif
-
- #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
- # error "struct cpuinfo_ia64 too big!"
-@@ -65,7 +77,9 @@
- DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
- unsigned long ia64_cycles_per_usec;
- struct ia64_boot_param *ia64_boot_param;
-+#ifndef XEN
- struct screen_info screen_info;
-+#endif
-
- unsigned long ia64_max_cacheline_size;
- unsigned long ia64_iobase; /* virtual address for I/O accesses */
-@@ -98,7 +112,6 @@
- struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
- int num_rsvd_regions;
-
--
- /*
- * Filter incoming memory segments based on the primitive map created from
the boot
- * parameters. Segments contained in the map are removed from the memory
ranges. A
-@@ -128,9 +141,12 @@
- for (i = 0; i < num_rsvd_regions; ++i) {
- range_start = max(start, prev_start);
- range_end = min(end, rsvd_region[i].start);
--
-- if (range_start < range_end)
-- call_pernode_memory(__pa(range_start), range_end -
range_start, func);
-+ /* init_boot_pages requires "ps, pe" */
-+ if (range_start < range_end) {
-+ printk("Init boot pages: 0x%lx -> 0x%lx.\n",
-+ __pa(range_start), __pa(range_end));
-+ (*func)(__pa(range_start), __pa(range_end), 0);
-+ }
-
- /* nothing more available in this segment */
- if (range_end == end) return 0;
-@@ -187,17 +203,17 @@
- + strlen(__va(ia64_boot_param->command_line)) +
1);
- n++;
-
-+ /* Reserve xen image/bitmap/xen-heap */
- rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
-- rsvd_region[n].end = (unsigned long) ia64_imva(_end);
-+ rsvd_region[n].end = rsvd_region[n].start + xenheap_size;
- n++;
-
--#ifdef CONFIG_BLK_DEV_INITRD
-+ /* This is actually dom0 image */
- if (ia64_boot_param->initrd_start) {
- rsvd_region[n].start = (unsigned
long)__va(ia64_boot_param->initrd_start);
- rsvd_region[n].end = rsvd_region[n].start +
ia64_boot_param->initrd_size;
- n++;
- }
--#endif
-
- /* end of memory marker */
- rsvd_region[n].start = ~0UL;
-@@ -207,6 +223,16 @@
- num_rsvd_regions = n;
-
- sort_regions(rsvd_region, num_rsvd_regions);
-+
-+ {
-+ int i;
-+ printk("Reserved regions: \n");
-+ for (i = 0; i < num_rsvd_regions; i++)
-+ printk(" [%d] -> [0x%lx, 0x%lx]\n",
-+ i,
-+ rsvd_region[i].start,
-+ rsvd_region[i].end);
-+ }
- }
-
- /**
-@@ -280,23 +306,26 @@
- }
- #endif
-
-+#ifdef XEN
- void __init
--setup_arch (char **cmdline_p)
-+early_setup_arch(char **cmdline_p)
- {
- unw_init();
--
-- ia64_patch_vtop((u64) __start___vtop_patchlist, (u64)
__end___vtop_patchlist);
--
-+
- *cmdline_p = __va(ia64_boot_param->command_line);
- strlcpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
--
-+ cmdline_parse(*cmdline_p);
-+
- efi_init();
-- io_port_init();
--
-+
- #ifdef CONFIG_IA64_GENERIC
- machvec_init(acpi_get_sysname());
- #endif
-
-+#ifdef XEN
-+#undef CONFIG_ACPI_BOOT
-+#endif
-+
- #ifdef CONFIG_ACPI_BOOT
- /* Initialize the ACPI boot-time table parser */
- acpi_table_init();
-@@ -308,9 +337,13 @@
- smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
- # endif
- #endif /* CONFIG_APCI_BOOT */
-+ io_port_init();
-+}
-+#endif
-
-- find_memory();
--
-+void __init
-+setup_arch (void)
-+{
- /* process SAL system table: */
- ia64_sal_init(efi.sal_systab);
-
-@@ -353,7 +386,6 @@
- /* enable IA-64 Machine Check Abort Handling */
- ia64_mca_init();
-
-- platform_setup(cmdline_p);
- paging_init();
- }
-
-@@ -413,6 +445,9 @@
- sprintf(cp, " 0x%lx", mask);
- }
-
-+#ifdef XEN
-+#define seq_printf(a,b...) printf(b)
-+#endif
- seq_printf(m,
- "processor : %d\n"
- "vendor : %s\n"
-@@ -616,7 +651,11 @@
- | IA64_DCR_DA | IA64_DCR_DD |
IA64_DCR_LC));
- atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
-+#ifdef XEN
-+ if (current->domain->arch.mm)
-+#else
- if (current->mm)
-+#endif
- BUG();
-
- ia64_mmu_init(ia64_imva(cpu_data));
-@@ -667,6 +706,8 @@
- void
- check_bugs (void)
- {
-+#ifndef XEN
- ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
- (unsigned long) __end___mckinley_e9_bundles);
-+#endif
- }
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/slab.c
--- a/xen/arch/ia64/patch/linux-2.6.7/slab.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,139 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/mm/slab.c 2004-06-15
23:19:44.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/slab.c 2004-12-17
13:47:03.000000000 -0700
-@@ -86,15 +86,30 @@
- #include <linux/init.h>
- #include <linux/compiler.h>
- #include <linux/seq_file.h>
-+#ifndef XEN
- #include <linux/notifier.h>
- #include <linux/kallsyms.h>
- #include <linux/cpu.h>
- #include <linux/sysctl.h>
- #include <linux/module.h>
-+#endif
-
- #include <asm/uaccess.h>
- #include <asm/cacheflush.h>
-+#ifndef XEN
- #include <asm/tlbflush.h>
-+#endif
-+
-+#ifdef XEN
-+#define lock_cpu_hotplug() do { } while (0)
-+#define unlock_cpu_hotplug() do { } while (0)
-+#define might_sleep_if(x) do { } while (0)
-+#define dump_stack() do { } while (0)
-+#define start_cpu_timer(cpu) do { } while (0)
-+static inline void __down(struct semaphore *sem) { }
-+static inline void __up(struct semaphore *sem) { }
-+static inline void might_sleep(void) { }
-+#endif
-
- /*
- * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
-@@ -530,7 +545,9 @@
- FULL
- } g_cpucache_up;
-
-+#ifndef XEN
- static DEFINE_PER_CPU(struct timer_list, reap_timers);
-+#endif
-
- static void reap_timer_fnc(unsigned long data);
- static void free_block(kmem_cache_t* cachep, void** objpp, int len);
-@@ -588,6 +605,7 @@
- * Add the CPU number into the expiry time to minimize the possibility of the
- * CPUs getting into lockstep and contending for the global cache chain lock.
- */
-+#ifndef XEN
- static void __devinit start_cpu_timer(int cpu)
- {
- struct timer_list *rt = &per_cpu(reap_timers, cpu);
-@@ -600,6 +618,7 @@
- add_timer_on(rt, cpu);
- }
- }
-+#endif
-
- #ifdef CONFIG_HOTPLUG_CPU
- static void stop_cpu_timer(int cpu)
-@@ -634,6 +653,7 @@
- return nc;
- }
-
-+#ifndef XEN
- static int __devinit cpuup_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-@@ -693,6 +713,7 @@
- }
-
- static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
-+#endif
-
- /* Initialisation.
- * Called after the gfp() functions have been enabled, and before smp_init().
-@@ -805,10 +826,14 @@
- /* Done! */
- g_cpucache_up = FULL;
-
-+#ifdef XEN
-+printk("kmem_cache_init: some parts commented out, ignored\n");
-+#else
- /* Register a cpu startup notifier callback
- * that initializes ac_data for all new cpus
- */
- register_cpu_notifier(&cpucache_notifier);
-+#endif
-
-
- /* The reap timers are started later, with a module init call:
-@@ -886,8 +911,10 @@
- page++;
- }
- sub_page_state(nr_slab, nr_freed);
-+#ifndef XEN
- if (current->reclaim_state)
- current->reclaim_state->reclaimed_slab += nr_freed;
-+#endif
- free_pages((unsigned long)addr, cachep->gfporder);
- if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
- atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
-@@ -1363,8 +1390,10 @@
- + cachep->num;
- }
-
-+#ifndef XEN
- cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned
long)cachep)%REAPTIMEOUT_LIST3;
-+#endif
-
- /* Need the semaphore to access the chain. */
- down(&cache_chain_sem);
-@@ -2237,8 +2266,10 @@
-
- if (unlikely(addr < min_addr))
- goto out;
-+#ifndef XEN
- if (unlikely(addr > (unsigned long)high_memory - size))
- goto out;
-+#endif
- if (unlikely(addr & align_mask))
- goto out;
- if (unlikely(!kern_addr_valid(addr)))
-@@ -2769,6 +2800,7 @@
- */
- static void reap_timer_fnc(unsigned long cpu)
- {
-+#ifndef XEN
- struct timer_list *rt = &__get_cpu_var(reap_timers);
-
- /* CPU hotplug can drag us off cpu: don't run on wrong CPU */
-@@ -2776,6 +2808,7 @@
- cache_reap();
- mod_timer(rt, jiffies + REAPTIMEOUT_CPUC + cpu);
- }
-+#endif
- }
-
- #ifdef CONFIG_PROC_FS
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/slab.h
--- a/xen/arch/ia64/patch/linux-2.6.7/slab.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,14 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/slab.h
2004-06-15 23:20:26.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/slab.h 2004-08-25
19:28:13.000000000 -0600
-@@ -83,7 +83,11 @@
- goto found; \
- else \
- i++;
-+#ifdef XEN
-+#include <linux/kmalloc_sizes.h>
-+#else
- #include "kmalloc_sizes.h"
-+#endif
- #undef CACHE
- {
- extern void __you_cannot_kmalloc_that_much(void);
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/swiotlb.c
--- a/xen/arch/ia64/patch/linux-2.6.7/swiotlb.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,47 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/lib/swiotlb.c 2004-06-15 23:19:43.000000000
-0600
-+++ arch/ia64/lib/swiotlb.c 2005-03-23 14:54:05.000000000 -0700
-@@ -100,7 +100,11 @@
- /*
- * Get IO TLB memory from the low pages
- */
-- io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 <<
IO_TLB_SHIFT));
-+ /* FIXME: Do we really need swiotlb in HV? If all memory trunks
-+ * presented to guest as <4G, are actually <4G in machine range,
-+ * no DMA intevention from HV...
-+ */
-+ io_tlb_start = alloc_xenheap_pages(get_order(io_tlb_nslabs * (1 <<
IO_TLB_SHIFT)));
- if (!io_tlb_start)
- BUG();
- io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-@@ -110,11 +114,11 @@
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
- */
-- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
-+ io_tlb_list = alloc_xenheap_pages(get_order(io_tlb_nslabs *
sizeof(int)));
- for (i = 0; i < io_tlb_nslabs; i++)
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_index = 0;
-- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
-+ io_tlb_orig_addr = alloc_xenheap_pages(get_order(io_tlb_nslabs *
sizeof(char *)));
-
- printk(KERN_INFO "Placing software IO TLB between 0x%p - 0x%p\n",
- (void *) io_tlb_start, (void *) io_tlb_end);
-@@ -279,7 +283,7 @@
- /* XXX fix me: the DMA API should pass us an explicit DMA mask instead:
*/
- flags |= GFP_DMA;
-
-- ret = (void *)__get_free_pages(flags, get_order(size));
-+ ret = (void *)alloc_xenheap_pages(get_order(size));
- if (!ret)
- return NULL;
-
-@@ -294,7 +298,7 @@
- void
- swiotlb_free_coherent (struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dma_handle)
- {
-- free_pages((unsigned long) vaddr, get_order(size));
-+ free_xenheap_pages((unsigned long) vaddr, get_order(size));
- }
-
- /*
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/system.h
--- a/xen/arch/ia64/patch/linux-2.6.7/system.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,43 +0,0 @@
---- ../../linux-2.6.7/include/asm-ia64/system.h 2005-03-24
19:39:56.000000000 -0700
-+++ include/asm-ia64/system.h 2005-04-01 12:56:37.000000000 -0700
-@@ -24,8 +24,16 @@
- * 0xa000000000000000+2*PERCPU_PAGE_SIZE
- * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
- */
-+#ifdef XEN
-+//#define KERNEL_START 0xf000000100000000
-+#define KERNEL_START 0xf000000004000000
-+#define PERCPU_ADDR 0xf100000000000000-PERCPU_PAGE_SIZE
-+#define SHAREDINFO_ADDR 0xf100000000000000
-+#define VHPT_ADDR 0xf200000000000000
-+#else
- #define KERNEL_START 0xa000000100000000
- #define PERCPU_ADDR (-PERCPU_PAGE_SIZE)
-+#endif
-
- #ifndef __ASSEMBLY__
-
-@@ -218,9 +226,13 @@
- # define PERFMON_IS_SYSWIDE() (0)
- #endif
-
-+#ifdef XEN
-+#define IA64_HAS_EXTRA_STATE(t) 0
-+#else
- #define IA64_HAS_EXTRA_STATE(t)
\
- ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)
\
- || IS_IA32_PROCESS(ia64_task_regs(t)) || PERFMON_IS_SYSWIDE())
-+#endif
-
- #define __switch_to(prev,next,last) do {
\
- if (IA64_HAS_EXTRA_STATE(prev))
\
-@@ -249,6 +261,9 @@
- #else
- # define switch_to(prev,next,last) __switch_to(prev, next, last)
- #endif
-+//#ifdef XEN
-+//#undef switch_to
-+//#endif
-
- /*
- * On IA-64, we don't want to hold the runqueue's lock during the low-level
context-switch,
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/time.c
--- a/xen/arch/ia64/patch/linux-2.6.7/time.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,338 +0,0 @@
---- ../../linux-2.6.7/arch/ia64/kernel/time.c 2004-06-15 23:19:01.000000000
-0600
-+++ arch/ia64/time.c 2005-03-14 17:27:11.000000000 -0700
-@@ -10,16 +10,22 @@
- */
- #include <linux/config.h>
-
-+#ifndef XEN
- #include <linux/cpu.h>
-+#endif
- #include <linux/init.h>
- #include <linux/kernel.h>
- #include <linux/module.h>
-+#ifndef XEN
- #include <linux/profile.h>
-+#endif
- #include <linux/sched.h>
- #include <linux/time.h>
- #include <linux/interrupt.h>
- #include <linux/efi.h>
-+#ifndef XEN
- #include <linux/profile.h>
-+#endif
- #include <linux/timex.h>
-
- #include <asm/machvec.h>
-@@ -29,6 +35,9 @@
- #include <asm/sal.h>
- #include <asm/sections.h>
- #include <asm/system.h>
-+#ifdef XEN
-+#include <asm/ia64_int.h>
-+#endif
-
- extern unsigned long wall_jiffies;
-
-@@ -45,6 +54,59 @@
-
- #endif
-
-+#ifdef XEN
-+volatile unsigned long last_nsec_offset;
-+extern rwlock_t xtime_lock;
-+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
-+static s_time_t stime_irq; /* System time at last 'time update'
*/
-+
-+static inline u64 get_time_delta(void)
-+{
-+ return ia64_get_itc();
-+}
-+
-+s_time_t get_s_time(void)
-+{
-+ s_time_t now;
-+ unsigned long flags;
-+
-+ read_lock_irqsave(&xtime_lock, flags);
-+
-+ now = stime_irq + get_time_delta();
-+
-+ /* Ensure that the returned system time is monotonically increasing. */
-+ {
-+ static s_time_t prev_now = 0;
-+ if ( unlikely(now < prev_now) )
-+ now = prev_now;
-+ prev_now = now;
-+ }
-+
-+ read_unlock_irqrestore(&xtime_lock, flags);
-+
-+ return now;
-+}
-+
-+void update_dom_time(struct vcpu *v)
-+{
-+// FIXME: implement this?
-+// printf("update_dom_time: called, not implemented, skipping\n");
-+ return;
-+}
-+
-+/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
-+void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
-+{
-+// FIXME: Should this be do_settimeofday (from linux)???
-+ printf("do_settime: called, not implemented, stopping\n");
-+ dummy();
-+}
-+#endif
-+
-+#if 0 /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-+#endif /* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
-+
-+#ifndef XEN
- static void
- itc_reset (void)
- {
-@@ -80,12 +142,15 @@
- return (elapsed_cycles*local_cpu_data->nsec_per_cyc) >>
IA64_NSEC_PER_CYC_SHIFT;
- }
-
-+#ifndef XEN
- static struct time_interpolator itc_interpolator = {
- .get_offset = itc_get_offset,
- .update = itc_update,
- .reset = itc_reset
- };
-+#endif
-
-+#ifndef XEN
- int
- do_settimeofday (struct timespec *tv)
- {
-@@ -95,7 +160,9 @@
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
-+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- write_seqlock_irq(&xtime_lock);
-+#endif
- {
- /*
- * This is revolting. We need to set "xtime" correctly.
However, the value
-@@ -117,12 +184,15 @@
- time_esterror = NTP_PHASE_LIMIT;
- time_interpolator_reset();
- }
-+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- write_sequnlock_irq(&xtime_lock);
-+#endif
- clock_was_set();
- return 0;
- }
-
- EXPORT_SYMBOL(do_settimeofday);
-+#endif
-
- void
- do_gettimeofday (struct timeval *tv)
-@@ -185,6 +255,7 @@
- }
-
- EXPORT_SYMBOL(do_gettimeofday);
-+#endif
-
- /*
- * The profiling function is SMP safe. (nothing can mess
-@@ -195,6 +266,9 @@
- static inline void
- ia64_do_profile (struct pt_regs * regs)
- {
-+#ifdef XEN
-+}
-+#else
- unsigned long ip, slot;
- extern cpumask_t prof_cpu_mask;
-
-@@ -231,24 +305,89 @@
- ip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[ip]);
- }
-+#endif
-+
-+#ifdef XEN
-+unsigned long domain0_ready = 0; // FIXME (see below)
-+#define typecheck(a,b) 1
-+/* FROM linux/include/linux/jiffies.h */
-+/*
-+ * These inlines deal with timer wrapping correctly. You are
-+ * strongly encouraged to use them
-+ * 1. Because people otherwise forget
-+ * 2. Because if the timer wrap changes in future you won't have to
-+ * alter your driver code.
-+ *
-+ * time_after(a,b) returns true if the time a is after time b.
-+ *
-+ * Do this with "<0" and ">=0" to only test the sign of the result. A
-+ * good compiler would generate better code (and a really good compiler
-+ * wouldn't care). Gcc is currently neither.
-+ */
-+#define time_after(a,b) \
-+ (typecheck(unsigned long, a) && \
-+ typecheck(unsigned long, b) && \
-+ ((long)(b) - (long)(a) < 0))
-+#define time_before(a,b) time_after(b,a)
-+
-+#define time_after_eq(a,b) \
-+ (typecheck(unsigned long, a) && \
-+ typecheck(unsigned long, b) && \
-+ ((long)(a) - (long)(b) >= 0))
-+#define time_before_eq(a,b) time_after_eq(b,a)
-+#endif
-
- static irqreturn_t
- timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
- {
- unsigned long new_itm;
-
-+#ifndef XEN
- if (unlikely(cpu_is_offline(smp_processor_id()))) {
- return IRQ_HANDLED;
- }
-+#endif
-+#ifdef XEN
-+ if (current->domain == dom0) {
-+ // FIXME: there's gotta be a better way of doing this...
-+ // We have to ensure that domain0 is launched before we
-+ // call vcpu_timer_expired on it
-+ //domain0_ready = 1; // moved to xensetup.c
-+ }
-+ if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) {
-+ vcpu_pend_timer(dom0->vcpu[0]);
-+ //vcpu_set_next_timer(dom0->vcpu[0]);
-+ domain_wake(dom0->vcpu[0]);
-+ }
-+ if (!is_idle_task(current->domain) && current->domain != dom0) {
-+ if (vcpu_timer_expired(current)) {
-+ vcpu_pend_timer(current);
-+ // ensure another timer interrupt happens even if
domain doesn't
-+ vcpu_set_next_timer(current);
-+ domain_wake(current);
-+ }
-+ }
-+ raise_actimer_softirq();
-+#endif
-
-+#ifndef XEN
- platform_timer_interrupt(irq, dev_id, regs);
-+#endif
-
- new_itm = local_cpu_data->itm_next;
-
- if (!time_after(ia64_get_itc(), new_itm))
-+#ifdef XEN
-+ return;
-+#else
- printk(KERN_ERR "Oops: timer tick before it's due
(itc=%lx,itm=%lx)\n",
- ia64_get_itc(), new_itm);
-+#endif
-
-+#ifdef XEN
-+// printf("GOT TO HERE!!!!!!!!!!!\n");
-+ //while(1);
-+#endif
- ia64_do_profile(regs);
-
- while (1) {
-@@ -269,10 +408,16 @@
- * another CPU. We need to avoid to SMP race by
acquiring the
- * xtime_lock.
- */
-+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- write_seqlock(&xtime_lock);
-+#endif
-+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- do_timer(regs);
-+#endif
- local_cpu_data->itm_next = new_itm;
-+#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN
- write_sequnlock(&xtime_lock);
-+#endif
- } else
- local_cpu_data->itm_next = new_itm;
-
-@@ -292,7 +437,12 @@
- */
- while (!time_after(new_itm, ia64_get_itc() +
local_cpu_data->itm_delta/2))
- new_itm += local_cpu_data->itm_delta;
-+//#ifdef XEN
-+// vcpu_set_next_timer(current);
-+//#else
-+//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm);
- ia64_set_itm(new_itm);
-+//#endif
- /* double check, in case we got hit by a (slow) PMI: */
- } while (time_after_eq(ia64_get_itc(), new_itm));
- return IRQ_HANDLED;
-@@ -307,6 +457,7 @@
- int cpu = smp_processor_id();
- unsigned long shift = 0, delta;
-
-+printf("ia64_cpu_local_tick: about to call ia64_set_itv\n");
- /* arrange for the cycle counter to generate a timer interrupt: */
- ia64_set_itv(IA64_TIMER_VECTOR);
-
-@@ -320,6 +471,7 @@
- shift = (2*(cpu - hi) + 1) * delta/hi/2;
- }
- local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
-+printf("***** ia64_cpu_local_tick: Setting itm to
%lx\n",local_cpu_data->itm_next);
- ia64_set_itm(local_cpu_data->itm_next);
- }
-
-@@ -335,6 +487,7 @@
- * frequency and then a PAL call to determine the frequency ratio
between the ITC
- * and the base frequency.
- */
-+
- status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
- &platform_base_freq, &platform_base_drift);
- if (status != 0) {
-@@ -384,9 +537,11 @@
- + itc_freq/2)/itc_freq;
-
- if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
-+#ifndef XEN
- itc_interpolator.frequency = local_cpu_data->itc_freq;
- itc_interpolator.drift = itc_drift;
- register_time_interpolator(&itc_interpolator);
-+#endif
- }
-
- /* Setup the CPU local timer tick */
-@@ -395,7 +550,9 @@
-
- static struct irqaction timer_irqaction = {
- .handler = timer_interrupt,
-+#ifndef XEN
- .flags = SA_INTERRUPT,
-+#endif
- .name = "timer"
- };
-
-@@ -403,12 +560,16 @@
- time_init (void)
- {
- register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
-+#ifndef XEN
- efi_gettimeofday(&xtime);
-+#endif
- ia64_init_itm();
-
-+#ifndef XEN
- /*
- * Initialize wall_to_monotonic such that adding it to xtime will yield
zero, the
- * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
- */
- set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec,
-xtime.tv_nsec);
-+#endif
- }
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/tlb.c
--- a/xen/arch/ia64/patch/linux-2.6.7/tlb.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,48 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/mm/tlb.c
2004-06-15 23:19:43.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/tlb.c 2004-08-25
19:28:12.000000000 -0600
-@@ -21,7 +21,9 @@
- #include <asm/mmu_context.h>
- #include <asm/pgalloc.h>
- #include <asm/pal.h>
-+#ifndef XEN
- #include <asm/tlbflush.h>
-+#endif
-
- static struct {
- unsigned long mask; /* mask of supported purge page-sizes */
-@@ -43,6 +45,9 @@
- void
- wrap_mmu_context (struct mm_struct *mm)
- {
-+#ifdef XEN
-+printf("wrap_mmu_context: called, not implemented\n");
-+#else
- unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
- struct task_struct *tsk;
- int i;
-@@ -83,6 +88,7 @@
- put_cpu();
- }
- local_flush_tlb_all();
-+#endif
- }
-
- void
-@@ -132,6 +138,9 @@
- void
- flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned
long end)
- {
-+#ifdef XEN
-+printf("flush_tlb_range: called, not implemented\n");
-+#else
- struct mm_struct *mm = vma->vm_mm;
- unsigned long size = end - start;
- unsigned long nbits;
-@@ -163,6 +172,7 @@
- # endif
-
- ia64_srlz_i(); /* srlz.i implies srlz.d */
-+#endif
- }
- EXPORT_SYMBOL(flush_tlb_range);
-
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/types.h
--- a/xen/arch/ia64/patch/linux-2.6.7/types.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,15 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/asm-ia64/types.h
2004-06-15 23:19:01.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/types.h
2004-11-11 17:08:30.000000000 -0700
-@@ -1,5 +1,12 @@
- #ifndef _ASM_IA64_TYPES_H
- #define _ASM_IA64_TYPES_H
-+#ifdef XEN
-+#ifndef __ASSEMBLY__
-+typedef unsigned long ssize_t;
-+typedef unsigned long size_t;
-+typedef long long loff_t;
-+#endif
-+#endif
-
- /*
- * This file is never included by application software unless explicitly
requested (e.g.,
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/unaligned.c
--- a/xen/arch/ia64/patch/linux-2.6.7/unaligned.c Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,97 +0,0 @@
----
/home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/arch/ia64/kernel/unaligned.c
2004-06-15 23:20:03.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/arch/ia64/unaligned.c 2004-08-25
19:28:12.000000000 -0600
-@@ -15,8 +15,10 @@
- */
- #include <linux/kernel.h>
- #include <linux/sched.h>
-+#ifndef XEN
- #include <linux/smp_lock.h>
- #include <linux/tty.h>
-+#endif
-
- #include <asm/intrinsics.h>
- #include <asm/processor.h>
-@@ -24,7 +26,16 @@
- #include <asm/uaccess.h>
- #include <asm/unaligned.h>
-
-+#ifdef XEN
-+#define ia64_peek(x...) printk("ia64_peek: called, not implemented\n")
-+#define ia64_poke(x...) printk("ia64_poke: called, not implemented\n")
-+#define ia64_sync_fph(x...) printk("ia64_sync_fph: called, not
implemented\n")
-+#define ia64_flush_fph(x...) printk("ia64_flush_fph: called, not
implemented\n")
-+#define die_if_kernel(x...) printk("die_if_kernel: called, not
implemented\n")
-+#define jiffies 0
-+#else
- extern void die_if_kernel(char *str, struct pt_regs *regs, long err)
__attribute__ ((noreturn));
-+#endif
-
- #undef DEBUG_UNALIGNED_TRAP
-
-@@ -437,7 +448,11 @@
- }
-
-
-+#ifdef XEN
-+void
-+#else
- static void
-+#endif
- setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs
*regs)
- {
- struct switch_stack *sw = (struct switch_stack *) regs - 1;
-@@ -611,7 +626,11 @@
- }
-
-
-+#ifdef XEN
-+void
-+#else
- static void
-+#endif
- getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs
*regs)
- {
- struct switch_stack *sw = (struct switch_stack *) regs - 1;
-@@ -1298,7 +1317,9 @@
- mm_segment_t old_fs = get_fs();
- unsigned long bundle[2];
- unsigned long opcode;
-+#ifndef XEN
- struct siginfo si;
-+#endif
- const struct exception_table_entry *eh = NULL;
- union {
- unsigned long l;
-@@ -1317,6 +1338,9 @@
- * user-level unaligned accesses. Otherwise, a clever program could
trick this
- * handler into reading an arbitrary kernel addresses...
- */
-+#ifdef XEN
-+printk("ia64_handle_unaligned: called, not working yet\n");
-+#else
- if (!user_mode(regs))
- eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
- if (user_mode(regs) || eh) {
-@@ -1353,6 +1377,7 @@
-
- if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
- goto failure;
-+#endif
-
- /*
- * extract the instruction from the bundle given the slot number
-@@ -1493,6 +1518,7 @@
- /* NOT_REACHED */
- }
- force_sigbus:
-+#ifndef XEN
- si.si_signo = SIGBUS;
- si.si_errno = 0;
- si.si_code = BUS_ADRALN;
-@@ -1501,5 +1527,6 @@
- si.si_isr = 0;
- si.si_imm = 0;
- force_sig_info(SIGBUS, &si, current);
-+#endif
- goto done;
- }
diff -r de3576a1c62c -r dfaf788ab18c xen/arch/ia64/patch/linux-2.6.7/wait.h
--- a/xen/arch/ia64/patch/linux-2.6.7/wait.h Thu Aug 25 20:52:38 2005
+++ /dev/null Fri Aug 26 20:47:16 2005
@@ -1,26 +0,0 @@
---- /home/djm/src/xen/xeno-ia64.bk/xen/linux-2.6.7/include/linux/wait.h
2004-06-15 23:19:31.000000000 -0600
-+++ /home/djm/src/xen/xeno-ia64.bk/xen/include/asm-ia64/linux/wait.h
2004-08-25 19:28:13.000000000 -0600
-@@ -104,10 +104,15 @@
- list_del(&old->task_list);
- }
-
-+#ifdef XEN
-+void FASTCALL(__wake_up(struct task_struct *p));
-+#else
- void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void
*key));
-+#endif
- extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int
mode));
- extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode,
int nr));
-
-+#ifndef XEN
- #define wake_up(x) __wake_up(x, TASK_UNINTERRUPTIBLE |
TASK_INTERRUPTIBLE, 1, NULL)
- #define wake_up_nr(x, nr) __wake_up(x, TASK_UNINTERRUPTIBLE |
TASK_INTERRUPTIBLE, nr, NULL)
- #define wake_up_all(x) __wake_up(x,
TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0, NULL)
-@@ -117,6 +122,7 @@
- #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0,
NULL)
- #define wake_up_locked(x) __wake_up_locked((x),
TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
- #define wake_up_interruptible_sync(x)
__wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
-+#endif
-
- #define __wait_event(wq, condition) \
- do { \
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|