WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] libxc domain builder rewrite, core bits.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] libxc domain builder rewrite, core bits.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 26 Jan 2007 05:35:26 -0800
Delivery-date: Fri, 26 Jan 2007 05:51:42 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Emmanuel Ackaouy <ack@xxxxxxxxxxxxx>
# Date 1169763412 0
# Node ID fd50500eee7ce4477b9e80413f1a2ea0dfe661c8
# Parent  50d9e2ddc377a746eb094578b264bba67045cbce
libxc domain builder rewrite, core bits.

Signed-off-by: Gerd Hoffmann <kraxel@xxxxxxx>
---
 tools/libxc/Makefile           |   14 
 tools/libxc/xc_dom.h           |  261 +++++++++++++
 tools/libxc/xc_dom_binloader.c |  294 +++++++++++++++
 tools/libxc/xc_dom_boot.c      |  515 +++++++++++++++++++++++++++
 tools/libxc/xc_dom_core.c      |  773 +++++++++++++++++++++++++++++++++++++++++
 tools/libxc/xc_dom_elfloader.c |  283 +++++++++++++++
 tools/libxc/xc_dom_ia64.c      |  118 ++++++
 tools/libxc/xc_dom_powerpc64.c |  100 +++++
 tools/libxc/xc_dom_x86.c       |  559 +++++++++++++++++++++++++++++
 9 files changed, 2917 insertions(+)

diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Jan 25 22:16:52 2007 +0000
+++ b/tools/libxc/Makefile      Thu Jan 25 22:16:52 2007 +0000
@@ -44,6 +44,20 @@ libelf-relocate.o: libelf-relocate.c lib
 
 # add libelf bits to libxc
 GUEST_SRCS-y += $(LIBELF_SRCS)
+
+# new domain builder
+GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c
+GUEST_SRCS-y += xc_dom_elfloader.c
+GUEST_SRCS-y += xc_dom_binloader.c
+
+ifeq ($(CONFIG_POWERPC),y)
+# big endian boxes
+GUEST_SRCS-y += xc_dom_powerpc64.c
+else
+# little endian boxes
+GUEST_SRCS-y += xc_dom_x86.c
+GUEST_SRCS-y += xc_dom_ia64.c
+endif
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom.h      Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,261 @@
+#include <xen/libelf.h>
+
+#define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
+
+/* --- typedefs and structs ---------------------------------------- */
+
+typedef uint64_t xen_vaddr_t;
+typedef uint64_t xen_paddr_t;
+
+/* FIXME: temporary hack ... */
+#ifndef PRIpfn
+#define PRIpfn "lx"
+#endif
+
+struct xc_dom_seg {
+    xen_vaddr_t vstart;
+    xen_vaddr_t vend;
+    xen_pfn_t pfn;
+};
+
+struct xc_dom_mem {
+    struct xc_dom_mem *next;
+    void *mmap_ptr;
+    size_t mmap_len;
+    unsigned char memory[0];
+};
+
+struct xc_dom_phys {
+    struct xc_dom_phys *next;
+    void *ptr;
+    xen_pfn_t first;
+    xen_pfn_t count;
+};
+
+struct xc_dom_image {
+    /* files */
+    void *kernel_blob;
+    size_t kernel_size;
+    void *ramdisk_blob;
+    size_t ramdisk_size;
+
+    /* arguments and parameters */
+    char *cmdline;
+    uint32_t f_requested[XENFEAT_NR_SUBMAPS];
+
+    /* info from (elf) kernel image */
+    struct elf_dom_parms parms;
+    char *guest_type;
+
+    /* memory layout */
+    struct xc_dom_seg kernel_seg;
+    struct xc_dom_seg ramdisk_seg;
+    struct xc_dom_seg p2m_seg;
+    struct xc_dom_seg pgtables_seg;
+    xen_pfn_t start_info_pfn;
+    xen_pfn_t console_pfn;
+    xen_pfn_t xenstore_pfn;
+    xen_pfn_t shared_info_pfn;
+    xen_pfn_t bootstack_pfn;
+    xen_vaddr_t virt_alloc_end;
+    xen_vaddr_t bsd_symtab_start;
+
+    /* initial page tables */
+    unsigned int pgtables;
+    unsigned int pg_l4;
+    unsigned int pg_l3;
+    unsigned int pg_l2;
+    unsigned int pg_l1;
+    unsigned int alloc_bootstack;
+    unsigned int extra_pages;
+    xen_vaddr_t virt_pgtab_end;
+
+    /* other state info */
+    uint32_t f_active[XENFEAT_NR_SUBMAPS];
+    xen_pfn_t *p2m_host;
+    void *p2m_guest;
+
+    /* physical memory */
+    xen_pfn_t total_pages;
+    struct xc_dom_phys *phys_pages;
+
+    /* malloc memory pool */
+    struct xc_dom_mem *memblocks;
+
+    /* memory footprint stats */
+    size_t alloc_malloc;
+    size_t alloc_mem_map;
+    size_t alloc_file_map;
+    size_t alloc_domU_map;
+
+    /* misc xen domain config stuff */
+    unsigned long flags;
+    unsigned int console_evtchn;
+    unsigned int xenstore_evtchn;
+    xen_pfn_t shared_info_mfn;
+
+    int guest_xc;
+    domid_t guest_domid;
+    int shadow_enabled;
+
+    int xen_version;
+    xen_capabilities_info_t xen_caps;
+
+    /* kernel loader, arch hooks */
+    struct xc_dom_loader *kernel_loader;
+    void *private_loader;
+
+    /* kernel loader */
+    struct xc_dom_arch *arch_hooks;
+};
+
+/* --- pluggable kernel loader ------------------------------------- */
+
+struct xc_dom_loader {
+    char *name;
+    int (*probe) (struct xc_dom_image * dom);
+    int (*parser) (struct xc_dom_image * dom);
+    int (*loader) (struct xc_dom_image * dom);
+
+    struct xc_dom_loader *next;
+};
+
+#define __init __attribute__ ((constructor))
+void xc_dom_register_loader(struct xc_dom_loader *loader);
+
+/* --- arch specific hooks ----------------------------------------- */
+
+struct xc_dom_arch {
+    /* pagetable setup */
+    int (*alloc_magic_pages) (struct xc_dom_image * dom);
+    int (*count_pgtables) (struct xc_dom_image * dom);
+    int (*setup_pgtables) (struct xc_dom_image * dom);
+
+    /* arch-specific data structs setup */
+    int (*start_info) (struct xc_dom_image * dom);
+    int (*shared_info) (struct xc_dom_image * dom, void *shared_info);
+    int (*vcpu) (struct xc_dom_image * dom, void *vcpu_ctxt);
+
+    char *guest_type;
+    int page_shift;
+    int sizeof_pfn;
+
+    struct xc_dom_arch *next;
+};
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks);
+
+#define XC_DOM_PAGE_SHIFT(dom)  ((dom)->arch_hooks->page_shift)
+#define XC_DOM_PAGE_SIZE(dom)   (1 << (dom)->arch_hooks->page_shift)
+
+/* --- main functions ---------------------------------------------- */
+
+struct xc_dom_image *xc_dom_allocate(const char *cmdline, const char 
*features);
+void xc_dom_release_phys(struct xc_dom_image *dom);
+void xc_dom_release(struct xc_dom_image *dom);
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
+
+size_t xc_dom_check_gzip(void *blob, size_t ziplen);
+int xc_dom_do_gunzip(void *src, size_t srclen, void *dst, size_t dstlen);
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size);
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename);
+int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename);
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
+                     size_t memsize);
+int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+                      size_t memsize);
+
+int xc_dom_parse_image(struct xc_dom_image *dom);
+int xc_dom_build_image(struct xc_dom_image *dom);
+int xc_dom_update_guest_p2m(struct xc_dom_image *dom);
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, int xc, domid_t domid);
+int xc_dom_boot_mem_init(struct xc_dom_image *dom);
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+                          xen_pfn_t count);
+int xc_dom_boot_image(struct xc_dom_image *dom);
+int xc_dom_compat_check(struct xc_dom_image *dom);
+
+/* --- debugging bits ---------------------------------------------- */
+
+extern FILE *xc_dom_logfile;
+
+void xc_dom_loginit(void);
+int xc_dom_printf(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
+int xc_dom_panic_func(const char *file, int line, xc_error_code err,
+                     const char *fmt, ...)
+    __attribute__ ((format(printf, 4, 5)));
+#define xc_dom_panic(err, fmt, args...) \
+       xc_dom_panic_func(__FILE__, __LINE__, err, fmt, ## args)
+#define xc_dom_trace(mark) \
+       xc_dom_printf("%s:%d: trace %s\n", __FILE__, __LINE__, mark)
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom);
+
+/* --- simple memory pool ------------------------------------------ */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+                           const char *filename, size_t * size);
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str);
+
+/* --- alloc memory pool ------------------------------------------- */
+
+int xc_dom_alloc_page(struct xc_dom_image *dom, char *name);
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+                        struct xc_dom_seg *seg, char *name,
+                        xen_vaddr_t start, xen_vaddr_t size);
+
+/* --- misc bits --------------------------------------------------- */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first,
+                       xen_pfn_t count);
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn);
+void xc_dom_unmap_all(struct xc_dom_image *dom);
+
+static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
+                                     struct xc_dom_seg *seg)
+{
+    xen_vaddr_t segsize = seg->vend - seg->vstart;
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t pages = (segsize + page_size - 1) / page_size;
+
+    return xc_dom_pfn_to_ptr(dom, seg->pfn, pages);
+}
+
+static inline void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom,
+                                       xen_vaddr_t vaddr)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t page = (vaddr - dom->parms.virt_base) / page_size;
+    unsigned int offset = (vaddr - dom->parms.virt_base) % page_size;
+    void *ptr = xc_dom_pfn_to_ptr(dom, page, 0);
+
+    if (!ptr)
+       return NULL;
+    return ptr + offset;
+}
+
+static inline int xc_dom_feature_translated(struct xc_dom_image *dom)
+{
+    return elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active);
+}
+
+static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t 
pfn)
+{
+    if (dom->shadow_enabled)
+       return pfn;
+    return dom->p2m_host[pfn];
+}
+
+static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom,
+                                        xen_pfn_t pfn)
+{
+    if (xc_dom_feature_translated(dom))
+       return pfn;
+    return dom->p2m_host[pfn];
+}
+
+/* --- arch bits --------------------------------------------------- */
+
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_binloader.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_binloader.c    Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,294 @@
+/******************************************************************************
+ *
+ * Loads simple binary images. It's like a .COM file in MS-DOS. No headers are
+ * present. The only requirement is that it must have a xen_bin_image table
+ * somewhere in the first 8192 bytes, starting on a 32-bit aligned address.
+ * Those familiar with the multiboot specification should recognize this, it's
+ * (almost) the same as the multiboot header.
+ * The layout of the xen_bin_image table is:
+ *
+ * Offset Type Name          Note
+ * 0      uint32_t  magic         required
+ * 4      uint32_t  flags         required
+ * 8      uint32_t  checksum      required
+ * 12     uint32_t  header_addr   required
+ * 16     uint32_t  load_addr     required
+ * 20     uint32_t  load_end_addr required
+ * 24     uint32_t  bss_end_addr  required
+ * 28     uint32_t  entry_addr    required
+ *
+ * - magic
+ *   Magic number identifying the table. For images to be loaded by Xen 3, the
+ *   magic value is 0x336ec578 ("xEn3" with the 0x80 bit of the "E" set).
+ * - flags
+ *   bit 0: indicates whether the image needs to be loaded on a page boundary
+ *   bit 1: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ *          that memory info should be passed to the image)
+ *   bit 2: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ *          that the bootloader should pass video mode info to the image)
+ *   bit 16: reserved, must be 1 (the multiboot spec uses this bit to indicate
+ *           that the values in the fields header_addr - entry_addr are
+ *           valid)
+ *   All other bits should be set to 0.
+ * - checksum
+ *   When added to "magic" and "flags", the resulting value should be 0.
+ * - header_addr
+ *   Contains the virtual address corresponding to the beginning of the
+ *   table - the memory location at which the magic value is supposed to be
+ *   loaded. This field serves to synchronize the mapping between OS image
+ *   offsets and virtual memory addresses.
+ * - load_addr
+ *   Contains the virtual address of the beginning of the text segment. The
+ *   offset in the OS image file at which to start loading is defined by the
+ *   offset at which the table was found, minus (header addr - load addr).
+ *   load addr must be less than or equal to header addr.
+ * - load_end_addr
+ *   Contains the virtual address of the end of the data segment.
+ *   (load_end_addr - load_addr) specifies how much data to load. This implies
+ *   that the text and data segments must be consecutive in the OS image. If
+ *   this field is zero, the domain builder assumes that the text and data
+ *   segments occupy the whole OS image file.
+ * - bss_end_addr
+ *   Contains the virtual address of the end of the bss segment. The domain
+ *   builder initializes this area to zero, and reserves the memory it occupies
+ *   to avoid placing boot modules and other data relevant to the loaded image
+ *   in that area. If this field is zero, the domain builder assumes that no 
bss
+ *   segment is present.
+ * - entry_addr
+ *   The virtual address at which to start execution of the loaded image.
+ *
+ * Some of the field descriptions were copied from "The Multiboot
+ * Specification", Copyright 1995, 96 Bryan Ford <baford@xxxxxxxxxxx>,
+ * Erich Stefan Boleyn <erich@xxxxxxxx> Copyright 1999, 2000, 2001, 2002
+ * Free Software Foundation, Inc.
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE_X86-1))&PAGE_MASK_X86)
+#define round_pgdown(_p)  ((_p)&PAGE_MASK_X86)
+
+struct xen_bin_image_table
+{
+    uint32_t magic;
+    uint32_t flags;
+    uint32_t checksum;
+    uint32_t header_addr;
+    uint32_t load_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t entry_addr;
+};
+
+#define XEN_MULTIBOOT_MAGIC3 0x336ec578
+
+#define XEN_MULTIBOOT_FLAG_ALIGN4K     0x00000001
+#define XEN_MULTIBOOT_FLAG_NEEDMEMINFO 0x00000002
+#define XEN_MULTIBOOT_FLAG_NEEDVIDINFO 0x00000004
+#define XEN_MULTIBOOT_FLAG_ADDRSVALID  0x00010000
+#define XEN_MULTIBOOT_FLAG_PAE_SHIFT   14
+#define XEN_MULTIBOOT_FLAG_PAE_MASK    (3 << XEN_MULTIBOOT_FLAG_PAE_SHIFT)
+
+/* Flags we test for */
+#define FLAGS_MASK     ((~ 0) & (~ XEN_MULTIBOOT_FLAG_ALIGN4K) & \
+                               (~ XEN_MULTIBOOT_FLAG_PAE_MASK))
+#define FLAGS_REQUIRED XEN_MULTIBOOT_FLAG_ADDRSVALID
+
+/* --------------------------------------------------------------------- */
+
+static struct xen_bin_image_table *find_table(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *table;
+    uint32_t *probe_ptr;
+    uint32_t *probe_end;
+
+    probe_ptr = dom->kernel_blob;
+    probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
+    if ((void*)probe_end > dom->kernel_blob + 8192)
+        probe_end = dom->kernel_blob + 8192;
+
+    for (table = NULL; probe_ptr < probe_end; probe_ptr++)
+    {
+        if (XEN_MULTIBOOT_MAGIC3 == *probe_ptr)
+        {
+            table = (struct xen_bin_image_table *) probe_ptr;
+            /* Checksum correct? */
+            if (0 == table->magic + table->flags + table->checksum)
+            {
+                return table;
+            }
+        }
+    }
+    return NULL;
+}
+
+static int xc_dom_probe_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *table;
+
+    table = find_table(dom);
+    if (!table)
+        return -EINVAL;
+    return 0;
+}
+
+static int xc_dom_parse_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *image_info;
+    char *image = dom->kernel_blob;
+    size_t image_size = dom->kernel_size;
+    uint32_t start_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t pae_flags;
+
+    image_info = find_table(dom);
+    if (!image_info)
+        return -EINVAL;
+
+    xc_dom_printf("%s: multiboot header fields\n", __FUNCTION__);
+    xc_dom_printf("  flags:         0x%" PRIx32 "\n", image_info->flags);
+    xc_dom_printf("  header_addr:   0x%" PRIx32 "\n", image_info->header_addr);
+    xc_dom_printf("  load_addr:     0x%" PRIx32 "\n", image_info->load_addr);
+    xc_dom_printf("  load_end_addr: 0x%" PRIx32 "\n", 
image_info->load_end_addr);
+    xc_dom_printf("  bss_end_addr:  0x%" PRIx32 "\n", 
image_info->bss_end_addr);
+    xc_dom_printf("  entry_addr:    0x%" PRIx32 "\n", image_info->entry_addr);
+
+    /* Check the flags */
+    if ( FLAGS_REQUIRED != (image_info->flags & FLAGS_MASK) )
+    {
+        xc_dom_panic(XC_INVALID_KERNEL,
+                     "%s: xen_bin_image_table flags required "
+                     "0x%08" PRIx32 " found 0x%08" PRIx32 "\n",
+                     __FUNCTION__, FLAGS_REQUIRED, image_info->flags & 
FLAGS_MASK);
+        return -EINVAL;
+    }
+
+    /* Sanity check on the addresses */
+    if ( image_info->header_addr < image_info->load_addr ||
+         ((char *) image_info - image) <
+         (image_info->header_addr - image_info->load_addr) )
+    {
+        xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid header_addr.",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    start_addr = image_info->header_addr - ((char *)image_info - image);
+    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+    xc_dom_printf("%s: calculated addresses\n", __FUNCTION__);
+    xc_dom_printf("  start_addr:    0x%" PRIx32 "\n", start_addr);
+    xc_dom_printf("  load_end_addr: 0x%" PRIx32 "\n", load_end_addr);
+    xc_dom_printf("  bss_end_addr:  0x%" PRIx32 "\n", bss_end_addr);
+
+    if ( start_addr + image_size < load_end_addr )
+    {
+        xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid load_end_addr.\n",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( bss_end_addr < load_end_addr)
+    {
+        xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid bss_end_addr.\n",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    dom->kernel_seg.vstart = image_info->load_addr;
+    dom->kernel_seg.vend   = bss_end_addr;
+    dom->parms.virt_base   = start_addr;
+    dom->parms.virt_entry  = image_info->entry_addr;
+
+    pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+    switch (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) {
+    case 0:
+       dom->guest_type = "xen-3.0-x86_32";
+       break;
+    case 1:
+       dom->guest_type = "xen-3.0-x86_32p";
+       break;
+    case 2:
+       dom->guest_type = "xen-3.0-x86_64";
+       break;
+    case 3:
+       /* Kernel detects PAE at runtime.  So try to figure whenever
+        * xen supports PAE and advertise a PAE-capable kernel in case
+        * it does. */
+       dom->guest_type = "xen-3.0-x86_32";
+       if (strstr(dom->xen_caps, "xen-3.0-x86_32p")) {
+           xc_dom_printf("%s: PAE fixup\n", __FUNCTION__);
+           dom->guest_type = "xen-3.0-x86_32p";
+           dom->parms.pae  = 2;
+       }
+       break;
+    }
+    return 0;
+}
+
+static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *image_info;
+    char *image = dom->kernel_blob;
+    char *dest;
+    size_t image_size = dom->kernel_size;
+    uint32_t start_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t skip, text_size, bss_size;
+    uint32_t pae_flags;
+
+    image_info = find_table(dom);
+    if (!image_info)
+        return -EINVAL;
+
+    start_addr = image_info->header_addr - ((char *)image_info - image);
+    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+    /* It's possible that we need to skip the first part of the image */
+    skip = image_info->load_addr - start_addr;
+    text_size = load_end_addr - image_info->load_addr;
+    bss_size = bss_end_addr - load_end_addr;
+
+    xc_dom_printf("%s: calculated sizes\n", __FUNCTION__);
+    xc_dom_printf("  skip:      0x%" PRIx32 "\n", skip);
+    xc_dom_printf("  text_size: 0x%" PRIx32 "\n", text_size);
+    xc_dom_printf("  bss_size:  0x%" PRIx32 "\n", bss_size);
+
+    dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart);
+    memcpy(dest, image + skip, text_size);
+    memset(dest + text_size, 0, bss_size);
+
+    pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+    if (3 == (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) && dom->guest_xc > 0)
+    {
+    }
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader bin_loader = {
+    .name = "multiboot-binary",
+    .probe = xc_dom_probe_bin_kernel,
+    .parser = xc_dom_parse_bin_kernel,
+    .loader = xc_dom_load_bin_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&bin_loader);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * End:
+ */
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_boot.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_boot.c Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,515 @@
+/*
+ * Xen domain builder -- xen booter.
+ *
+ * This is the code which actually boots a fresh
+ * prepared domain image as xen guest domain.
+ *
+ * ==>  this is the only domain bilder code piece
+ *          where xen hypercalls are allowed        <==
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+#include <xen/hvm/params.h>
+
+/* ------------------------------------------------------------------------ */
+
+static int setup_hypercall_page(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    xen_pfn_t pfn;
+    int rc;
+
+    if (-1 == dom->parms.virt_hypercall)
+       return 0;
+    pfn = (dom->parms.virt_hypercall - dom->parms.virt_base)
+       >> XC_DOM_PAGE_SHIFT(dom);
+
+    xc_dom_printf("%s: vaddr=0x%" PRIx64 " pfn=0x%" PRIpfn "\n", __FUNCTION__,
+                 dom->parms.virt_hypercall, pfn);
+    domctl.cmd = XEN_DOMCTL_hypercall_init;
+    domctl.domain = dom->guest_domid;
+    domctl.u.hypercall_init.gmfn = xc_dom_p2m_guest(dom, pfn);
+    rc = do_domctl(dom->guest_xc, &domctl);
+    if (0 != rc)
+       xc_dom_panic(XC_INTERNAL_ERROR, "%s: HYPERCALL_INIT failed (rc=%d)\n",
+                    __FUNCTION__, rc);
+    return rc;
+}
+
+static int launch_vm(int xc, domid_t domid, void *ctxt)
+{
+    DECLARE_DOMCTL;
+    int rc;
+
+    xc_dom_printf("%s: called, ctxt=%p\n", __FUNCTION__, ctxt);
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.cmd = XEN_DOMCTL_setvcpucontext;
+    domctl.domain = domid;
+    domctl.u.vcpucontext.vcpu = 0;
+    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, ctxt);
+    rc = do_domctl(xc, &domctl);
+    if (0 != rc)
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: SETVCPUCONTEXT failed (rc=%d)\n", __FUNCTION__, rc);
+    return rc;
+}
+
+static int clear_page(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+    xen_pfn_t dst;
+    int rc;
+
+    if (0 == pfn)
+       return 0;
+
+    dst = xc_dom_p2m_host(dom, pfn);
+    xc_dom_printf("%s: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, pfn, dst);
+    rc = xc_clear_domain_page(dom->guest_xc, dom->guest_domid, dst);
+    if (0 != rc)
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: xc_clear_domain_page failed (pfn 0x%" PRIpfn
+                    ", rc=%d)\n", __FUNCTION__, pfn, rc);
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: x86 bits                                                     */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+
+static int x86_compat(int xc, domid_t domid, char *guest_type)
+{
+#ifdef XEN_DOMCTL_set_compat
+    static const struct {
+       char           *guest;
+       unsigned long  cmd;
+    } types[] = {
+       { "xen-3.0-x86_32p", XEN_DOMCTL_set_compat },
+       { "xen-3.0-x86_64",  XEN_DOMCTL_set_native },
+    };
+    DECLARE_DOMCTL;
+    int i,rc;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    for (i = 0; i < sizeof(types)/sizeof(types[0]); i++)
+       if (0 == strcmp(types[i].guest, guest_type))
+           domctl.cmd = types[i].cmd;
+    if (0 == domctl.cmd)
+       /* nothing to do */
+       return 0;
+
+    xc_dom_printf("%s: guest %s, cmd %d\n", __FUNCTION__,
+                 guest_type, domctl.cmd);
+    rc = do_domctl(xc, &domctl);
+    if (0 != rc)
+       xc_dom_printf("%s: warning: failed (rc=%d)\n",
+                     __FUNCTION__, rc);
+    return rc;
+#else
+    xc_dom_printf("%s: compiled without compat/native switching\n", 
__FUNCTION__);
+    return 0;
+#endif /* XEN_DOMCTL_set_compat */
+}
+
+
+static int x86_shadow(int xc, domid_t domid)
+{
+    int rc, mode;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    mode = XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT |
+       XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE;
+
+    rc = xc_shadow_control(xc, domid,
+                          XEN_DOMCTL_SHADOW_OP_ENABLE,
+                          NULL, 0, NULL, mode, NULL);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: SHADOW_OP_ENABLE (mode=0x%x) failed (rc=%d)\n",
+                    __FUNCTION__, mode, rc);
+       return rc;
+    }
+    xc_dom_printf("%s: shadow enabled (mode=0x%x)\n", __FUNCTION__, mode);
+    return rc;
+}
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    int rc = 0;
+
+    x86_compat(dom->guest_xc, dom->guest_domid, dom->guest_type);
+    if (xc_dom_feature_translated(dom))
+    {
+       dom->shadow_enabled = 1;
+       rc = x86_shadow(dom->guest_xc, dom->guest_domid);
+    }
+    return rc;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    static const struct {
+       char *guest;
+       unsigned long pgd_type;
+    } types[] = {
+       { "xen-3.0-x86_32",  MMUEXT_PIN_L2_TABLE},
+       { "xen-3.0-x86_32p", MMUEXT_PIN_L3_TABLE},
+       { "xen-3.0-x86_64",  MMUEXT_PIN_L4_TABLE},
+    };
+    unsigned long pgd_type = 0;
+    shared_info_t *shared_info;
+    xen_pfn_t shinfo;
+    int i, rc;
+
+    for (i = 0; i < sizeof(types) / sizeof(types[0]); i++)
+       if (0 == strcmp(types[i].guest, dom->guest_type))
+           pgd_type = types[i].pgd_type;
+
+    if (!xc_dom_feature_translated(dom))
+    {
+       /* paravirtualized guest */
+       xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+       rc = pin_table(dom->guest_xc, pgd_type,
+                      xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
+                      dom->guest_domid);
+       if (0 != rc)
+       {
+           xc_dom_panic(XC_INTERNAL_ERROR,
+                        "%s: pin_table failed (pfn 0x%" PRIpfn ", rc=%d)\n",
+                        __FUNCTION__, dom->pgtables_seg.pfn, rc);
+           return rc;
+       }
+       shinfo = dom->shared_info_mfn;
+    }
+    else
+    {
+       /* paravirtualized guest with auto-translation */
+       struct xen_add_to_physmap xatp;
+       int i;
+
+       /* Map shared info frame into guest physmap. */
+       xatp.domid = dom->guest_domid;
+       xatp.space = XENMAPSPACE_shared_info;
+       xatp.idx = 0;
+       xatp.gpfn = dom->shared_info_pfn;
+       rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
+       if (rc != 0)
+       {
+           xc_dom_panic(XC_INTERNAL_ERROR, "%s: mapping shared_info failed "
+                        "(pfn=0x%" PRIpfn ", rc=%d)\n",
+                        __FUNCTION__, xatp.gpfn, rc);
+           return rc;
+       }
+
+       /* Map grant table frames into guest physmap. */
+       for (i = 0;; i++)
+       {
+           xatp.domid = dom->guest_domid;
+           xatp.space = XENMAPSPACE_grant_table;
+           xatp.idx = i;
+           xatp.gpfn = dom->total_pages + i;
+           rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
+           if (rc != 0)
+           {
+               if (i > 0 && errno == EINVAL)
+               {
+                   xc_dom_printf("%s: %d grant tables mapped\n", __FUNCTION__,
+                                 i);
+                   break;
+               }
+               xc_dom_panic(XC_INTERNAL_ERROR,
+                            "%s: mapping grant tables failed " "(pfn=0x%"
+                            PRIpfn ", rc=%d)\n", __FUNCTION__, xatp.gpfn, rc);
+               return rc;
+           }
+       }
+       shinfo = dom->shared_info_pfn;
+    }
+
+    /* setup shared_info page */
+    xc_dom_printf("%s: shared_info: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, dom->shared_info_pfn, dom->shared_info_mfn);
+    shared_info = xc_map_foreign_range(dom->guest_xc, dom->guest_domid,
+                                      PAGE_SIZE_X86,
+                                      PROT_READ | PROT_WRITE,
+                                      shinfo);
+    if (NULL == shared_info)
+       return -1;
+    dom->arch_hooks->shared_info(dom, shared_info);
+    munmap(shared_info, PAGE_SIZE_X86);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: ia64                                                         */
+
+#elif defined(__ia64__)
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    int rc;
+
+    xc_dom_printf("%s: setup firmware\n", __FUNCTION__);
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.cmd = XEN_DOMCTL_arch_setup;
+    domctl.domain = dom->guest_domid;
+    domctl.u.arch_setup.flags = 0;
+    domctl.u.arch_setup.bp = (dom->start_info_pfn << PAGE_SHIFT)
+       + sizeof(start_info_t);
+    domctl.u.arch_setup.maxmem = dom->total_pages << PAGE_SHIFT;
+    rc = do_domctl(dom->guest_xc, &domctl);
+    return rc;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: powerpc                                                      */
+
+#elif defined(__powerpc64__)
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    start_info_t *si =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+
+    xc_dom_printf("%s: TODO: setup devtree\n", __FUNCTION__);
+
+#if 0
+    load_devtree(dom->guest_xc,
+                dom->guest_domid,
+                dom->p2m_host,
+                devtree,           // FIXME
+                devtree_addr,      // FIXME
+                dom->ramdisk_seg.vstart,
+                dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart,
+                si,
+                dom->start_info_pfn << PAGE_SHIFT);
+#endif
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: other                                                        */
+
+#else
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+#endif /* arch stuff */
+
+/* ------------------------------------------------------------------------ */
+
+int xc_dom_compat_check(struct xc_dom_image *dom)
+{
+    xen_capabilities_info_t xen_caps;
+    char *item, *ptr;
+    int match, found = 0;
+
+    strcpy(xen_caps, dom->xen_caps);
+    for (item = strtok_r(xen_caps, " ", &ptr);
+        NULL != item; item = strtok_r(NULL, " ", &ptr))
+    {
+       match = (0 == strcmp(dom->guest_type, item));
+       xc_dom_printf("%s: supported guest type: %s%s\n", __FUNCTION__,
+                     item, match ? " <= matches" : "");
+       if (match)
+           found++;
+    }
+    if (!found)
+       xc_dom_panic(XC_INVALID_KERNEL,
+                    "%s: guest type %s not supported by xen kernel, sorry\n",
+                    __FUNCTION__, dom->guest_type);
+
+    return found;
+}
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, int xc, domid_t domid)
+{
+    dom->guest_xc = xc;
+    dom->guest_domid = domid;
+
+    dom->xen_version = xc_version(dom->guest_xc, XENVER_version, NULL);
+    if (xc_version(xc, XENVER_capabilities, &dom->xen_caps) < 0) {
+       xc_dom_panic(XC_INTERNAL_ERROR, "can't get xen capabilities");
+       return -1;
+    }
+    xc_dom_printf("%s: ver %d.%d, caps %s\n", __FUNCTION__,
+                 dom->xen_version >> 16, dom->xen_version & 0xff,
+                 dom->xen_caps);
+    return 0;
+}
+
+int xc_dom_boot_mem_init(struct xc_dom_image *dom)
+{
+    long rc;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    if (0 != (rc = arch_setup_early(dom)))
+       return rc;
+
+    /* allocate guest memory */
+    rc = xc_domain_memory_populate_physmap(dom->guest_xc, dom->guest_domid,
+                                          dom->total_pages, 0, 0,
+                                          dom->p2m_host);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_OUT_OF_MEMORY,
+                    "%s: can't allocate low memory for domain\n",
+                    __FUNCTION__);
+       return rc;
+    }
+
+    return 0;
+}
+
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+                          xen_pfn_t count)
+{
+    int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    privcmd_mmap_entry_t *entries;
+    void *ptr;
+    int i, rc;
+
+    entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
+    if (NULL == entries)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                    " [malloc]\n", __FUNCTION__, pfn, count);
+       return NULL;
+    }
+
+    ptr = mmap(NULL, count << page_shift, PROT_READ | PROT_WRITE,
+              MAP_SHARED, dom->guest_xc, 0);
+    if (MAP_FAILED == ptr)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                    " [mmap]\n", __FUNCTION__, pfn, count);
+       return NULL;
+    }
+
+    for (i = 0; i < count; i++)
+    {
+       entries[i].va = (uintptr_t) ptr + (i << page_shift);
+       entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
+       entries[i].npages = 1;
+    }
+
+    rc = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid, entries, 
count);
+    if (rc < 0)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                    " [xenctl, rc=%d]\n", __FUNCTION__, pfn, count, rc);
+       return NULL;
+    }
+    return ptr;
+}
+
+int xc_dom_boot_image(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    void *ctxt;
+    int rc;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* collect some info */
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = dom->guest_domid;
+    rc = do_domctl(dom->guest_xc, &domctl);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: getdomaininfo failed (rc=%d)\n", __FUNCTION__, rc);
+       return rc;
+    }
+    if (domctl.domain != dom->guest_domid)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: Huh? domid mismatch (%d != %d)\n", __FUNCTION__,
+                    domctl.domain, dom->guest_domid);
+       return -1;
+    }
+    dom->shared_info_mfn = domctl.u.getdomaininfo.shared_info_frame;
+
+    /* sanity checks */
+    if (!xc_dom_compat_check(dom))
+       return -1;
+
+    /* initial mm setup */
+    if (0 != (rc = xc_dom_update_guest_p2m(dom)))
+       return rc;
+    if (dom->arch_hooks->setup_pgtables)
+       if (0 != (rc = dom->arch_hooks->setup_pgtables(dom)))
+           return rc;
+
+    if (0 != (rc = clear_page(dom, dom->console_pfn)))
+       return rc;
+    if (0 != (rc = clear_page(dom, dom->xenstore_pfn)))
+       return rc;
+
+    /* start info page */
+    if (dom->arch_hooks->start_info)
+       dom->arch_hooks->start_info(dom);
+
+    /* hypercall page */
+    if (0 != (rc = setup_hypercall_page(dom)))
+       return rc;
+    xc_dom_log_memory_footprint(dom);
+
+    /* misc x86 stuff */
+    if (0 != (rc = arch_setup_late(dom)))
+       return rc;
+
+    /* let the vm run */
+    ctxt = xc_dom_malloc(dom, PAGE_SIZE * 2 /* FIXME */ );
+    memset(ctxt, 0, PAGE_SIZE * 2);
+    if (0 != (rc = dom->arch_hooks->vcpu(dom, ctxt)))
+       return rc;
+    xc_dom_unmap_all(dom);
+    rc = launch_vm(dom->guest_xc, dom->guest_domid, ctxt);
+
+    return rc;
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_core.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_core.c Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,773 @@
+/*
+ * Xen domain builder -- core bits.
+ *
+ * The core code goes here:
+ *   - allocate and release domain structs.
+ *   - memory management functions.
+ *   - misc helper functions.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+/* debugging                                                                */
+
+FILE *xc_dom_logfile = NULL;
+
+void xc_dom_loginit(void)
+{
+    if (xc_dom_logfile)
+       return;
+    xc_dom_logfile = fopen("/var/log/xen/domain-builder-ng.log", "a");
+    setvbuf(xc_dom_logfile, NULL, _IONBF, 0);
+    xc_dom_printf("### ----- xc domain builder logfile opened -----\n");
+}
+
+int xc_dom_printf(const char *fmt, ...)
+{
+    va_list args;
+    char buf[1024];
+    int rc;
+
+    if (!xc_dom_logfile)
+       return 0;
+
+    va_start(args, fmt);
+    rc = vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+    rc = fwrite(buf, rc, 1, xc_dom_logfile);
+
+    return rc;
+}
+
+int xc_dom_panic_func(const char *file, int line, xc_error_code err,
+                     const char *fmt, ...)
+{
+    va_list args;
+    FILE *fp = stderr;
+    int rc = 0;
+    char pos[256];
+    char msg[XC_MAX_ERROR_MSG_LEN];
+
+    if (xc_dom_logfile)
+       fp = xc_dom_logfile;
+
+    snprintf(pos, sizeof(pos), "%s:%d: panic: ", file, line);
+    va_start(args, fmt);
+    vsnprintf(msg, sizeof(msg), fmt, args);
+    va_end(args);
+    xc_set_error(err, "%s", msg);
+    rc = fprintf(fp, "%s%s", pos, msg);
+    return rc;
+}
+
+static void print_mem(const char *name, size_t mem)
+{
+    if (mem > 32 * 1024 * 1024)
+       xc_dom_printf("%-24s : %zd MB\n", name, mem / (1024 * 1024));
+    else if (mem > 32 * 1024)
+       xc_dom_printf("%-24s : %zd kB\n", name, mem / 1024);
+    else
+       xc_dom_printf("%-24s : %zd bytes\n", name, mem);
+}
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom)
+{
+    xc_dom_printf("domain builder memory footprint\n");
+    xc_dom_printf("   allocated\n");
+    print_mem("      malloc", dom->alloc_malloc);
+    print_mem("      anon mmap", dom->alloc_mem_map);
+    xc_dom_printf("   mapped\n");
+    print_mem("      file mmap", dom->alloc_file_map);
+    print_mem("      domU mmap", dom->alloc_domU_map);
+}
+
+/* ------------------------------------------------------------------------ */
+/* simple memory pool                                                       */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block) + size);
+    if (NULL == block)
+       return NULL;
+    memset(block, 0, sizeof(*block) + size);
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block) + size;
+    if (size > 100 * 1024)
+       print_mem(__FUNCTION__, size);
+    return block->memory;
+}
+
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block));
+    if (NULL == block)
+       return NULL;
+    memset(block, 0, sizeof(*block));
+    block->mmap_len = size;
+    block->mmap_ptr = mmap(NULL, block->mmap_len,
+                          PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                          -1, 0);
+    if (MAP_FAILED == block->mmap_ptr)
+    {
+       free(block);
+       return NULL;
+    }
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_mem_map += block->mmap_len;
+    if (size > 100 * 1024)
+       print_mem(__FUNCTION__, size);
+    return block->mmap_ptr;
+}
+
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+                           const char *filename, size_t * size)
+{
+    struct xc_dom_mem *block = NULL;
+    int fd = -1;
+
+    fd = open(filename, O_RDONLY);
+    if (-1 == fd)
+       goto err;
+
+    lseek(fd, 0, SEEK_SET);
+    *size = lseek(fd, 0, SEEK_END);
+
+    block = malloc(sizeof(*block));
+    if (NULL == block)
+       goto err;
+    memset(block, 0, sizeof(*block));
+    block->mmap_len = *size;
+    block->mmap_ptr = mmap(NULL, block->mmap_len, PROT_READ, MAP_SHARED, fd, 
0);
+    if (MAP_FAILED == block->mmap_ptr)
+       goto err;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_file_map += block->mmap_len;
+    close(fd);
+    if (*size > 100 * 1024)
+       print_mem(__FUNCTION__, *size);
+    return block->mmap_ptr;
+
+  err:
+    if (-1 != fd)
+       close(fd);
+    if (block)
+       free(block);
+    return NULL;
+}
+
+static void xc_dom_free_all(struct xc_dom_image *dom)
+{
+    struct xc_dom_mem *block;
+
+    while (NULL != (block = dom->memblocks))
+    {
+       dom->memblocks = block->next;
+       if (block->mmap_ptr)
+           munmap(block->mmap_ptr, block->mmap_len);
+       free(block);
+    }
+}
+
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str)
+{
+    size_t len = strlen(str) + 1;
+    char *nstr = xc_dom_malloc(dom, len);
+
+    if (NULL == nstr)
+       return NULL;
+    memcpy(nstr, str, len);
+    return nstr;
+}
+
+/* ------------------------------------------------------------------------ */
+/* read files, copy memory blocks, with transparent gunzip                  */
+
+size_t xc_dom_check_gzip(void *blob, size_t ziplen)
+{
+    unsigned char *gzlen;
+    size_t unziplen;
+
+    if (0 != strncmp(blob, "\037\213", 2))
+       /* not gzipped */
+       return 0;
+
+    gzlen = blob + ziplen - 4;
+    unziplen = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0];
+    if (unziplen < ziplen || unziplen > ziplen * 8)
+    {
+       xc_dom_printf
+           ("%s: size (zip %zd, unzip %zd) looks insane, skip gunzip\n",
+            __FUNCTION__, ziplen, unziplen);
+       return 0;
+    }
+
+    return unziplen + 16;
+}
+
+int xc_dom_do_gunzip(void *src, size_t srclen, void *dst, size_t dstlen)
+{
+    z_stream zStream;
+    int rc;
+
+    memset(&zStream, 0, sizeof(zStream));
+    zStream.next_in = src;
+    zStream.avail_in = srclen;
+    zStream.next_out = dst;
+    zStream.avail_out = dstlen;
+    rc = inflateInit2(&zStream, (MAX_WBITS + 32));     /* +32 means "handle 
gzip" */
+    if (rc != Z_OK)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: inflateInit2 failed (rc=%d)\n", __FUNCTION__, rc);
+       return -1;
+    }
+    rc = inflate(&zStream, Z_FINISH);
+    if (rc != Z_STREAM_END)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: inflate failed (rc=%d)\n", __FUNCTION__, rc);
+       return -1;
+    }
+
+    xc_dom_printf("%s: unzip ok, 0x%zx -> 0x%zx\n",
+                 __FUNCTION__, srclen, dstlen);
+    return 0;
+}
+
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size)
+{
+    void *unzip;
+    size_t unziplen;
+
+    unziplen = xc_dom_check_gzip(*blob, *size);
+    if (0 == unziplen)
+       return 0;
+
+    unzip = xc_dom_malloc(dom, unziplen);
+    if (NULL == unzip)
+       return -1;
+
+    if (-1 == xc_dom_do_gunzip(*blob, *size, unzip, unziplen))
+       return -1;
+
+    *blob = unzip;
+    *size = unziplen;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* domain memory                                                            */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
+                       xen_pfn_t count)
+{
+    struct xc_dom_phys *phys;
+    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    char *mode = "unset";
+
+    if (pfn > dom->total_pages)
+    {
+       xc_dom_printf("%s: pfn out of range (0x%" PRIpfn " > 0x%" PRIpfn ")\n",
+                     __FUNCTION__, pfn, dom->total_pages);
+       return NULL;
+    }
+
+    /* already allocated? */
+    for (phys = dom->phys_pages; NULL != phys; phys = phys->next)
+    {
+       if (pfn >= phys->first + phys->count)
+           continue;
+       if (count)
+       {
+           /* size given: must be completely within the already allocated 
block */
+           if (pfn + count <= phys->first)
+               continue;
+           if (pfn < phys->first || pfn + count > phys->first + phys->count)
+           {
+               xc_dom_printf("%s: request overlaps allocated block"
+                             " (req 0x%" PRIpfn "+0x%" PRIpfn ","
+                             " blk 0x%" PRIpfn "+0x%" PRIpfn ")\n",
+                             __FUNCTION__, pfn, count, phys->first,
+                             phys->count);
+               return NULL;
+           }
+       }
+       else
+       {
+           /* no size given: block must be allocated already,
+              just hand out a pointer to it */
+           if (pfn < phys->first)
+               continue;
+       }
+       return phys->ptr + ((pfn - phys->first) << page_shift);
+    }
+
+    /* allocating is allowed with size specified only */
+    if (0 == count)
+    {
+       xc_dom_printf("%s: no block found, no size given,"
+                     " can't malloc (pfn 0x%" PRIpfn ")\n", __FUNCTION__, pfn);
+       return NULL;
+    }
+
+    /* not found, no overlap => allocate */
+    phys = xc_dom_malloc(dom, sizeof(*phys));
+    if (NULL == phys)
+       return NULL;
+    memset(phys, 0, sizeof(*phys));
+    phys->first = pfn;
+    phys->count = count;
+
+    if (dom->guest_domid)
+    {
+       mode = "domU mapping";
+       phys->ptr = xc_dom_boot_domU_map(dom, phys->first, phys->count);
+       if (NULL == phys->ptr)
+           return NULL;
+       dom->alloc_domU_map += phys->count << page_shift;
+    }
+    else
+    {
+       mode = "anonymous memory";
+       phys->ptr = mmap(NULL, phys->count << page_shift,
+                        PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                        -1, 0);
+       if (MAP_FAILED == phys->ptr)
+       {
+           xc_dom_panic(XC_OUT_OF_MEMORY,
+                        "%s: oom: can't allocate 0x%" PRIpfn " pages\n",
+                        __FUNCTION__, count);
+           return NULL;
+       }
+       dom->alloc_mem_map += phys->count << page_shift;
+    }
+
+#if 1
+    xc_dom_printf("%s: %s: pfn 0x%" PRIpfn "+0x%" PRIpfn " at %p\n",
+                 __FUNCTION__, mode, phys->first, phys->count, phys->ptr);
+#endif
+    phys->next = dom->phys_pages;
+    dom->phys_pages = phys;
+    return phys->ptr;
+}
+
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+                        struct xc_dom_seg *seg, char *name,
+                        xen_vaddr_t start, xen_vaddr_t size)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t pages = (size + page_size - 1) / page_size;
+    void *ptr;
+
+    if (0 == start)
+       start = dom->virt_alloc_end;
+
+    if (start & (page_size - 1))
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: segment start isn't page aligned (0x%" PRIx64 ")\n",
+                    __FUNCTION__, start);
+       return -1;
+    }
+    if (start < dom->virt_alloc_end)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: segment start too low (0x%" PRIx64 " < 0x%" PRIx64
+                    ")\n", __FUNCTION__, start, dom->virt_alloc_end);
+       return -1;
+    }
+
+    seg->vstart = start;
+    seg->vend = start + pages * page_size;
+    seg->pfn = (seg->vstart - dom->parms.virt_base) / page_size;
+    dom->virt_alloc_end = seg->vend;
+
+    xc_dom_printf("%-20s:   %-12s : 0x%" PRIx64 " -> 0x%" PRIx64
+                 "  (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)\n",
+                 __FUNCTION__, name, seg->vstart, seg->vend, seg->pfn, pages);
+
+    /* map and clear pages */
+    ptr = xc_dom_seg_to_ptr(dom, seg);
+    if (NULL == ptr)
+       return -1;
+    memset(ptr, 0, pages * page_size);
+
+    return 0;
+}
+
+int xc_dom_alloc_page(struct xc_dom_image *dom, char *name)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_vaddr_t start;
+    xen_pfn_t pfn;
+
+    start = dom->virt_alloc_end;
+    dom->virt_alloc_end += page_size;
+    pfn = (start - dom->parms.virt_base) / page_size;
+
+    xc_dom_printf("%-20s:   %-12s : 0x%" PRIx64 " (pfn 0x%" PRIpfn ")\n",
+                 __FUNCTION__, name, start, pfn);
+    return pfn;
+}
+
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    struct xc_dom_phys *phys, *prev = NULL;
+
+    for (phys = dom->phys_pages; NULL != phys; phys = phys->next)
+    {
+       if (pfn >= phys->first && pfn < phys->first + phys->count)
+           break;
+       prev = phys;
+    }
+    if (!phys)
+    {
+       xc_dom_printf("%s: Huh? no mapping with pfn 0x%" PRIpfn "\n",
+                     __FUNCTION__, pfn);
+       return;
+    }
+
+    munmap(phys->ptr, phys->count << page_shift);
+    if (prev)
+       prev->next = phys->next;
+    else
+       dom->phys_pages = phys->next;
+}
+
+void xc_dom_unmap_all(struct xc_dom_image *dom)
+{
+    while (dom->phys_pages)
+       xc_dom_unmap_one(dom, dom->phys_pages->first);
+}
+
+/* ------------------------------------------------------------------------ */
+/* pluggable kernel loaders                                                 */
+
+static struct xc_dom_loader *first_loader = NULL;
+static struct xc_dom_arch *first_hook = NULL;
+
+void xc_dom_register_loader(struct xc_dom_loader *loader)
+{
+    loader->next = first_loader;
+    first_loader = loader;
+}
+
+static struct xc_dom_loader *xc_dom_find_loader(struct xc_dom_image *dom)
+{
+    struct xc_dom_loader *loader = first_loader;
+
+    while (NULL != loader)
+    {
+       xc_dom_printf("%s: trying %s loader ... ", __FUNCTION__, loader->name);
+       if (0 == loader->probe(dom))
+       {
+           xc_dom_printf("OK\n");
+           return loader;
+       }
+       xc_dom_printf("failed\n");
+       loader = loader->next;
+    }
+    xc_dom_panic(XC_INVALID_KERNEL, "%s: no loader found\n", __FUNCTION__);
+    return NULL;
+}
+
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks)
+{
+    hooks->next = first_hook;
+    first_hook = hooks;
+}
+
+static struct xc_dom_arch *xc_dom_find_arch_hooks(char *guest_type)
+{
+    struct xc_dom_arch *hooks = first_hook;
+
+    while (NULL != hooks)
+    {
+       if (0 == strcmp(hooks->guest_type, guest_type))
+           return hooks;
+       hooks = hooks->next;
+    }
+    xc_dom_panic(XC_INVALID_KERNEL,
+                "%s: not found (type %s)\n", __FUNCTION__, guest_type);
+    return NULL;
+}
+
+/* ------------------------------------------------------------------------ */
+/* public interface                                                         */
+
+void xc_dom_release(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+    if (dom->phys_pages)
+       xc_dom_unmap_all(dom);
+    xc_dom_free_all(dom);
+    free(dom);
+}
+
+struct xc_dom_image *xc_dom_allocate(const char *cmdline, const char *features)
+{
+    struct xc_dom_image *dom;
+
+    xc_dom_printf("%s: cmdline=\"%s\", features=\"%s\"\n",
+                 __FUNCTION__, cmdline, features);
+    dom = malloc(sizeof(*dom));
+    if (!dom)
+       goto err;
+
+    memset(dom, 0, sizeof(*dom));
+    if (cmdline)
+       dom->cmdline = xc_dom_strdup(dom, cmdline);
+    if (features)
+       elf_xen_parse_features(features, dom->f_requested, NULL);
+
+    dom->parms.virt_base = UNSET_ADDR;
+    dom->parms.virt_entry = UNSET_ADDR;
+    dom->parms.virt_hypercall = UNSET_ADDR;
+    dom->parms.virt_hv_start_low = UNSET_ADDR;
+    dom->parms.elf_paddr_offset = UNSET_ADDR;
+
+    dom->alloc_malloc += sizeof(*dom);
+    return dom;
+
+  err:
+    if (dom)
+       xc_dom_release(dom);
+    return NULL;
+}
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename)
+{
+    xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename);
+    dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size);
+    if (NULL == dom->kernel_blob)
+       return -1;
+    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename)
+{
+    xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename);
+    dom->ramdisk_blob =
+       xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size);
+    if (NULL == dom->ramdisk_blob)
+       return -1;
+//    return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
+    return 0;
+}
+
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t 
memsize)
+{
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+    dom->kernel_blob = (void *)mem;
+    dom->kernel_size = memsize;
+    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+                      size_t memsize)
+{
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+    dom->ramdisk_blob = (void *)mem;
+    dom->ramdisk_size = memsize;
+//    return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
+    return 0;
+}
+
+int xc_dom_parse_image(struct xc_dom_image *dom)
+{
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* parse kernel image */
+    dom->kernel_loader = xc_dom_find_loader(dom);
+    if (NULL == dom->kernel_loader)
+       goto err;
+    if (0 != dom->kernel_loader->parser(dom))
+       goto err;
+    if (NULL == dom->guest_type)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: guest_type not set\n", __FUNCTION__);
+       goto err;
+    }
+
+    /* check features */
+    for (i = 0; i < XENFEAT_NR_SUBMAPS; i++)
+    {
+       dom->f_active[i] |= dom->f_requested[i];        /* cmd line */
+       dom->f_active[i] |= dom->parms.f_required[i];   /* kernel   */
+       if ((dom->f_active[i] & dom->parms.f_supported[i]) != dom->f_active[i])
+       {
+           xc_dom_panic(XC_INVALID_PARAM,
+                        "%s: unsupported feature requested\n", __FUNCTION__);
+           goto err;
+       }
+    }
+    return 0;
+
+  err:
+    return -1;
+}
+
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb)
+{
+    unsigned int page_shift;
+    xen_pfn_t nr_pages, pfn;
+
+    dom->arch_hooks = xc_dom_find_arch_hooks(dom->guest_type);
+    if (NULL == dom->arch_hooks)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR, "%s: arch hooks not set\n",
+                    __FUNCTION__);
+       return -1;
+    }
+
+    page_shift = XC_DOM_PAGE_SHIFT(dom);
+    nr_pages = mem_mb << (20 - page_shift);
+
+    xc_dom_printf("%s: mem %d MB, pages 0x%" PRIpfn " pages, %dk each\n",
+                 __FUNCTION__, mem_mb, nr_pages, 1 << (page_shift-10));
+    dom->total_pages = nr_pages;
+
+    xc_dom_printf("%s: 0x%" PRIpfn " pages\n",
+                 __FUNCTION__, dom->total_pages);
+
+    /* setup initial p2m */
+    dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages);
+    for (pfn = 0; pfn < dom->total_pages; pfn++)
+       dom->p2m_host[pfn] = pfn;
+    return 0;
+}
+
+int xc_dom_update_guest_p2m(struct xc_dom_image *dom)
+{
+    uint32_t *p2m_32;
+    uint64_t *p2m_64;
+    xen_pfn_t i;
+
+    if (!dom->p2m_guest)
+       return 0;
+
+    switch (dom->arch_hooks->sizeof_pfn)
+    {
+    case 4:
+       xc_dom_printf("%s: dst 32bit, pages 0x%" PRIpfn " \n",
+                     __FUNCTION__, dom->total_pages);
+       p2m_32 = dom->p2m_guest;
+       for (i = 0; i < dom->total_pages; i++)
+           if (INVALID_P2M_ENTRY != dom->p2m_host[i])
+               p2m_32[i] = dom->p2m_host[i];
+           else
+               p2m_32[i] = (uint32_t) - 1;
+       break;
+    case 8:
+       xc_dom_printf("%s: dst 64bit, pages 0x%" PRIpfn " \n",
+                     __FUNCTION__, dom->total_pages);
+       p2m_64 = dom->p2m_guest;
+       for (i = 0; i < dom->total_pages; i++)
+           if (INVALID_P2M_ENTRY != dom->p2m_host[i])
+               p2m_64[i] = dom->p2m_host[i];
+           else
+               p2m_64[i] = (uint64_t) - 1;
+       break;
+    default:
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "sizeof_pfn is invalid (is %d, can be 4 or 8)",
+                    dom->arch_hooks->sizeof_pfn);
+       return -1;
+    }
+    return 0;
+}
+
+int xc_dom_build_image(struct xc_dom_image *dom)
+{
+    unsigned int page_size;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* check for arch hooks */
+    if (NULL == dom->arch_hooks)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR, "%s: arch hooks not set\n",
+                    __FUNCTION__);
+       goto err;
+    }
+    page_size = XC_DOM_PAGE_SIZE(dom);
+
+    /* load kernel */
+    if (0 != xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
+                                 dom->kernel_seg.vstart,
+                                 dom->kernel_seg.vend -
+                                 dom->kernel_seg.vstart))
+       goto err;
+    if (0 != dom->kernel_loader->loader(dom))
+       goto err;
+
+    /* load ramdisk */
+    if (dom->ramdisk_blob)
+    {
+       size_t unziplen, ramdisklen;
+       void *ramdiskmap;
+
+       unziplen = xc_dom_check_gzip(dom->ramdisk_blob, dom->ramdisk_size);
+       ramdisklen = unziplen ? unziplen : dom->ramdisk_size;
+       if (0 != xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0,
+                                     ramdisklen))
+           goto err;
+       ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg);
+       if (unziplen)
+       {
+           if (-1 == xc_dom_do_gunzip(dom->ramdisk_blob, dom->ramdisk_size,
+                                      ramdiskmap, ramdisklen))
+               goto err;
+       }
+       else
+           memcpy(ramdiskmap, dom->ramdisk_blob, dom->ramdisk_size);
+    }
+
+    /* allocate other pages */
+    if (0 != dom->arch_hooks->alloc_magic_pages(dom))
+       goto err;
+    if (dom->arch_hooks->count_pgtables)
+    {
+       dom->arch_hooks->count_pgtables(dom);
+       if (dom->pgtables > 0)
+           if (0 !=
+               xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0,
+                                    dom->pgtables * page_size))
+               goto err;
+    }
+    if (dom->alloc_bootstack)
+       dom->bootstack_pfn = xc_dom_alloc_page(dom, "boot stack");
+    xc_dom_printf("%-20s: virt_alloc_end : 0x%" PRIx64 "\n",
+                 __FUNCTION__, dom->virt_alloc_end);
+    xc_dom_printf("%-20s: virt_pgtab_end : 0x%" PRIx64 "\n",
+                 __FUNCTION__, dom->virt_pgtab_end);
+    return 0;
+
+  err:
+    return -1;
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_elfloader.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_elfloader.c    Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,283 @@
+/*
+ * Xen domain builder -- ELF bits.
+ *
+ * Parse and load ELF kernel images.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+#define XEN_VER "xen-3.0"
+
+/* ------------------------------------------------------------------------ */
+
+static char *xc_dom_guest_type(struct xc_dom_image *dom,
+                              struct elf_binary *elf)
+{
+    uint64_t machine = elf_uval(elf, elf->ehdr, e_machine);
+
+    switch (machine) {
+    case EM_386:
+       switch (dom->parms.pae) {
+       case 3 /* PAEKERN_bimodal */:
+           if (strstr(dom->xen_caps, "xen-3.0-x86_32p"))
+               return "xen-3.0-x86_32p";
+           return "xen-3.0-x86_32";
+       case PAEKERN_extended_cr3:
+       case PAEKERN_yes:
+           return "xen-3.0-x86_32p";
+           break;
+       case PAEKERN_no:
+       default:
+           return "xen-3.0-x86_32";
+       }
+    case EM_X86_64:
+       return "xen-3.0-x86_64";
+    case EM_IA_64:
+       return elf_msb(elf) ? "xen-3.0-ia64be" : "xen-3.0-ia64";
+    case EM_PPC64:
+       return "xen-3.0-powerpc64";
+    default:
+       return "xen-3.0-unknown";
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* parse elf binary                                                         */
+
+static int check_elf_kernel(struct xc_dom_image *dom, int verbose)
+{
+    if (NULL == dom->kernel_blob)
+    {
+       if (verbose)
+           xc_dom_panic(XC_INTERNAL_ERROR, "%s: no kernel image loaded\n",
+                        __FUNCTION__);
+       return -EINVAL;
+    }
+
+    if (!elf_is_elfbinary(dom->kernel_blob))
+    {
+       if (verbose)
+           xc_dom_panic(XC_INVALID_KERNEL, "%s: kernel is not an ELF image\n",
+                        __FUNCTION__);
+       return -EINVAL;
+    }
+    return 0;
+}
+
+static int xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
+{
+    return check_elf_kernel(dom, 0);
+}
+
+static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
+                                 struct elf_binary *elf, int load)
+{
+    struct elf_binary syms;
+    const elf_shdr *shdr, *shdr2;
+    xen_vaddr_t symtab, maxaddr;
+    char *hdr;
+    size_t size;
+    int h, count, type, i, tables = 0;
+
+    if (elf_swap(elf)) {
+       xc_dom_printf("%s: non-native byte order, bsd symtab not supported\n",
+                     __FUNCTION__);
+       return 0;
+    }
+
+    if (load) {
+       if (!dom->bsd_symtab_start)
+           return 0;
+       size = dom->kernel_seg.vend - dom->bsd_symtab_start;
+       hdr  = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start);
+       *(int *)hdr = size - sizeof(int);
+    } else {
+       size = sizeof(int) + elf_size(elf, elf->ehdr) +
+           elf_shdr_count(elf) * elf_size(elf, shdr);
+       hdr = xc_dom_malloc(dom, size);
+       if (hdr == NULL)
+           return 0;
+       dom->bsd_symtab_start = elf_round_up(&syms, dom->kernel_seg.vend);
+    }
+
+    memcpy(hdr + sizeof(int),
+          elf->image,
+          elf_size(elf, elf->ehdr));
+    memcpy(hdr + sizeof(int) + elf_size(elf, elf->ehdr),
+          elf->image + elf_uval(elf, elf->ehdr, e_shoff),
+          elf_shdr_count(elf) * elf_size(elf, shdr));
+    if (elf_64bit(elf)) {
+       Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(int));
+       ehdr->e_phoff = 0;
+       ehdr->e_phentsize = 0;
+       ehdr->e_phnum = 0;
+       ehdr->e_shoff = elf_size(elf, elf->ehdr);
+       ehdr->e_shstrndx = SHN_UNDEF;
+    } else {
+       Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(int));
+       ehdr->e_phoff = 0;
+       ehdr->e_phentsize = 0;
+       ehdr->e_phnum = 0;
+       ehdr->e_shoff = elf_size(elf, elf->ehdr);
+       ehdr->e_shstrndx = SHN_UNDEF;
+    }
+    if (0 != elf_init(&syms, hdr + sizeof(int), size - sizeof(int)))
+       return -1;
+    if (xc_dom_logfile)
+       elf_set_logfile(&syms, xc_dom_logfile, 1);
+
+    symtab = dom->bsd_symtab_start + sizeof(int);
+    maxaddr = elf_round_up(&syms, symtab + elf_size(&syms, syms.ehdr) +
+                          elf_shdr_count(&syms) * elf_size(&syms, shdr));
+
+    xc_dom_printf("%s/%s: bsd_symtab_start=%" PRIx64 ", kernel.end=0x%" PRIx64
+                 " -- symtab=0x%" PRIx64 ", maxaddr=0x%" PRIx64 "\n",
+                 __FUNCTION__, load ? "load" : "parse",
+                 dom->bsd_symtab_start, dom->kernel_seg.vend, symtab, maxaddr);
+
+    count = elf_shdr_count(&syms);
+    for (h = 0; h < count; h++)
+    {
+       shdr = elf_shdr_by_index(&syms, h);
+       type = elf_uval(&syms, shdr, sh_type);
+       if (type == SHT_STRTAB)
+       {
+           /* Look for a strtab @i linked to symtab @h. */
+           for (i = 0; i < count; i++) {
+               shdr2 = elf_shdr_by_index(&syms, i);
+               if ((elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB) &&
+                   (elf_uval(&syms, shdr2, sh_link) == h))
+                   break;
+           }
+           /* Skip symtab @h if we found no corresponding strtab @i. */
+           if (i == count)
+           {
+               if (elf_64bit(&syms))
+                   *(Elf64_Off*)(&shdr->e64.sh_offset) = 0;
+               else
+                   *(Elf32_Off*)(&shdr->e32.sh_offset) = 0;
+               continue;
+           }
+       }
+
+       if ((type == SHT_STRTAB) || (type == SHT_SYMTAB))
+       {
+           /* Mangled to be based on ELF header location. */
+           if (elf_64bit(&syms))
+               *(Elf64_Off*)(&shdr->e64.sh_offset) = maxaddr - symtab;
+           else
+               *(Elf32_Off*)(&shdr->e32.sh_offset) = maxaddr - symtab;
+           size = elf_uval(&syms, shdr, sh_size);
+           maxaddr = elf_round_up(&syms, maxaddr + size);
+           tables++;
+           xc_dom_printf("%s: h=%d %s, size=0x%zx, maxaddr=0x%" PRIx64 "\n",
+                         __FUNCTION__, h,
+                         type == SHT_SYMTAB ? "symtab" : "strtab",
+                         size, maxaddr);
+
+           if (load) {
+               shdr2 = elf_shdr_by_index(elf, h);
+               memcpy((void*)elf_section_start(&syms, shdr),
+                      elf_section_start(elf, shdr2),
+                      size);
+           }
+       }
+
+       /* Name is NULL. */
+       if (elf_64bit(&syms))
+           *(Elf64_Half*)(&shdr->e64.sh_name) = 0;
+       else
+           *(Elf32_Word*)(&shdr->e32.sh_name) = 0;
+    }
+
+    if (0 == tables)
+    {
+       xc_dom_printf("%s: no symbol table present\n", __FUNCTION__);
+       dom->bsd_symtab_start = 0;
+       return 0;
+    }
+    if (!load)
+       dom->kernel_seg.vend = maxaddr;
+    return 0;
+}
+
+static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf;
+    int rc;
+
+    rc = check_elf_kernel(dom, 1);
+    if (0 != rc)
+       return rc;
+
+    elf = xc_dom_malloc(dom, sizeof(*elf));
+    dom->private_loader = elf;
+    rc = elf_init(elf, dom->kernel_blob, dom->kernel_size);
+    if (xc_dom_logfile)
+       elf_set_logfile(elf, xc_dom_logfile, 1);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_INVALID_KERNEL, "%s: corrupted ELF image\n",
+                    __FUNCTION__);
+       return rc;
+    }
+
+    /* Find the section-header strings table. */
+    if (NULL == elf->sec_strtab)
+    {
+       xc_dom_panic(XC_INVALID_KERNEL, "%s: ELF image has no shstrtab\n",
+                    __FUNCTION__);
+       return -EINVAL;
+    }
+
+    /* parse binary and get xen meta info */
+    elf_parse_binary(elf);
+    if (0 != (rc = elf_xen_parse(elf, &dom->parms)))
+       return rc;
+
+    /* find kernel segment */
+    dom->kernel_seg.vstart = dom->parms.virt_kstart;
+    dom->kernel_seg.vend   = dom->parms.virt_kend;
+
+    if (dom->parms.bsd_symtab)
+       xc_dom_load_elf_symtab(dom, elf, 0);
+
+    dom->guest_type = xc_dom_guest_type(dom, elf);
+    xc_dom_printf("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
+                 __FUNCTION__, dom->guest_type,
+                 dom->kernel_seg.vstart, dom->kernel_seg.vend);
+    return 0;
+}
+
+static int xc_dom_load_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf = dom->private_loader;
+
+    elf->dest = xc_dom_seg_to_ptr(dom, &dom->kernel_seg);
+    elf_load_binary(elf);
+    if (dom->parms.bsd_symtab)
+       xc_dom_load_elf_symtab(dom, elf, 1);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader elf_loader = {
+    .name = "ELF-generic",
+    .probe = xc_dom_probe_elf_kernel,
+    .parser = xc_dom_parse_elf_kernel,
+    .loader = xc_dom_load_elf_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&elf_loader);
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_ia64.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_ia64.c Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,118 @@
+/*
+ * Xen domain builder -- ia64 bits.
+ *
+ * Most architecture-specific code for ia64 goes here.
+ *   - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/ia64.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    /* allocate special pages */
+    dom->console_pfn = dom->total_pages -1;
+    dom->xenstore_pfn = dom->total_pages -2;
+    dom->start_info_pfn = dom->total_pages -3;
+    return 0;
+}
+
+static int start_info_ia64(struct xc_dom_image *dom)
+{
+    start_info_ia64_t *start_info =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    struct xen_ia64_boot_param_ia64 *bp =
+       (struct xen_ia64_boot_param_ia64 *)(start_info + 1);
+
+    xc_dom_printf("%s\n", __FUNCTION__);
+
+    sprintf(start_info->magic, dom->guest_type);
+    start_info->flags = dom->flags;
+    start_info->nr_pages = dom->total_pages;
+    start_info->store_mfn = dom->xenstore_pfn;
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = dom->console_pfn;
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if (dom->ramdisk_blob)
+    {
+       start_info->mod_start = dom->ramdisk_seg.vstart;
+       start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+       bp->initrd_start = start_info->mod_start;
+       bp->initrd_size = start_info->mod_len;
+    }
+    if (dom->cmdline)
+    {
+       strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+       start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+       bp->command_line = (dom->start_info_pfn << PAGE_SHIFT_IA64)
+           + offsetof(start_info_t, cmd_line);
+    }
+    return 0;
+}
+
+static int shared_info_ia64(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_ia64_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+       shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    shared_info->arch.start_info_pfn = dom->start_info_pfn;
+    return 0;
+}
+
+extern unsigned long xc_ia64_fpsr_default(void);
+
+static int vcpu_ia64(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_ia64_t *ctxt = ptr;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->flags = 0;
+    ctxt->user_regs.cr_ipsr = 0;       /* all necessary bits filled by 
hypervisor */
+    ctxt->user_regs.cr_iip = dom->parms.virt_entry;
+    ctxt->user_regs.cr_ifs = (uint64_t) 1 << 63;
+#ifdef __ia64__                        /* FIXME */
+    ctxt->user_regs.ar_fpsr = xc_ia64_fpsr_default();
+#endif
+    ctxt->user_regs.r28 = (dom->start_info_pfn << PAGE_SHIFT_IA64)
+       + sizeof(start_info_ia64_t);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_arch = {
+    .guest_type = "xen-3.0-ia64",
+    .page_shift = PAGE_SHIFT_IA64,
+    .alloc_magic_pages = alloc_magic_pages,
+    .start_info = start_info_ia64,
+    .shared_info = shared_info_ia64,
+    .vcpu = vcpu_ia64,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_arch);
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_powerpc64.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_powerpc64.c    Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,100 @@
+/*
+ * Xen domain builder -- ia64 bits.
+ *
+ * Most architecture-specific code for ia64 goes here.
+ *   - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    /* allocate special pages */
+    dom->low_top--; /* shared_info */
+    dom->xenstore_pfn = --dom->low_top;
+    dom->console_pfn = --dom->low_top;
+    dom->start_info_pfn = --dom->low_top;
+    return 0;
+}
+
+static int start_info(struct xc_dom_image *dom)
+{
+    start_info_t *si =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+
+    xc_dom_printf("%s\n", __FUNCTION__);
+
+    snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0);
+
+    si->nr_pages = dom->total_pages;
+    si->shared_info = (dom->total_pages - 1) << PAGE_SHIFT;
+    si->store_mfn = dom->xenstore_pfn;
+    si->store_evtchn = dom->store_evtchn;
+    si->console.domU.mfn = dom->console_pfn;
+    si->console.domU.evtchn = dom->console_evtchn;
+    return 0;
+}
+
+static int shared_info(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    return 0;
+}
+
+static int vcpu(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_t *ctxt = ptr;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    memset(&ctxt->user_regs, 0x55, sizeof(ctxt.user_regs));
+    ctxt->user_regs.pc = dsi->v_kernentry;
+    ctxt->user_regs.msr = 0;
+    ctxt->user_regs.gprs[1] = 0; /* Linux uses its own stack */
+    ctxt->user_regs.gprs[3] = devtree_addr;
+    ctxt->user_regs.gprs[4] = kern_addr;
+    ctxt->user_regs.gprs[5] = 0;
+
+    /* There is a buggy kernel that does not zero the "local_paca", so
+     * we must make sure this register is 0 */
+    ctxt->user_regs.gprs[13] = 0;
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_arch = {
+    .guest_type = "xen-3.0-powerpc64",
+    .page_shift = FIXME,
+    .alloc_magic_pages = alloc_magic_pages,
+    .start_info = start_info,
+    .shared_info = shared_info,
+    .vcpu = vcpu,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_arch);
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_x86.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_x86.c  Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,559 @@
+/*
+ * Xen domain builder -- i386 and x86_64 bits.
+ *
+ * Most architecture-specific code for x86 goes here.
+ *   - prepare page tables.
+ *   - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/e820.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define bits_to_mask(bits)       (((xen_vaddr_t)1 << (bits))-1)
+#define round_down(addr, mask)   ((addr) & ~(mask))
+#define round_up(addr, mask)     ((addr) | (mask))
+
+static inline unsigned long
+nr_page_tables(xen_vaddr_t start, xen_vaddr_t end, unsigned long bits)
+{
+    xen_vaddr_t mask = bits_to_mask(bits);
+    int tables;
+
+    if (0 == bits)
+       return 0;               /* unused */
+
+    if (8 * sizeof(unsigned long) == bits)
+    {
+       /* must be pgd, need one */
+       start = 0;
+       end = -1;
+       tables = 1;
+    }
+    else
+    {
+       start = round_down(start, mask);
+       end = round_up(end, mask);
+       tables = ((end - start) >> bits) + 1;
+    }
+
+    xc_dom_printf("%s: 0x%016" PRIx64 "/%ld: 0x%016" PRIx64
+                 " -> 0x%016" PRIx64 ", %d table(s)\n",
+                 __FUNCTION__, mask, bits, start, end, tables);
+    return tables;
+}
+
+static int count_pgtables(struct xc_dom_image *dom, int pae,
+                         int l4_bits, int l3_bits, int l2_bits, int l1_bits)
+{
+    int pages, extra_pages;
+    xen_vaddr_t try_virt_end;
+
+    extra_pages = dom->alloc_bootstack ? 1 : 0;
+    extra_pages += dom->extra_pages;
+    pages = extra_pages;
+    for (;;)
+    {
+       try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
+                               bits_to_mask(l1_bits));
+       dom->pg_l4 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l4_bits);
+       dom->pg_l3 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l3_bits);
+       dom->pg_l2 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l2_bits);
+       dom->pg_l1 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l1_bits);
+       if (pae && try_virt_end < 0xc0000000)
+       {
+           xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", 
__FUNCTION__);
+           dom->pg_l2++;
+       }
+       dom->pgtables = dom->pg_l4 + dom->pg_l3 + dom->pg_l2 + dom->pg_l1;
+       pages = dom->pgtables + extra_pages;
+       if (dom->virt_alloc_end + pages * PAGE_SIZE_X86 <= try_virt_end + 1)
+           break;
+    }
+    dom->virt_pgtab_end = try_virt_end + 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* i386 pagetables                                                          */
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
+
+static int count_pgtables_x86_32(struct xc_dom_image *dom)
+{
+    return count_pgtables(dom, 0, 0, 0, 32, L2_PAGETABLE_SHIFT_I386);
+}
+
+static int count_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+    return count_pgtables(dom, 1, 0, 32,
+                         L3_PAGETABLE_SHIFT_PAE, L2_PAGETABLE_SHIFT_PAE);
+}
+
+#define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
+
+static int setup_pgtables_x86_32(struct xc_dom_image *dom)
+{
+    xen_pfn_t l2pfn = dom->pgtables_seg.pfn;
+    xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l2;
+    l2_pgentry_32_t *l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+    l1_pgentry_32_t *l1tab = NULL;
+    unsigned long l2off, l1off;
+    xen_vaddr_t addr;
+    xen_pfn_t pgpfn;
+
+    for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+        addr += PAGE_SIZE_X86)
+    {
+       if (NULL == l1tab)
+       {
+           /* get L1 tab, make L2 entry */
+           l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+           l2off = l2_table_offset_i386(addr);
+           l2tab[l2off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+           l1pfn++;
+       }
+
+       /* make L1 entry */
+       l1off = l1_table_offset_i386(addr);
+       pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+       l1tab[l1off] =
+           pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+       if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+           l1tab[l1off] &= ~_PAGE_RW;  /* page tables are r/o */
+       if (L1_PAGETABLE_ENTRIES_I386 - 1 == l1off)
+           l1tab = NULL;
+    }
+    return 0;
+}
+
+static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+    xen_pfn_t l3pfn = dom->pgtables_seg.pfn;
+    xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l3;
+    xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l3 + dom->pg_l2;
+    l3_pgentry_64_t *l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+    l2_pgentry_64_t *l2tab = NULL;
+    l1_pgentry_64_t *l1tab = NULL;
+    unsigned long l3off, l2off, l1off;
+    xen_vaddr_t addr;
+    xen_pfn_t pgpfn;
+
+    for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+        addr += PAGE_SIZE_X86)
+    {
+       if (NULL == l2tab)
+       {
+           /* get L2 tab, make L3 entry */
+           l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+           l3off = l3_table_offset_pae(addr);
+           l3tab[l3off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+           l2pfn++;
+       }
+
+       if (NULL == l1tab)
+       {
+           /* get L1 tab, make L2 entry */
+           l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+           l2off = l2_table_offset_pae(addr);
+           l2tab[l2off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+           if (L2_PAGETABLE_ENTRIES_PAE - 1 == l2off)
+               l2tab = NULL;
+           l1pfn++;
+       }
+
+       /* make L1 entry */
+       l1off = l1_table_offset_pae(addr);
+       pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+       l1tab[l1off] =
+           pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+       if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+           l1tab[l1off] &= ~_PAGE_RW;  /* page tables are r/o */
+       if (L1_PAGETABLE_ENTRIES_PAE - 1 == l1off)
+           l1tab = NULL;
+    }
+
+    if (dom->virt_pgtab_end <= 0xc0000000)
+    {
+       xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", __FUNCTION__);
+       l3tab[3] = pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+    }
+    return 0;
+}
+
+#undef L1_PROT
+#undef L2_PROT
+#undef L3_PROT
+
+/* ------------------------------------------------------------------------ */
+/* x86_64 pagetables                                                        */
+
+static int count_pgtables_x86_64(struct xc_dom_image *dom)
+{
+    return count_pgtables(dom, 0,
+                         L4_PAGETABLE_SHIFT_X86_64 + 9,
+                         L4_PAGETABLE_SHIFT_X86_64,
+                         L3_PAGETABLE_SHIFT_X86_64, L2_PAGETABLE_SHIFT_X86_64);
+}
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+static int setup_pgtables_x86_64(struct xc_dom_image *dom)
+{
+    xen_pfn_t l4pfn = dom->pgtables_seg.pfn;
+    xen_pfn_t l3pfn = dom->pgtables_seg.pfn + dom->pg_l4;
+    xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3;
+    xen_pfn_t l1pfn =
+       dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3 + dom->pg_l2;
+    l4_pgentry_64_t *l4tab = xc_dom_pfn_to_ptr(dom, l4pfn, 1);
+    l3_pgentry_64_t *l3tab = NULL;
+    l2_pgentry_64_t *l2tab = NULL;
+    l1_pgentry_64_t *l1tab = NULL;
+    uint64_t l4off, l3off, l2off, l1off;
+    uint64_t addr;
+    xen_pfn_t pgpfn;
+
+    for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+        addr += PAGE_SIZE_X86)
+    {
+       if (NULL == l3tab)
+       {
+           /* get L3 tab, make L4 entry */
+           l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+           l4off = l4_table_offset_x86_64(addr);
+           l4tab[l4off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
+           l3pfn++;
+       }
+
+       if (NULL == l2tab)
+       {
+           /* get L2 tab, make L3 entry */
+           l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+           l3off = l3_table_offset_x86_64(addr);
+           l3tab[l3off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+           if (L3_PAGETABLE_ENTRIES_X86_64 - 1 == l3off)
+               l3tab = NULL;
+           l2pfn++;
+       }
+
+       if (NULL == l1tab)
+       {
+           /* get L1 tab, make L2 entry */
+           l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+           l2off = l2_table_offset_x86_64(addr);
+           l2tab[l2off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+           if (L2_PAGETABLE_ENTRIES_X86_64 - 1 == l2off)
+               l2tab = NULL;
+           l1pfn++;
+       }
+
+       /* make L1 entry */
+       l1off = l1_table_offset_x86_64(addr);
+       pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+       l1tab[l1off] =
+           pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+       if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+           l1tab[l1off] &= ~_PAGE_RW;  /* page tables are r/o */
+       if (L1_PAGETABLE_ENTRIES_X86_64 - 1 == l1off)
+           l1tab = NULL;
+    }
+    return 0;
+}
+
+#undef L1_PROT
+#undef L2_PROT
+#undef L3_PROT
+#undef L4_PROT
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    size_t p2m_size = dom->total_pages * dom->arch_hooks->sizeof_pfn;
+
+    /* allocate phys2mach table */
+    if (0 != xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, 
p2m_size))
+       return -1;
+    dom->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
+
+    /* allocate special pages */
+    dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
+    dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore");
+    dom->console_pfn = xc_dom_alloc_page(dom, "console");
+    if (xc_dom_feature_translated(dom))
+       dom->shared_info_pfn = xc_dom_alloc_page(dom, "shared info");
+    dom->alloc_bootstack = 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int start_info_x86_32(struct xc_dom_image *dom)
+{
+    start_info_x86_32_t *start_info =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    xen_pfn_t shinfo =
+       xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
+       shared_info_mfn;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    sprintf(start_info->magic, dom->guest_type);
+    start_info->nr_pages = dom->total_pages;
+    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+    start_info->pt_base = dom->pgtables_seg.vstart;
+    start_info->nr_pt_frames = dom->pgtables;
+    start_info->mfn_list = dom->p2m_seg.vstart;
+
+    start_info->flags = dom->flags;
+    start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if (dom->ramdisk_blob)
+    {
+       start_info->mod_start = dom->ramdisk_seg.vstart;
+       start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+    }
+    if (dom->cmdline)
+    {
+       strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+       start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+    }
+    return 0;
+}
+
+static int start_info_x86_64(struct xc_dom_image *dom)
+{
+    start_info_x86_64_t *start_info =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    xen_pfn_t shinfo =
+       xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
+       shared_info_mfn;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    sprintf(start_info->magic, dom->guest_type);
+    start_info->nr_pages = dom->total_pages;
+    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+    start_info->pt_base = dom->pgtables_seg.vstart;
+    start_info->nr_pt_frames = dom->pgtables;
+    start_info->mfn_list = dom->p2m_seg.vstart;
+
+    start_info->flags = dom->flags;
+    start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if (dom->ramdisk_blob)
+    {
+       start_info->mod_start = dom->ramdisk_seg.vstart;
+       start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+    }
+    if (dom->cmdline)
+    {
+       strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+       start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+    }
+    return 0;
+}
+
+static int shared_info_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_x86_32_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+       shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    return 0;
+}
+
+static int shared_info_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_x86_64_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+       shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int vcpu_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_x86_32_t *ctxt = ptr;
+    xen_pfn_t cr3_pfn;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    /* Virtual IDT is empty at start-of-day. */
+    for (i = 0; i < 256; i++)
+    {
+       ctxt->trap_ctxt[i].vector = i;
+       ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS_X86_32;
+    }
+
+    /* No callback handlers. */
+    ctxt->event_callback_cs = FLAT_KERNEL_CS_X86_32;
+    ctxt->failsafe_callback_cs = FLAT_KERNEL_CS_X86_32;
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_32;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_32;
+    ctxt->user_regs.eip = dom->parms.virt_entry;
+    ctxt->user_regs.esp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+    ctxt->user_regs.esi =
+       dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+    ctxt->user_regs.eflags = 1 << 9;   /* Interrupt Enable */
+
+    ctxt->kernel_ss = FLAT_KERNEL_SS_X86_32;
+    ctxt->kernel_sp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+
+    ctxt->flags = VGCF_in_kernel_X86_32;
+    if (dom->parms.pae == 2 /* extended_cr3 */ ||
+       dom->parms.pae == 3 /* bimodal */)
+       ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+
+    cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
+    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_32(cr3_pfn);
+    xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+    return 0;
+}
+
+static int vcpu_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_x86_64_t *ctxt = ptr;
+    xen_pfn_t cr3_pfn;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    /* Virtual IDT is empty at start-of-day. */
+    for (i = 0; i < 256; i++)
+    {
+       ctxt->trap_ctxt[i].vector = i;
+       ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS_X86_64;
+    }
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
+    ctxt->user_regs.rip = dom->parms.virt_entry;
+    ctxt->user_regs.rsp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+    ctxt->user_regs.rsi =
+       dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+    ctxt->user_regs.rflags = 1 << 9;   /* Interrupt Enable */
+
+    ctxt->kernel_ss = FLAT_KERNEL_SS_X86_64;
+    ctxt->kernel_sp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+
+    ctxt->flags = VGCF_in_kernel_X86_64;
+    cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
+    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_64(cr3_pfn);
+    xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_32 = {
+    .guest_type = "xen-3.0-x86_32",
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 4,
+    .alloc_magic_pages = alloc_magic_pages,
+    .count_pgtables = count_pgtables_x86_32,
+    .setup_pgtables = setup_pgtables_x86_32,
+    .start_info = start_info_x86_32,
+    .shared_info = shared_info_x86_32,
+    .vcpu = vcpu_x86_32,
+};
+static struct xc_dom_arch xc_dom_32_pae = {
+    .guest_type = "xen-3.0-x86_32p",
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 4,
+    .alloc_magic_pages = alloc_magic_pages,
+    .count_pgtables = count_pgtables_x86_32_pae,
+    .setup_pgtables = setup_pgtables_x86_32_pae,
+    .start_info = start_info_x86_32,
+    .shared_info = shared_info_x86_32,
+    .vcpu = vcpu_x86_32,
+};
+
+static struct xc_dom_arch xc_dom_64 = {
+    .guest_type = "xen-3.0-x86_64",
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 8,
+    .alloc_magic_pages = alloc_magic_pages,
+    .count_pgtables = count_pgtables_x86_64,
+    .setup_pgtables = setup_pgtables_x86_64,
+    .start_info = start_info_x86_64,
+    .shared_info = shared_info_x86_64,
+    .vcpu = vcpu_x86_64,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_32);
+    xc_dom_register_arch_hooks(&xc_dom_32_pae);
+    xc_dom_register_arch_hooks(&xc_dom_64);
+}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] libxc domain builder rewrite, core bits., Xen patchbot-unstable <=