| # HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1233280453 -32400
# Node ID 254021201b1bb8a5d937a84d5184ceacdf169a98
# Parent  79f259a26a11cb57617982ce3bc829cdd76fff46
# Parent  916ca93a8658084f041793bf51944e5d97f907ed
merge with xen-unstable.hg
---
 stubdom/stubdom-dm                            |    7 
 tools/python/xen/xend/image.py                |   13 +
 tools/python/xen/xend/server/blkif.py         |   14 +
 tools/python/xen/xm/create.py                 |    4 
 xen/arch/ia64/xen/domain.c                    |   15 +
 xen/arch/ia64/xen/mm.c                        |    2 
 xen/arch/ia64/xen/xenmem.c                    |    2 
 xen/arch/x86/acpi/power.c                     |   91 ++++-----
 xen/arch/x86/bzimage.c                        |    4 
 xen/arch/x86/domain.c                         |   47 +++-
 xen/arch/x86/e820.c                           |  105 ++++++----
 xen/arch/x86/hvm/svm/vmcb.c                   |    2 
 xen/arch/x86/mm/p2m.c                         |  259 +++++++++++++-------------
 xen/arch/x86/setup.c                          |   13 -
 xen/arch/x86/smpboot.c                        |    4 
 xen/arch/x86/tboot.c                          |  220 +++++++++++++++++++---
 xen/common/domain.c                           |   10 -
 xen/common/grant_table.c                      |   27 --
 xen/common/page_alloc.c                       |   19 -
 xen/common/trace.c                            |    2 
 xen/common/xenoprof.c                         |    2 
 xen/common/xmalloc_tlsf.c                     |    8 
 xen/drivers/char/console.c                    |    2 
 xen/drivers/char/serial.c                     |    2 
 xen/drivers/passthrough/vtd/dmar.c            |   11 +
 xen/include/asm-x86/e820.h                    |    3 
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |    2 
 xen/include/asm-x86/mm.h                      |    9 
 xen/include/asm-x86/tboot.h                   |   58 ++++-
 xen/include/xen/domain.h                      |    4 
 xen/include/xen/grant_table.h                 |    3 
 xen/include/xen/mm.h                          |    4 
 32 files changed, 618 insertions(+), 350 deletions(-)
diff -r 79f259a26a11 -r 254021201b1b stubdom/stubdom-dm
--- a/stubdom/stubdom-dm        Wed Jan 28 13:06:45 2009 +0900
+++ b/stubdom/stubdom-dm        Fri Jan 30 10:54:13 2009 +0900
@@ -15,6 +15,7 @@ vncviewer=0
 vncviewer=0
 vncpid=
 extra=
+videoram=4
 while [ "$#" -gt 0 ];
 do
     if [ "$#" -ge 2 ];
@@ -38,6 +39,10 @@ do
                 extra="$extra -loadvm $2";
                 shift
                 ;;
+           -videoram)
+               videoram="$2"
+               shift
+               ;;
        esac
     fi
     case "$1" in
@@ -72,7 +77,7 @@ do
        sleep 1
 done
 
-creation="xm create -c $domname-dm target=$domid memory=32 extra=\"$extra\""
+creation="xm create -c $domname-dm target=$domid memory=32 videoram=$videoram 
extra=\"$extra\""
 
 (while true ; do sleep 60 ; done) | /bin/sh -c "$creation" &
 #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to 
shut down ; read" &
diff -r 79f259a26a11 -r 254021201b1b tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Wed Jan 28 13:06:45 2009 +0900
+++ b/tools/python/xen/xend/image.py    Fri Jan 30 10:54:13 2009 +0900
@@ -633,6 +633,8 @@ class LinuxImageHandler(ImageHandler):
 
     def configure(self, vmConfig):
         ImageHandler.configure(self, vmConfig)
+        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
+        self.is_stubdom = (self.kernel.find('stubdom') >= 0)
 
     def buildDomain(self):
         store_evtchn = self.vm.getStorePort()
@@ -663,6 +665,17 @@ class LinuxImageHandler(ImageHandler):
                               features       = self.vm.getFeatures(),
                               flags          = self.flags,
                               vhpt           = self.vhpt)
+
+    def getRequiredAvailableMemory(self, mem_kb):
+        if self.is_stubdom :
+            mem_kb += self.vramsize
+        return mem_kb
+
+    def getRequiredInitialReservation(self):
+        return self.vm.getMemoryTarget()
+
+    def getRequiredMaximumReservation(self):
+        return self.vm.getMemoryMaximum()
 
     def parseDeviceModelArgs(self, vmConfig):
         ret = ImageHandler.parseDeviceModelArgs(self, vmConfig)
diff -r 79f259a26a11 -r 254021201b1b tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Wed Jan 28 13:06:45 2009 +0900
+++ b/tools/python/xen/xend/server/blkif.py     Fri Jan 30 10:54:13 2009 +0900
@@ -18,6 +18,7 @@
 
 import re
 import string
+import os
 
 from xen.util import blkif
 import xen.util.xsm.xsm as security
@@ -34,6 +35,13 @@ class BlkifController(DevController):
         """Create a block device controller.
         """
         DevController.__init__(self, vm)
+
+    def _isValidProtocol(self, protocol):
+        if protocol in ('phy', 'file', 'tap'):
+            return True
+
+        return os.access('/etc/xen/scripts/block-%s' % protocol, os.X_OK)
+
 
     def getDeviceDetails(self, config):
         """@see DevController.getDeviceDetails"""
@@ -56,10 +64,8 @@ class BlkifController(DevController):
         else:
             try:
                 (typ, params) = string.split(uname, ':', 1)
-                if typ not in ('phy', 'file', 'tap'):
-                    raise VmError(
-                        'Block device must have "phy", "file" or "tap" '
-                        'specified to type')
+                if not self._isValidProtocol(typ):
+                    raise VmError('Block device type "%s" is invalid.' % typ)
             except ValueError:
                 raise VmError(
                     'Block device must have physical details specified')
diff -r 79f259a26a11 -r 254021201b1b tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Wed Jan 28 13:06:45 2009 +0900
+++ b/tools/python/xen/xm/create.py     Fri Jan 30 10:54:13 2009 +0900
@@ -635,6 +635,8 @@ def configure_image(vals):
     if vals.root:
         cmdline_root = strip('root=', vals.root)
         config_image.append(['root', cmdline_root])
+    if vals.videoram:
+        config_image.append(['videoram', vals.videoram])
     if vals.extra:
         config_image.append(['args', vals.extra])
 
@@ -884,7 +886,7 @@ def configure_hvm(config_image, vals):
     """Create the config for HVM devices.
     """
     args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 
'timer_mode',
-             'localtime', 'serial', 'stdvga', 'videoram', 'isa', 'nographic', 
'soundhw',
+             'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
diff -r 79f259a26a11 -r 254021201b1b xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/ia64/xen/domain.c        Fri Jan 30 10:54:13 2009 +0900
@@ -405,6 +405,16 @@ void relinquish_vcpu_resources(struct vc
        kill_timer(&v->arch.hlt_timer);
 }
 
+struct domain *alloc_domain_struct(void)
+{
+    return xmalloc(struct domain);
+}
+
+void free_domain_struct(struct domain *d)
+{
+    xfree(d);
+}
+
 struct vcpu *alloc_vcpu_struct(void)
 {
        struct page_info *page;
@@ -509,7 +519,7 @@ int vcpu_late_initialise(struct vcpu *v)
 
        /* Create privregs page. */
        order = get_order_from_shift(XMAPPEDREGS_SHIFT);
-       v->arch.privregs = alloc_xenheap_pages(order);
+       v->arch.privregs = alloc_xenheap_pages(order, 0);
        if (v->arch.privregs == NULL)
                return -ENOMEM;
        BUG_ON(v->arch.privregs == NULL);
@@ -578,7 +588,8 @@ int arch_domain_create(struct domain *d,
 #endif
        if (tlb_track_create(d) < 0)
                goto fail_nomem1;
-       d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
+       d->shared_info = alloc_xenheap_pages(
+               get_order_from_shift(XSI_SHIFT), 0);
        if (d->shared_info == NULL)
                goto fail_nomem;
        BUG_ON(d->shared_info == NULL);
diff -r 79f259a26a11 -r 254021201b1b xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/ia64/xen/mm.c    Fri Jan 30 10:54:13 2009 +0900
@@ -3076,7 +3076,7 @@ void *pgtable_quicklist_alloc(void)
         clear_page(p);
         return p;
     }
-    p = alloc_xenheap_pages(0);
+    p = alloc_xenheap_page();
     if (p) {
         clear_page(p);
         /*
diff -r 79f259a26a11 -r 254021201b1b xen/arch/ia64/xen/xenmem.c
--- a/xen/arch/ia64/xen/xenmem.c        Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/ia64/xen/xenmem.c        Fri Jan 30 10:54:13 2009 +0900
@@ -64,7 +64,7 @@ paging_init (void)
        mpt_table_size = max_page * sizeof(unsigned long);
        mpt_order = get_order(mpt_table_size);
        ASSERT(mpt_order <= MAX_ORDER);
-       if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL)
+       if ((mpt_table = alloc_xenheap_pages(mpt_order, 0)) == NULL)
                panic("Not enough memory to bootstrap Xen.\n");
 
        printk("machine to physical table: 0x%lx mpt_table_size 0x%lx\n"
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/acpi/power.c Fri Jan 30 10:54:13 2009 +0900
@@ -129,20 +129,15 @@ static void acpi_sleep_prepare(u32 state
 
     wakeup_vector_va = __acpi_map_table(
         acpi_sinfo.wakeup_vector, sizeof(uint64_t));
+
+    /* TBoot will set resume vector itself (when it is safe to do so). */
+    if ( tboot_in_measured_env() )
+        return;
+
     if ( acpi_sinfo.vector_width == 32 )
-    {
-            *(uint32_t *)wakeup_vector_va =
-                tboot_in_measured_env() ?
-                (uint32_t)g_tboot_shared->s3_tb_wakeup_entry :
-                (uint32_t)bootsym_phys(wakeup_start);
-    }
+        *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
     else
-    {
-            *(uint64_t *)wakeup_vector_va =
-                tboot_in_measured_env() ?
-                (uint64_t)g_tboot_shared->s3_tb_wakeup_entry :
-                (uint64_t)bootsym_phys(wakeup_start);
-    }
+        *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
 }
 
 static void acpi_sleep_post(u32 state) {}
@@ -279,37 +274,47 @@ static int acpi_get_wake_status(void)
 
 static void tboot_sleep(u8 sleep_state)
 {
-   uint32_t shutdown_type;
-
-   g_tboot_shared->acpi_sinfo.pm1a_cnt =
-                           (uint16_t)acpi_sinfo.pm1a_cnt_blk.address;
-   g_tboot_shared->acpi_sinfo.pm1b_cnt =
-                           (uint16_t)acpi_sinfo.pm1b_cnt_blk.address;
-   g_tboot_shared->acpi_sinfo.pm1a_evt =
-                           (uint16_t)acpi_sinfo.pm1a_evt_blk.address;
-   g_tboot_shared->acpi_sinfo.pm1b_evt =
-                           (uint16_t)acpi_sinfo.pm1b_evt_blk.address;
-   g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
-   g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
-
-   switch ( sleep_state )
-   {
-       case ACPI_STATE_S3:
-           shutdown_type = TB_SHUTDOWN_S3;
-           g_tboot_shared->s3_k_wakeup_entry =
-               (uint32_t)bootsym_phys(wakeup_start);
-           break;
-       case ACPI_STATE_S4:
-           shutdown_type = TB_SHUTDOWN_S4;
-           break;
-       case ACPI_STATE_S5:
-           shutdown_type = TB_SHUTDOWN_S5;
-           break;
-       default:
-           return;
-   }
-
-   tboot_shutdown(shutdown_type);
+    uint32_t shutdown_type;
+
+#define TB_COPY_GAS(tbg, g)             \
+    tbg.space_id = g.space_id;          \
+    tbg.bit_width = g.bit_width;        \
+    tbg.bit_offset = g.bit_offset;      \
+    tbg.access_width = g.access_width;  \
+    tbg.address = g.address;
+
+    /* sizes are not same (due to packing) so copy each one */
+    TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1a_cnt_blk,
+                acpi_sinfo.pm1a_cnt_blk);
+    TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1b_cnt_blk,
+                acpi_sinfo.pm1b_cnt_blk);
+    TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1a_evt_blk,
+                acpi_sinfo.pm1a_evt_blk);
+    TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1b_evt_blk,
+                acpi_sinfo.pm1b_evt_blk);
+    g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
+    g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
+    g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector;
+    g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width;
+    g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector =
+                                              bootsym_phys(wakeup_start);
+
+    switch ( sleep_state )
+    {
+        case ACPI_STATE_S3:
+            shutdown_type = TB_SHUTDOWN_S3;
+            break;
+        case ACPI_STATE_S4:
+            shutdown_type = TB_SHUTDOWN_S4;
+            break;
+        case ACPI_STATE_S5:
+            shutdown_type = TB_SHUTDOWN_S5;
+            break;
+        default:
+            return;
+    }
+
+    tboot_shutdown(shutdown_type);
 }
          
 /* System is really put into sleep state by this stub */
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/bzimage.c
--- a/xen/arch/x86/bzimage.c    Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/bzimage.c    Fri Jan 30 10:54:13 2009 +0900
@@ -110,7 +110,7 @@ static  __init int perform_gunzip(char *
 
     window = (unsigned char *)output;
 
-    free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER);
+    free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER, 0);
     free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER);
 
     inbuf = (unsigned char *)image;
@@ -198,7 +198,7 @@ int __init bzimage_headroom(char *image_
 
     err = bzimage_check(hdr, image_length);
     if (err < 1)
-        return err;
+        return 0;
 
     img = image_start + (hdr->setup_sects+1) * 512;
     img += hdr->payload_offset;
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/domain.c     Fri Jan 30 10:54:13 2009 +0900
@@ -162,17 +162,43 @@ void dump_pageframe_info(struct domain *
     }
 }
 
+struct domain *alloc_domain_struct(void)
+{
+    struct domain *d;
+    /*
+     * We pack the MFN of the domain structure into a 32-bit field within
+     * the page_info structure. Hence the MEMF_bits() restriction.
+     */
+    d = alloc_xenheap_pages(
+        get_order_from_bytes(sizeof(*d)), MEMF_bits(32 + PAGE_SHIFT));
+    if ( d != NULL )
+        memset(d, 0, sizeof(*d));
+    return d;
+}
+
+void free_domain_struct(struct domain *d)
+{
+    free_xenheap_pages(d, get_order_from_bytes(sizeof(*d)));
+}
+
 struct vcpu *alloc_vcpu_struct(void)
 {
     struct vcpu *v;
-    if ( (v = xmalloc(struct vcpu)) != NULL )
+    /*
+     * This structure contains embedded PAE PDPTEs, used when an HVM guest
+     * runs on shadow pagetables outside of 64-bit mode. In this case the CPU
+     * may require that the shadow CR3 points below 4GB, and hence the whole
+     * structure must satisfy this restriction. Thus we specify MEMF_bits(32).
+     */
+    v = alloc_xenheap_pages(get_order_from_bytes(sizeof(*v)), MEMF_bits(32));
+    if ( v != NULL )
         memset(v, 0, sizeof(*v));
     return v;
 }
 
 void free_vcpu_struct(struct vcpu *v)
 {
-    xfree(v);
+    free_xenheap_pages(v, get_order_from_bytes(sizeof(*v)));
 }
 
 #ifdef CONFIG_COMPAT
@@ -357,7 +383,7 @@ int arch_domain_create(struct domain *d,
     INIT_LIST_HEAD(&d->arch.relmem_list);
 
     pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
-    d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
+    d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0);
     if ( d->arch.mm_perdomain_pt == NULL )
         goto fail;
     memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
@@ -405,17 +431,12 @@ int arch_domain_create(struct domain *d,
         if ( d->arch.ioport_caps == NULL )
             goto fail;
 
-#ifdef __i386__
-        if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+        /*
+         * The shared_info machine address must fit in a 32-bit field within a
+         * 32-bit guest's start_info structure. Hence we specify MEMF_bits(32).
+         */
+        if ( (d->shared_info = alloc_xenheap_pages(0, MEMF_bits(32))) == NULL )
             goto fail;
-#else
-        pg = alloc_domheap_page(
-            NULL, MEMF_node(domain_to_node(d)) | MEMF_bits(32));
-        if ( pg == NULL )
-            goto fail;
-        pg->count_info |= PGC_xen_heap;
-        d->shared_info = page_to_virt(pg);
-#endif
 
         clear_page(d->shared_info);
         share_xen_page_with_guest(
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/e820.c
--- a/xen/arch/x86/e820.c       Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/e820.c       Fri Jan 30 10:54:13 2009 +0900
@@ -391,8 +391,9 @@ static void __init machine_specific_memo
     reserve_dmi_region();
 }
 
-/* Reserve RAM area (@s,@e) in the specified e820 map. */
-int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e)
+int __init e820_change_range_type(
+    struct e820map *e820, uint64_t s, uint64_t e,
+    uint32_t orig_type, uint32_t new_type)
 {
     uint64_t rs = 0, re = 0;
     int i;
@@ -406,55 +407,79 @@ int __init reserve_e820_ram(struct e820m
             break;
     }
 
-    if ( (i == e820->nr_map) || (e820->map[i].type != E820_RAM) )
+    if ( (i == e820->nr_map) || (e820->map[i].type != orig_type) )
         return 0;
 
     if ( (s == rs) && (e == re) )
     {
-        /* Complete excision. */
-        memmove(&e820->map[i], &e820->map[i+1],
-                (e820->nr_map-i-1) * sizeof(e820->map[0]));
-        e820->nr_map--;
-    }
-    else if ( s == rs )
-    {
-        /* Truncate start. */
-        e820->map[i].addr += e - s;
-        e820->map[i].size -= e - s;
-    }
-    else if ( e == re )
-    {
-        /* Truncate end. */
-        e820->map[i].size -= e - s;
-    }
-    else if ( e820->nr_map < ARRAY_SIZE(e820->map) )
-    {
-        /* Split in two. */
+        e820->map[i].type = new_type;
+    }
+    else if ( (s == rs) || (e == re) )
+    {
+        if ( (e820->nr_map + 1) > ARRAY_SIZE(e820->map) )
+            goto overflow;
+
         memmove(&e820->map[i+1], &e820->map[i],
                 (e820->nr_map-i) * sizeof(e820->map[0]));
         e820->nr_map++;
+
+        if ( s == rs )
+        {
+            e820->map[i].size = e - s;
+            e820->map[i].type = new_type;
+            e820->map[i+1].addr = e;
+            e820->map[i+1].size = re - e;
+        }
+        else
+        {
+            e820->map[i].size = s - rs;
+            e820->map[i+1].addr = s;
+            e820->map[i+1].size = e - s;
+            e820->map[i+1].type = new_type;
+        }
+    }
+    else if ( e820->nr_map+1 < ARRAY_SIZE(e820->map) )
+    {
+        if ( (e820->nr_map + 2) > ARRAY_SIZE(e820->map) )
+            goto overflow;
+
+        memmove(&e820->map[i+2], &e820->map[i],
+                (e820->nr_map-i) * sizeof(e820->map[0]));
+        e820->nr_map += 2;
+
         e820->map[i].size = s - rs;
-        i++;
-        e820->map[i].addr = e;
-        e820->map[i].size = re - e;
-    }
-    else
-    {
-        /* e820map is at maximum size. We have to leak some space. */
-        if ( (s - rs) > (re - e) )
-        {
-            printk("e820 overflow: leaking RAM %"PRIx64"-%"PRIx64"\n", e, re);
-            e820->map[i].size = s - rs;
-        }
-        else
-        {
-            printk("e820 overflow: leaking RAM %"PRIx64"-%"PRIx64"\n", rs, s);
-            e820->map[i].addr = e;
-            e820->map[i].size = re - e;
-        }
+        e820->map[i+1].addr = s;
+        e820->map[i+1].size = e - s;
+        e820->map[i+1].type = new_type;
+        e820->map[i+2].addr = e;
+        e820->map[i+2].size = re - e;
+    }
+
+    /* Finally, look for any opportunities to merge adjacent e820 entries. */
+    for ( i = 0; i < (e820->nr_map - 1); i++ )
+    {
+        if ( (e820->map[i].type != e820->map[i+1].type) ||
+             ((e820->map[i].addr + e820->map[i].size) != e820->map[i+1].addr) )
+            continue;
+        e820->map[i].size += e820->map[i+1].size;
+        memmove(&e820->map[i+1], &e820->map[i+2],
+                (e820->nr_map-i-2) * sizeof(e820->map[0]));
+        e820->nr_map--;
+        i--;
     }
 
     return 1;
+
+ overflow:
+    printk("Overflow in e820 while reserving region %"PRIx64"-%"PRIx64"\n",
+           s, e);
+    return 0;
+}
+
+/* Set E820_RAM area (@s,@e) as RESERVED in specified e820 map. */
+int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e)
+{
+    return e820_change_range_type(e820, s, e, E820_RAM, E820_RESERVED);
 }
 
 unsigned long __init init_e820(
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Fri Jan 30 10:54:13 2009 +0900
@@ -138,7 +138,7 @@ static int construct_vmcb(struct vcpu *v
                             CR_INTERCEPT_CR8_WRITE);
 
     /* I/O and MSR permission bitmaps. */
-    arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
+    arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
     if ( arch_svm->msrpm == NULL )
         return -ENOMEM;
     memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/mm/p2m.c     Fri Jan 30 10:54:13 2009 +0900
@@ -713,155 +713,167 @@ p2m_pod_dump_data(struct domain *d)
 
 #define superpage_aligned(_x)  (((_x)&((1<<9)-1))==0)
 
-/* Must be called w/ p2m lock held, page_alloc lock not held */
+/* Search for all-zero superpages to be reclaimed as superpages for the
+ * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
 static int
 p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
 {
-    mfn_t mfns[1<<9];
-    p2m_type_t types[1<<9];
-    unsigned long * map[1<<9] = { NULL };
-    int ret=0, reset = 0, reset_max = 0;
+    mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
+    p2m_type_t type, type0 = 0;
+    unsigned long * map = NULL;
+    int ret=0, reset = 0;
     int i, j;
+    int max_ref = 1;
 
     if ( !superpage_aligned(gfn) )
         goto out;
+
+    /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
+    if ( paging_mode_shadow(d) )
+        max_ref++;
 
     /* Look up the mfns, checking to make sure they're the same mfn
      * and aligned, and mapping them. */
     for ( i=0; i<(1<<9); i++ )
     {
-        mfns[i] = gfn_to_mfn_query(d, gfn + i, types + i);
+        
+        mfn = gfn_to_mfn_query(d, gfn + i, &type);
+
+        if ( i == 0 )
+        {
+            mfn0 = mfn;
+            type0 = type;
+        }
 
         /* Conditions that must be met for superpage-superpage:
          * + All gfns are ram types
          * + All gfns have the same type
+         * + All of the mfns are allocated to a domain
          * + None of the mfns are used as pagetables
          * + The first mfn is 2-meg aligned
          * + All the other mfns are in sequence
+         * Adding for good measure:
+         * + None of the mfns are likely to be mapped elsewhere (refcount
+         *   2 or less for shadow, 1 for hap)
          */
+        if ( !p2m_is_ram(type)
+             || type != type0
+             || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
+             || ( (mfn_to_page(mfn)->count_info & PGC_page_table) != 0 )
+             || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
+             || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
+                   || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
+            goto out;
+    }
+
+    /* Now, do a quick check to see if it may be zero before unmapping. */
+    for ( i=0; i<(1<<9); i++ )
+    {
+        /* Quick zero-check */
+        map = map_domain_page(mfn_x(mfn0) + i);
+
+        for ( j=0; j<16; j++ )
+            if( *(map+j) != 0 )
+                break;
+
+        unmap_domain_page(map);
+
+        if ( j < 16 )
+            goto out;
+
+    }
+
+    /* Try to remove the page, restoring old mapping if it fails. */
+    set_p2m_entry(d, gfn,
+                  _mfn(POPULATE_ON_DEMAND_MFN), 9,
+                  p2m_populate_on_demand);
+
+    /* Make none of the MFNs are used elsewhere... for example, mapped
+     * via the grant table interface, or by qemu.  Allow one refcount for
+     * being allocated to the domain. */
+    for ( i=0; i < (1<<9); i++ )
+    {
+        mfn = _mfn(mfn_x(mfn0) + i);
+        if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
+        {
+            reset = 1;
+            goto out_reset;
+        }
+    }
+
+    /* Finally, do a full zero-check */
+    for ( i=0; i < (1<<9); i++ )
+    {
+        map = map_domain_page(mfn_x(mfn0) + i);
+
+        for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
+            if( *(map+j) != 0 )
+            {
+                reset = 1;
+                break;
+            }
+
+        unmap_domain_page(map);
+
+        if ( reset )
+            goto out_reset;
+    }
+
+    /* Finally!  We've passed all the checks, and can add the mfn superpage
+     * back on the PoD cache, and account for the new p2m PoD entries */
+    p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
+    d->arch.p2m->pod.entry_count += (1<<9);
+
+out_reset:
+    if ( reset )
+        set_p2m_entry(d, gfn, mfn0, 9, type0);
+    
+out:
+    return ret;
+}
+
+static void
+p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
+{
+    mfn_t mfns[count];
+    p2m_type_t types[count];
+    unsigned long * map[count];
+
+    int i, j;
+    int max_ref = 1;
+
+    /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
+    if ( paging_mode_shadow(d) )
+        max_ref++;
+
+    /* First, get the gfn list, translate to mfns, and map the pages. */
+    for ( i=0; i<count; i++ )
+    {
+        mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
+        /* If this is ram, and not a pagetable, and probably not mapped
+           elsewhere, map it; otherwise, skip. */
         if ( p2m_is_ram(types[i])
-             && types[i] == types[0]
-             && ( (mfn_to_page(mfns[i])->count_info & PGC_page_table) == 0 )
-             && ( ( i == 0 && superpage_aligned(mfn_x(mfns[0])) )
-                  || ( i != 0 && mfn_x(mfns[i]) == mfn_x(mfns[0]) + i ) ) )
+             && ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 ) 
+             && ( (mfn_to_page(mfns[i])->count_info & PGC_page_table) == 0 ) 
+             && ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= 
max_ref ) )
             map[i] = map_domain_page(mfn_x(mfns[i]));
         else
-            goto out_unmap;
-    }
-
-    /* Now, do a quick check to see if it may be zero before unmapping. */
-    for ( i=0; i<(1<<9); i++ )
-    {
+            map[i] = NULL;
+    }
+
+    /* Then, go through and check for zeroed pages, removing write permission
+     * for those with zeroes. */
+    for ( i=0; i<count; i++ )
+    {
+        if(!map[i])
+            continue;
+
         /* Quick zero-check */
         for ( j=0; j<16; j++ )
             if( *(map[i]+j) != 0 )
                 break;
 
         if ( j < 16 )
-            goto out_unmap;
-
-    }
-
-    /* Try to remove the page, restoring old mapping if it fails. */
-    reset_max = 1<<9;
-    set_p2m_entry(d, gfn,
-                  _mfn(POPULATE_ON_DEMAND_MFN), 9,
-                  p2m_populate_on_demand);
-
-    if ( (mfn_to_page(mfns[0])->u.inuse.type_info & PGT_count_mask) != 0 )
-    {
-        reset = 1;
-        goto out_reset;
-    }
-
-    /* Timing here is important.  We need to make sure not to reclaim
-     * a page which has been grant-mapped to another domain.  But we
-     * can't grab the grant table lock, because we may be invoked from
-     * the grant table code!  So we first remove the page from the
-     * p2m, then check to see if the gpfn has been granted.  Once this
-     * gpfn is marked PoD, any future gfn_to_mfn() call will block
-     * waiting for the p2m lock.  If we find that it has been granted, we
-     * simply restore the old value.
-     */
-    if ( gnttab_is_granted(d, gfn, 9) )
-    {
-        printk("gfn contains grant table %lx\n", gfn);
-        reset = 1;
-        goto out_reset;
-    }
-
-    /* Finally, do a full zero-check */
-    for ( i=0; i < (1<<9); i++ )
-    {
-        for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
-            if( *(map[i]+j) != 0 )
-            {
-                reset = 1;
-                break;
-            }
-
-        if ( reset )
-            goto out_reset;
-    }
-
-    /* Finally!  We've passed all the checks, and can add the mfn superpage
-     * back on the PoD cache, and account for the new p2m PoD entries */
-    p2m_pod_cache_add(d, mfn_to_page(mfns[0]), 9);
-    d->arch.p2m->pod.entry_count += (1<<9);
-
-out_reset:
-    if ( reset )
-    {
-        if (reset_max == (1<<9) )
-            set_p2m_entry(d, gfn, mfns[0], 9, types[0]);
-        else
-            for ( i=0; i<reset_max; i++)
-                set_p2m_entry(d, gfn + i, mfns[i], 0, types[i]);
-    }
-    
-out_unmap:
-    for ( i=0; i<(1<<9); i++ )
-        if ( map[i] )
-            unmap_domain_page(map[i]);
-out:
-    return ret;
-}
-
-static void
-p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
-{
-    mfn_t mfns[count];
-    p2m_type_t types[count];
-    unsigned long * map[count];
-
-    int i, j;
-
-    /* First, get the gfn list, translate to mfns, and map the pages. */
-    for ( i=0; i<count; i++ )
-    {
-        mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
-        /* If this is ram, and not a pagetable, map it; otherwise,
-         * skip. */
-        if ( p2m_is_ram(types[i])
-             && ( (mfn_to_page(mfns[i])->count_info & PGC_page_table) == 0 ) )
-            map[i] = map_domain_page(mfn_x(mfns[i]));
-        else
-            map[i] = NULL;
-    }
-
-    /* Then, go through and check for zeroed pages, removing write permission
-     * for those with zeroes. */
-    for ( i=0; i<count; i++ )
-    {
-        if(!map[i])
-            continue;
-
-        /* Quick zero-check */
-        for ( j=0; j<16; j++ )
-            if( *(map[i]+j) != 0 )
-                break;
-
-        if ( j < 16 )
         {
             unmap_domain_page(map[i]);
             map[i] = NULL;
@@ -873,7 +885,9 @@ p2m_pod_zero_check(struct domain *d, uns
                       _mfn(POPULATE_ON_DEMAND_MFN), 0,
                       p2m_populate_on_demand);
 
-        if ( (mfn_to_page(mfns[i])->u.inuse.type_info & PGT_count_mask) != 0 )
+        /* See if the page was successfully unmapped.  (Allow one refcount
+         * for being allocated to a domain.) */
+        if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
         {
             unmap_domain_page(map[i]);
             map[i] = NULL;
@@ -896,8 +910,7 @@ p2m_pod_zero_check(struct domain *d, uns
 
         /* See comment in p2m_pod_zero_check_superpage() re gnttab
          * check timing.  */
-        if ( j < PAGE_SIZE/sizeof(*map[i])
-             || gnttab_is_granted(d, gfns[i], 0) )
+        if ( j < PAGE_SIZE/sizeof(*map[i]) )
         {
             set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
             continue;
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/setup.c      Fri Jan 30 10:54:13 2009 +0900
@@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb
     unsigned int initrdidx = 1;
     multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
-    unsigned long nr_pages, modules_length, modules_headroom;
+    unsigned long nr_pages, modules_length, modules_headroom = -1;
     unsigned long allocator_bitmap_end;
     int i, e820_warn = 0, bytes = 0;
     struct ns16550_defaults ns16550 = {
@@ -617,9 +617,6 @@ void __init __start_xen(unsigned long mb
      * x86/64, we relocate Xen to higher memory.
      */
     modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
-    modules_headroom = bzimage_headroom(
-        (char *)(unsigned long)mod[0].mod_start,
-        (unsigned long)(mod[0].mod_end - mod[0].mod_start));
 
     for ( i = boot_e820.nr_map-1; i >= 0; i-- )
     {
@@ -724,6 +721,11 @@ void __init __start_xen(unsigned long mb
         }
 #endif
 
+        if ( modules_headroom == -1 )
+            modules_headroom = bzimage_headroom(
+                      (char *)(unsigned long)mod[0].mod_start,
+                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+
         /* Is the region suitable for relocating the multiboot modules? */
         if ( !initial_images_start && (s < e) &&
              ((e-s) >= (modules_length+modules_headroom)) )
@@ -1033,6 +1035,9 @@ void __init __start_xen(unsigned long mb
     if ( xen_cpuidle )
         xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
 
+    if ( !tboot_protect_mem_regions() )
+        panic("Could not protect TXT memory regions\n");
+
     /*
      * We're going to setup domain0 using the module(s) that we stashed safely
      * above our heap. The second module, if present, is an initrd ramdisk.
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/smpboot.c    Fri Jan 30 10:54:13 2009 +0900
@@ -804,7 +804,7 @@ static void *prepare_idle_stack(unsigned
 static void *prepare_idle_stack(unsigned int cpu)
 {
        if (!stack_base[cpu])
-               stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER);
+               stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0);
 
        return stack_base[cpu];
 }
@@ -867,7 +867,7 @@ static int __devinit do_boot_cpu(int api
                                           MEMF_node(cpu_to_node(cpu)));
                per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
 #else
-               per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order);
+               per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0);
 #endif
                memcpy(gdt, boot_cpu_gdt_table,
                       NR_RESERVED_GDT_PAGES * PAGE_SIZE);
diff -r 79f259a26a11 -r 254021201b1b xen/arch/x86/tboot.c
--- a/xen/arch/x86/tboot.c      Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/arch/x86/tboot.c      Fri Jan 30 10:54:13 2009 +0900
@@ -6,6 +6,7 @@
 #include <asm/fixmap.h>
 #include <asm/page.h>
 #include <asm/processor.h>
+#include <asm/e820.h>
 #include <asm/tboot.h>
 
 /* tboot=<physical address of shared page> */
@@ -17,10 +18,56 @@ tboot_shared_t *g_tboot_shared;
 
 static const uuid_t tboot_shared_uuid = TBOOT_SHARED_UUID;
 
+/* used by tboot_protect_mem_regions() and/or tboot_parse_dmar_table() */
+static uint64_t txt_heap_base, txt_heap_size;
+static uint64_t sinit_base, sinit_size;
+
+/*
+ * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
+ */
+
+#define TXT_PUB_CONFIG_REGS_BASE       0xfed30000
+#define TXT_PRIV_CONFIG_REGS_BASE      0xfed20000
+
+/* # pages for each config regs space - used by fixmap */
+#define NR_TXT_CONFIG_PAGES     ((TXT_PUB_CONFIG_REGS_BASE -                \
+                                  TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT)
+
+/* offsets from pub/priv config space */
+#define TXTCR_SINIT_BASE            0x0270
+#define TXTCR_SINIT_SIZE            0x0278
+#define TXTCR_HEAP_BASE             0x0300
+#define TXTCR_HEAP_SIZE             0x0308
+
+extern char __init_begin[], __per_cpu_start[], __per_cpu_end[], __bss_start[];
+
+#define SHA1_SIZE      20
+typedef uint8_t   sha1_hash_t[SHA1_SIZE];
+
+typedef struct __packed {
+    uint32_t     version;             /* currently 6 */
+    sha1_hash_t  bios_acm_id;
+    uint32_t     edx_senter_flags;
+    uint64_t     mseg_valid;
+    sha1_hash_t  sinit_hash;
+    sha1_hash_t  mle_hash;
+    sha1_hash_t  stm_hash;
+    sha1_hash_t  lcp_policy_hash;
+    uint32_t     lcp_policy_control;
+    uint32_t     rlp_wakeup_addr;
+    uint32_t     reserved;
+    uint32_t     num_mdrs;
+    uint32_t     mdrs_off;
+    uint32_t     num_vtd_dmars;
+    uint32_t     vtd_dmars_off;
+} sinit_mle_data_t;
+
 void __init tboot_probe(void)
 {
     tboot_shared_t *tboot_shared;
     unsigned long p_tboot_shared;
+    uint32_t map_base, map_size;
+    unsigned long map_addr;
 
     /* Look for valid page-aligned address for shared page. */
     p_tboot_shared = simple_strtoul(opt_tboot, NULL, 0);
@@ -30,24 +77,48 @@ void __init tboot_probe(void)
     /* Map and check for tboot UUID. */
     set_fixmap(FIX_TBOOT_SHARED_BASE, p_tboot_shared);
     tboot_shared = (tboot_shared_t *)fix_to_virt(FIX_TBOOT_SHARED_BASE);
+    if ( tboot_shared == NULL )
+        return;
     if ( memcmp(&tboot_shared_uuid, (uuid_t *)tboot_shared, sizeof(uuid_t)) )
         return;
+
+    /* new tboot_shared (w/ GAS support) is not backwards compatible */
+    if ( tboot_shared->version < 3 ) {
+        printk("unsupported version of tboot (%u)\n", tboot_shared->version);
+        return;
+    }
 
     g_tboot_shared = tboot_shared;
     printk("TBOOT: found shared page at phys addr %lx:\n", p_tboot_shared);
     printk("  version: %d\n", tboot_shared->version);
     printk("  log_addr: 0x%08x\n", tboot_shared->log_addr);
-    printk("  shutdown_entry32: 0x%08x\n", tboot_shared->shutdown_entry32);
-    printk("  shutdown_entry64: 0x%08x\n", tboot_shared->shutdown_entry64);
-    printk("  shutdown_type: %d\n", tboot_shared->shutdown_type);
-    printk("  s3_tb_wakeup_entry: 0x%08x\n", tboot_shared->s3_tb_wakeup_entry);
-    printk("  s3_k_wakeup_entry: 0x%08x\n", tboot_shared->s3_k_wakeup_entry);
-    printk("  &acpi_sinfo: 0x%p\n", &tboot_shared->acpi_sinfo);
-    if ( tboot_shared->version >= 0x02 )
-    {
-        printk("  tboot_base: 0x%08x\n", tboot_shared->tboot_base);
-        printk("  tboot_size: 0x%x\n", tboot_shared->tboot_size);
-    }
+    printk("  shutdown_entry: 0x%08x\n", tboot_shared->shutdown_entry);
+    printk("  tboot_base: 0x%08x\n", tboot_shared->tboot_base);
+    printk("  tboot_size: 0x%x\n", tboot_shared->tboot_size);
+
+    /* these will be needed by tboot_protect_mem_regions() and/or
+       tboot_parse_dmar_table(), so get them now */
+
+    map_base = PFN_DOWN(TXT_PUB_CONFIG_REGS_BASE);
+    map_size = PFN_UP(NR_TXT_CONFIG_PAGES * PAGE_SIZE);
+    map_addr = (unsigned long)__va(map_base << PAGE_SHIFT);
+    if ( map_pages_to_xen(map_addr, map_base, map_size, __PAGE_HYPERVISOR) )
+        return;
+
+    /* TXT Heap */
+    txt_heap_base =
+        *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_HEAP_BASE);
+    txt_heap_size =
+        *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_HEAP_SIZE);
+
+    /* SINIT */
+    sinit_base =
+        *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_BASE);
+    sinit_size =
+        *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_SIZE);
+
+    destroy_xen_mappings((unsigned long)__va(map_base << PAGE_SHIFT),
+                         (unsigned long)__va((map_base + map_size) << 
PAGE_SHIFT));
 }
 
 void tboot_shutdown(uint32_t shutdown_type)
@@ -59,17 +130,28 @@ void tboot_shutdown(uint32_t shutdown_ty
 
     local_irq_disable();
 
+    /* if this is S3 then set regions to MAC */
+    if ( shutdown_type == TB_SHUTDOWN_S3 ) {
+        g_tboot_shared->num_mac_regions = 4;
+        /* S3 resume code (and other real mode trampoline code) */
+        g_tboot_shared->mac_regions[0].start =
+            (uint64_t)bootsym_phys(trampoline_start);
+        g_tboot_shared->mac_regions[0].end =
+            (uint64_t)bootsym_phys(trampoline_end);
+        /* hypervisor code + data */
+        g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext);
+        g_tboot_shared->mac_regions[1].end = (uint64_t)__pa(&__init_begin);
+        /* per-cpu data */
+        g_tboot_shared->mac_regions[2].start = 
(uint64_t)__pa(&__per_cpu_start);
+        g_tboot_shared->mac_regions[2].end = (uint64_t)__pa(&__per_cpu_end);
+        /* bss */
+        g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start);
+        g_tboot_shared->mac_regions[3].end = (uint64_t)__pa(&_end);
+    }
+
     /* Create identity map for tboot shutdown code. */
-    if ( g_tboot_shared->version >= 0x02 )
-    {
-        map_base = PFN_DOWN(g_tboot_shared->tboot_base);
-        map_size = PFN_UP(g_tboot_shared->tboot_size);
-    }
-    else
-    {
-        map_base = 0;
-        map_size = PFN_UP(0xa0000);
-    }
+    map_base = PFN_DOWN(g_tboot_shared->tboot_base);
+    map_size = PFN_UP(g_tboot_shared->tboot_size);
 
     err = map_pages_to_xen(map_base << PAGE_SHIFT, map_base, map_size,
                            __PAGE_HYPERVISOR);
@@ -82,11 +164,7 @@ void tboot_shutdown(uint32_t shutdown_ty
 
     write_ptbase(idle_vcpu[0]);
 
-#ifdef __x86_64__
-    asm volatile ( "call *%%rdi" :: "D" (g_tboot_shared->shutdown_entry64) );
-#else
-    asm volatile ( "call *%0" :: "r" (g_tboot_shared->shutdown_entry32) );
-#endif
+    ((void(*)(void))(unsigned long)g_tboot_shared->shutdown_entry)();
 
     BUG(); /* should not reach here */
 }
@@ -94,6 +172,96 @@ int tboot_in_measured_env(void)
 int tboot_in_measured_env(void)
 {
     return (g_tboot_shared != NULL);
+}
+
+int __init tboot_protect_mem_regions(void)
+{
+    int rc;
+
+    if ( !tboot_in_measured_env() )
+        return 1;
+
+    /* TXT Heap */
+    if ( txt_heap_base == 0 )
+        return 0;
+    rc = e820_change_range_type(
+        &e820, txt_heap_base, txt_heap_base + txt_heap_size,
+        E820_RESERVED, E820_UNUSABLE);
+    if ( !rc )
+        return 0;
+
+    /* SINIT */
+    if ( sinit_base == 0 )
+        return 0;
+    rc = e820_change_range_type(
+        &e820, sinit_base, sinit_base + sinit_size,
+        E820_RESERVED, E820_UNUSABLE);
+    if ( !rc )
+        return 0;
+
+    /* TXT Private Space */
+    rc = e820_change_range_type(
+        &e820, TXT_PRIV_CONFIG_REGS_BASE,
+        TXT_PRIV_CONFIG_REGS_BASE + NR_TXT_CONFIG_PAGES * PAGE_SIZE,
+        E820_RESERVED, E820_UNUSABLE);
+    if ( !rc )
+        return 0;
+
+    return 1;
+}
+
+int __init tboot_parse_dmar_table(acpi_table_handler dmar_handler)
+{
+    uint32_t map_base, map_size;
+    unsigned long map_vaddr;
+    void *heap_ptr;
+    struct acpi_table_header *dmar_table;
+    int rc;
+
+    if ( !tboot_in_measured_env() )
+        return acpi_table_parse(ACPI_SIG_DMAR, dmar_handler);
+
+    /* ACPI tables may not be DMA protected by tboot, so use DMAR copy */
+    /* SINIT saved in SinitMleData in TXT heap (which is DMA protected) */
+
+    if ( txt_heap_base == 0 )
+        return 1;
+
+    /* map TXT heap into Xen addr space */
+    map_base = PFN_DOWN(txt_heap_base);
+    map_size = PFN_UP(txt_heap_size);
+    map_vaddr = (unsigned long)__va(map_base << PAGE_SHIFT);
+    if ( map_pages_to_xen(map_vaddr, map_base, map_size, __PAGE_HYPERVISOR) )
+        return 1;
+
+    /* walk heap to SinitMleData */
+    heap_ptr = __va(txt_heap_base);
+    /* skip BiosData */
+    heap_ptr += *(uint64_t *)heap_ptr;
+    /* skip OsMleData */
+    heap_ptr += *(uint64_t *)heap_ptr;
+    /* skip OsSinitData */
+    heap_ptr += *(uint64_t *)heap_ptr;
+    /* now points to SinitMleDataSize; set to SinitMleData */
+    heap_ptr += sizeof(uint64_t);
+    /* get addr of DMAR table */
+    dmar_table = (struct acpi_table_header *)(heap_ptr +
+            ((sinit_mle_data_t *)heap_ptr)->vtd_dmars_off - sizeof(uint64_t));
+
+    rc = dmar_handler(dmar_table);
+
+    destroy_xen_mappings(
+        (unsigned long)__va(map_base << PAGE_SHIFT),
+        (unsigned long)__va((map_base + map_size) << PAGE_SHIFT));
+  
+    /* acpi_parse_dmar() zaps APCI DMAR signature in TXT heap table */
+    /* but dom0 will read real table, so must zap it there too */
+    dmar_table = NULL;
+    acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_table);
+    if ( dmar_table != NULL )
+        ((struct acpi_table_dmar *)dmar_table)->header.signature[0] = '\0';
+
+    return rc;
 }
 
 /*
diff -r 79f259a26a11 -r 254021201b1b xen/common/domain.c
--- a/xen/common/domain.c       Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/common/domain.c       Fri Jan 30 10:54:13 2009 +0900
@@ -102,16 +102,6 @@ int current_domain_id(void)
     return current->domain->domain_id;
 }
 
-static struct domain *alloc_domain_struct(void)
-{
-    return xmalloc(struct domain);
-}
-
-static void free_domain_struct(struct domain *d)
-{
-    xfree(d);
-}
-
 static void __domain_finalise_shutdown(struct domain *d)
 {
     struct vcpu *v;
diff -r 79f259a26a11 -r 254021201b1b xen/common/grant_table.c
--- a/xen/common/grant_table.c  Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/common/grant_table.c  Fri Jan 30 10:54:13 2009 +0900
@@ -111,33 +111,6 @@ static unsigned inline int max_nr_maptra
 #define active_entry(t, e) \
     ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
 
-/* The p2m emergency sweep code should not reclaim a frame that is currenlty
- * grant mapped by another domain.  That would involve checking all other
- * domains grant maps, which is impractical.  Instead, we check the active
- * grant table for this domain to see if it's been granted.  Since this
- * may be called as a result of a grant table op, we can't grab the lock. */
-int
-gnttab_is_granted(struct domain *d, xen_pfn_t gfn, int order)
-{
-    int i, found=0;
-    struct active_grant_entry *act;
-
-    /* We need to compare with active grant entries to make sure that
-     * pinned (== currently mapped) entries don't disappear under our
-     * feet. */
-    for ( i=0; i<nr_grant_entries(d->grant_table); i++ )
-    {
-        act = &active_entry(d->grant_table, i);
-        if ( act->gfn >> order == gfn >> order )
-        {
-            found = 1;
-            break;
-        }
-    }
-
-    return found;
-}
-
 static inline int
 __get_maptrack_handle(
     struct grant_table *t)
diff -r 79f259a26a11 -r 254021201b1b xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/common/page_alloc.c   Fri Jan 30 10:54:13 2009 +0900
@@ -655,7 +655,7 @@ void init_xenheap_pages(paddr_t ps, padd
 }
 
 
-void *alloc_xenheap_pages(unsigned int order)
+void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
 {
     struct page_info *pg;
 
@@ -664,15 +664,11 @@ void *alloc_xenheap_pages(unsigned int o
     pg = alloc_heap_pages(
         MEMZONE_XEN, MEMZONE_XEN, cpu_to_node(smp_processor_id()), order);
     if ( unlikely(pg == NULL) )
-        goto no_memory;
+        return NULL;
 
     memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
 
     return page_to_virt(pg);
-
- no_memory:
-    printk("Cannot handle page request order %d!\n", order);
-    return NULL;
 }
 
 
@@ -695,26 +691,21 @@ void init_xenheap_pages(paddr_t ps, padd
     init_domheap_pages(ps, pe);
 }
 
-void *alloc_xenheap_pages(unsigned int order)
+void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
 {
     struct page_info *pg;
     unsigned int i;
 
     ASSERT(!in_irq());
 
-    pg = alloc_heap_pages(
-        MEMZONE_XEN+1, NR_ZONES-1, cpu_to_node(smp_processor_id()), order);
+    pg = alloc_domheap_pages(NULL, order, memflags);
     if ( unlikely(pg == NULL) )
-        goto no_memory;
+        return NULL;
 
     for ( i = 0; i < (1u << order); i++ )
         pg[i].count_info |= PGC_xen_heap;
 
     return page_to_virt(pg);
-
- no_memory:
-    printk("Cannot handle page request order %d!\n", order);
-    return NULL;
 }
 
 void free_xenheap_pages(void *v, unsigned int order)
diff -r 79f259a26a11 -r 254021201b1b xen/common/trace.c
--- a/xen/common/trace.c        Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/common/trace.c        Fri Jan 30 10:54:13 2009 +0900
@@ -94,7 +94,7 @@ static int alloc_trace_bufs(void)
     order    = get_order_from_pages(nr_pages);
     data_size  = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
     
-    if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
+    if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
     {
         printk("Xen trace buffers: memory allocation failed\n");
         opt_tbuf_size = 0;
diff -r 79f259a26a11 -r 254021201b1b xen/common/xenoprof.c
--- a/xen/common/xenoprof.c     Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/common/xenoprof.c     Fri Jan 30 10:54:13 2009 +0900
@@ -225,7 +225,7 @@ static int alloc_xenoprof_struct(
     bufsize += (max_samples - 1) * i;
     npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
 
-    d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages));
+    d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages), 0);
     if ( d->xenoprof->rawbuf == NULL )
     {
         xfree(d->xenoprof);
diff -r 79f259a26a11 -r 254021201b1b xen/common/xmalloc_tlsf.c
--- a/xen/common/xmalloc_tlsf.c Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/common/xmalloc_tlsf.c Fri Jan 30 10:54:13 2009 +0900
@@ -300,7 +300,7 @@ struct xmem_pool *xmem_pool_create(
     pool_bytes = ROUNDUP_SIZE(sizeof(*pool));
     pool_order = get_order_from_bytes(pool_bytes);
 
-    pool = (void *)alloc_xenheap_pages(pool_order);
+    pool = (void *)alloc_xenheap_pages(pool_order, 0);
     if ( pool == NULL )
         return NULL;
     memset(pool, 0, pool_bytes);
@@ -505,12 +505,12 @@ static void *xmalloc_pool_get(unsigned l
 static void *xmalloc_pool_get(unsigned long size)
 {
     ASSERT(size == PAGE_SIZE);
-    return alloc_xenheap_pages(0);
+    return alloc_xenheap_page();
 }
 
 static void xmalloc_pool_put(void *p)
 {
-    free_xenheap_pages(p,0);
+    free_xenheap_page(p);
 }
 
 static void *xmalloc_whole_pages(unsigned long size)
@@ -518,7 +518,7 @@ static void *xmalloc_whole_pages(unsigne
     struct bhdr *b;
     unsigned int pageorder = get_order_from_bytes(size + BHDR_OVERHEAD);
 
-    b = alloc_xenheap_pages(pageorder);
+    b = alloc_xenheap_pages(pageorder, 0);
     if ( b == NULL )
         return NULL;
 
diff -r 79f259a26a11 -r 254021201b1b xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/drivers/char/console.c        Fri Jan 30 10:54:13 2009 +0900
@@ -885,7 +885,7 @@ static int __init debugtrace_init(void)
         return 0;
 
     order = get_order_from_bytes(bytes);
-    debugtrace_buf = alloc_xenheap_pages(order);
+    debugtrace_buf = alloc_xenheap_pages(order, 0);
     ASSERT(debugtrace_buf != NULL);
 
     memset(debugtrace_buf, '\0', bytes);
diff -r 79f259a26a11 -r 254021201b1b xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/drivers/char/serial.c Fri Jan 30 10:54:13 2009 +0900
@@ -495,7 +495,7 @@ void serial_async_transmit(struct serial
     BUG_ON(!port->driver->tx_empty);
     if ( port->txbuf == NULL )
         port->txbuf = alloc_xenheap_pages(
-            get_order_from_bytes(serial_txbufsz));
+            get_order_from_bytes(serial_txbufsz), 0);
 }
 
 /*
diff -r 79f259a26a11 -r 254021201b1b xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c        Fri Jan 30 10:54:13 2009 +0900
@@ -506,6 +506,15 @@ static int __init acpi_parse_dmar(struct
     return ret;
 }
 
+#ifdef CONFIG_X86
+#include <asm/tboot.h>
+/* ACPI tables may not be DMA protected by tboot, so use DMAR copy */
+/* SINIT saved in SinitMleData in TXT heap (which is DMA protected) */
+#define parse_dmar_table(h) tboot_parse_dmar_table(h)
+#else
+#define parse_dmar_table(h) acpi_table_parse(ACPI_SIG_DMAR, h)
+#endif
+
 int acpi_dmar_init(void)
 {
     int rc;
@@ -519,7 +528,7 @@ int acpi_dmar_init(void)
     if ( !iommu_enabled )
         goto fail;
 
-    rc = acpi_table_parse(ACPI_SIG_DMAR, acpi_parse_dmar);
+    rc = parse_dmar_table(acpi_parse_dmar);
     if ( rc )
         goto fail;
 
diff -r 79f259a26a11 -r 254021201b1b xen/include/asm-x86/e820.h
--- a/xen/include/asm-x86/e820.h        Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/include/asm-x86/e820.h        Fri Jan 30 10:54:13 2009 +0900
@@ -24,6 +24,9 @@ struct e820map {
 };
 
 extern int reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e);
+extern int e820_change_range_type(
+    struct e820map *e820, uint64_t s, uint64_t e,
+    uint32_t orig_type, uint32_t new_type);
 extern unsigned long init_e820(const char *, struct e820entry *, int *);
 extern struct e820map e820;
 
diff -r 79f259a26a11 -r 254021201b1b 
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Wed Jan 28 13:06:45 
2009 +0900
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Fri Jan 30 10:54:13 
2009 +0900
@@ -140,7 +140,7 @@ static inline void* __alloc_amd_iommu_ta
 static inline void* __alloc_amd_iommu_tables(int order)
 {
     void *buf;
-    buf = alloc_xenheap_pages(order);
+    buf = alloc_xenheap_pages(order, 0);
     return buf;
 }
 
diff -r 79f259a26a11 -r 254021201b1b xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/include/asm-x86/mm.h  Fri Jan 30 10:54:13 2009 +0900
@@ -31,7 +31,7 @@ struct page_info
         /* Page is in use: ((count_info & PGC_count_mask) != 0). */
         struct {
             /* Owner of this page (NULL if page is anonymous). */
-            unsigned long _domain; /* pickled format */
+            u32 _domain; /* pickled format */
             /* Type reference count and various PGT_xxx flags and fields. */
             unsigned long type_info;
         } inuse;
@@ -173,8 +173,11 @@ struct page_info
 /* OOS fixup entries */
 #define SHADOW_OOS_FIXUPS 2
 
-#define page_get_owner(_p)    ((struct domain *)(_p)->u.inuse._domain)
-#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = (unsigned long)(_d))
+#define page_get_owner(_p)                                              \
+    ((struct domain *)((_p)->u.inuse._domain ?                          \
+                       mfn_to_virt((_p)->u.inuse._domain) : NULL))
+#define page_set_owner(_p,_d)                                           \
+    ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
 
 #define maddr_get_owner(ma)   (page_get_owner(maddr_to_page((ma))))
 #define vaddr_get_owner(va)   (page_get_owner(virt_to_page((va))))
diff -r 79f259a26a11 -r 254021201b1b xen/include/asm-x86/tboot.h
--- a/xen/include/asm-x86/tboot.h       Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/include/asm-x86/tboot.h       Fri Jan 30 10:54:13 2009 +0900
@@ -37,7 +37,13 @@
 #ifndef __TBOOT_H__
 #define __TBOOT_H__
 
-typedef struct __attribute__ ((__packed__)) {
+#include <xen/acpi.h>
+
+#ifndef __packed
+#define __packed   __attribute__ ((packed))
+#endif
+
+typedef struct __packed {
   uint32_t    data1;
   uint16_t    data2;
   uint16_t    data3;
@@ -47,31 +53,47 @@ typedef struct __attribute__ ((__packed_
 
 /* used to communicate between tboot and the launched kernel (i.e. Xen) */
 
-typedef struct __attribute__ ((__packed__)) {
-    uint16_t pm1a_cnt;
-    uint16_t pm1b_cnt;
-    uint16_t pm1a_evt;
-    uint16_t pm1b_evt;
+#define MAX_TB_MAC_REGIONS      32
+typedef struct __packed {
+    uint64_t  start;
+    uint64_t  end;
+} tboot_mac_region_t;
+
+/* GAS - Generic Address Structure (ACPI 2.0+) */
+typedef struct __packed {
+       uint8_t  space_id;
+       uint8_t  bit_width;
+       uint8_t  bit_offset;
+       uint8_t  access_width;
+       uint64_t address;
+} tboot_acpi_generic_address_t;
+
+typedef struct __packed {
+    tboot_acpi_generic_address_t pm1a_cnt_blk;
+    tboot_acpi_generic_address_t pm1b_cnt_blk;
+    tboot_acpi_generic_address_t pm1a_evt_blk;
+    tboot_acpi_generic_address_t pm1b_evt_blk;
     uint16_t pm1a_cnt_val;
     uint16_t pm1b_cnt_val;
-} tboot_acpi_sleep_info;
+    uint64_t wakeup_vector;
+    uint32_t vector_width;
+    uint64_t kernel_s3_resume_vector;
+} tboot_acpi_sleep_info_t;
 
-typedef struct __attribute__ ((__packed__)) {
-    /* version 0x01+ fields: */
+typedef struct __packed {
+    /* version 3+ fields: */
     uuid_t    uuid;              /* {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} */
-    uint32_t  version;           /* Version number: 0x01, 0x02, ... */
+    uint32_t  version;           /* Version number; currently supports 0.3 */
     uint32_t  log_addr;          /* physical addr of tb_log_t log */
-    uint32_t  shutdown_entry32;  /* entry point for tboot shutdown from 32b */
-    uint32_t  shutdown_entry64;  /* entry point for tboot shutdown from 64b */
+    uint32_t  shutdown_entry;    /* entry point for tboot shutdown */
     uint32_t  shutdown_type;     /* type of shutdown (TB_SHUTDOWN_*) */
-    uint32_t  s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */
-    uint32_t  s3_k_wakeup_entry; /* entry point for xen s3 wake up */
-    tboot_acpi_sleep_info
+    tboot_acpi_sleep_info_t
               acpi_sinfo;        /* where kernel put acpi sleep info in Sx */
-    uint8_t   reserved[52];      /* this pad is for compat with old field */
-    /* version 0x02+ fields: */
     uint32_t  tboot_base;        /* starting addr for tboot */
     uint32_t  tboot_size;        /* size of tboot */
+    uint8_t   num_mac_regions;   /* number mem regions to MAC on S3 */
+                                 /* contig regions memory to MAC on S3 */
+    tboot_mac_region_t mac_regions[MAX_TB_MAC_REGIONS];
 } tboot_shared_t;
 
 #define TB_SHUTDOWN_REBOOT      0
@@ -89,6 +111,8 @@ void tboot_probe(void);
 void tboot_probe(void);
 void tboot_shutdown(uint32_t shutdown_type);
 int tboot_in_measured_env(void);
+int tboot_protect_mem_regions(void);
+int tboot_parse_dmar_table(acpi_table_handler dmar_handler);
 
 #endif /* __TBOOT_H__ */
 
diff -r 79f259a26a11 -r 254021201b1b xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/include/xen/domain.h  Fri Jan 30 10:54:13 2009 +0900
@@ -22,6 +22,10 @@ void getdomaininfo(struct domain *d, str
 /*
  * Arch-specifics.
  */
+
+/* Allocate/free a domain structure. */
+struct domain *alloc_domain_struct(void);
+void free_domain_struct(struct domain *d);
 
 /* Allocate/free a VCPU structure. */
 struct vcpu *alloc_vcpu_struct(void);
diff -r 79f259a26a11 -r 254021201b1b xen/include/xen/grant_table.h
--- a/xen/include/xen/grant_table.h     Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/include/xen/grant_table.h     Fri Jan 30 10:54:13 2009 +0900
@@ -147,7 +147,4 @@ nr_active_grant_frames(struct grant_tabl
     return num_act_frames_from_sha_frames(nr_grant_frames(gt));
 }
 
-int
-gnttab_is_granted(struct domain *d, xen_pfn_t gfn, int order);
-
 #endif /* __XEN_GRANT_TABLE_H__ */
diff -r 79f259a26a11 -r 254021201b1b xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Wed Jan 28 13:06:45 2009 +0900
+++ b/xen/include/xen/mm.h      Fri Jan 30 10:54:13 2009 +0900
@@ -45,9 +45,9 @@ void end_boot_allocator(void);
 
 /* Xen suballocator. These functions are interrupt-safe. */
 void init_xenheap_pages(paddr_t ps, paddr_t pe);
-void *alloc_xenheap_pages(unsigned int order);
+void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
 void free_xenheap_pages(void *v, unsigned int order);
-#define alloc_xenheap_page() (alloc_xenheap_pages(0))
+#define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
 #define free_xenheap_page(v) (free_xenheap_pages(v,0))
 
 /* Domain suballocator. These functions are *not* interrupt-safe.*/
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
 |